fix: Repair data pipeline with StatCan CMHC rental data
- Add StatCan CMHC parser to fetch rental data from Statistics Canada API - Create year spine (2014-2025) as time dimension driver instead of census - Add CMA-level rental and income intermediate models - Update mart_neighbourhood_overview to use rental years as base - Fix neighbourhood_service queries to match dbt schema - Add CMHC data loading to pipeline script Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records Rent data available 2019-2025, crime data 2014-2024 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -38,12 +38,16 @@ from portfolio_app.toronto.loaders import ( # noqa: E402
|
||||
load_census_data,
|
||||
load_crime_data,
|
||||
load_neighbourhoods,
|
||||
load_statcan_cmhc_data,
|
||||
load_time_dimension,
|
||||
)
|
||||
from portfolio_app.toronto.parsers import ( # noqa: E402
|
||||
TorontoOpenDataParser,
|
||||
TorontoPoliceParser,
|
||||
)
|
||||
from portfolio_app.toronto.parsers.statcan_cmhc import ( # noqa: E402
|
||||
fetch_toronto_rental_data,
|
||||
)
|
||||
from portfolio_app.toronto.schemas import Neighbourhood # noqa: E402
|
||||
|
||||
# Configure logging
|
||||
@@ -91,6 +95,9 @@ class DataPipeline:
|
||||
# 5. Load amenities
|
||||
self._load_amenities(session)
|
||||
|
||||
# 6. Load CMHC rental data from StatCan
|
||||
self._load_rentals(session)
|
||||
|
||||
session.commit()
|
||||
logger.info("All data committed to database")
|
||||
|
||||
@@ -241,6 +248,32 @@ class DataPipeline:
|
||||
|
||||
self.stats["amenities"] = total_count
|
||||
|
||||
def _load_rentals(self, session: Any) -> None:
|
||||
"""Fetch and load CMHC rental data from StatCan."""
|
||||
logger.info("Fetching CMHC rental data from Statistics Canada...")
|
||||
|
||||
if self.dry_run:
|
||||
logger.info(" [DRY RUN] Would fetch and load CMHC rental data")
|
||||
return
|
||||
|
||||
try:
|
||||
# Fetch rental data (2014-present)
|
||||
rental_records = fetch_toronto_rental_data(start_year=2014)
|
||||
|
||||
if not rental_records:
|
||||
logger.warning(" No rental records fetched")
|
||||
return
|
||||
|
||||
count = load_statcan_cmhc_data(rental_records, session)
|
||||
self.stats["rentals"] = count
|
||||
logger.info(f" Loaded {count} CMHC rental records")
|
||||
except Exception as e:
|
||||
logger.warning(f" Failed to load CMHC rental data: {e}")
|
||||
if self.verbose:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
def run_dbt(self) -> bool:
|
||||
"""Run dbt to transform data.
|
||||
|
||||
|
||||
@@ -25,8 +25,10 @@ def main() -> int:
|
||||
engine = get_engine()
|
||||
|
||||
# Test connection
|
||||
from sqlalchemy import text
|
||||
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute("SELECT 1")
|
||||
result = conn.execute(text("SELECT 1"))
|
||||
result.fetchone()
|
||||
print("Database connection successful")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user