fix: Repair data pipeline with StatCan CMHC rental data

- Add StatCan CMHC parser to fetch rental data from Statistics Canada API - Create year spine (2014-2025) as time dimension driver instead of census - Add CMA-level rental and income intermediate models - Update mart_neighbourhood_overview to use rental years as base - Fix neighbourhood_service queries to match dbt schema - Add CMHC data loading to pipeline script Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records Rent data available 2019-2025, crime data 2014-2024 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 15:38:31 -05:00
parent 4818c53fd2
commit d0f32edba7
21 changed files with 955 additions and 156 deletions
--- a/portfolio_app/pages/toronto/callbacks/chart_callbacks.py
+++ b/portfolio_app/pages/toronto/callbacks/chart_callbacks.py
@@ -1,6 +1,7 @@
 """Chart callbacks for supporting visualizations."""
 # mypy: disable-error-code="misc,no-untyped-def,arg-type"

+import pandas as pd
 import plotly.graph_objects as go
 from dash import Input, Output, callback

@@ -43,7 +44,24 @@ def update_overview_scatter(year: str) -> go.Figure:
    # Compute safety score (inverse of crime rate)
    if "total_crime_rate" in merged.columns:
        max_crime = merged["total_crime_rate"].max()
-        merged["safety_score"] = 100 - (merged["total_crime_rate"] / max_crime * 100)
+        if max_crime and max_crime > 0:
+            merged["safety_score"] = 100 - (
+                merged["total_crime_rate"] / max_crime * 100
+            )
+        else:
+            merged["safety_score"] = 50  # Default if no crime data
+
+    # Fill NULL population with median or default value for sizing
+    if "population" in merged.columns:
+        median_pop = merged["population"].median()
+        default_pop = median_pop if pd.notna(median_pop) else 10000
+        merged["population"] = merged["population"].fillna(default_pop)
+
+    # Filter rows with required data for scatter plot
+    merged = merged.dropna(subset=["median_household_income", "safety_score"])
+
+    if merged.empty:
+        return _empty_chart("Insufficient data for scatter plot")

    data = merged.to_dict("records")

@@ -76,12 +94,13 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
        return _empty_chart("No trend data available")

    # Placeholder for trend data - would be historical
+    base_rent = averages.get("avg_rent_2bed") or 2000
    data = [
-        {"year": "2019", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.85},
-        {"year": "2020", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.88},
-        {"year": "2021", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.92},
-        {"year": "2022", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.96},
-        {"year": "2023", "avg_rent": averages.get("avg_rent_2bed", 2000)},
+        {"year": "2019", "avg_rent": base_rent * 0.85},
+        {"year": "2020", "avg_rent": base_rent * 0.88},
+        {"year": "2021", "avg_rent": base_rent * 0.92},
+        {"year": "2022", "avg_rent": base_rent * 0.96},
+        {"year": "2023", "avg_rent": base_rent},
    ]

    fig = go.Figure()
@@ -330,10 +349,11 @@ def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure
    # Get city averages
    averages = get_city_averages(year_int)

+    amenity_score = averages.get("avg_amenity_score") or 50
    city_data = {
-        "parks_per_1000": averages.get("avg_amenity_score", 50) / 100 * 10,
-        "schools_per_1000": averages.get("avg_amenity_score", 50) / 100 * 5,
-        "childcare_per_1000": averages.get("avg_amenity_score", 50) / 100 * 3,
+        "parks_per_1000": amenity_score / 100 * 10,
+        "schools_per_1000": amenity_score / 100 * 5,
+        "childcare_per_1000": amenity_score / 100 * 3,
        "transit_access": 70,
    }