fix: Repair data pipeline with StatCan CMHC rental data

- Add StatCan CMHC parser to fetch rental data from Statistics Canada API
- Create year spine (2014-2025) as time dimension driver instead of census
- Add CMA-level rental and income intermediate models
- Update mart_neighbourhood_overview to use rental years as base
- Fix neighbourhood_service queries to match dbt schema
- Add CMHC data loading to pipeline script

Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records
Rent data available 2019-2025, crime data 2014-2024

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-17 15:38:31 -05:00
parent 4818c53fd2
commit d0f32edba7
21 changed files with 955 additions and 156 deletions

View File

@@ -1,6 +1,7 @@
"""Chart callbacks for supporting visualizations."""
# mypy: disable-error-code="misc,no-untyped-def,arg-type"
import pandas as pd
import plotly.graph_objects as go
from dash import Input, Output, callback
@@ -43,7 +44,24 @@ def update_overview_scatter(year: str) -> go.Figure:
# Compute safety score (inverse of crime rate)
if "total_crime_rate" in merged.columns:
max_crime = merged["total_crime_rate"].max()
merged["safety_score"] = 100 - (merged["total_crime_rate"] / max_crime * 100)
if max_crime and max_crime > 0:
merged["safety_score"] = 100 - (
merged["total_crime_rate"] / max_crime * 100
)
else:
merged["safety_score"] = 50 # Default if no crime data
# Fill NULL population with median or default value for sizing
if "population" in merged.columns:
median_pop = merged["population"].median()
default_pop = median_pop if pd.notna(median_pop) else 10000
merged["population"] = merged["population"].fillna(default_pop)
# Filter rows with required data for scatter plot
merged = merged.dropna(subset=["median_household_income", "safety_score"])
if merged.empty:
return _empty_chart("Insufficient data for scatter plot")
data = merged.to_dict("records")
@@ -76,12 +94,13 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
return _empty_chart("No trend data available")
# Placeholder for trend data - would be historical
base_rent = averages.get("avg_rent_2bed") or 2000
data = [
{"year": "2019", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.85},
{"year": "2020", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.88},
{"year": "2021", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.92},
{"year": "2022", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.96},
{"year": "2023", "avg_rent": averages.get("avg_rent_2bed", 2000)},
{"year": "2019", "avg_rent": base_rent * 0.85},
{"year": "2020", "avg_rent": base_rent * 0.88},
{"year": "2021", "avg_rent": base_rent * 0.92},
{"year": "2022", "avg_rent": base_rent * 0.96},
{"year": "2023", "avg_rent": base_rent},
]
fig = go.Figure()
@@ -330,10 +349,11 @@ def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure
# Get city averages
averages = get_city_averages(year_int)
amenity_score = averages.get("avg_amenity_score") or 50
city_data = {
"parks_per_1000": averages.get("avg_amenity_score", 50) / 100 * 10,
"schools_per_1000": averages.get("avg_amenity_score", 50) / 100 * 5,
"childcare_per_1000": averages.get("avg_amenity_score", 50) / 100 * 3,
"parks_per_1000": amenity_score / 100 * 10,
"schools_per_1000": amenity_score / 100 * 5,
"childcare_per_1000": amenity_score / 100 * 3,
"transit_access": 70,
}