fix: Repair data pipeline with StatCan CMHC rental data
- Add StatCan CMHC parser to fetch rental data from Statistics Canada API - Create year spine (2014-2025) as time dimension driver instead of census - Add CMA-level rental and income intermediate models - Update mart_neighbourhood_overview to use rental years as base - Fix neighbourhood_service queries to match dbt schema - Add CMHC data loading to pipeline script Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records Rent data available 2019-2025, crime data 2014-2024 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Chart callbacks for supporting visualizations."""
|
||||
# mypy: disable-error-code="misc,no-untyped-def,arg-type"
|
||||
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
from dash import Input, Output, callback
|
||||
|
||||
@@ -43,7 +44,24 @@ def update_overview_scatter(year: str) -> go.Figure:
|
||||
# Compute safety score (inverse of crime rate)
|
||||
if "total_crime_rate" in merged.columns:
|
||||
max_crime = merged["total_crime_rate"].max()
|
||||
merged["safety_score"] = 100 - (merged["total_crime_rate"] / max_crime * 100)
|
||||
if max_crime and max_crime > 0:
|
||||
merged["safety_score"] = 100 - (
|
||||
merged["total_crime_rate"] / max_crime * 100
|
||||
)
|
||||
else:
|
||||
merged["safety_score"] = 50 # Default if no crime data
|
||||
|
||||
# Fill NULL population with median or default value for sizing
|
||||
if "population" in merged.columns:
|
||||
median_pop = merged["population"].median()
|
||||
default_pop = median_pop if pd.notna(median_pop) else 10000
|
||||
merged["population"] = merged["population"].fillna(default_pop)
|
||||
|
||||
# Filter rows with required data for scatter plot
|
||||
merged = merged.dropna(subset=["median_household_income", "safety_score"])
|
||||
|
||||
if merged.empty:
|
||||
return _empty_chart("Insufficient data for scatter plot")
|
||||
|
||||
data = merged.to_dict("records")
|
||||
|
||||
@@ -76,12 +94,13 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
|
||||
return _empty_chart("No trend data available")
|
||||
|
||||
# Placeholder for trend data - would be historical
|
||||
base_rent = averages.get("avg_rent_2bed") or 2000
|
||||
data = [
|
||||
{"year": "2019", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.85},
|
||||
{"year": "2020", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.88},
|
||||
{"year": "2021", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.92},
|
||||
{"year": "2022", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.96},
|
||||
{"year": "2023", "avg_rent": averages.get("avg_rent_2bed", 2000)},
|
||||
{"year": "2019", "avg_rent": base_rent * 0.85},
|
||||
{"year": "2020", "avg_rent": base_rent * 0.88},
|
||||
{"year": "2021", "avg_rent": base_rent * 0.92},
|
||||
{"year": "2022", "avg_rent": base_rent * 0.96},
|
||||
{"year": "2023", "avg_rent": base_rent},
|
||||
]
|
||||
|
||||
fig = go.Figure()
|
||||
@@ -330,10 +349,11 @@ def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure
|
||||
# Get city averages
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
amenity_score = averages.get("avg_amenity_score") or 50
|
||||
city_data = {
|
||||
"parks_per_1000": averages.get("avg_amenity_score", 50) / 100 * 10,
|
||||
"schools_per_1000": averages.get("avg_amenity_score", 50) / 100 * 5,
|
||||
"childcare_per_1000": averages.get("avg_amenity_score", 50) / 100 * 3,
|
||||
"parks_per_1000": amenity_score / 100 * 10,
|
||||
"schools_per_1000": amenity_score / 100 * 5,
|
||||
"childcare_per_1000": amenity_score / 100 * 3,
|
||||
"transit_access": 70,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user