feat: Complete Phase 5 dashboard implementation

Implement full 5-tab Toronto Neighbourhood Dashboard with real data
connectivity:

Dashboard Structure:
- Overview tab with livability scores and rankings
- Housing tab with affordability metrics
- Safety tab with crime statistics
- Demographics tab with population/income data
- Amenities tab with parks, schools, transit

Figure Factories (portfolio_app/figures/):
- bar_charts.py: ranking, stacked, horizontal bars
- scatter.py: scatter plots, bubble charts
- radar.py: spider/radar charts
- demographics.py: donut, age pyramid, income distribution

Service Layer (portfolio_app/toronto/services/):
- neighbourhood_service.py: queries dbt marts for all tab data
- geometry_service.py: generates GeoJSON from PostGIS
- Graceful error handling when database unavailable

Callbacks (portfolio_app/pages/toronto/callbacks/):
- map_callbacks.py: choropleth updates, map click handling
- chart_callbacks.py: supporting chart updates
- selection_callbacks.py: dropdown handlers, KPI updates

Data Pipeline (scripts/data/):
- load_toronto_data.py: orchestration script with CLI flags

Lessons Learned:
- Graceful error handling in service layers
- Modular callback structure for multi-tab dashboards
- Figure factory pattern for reusable charts

Closes: #64, #65, #66, #67, #68, #69, #70

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-17 11:46:18 -05:00
parent 3054441630
commit c9cf744d84
27 changed files with 4377 additions and 1770 deletions

View File

@@ -0,0 +1,33 @@
"""Data service layer for Toronto neighbourhood dashboard."""
from .geometry_service import (
get_cmhc_zones_geojson,
get_neighbourhoods_geojson,
)
from .neighbourhood_service import (
get_amenities_data,
get_city_averages,
get_demographics_data,
get_housing_data,
get_neighbourhood_details,
get_neighbourhood_list,
get_overview_data,
get_rankings,
get_safety_data,
)
__all__ = [
# Neighbourhood data
"get_overview_data",
"get_housing_data",
"get_safety_data",
"get_demographics_data",
"get_amenities_data",
"get_neighbourhood_details",
"get_neighbourhood_list",
"get_rankings",
"get_city_averages",
# Geometry
"get_neighbourhoods_geojson",
"get_cmhc_zones_geojson",
]

View File

@@ -0,0 +1,176 @@
"""Service layer for generating GeoJSON from PostGIS geometry."""
import json
from functools import lru_cache
from typing import Any
import pandas as pd
from sqlalchemy import text
from portfolio_app.toronto.models import get_engine
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
"""Execute SQL query and return DataFrame."""
engine = get_engine()
with engine.connect() as conn:
return pd.read_sql(text(sql), conn, params=params)
@lru_cache(maxsize=8)
def get_neighbourhoods_geojson(year: int = 2021) -> dict[str, Any]:
"""Get GeoJSON FeatureCollection for all neighbourhoods.
Queries mart_neighbourhood_overview for geometries and basic properties.
Args:
year: Year to query for joining properties.
Returns:
GeoJSON FeatureCollection dictionary.
"""
# Query geometries with ST_AsGeoJSON
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
ST_AsGeoJSON(geometry)::json as geom,
population,
livability_score
FROM mart_neighbourhood_overview
WHERE year = :year
AND geometry IS NOT NULL
"""
try:
df = _execute_query(sql, {"year": year})
except Exception:
# Table might not exist or have data yet
return _empty_geojson()
if df.empty:
return _empty_geojson()
# Build GeoJSON features
features = []
for _, row in df.iterrows():
geom = row["geom"]
if geom is None:
continue
# Handle geometry that might be a string or dict
if isinstance(geom, str):
geom = json.loads(geom)
feature = {
"type": "Feature",
"id": row["neighbourhood_id"],
"properties": {
"neighbourhood_id": int(row["neighbourhood_id"]),
"neighbourhood_name": row["neighbourhood_name"],
"population": int(row["population"])
if pd.notna(row["population"])
else None,
"livability_score": float(row["livability_score"])
if pd.notna(row["livability_score"])
else None,
},
"geometry": geom,
}
features.append(feature)
return {
"type": "FeatureCollection",
"features": features,
}
@lru_cache(maxsize=4)
def get_cmhc_zones_geojson() -> dict[str, Any]:
"""Get GeoJSON FeatureCollection for CMHC zones.
Queries dim_cmhc_zone for zone geometries.
Returns:
GeoJSON FeatureCollection dictionary.
"""
sql = """
SELECT
zone_code,
zone_name,
ST_AsGeoJSON(geometry)::json as geom
FROM dim_cmhc_zone
WHERE geometry IS NOT NULL
"""
try:
df = _execute_query(sql, {})
except Exception:
return _empty_geojson()
if df.empty:
return _empty_geojson()
features = []
for _, row in df.iterrows():
geom = row["geom"]
if geom is None:
continue
if isinstance(geom, str):
geom = json.loads(geom)
feature = {
"type": "Feature",
"id": row["zone_code"],
"properties": {
"zone_code": row["zone_code"],
"zone_name": row["zone_name"],
},
"geometry": geom,
}
features.append(feature)
return {
"type": "FeatureCollection",
"features": features,
}
def get_neighbourhood_geometry(neighbourhood_id: int) -> dict[str, Any] | None:
"""Get GeoJSON geometry for a single neighbourhood.
Args:
neighbourhood_id: The neighbourhood ID.
Returns:
GeoJSON geometry dict, or None if not found.
"""
sql = """
SELECT ST_AsGeoJSON(geometry)::json as geom
FROM dim_neighbourhood
WHERE neighbourhood_id = :neighbourhood_id
AND geometry IS NOT NULL
"""
try:
df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id})
except Exception:
return None
if df.empty:
return None
geom = df.iloc[0]["geom"]
if isinstance(geom, str):
result: dict[str, Any] = json.loads(geom)
return result
return dict(geom) if geom is not None else None
def _empty_geojson() -> dict[str, Any]:
"""Return an empty GeoJSON FeatureCollection."""
return {
"type": "FeatureCollection",
"features": [],
}

View File

@@ -0,0 +1,392 @@
"""Service layer for querying neighbourhood data from dbt marts."""
from functools import lru_cache
from typing import Any
import pandas as pd
from sqlalchemy import text
from portfolio_app.toronto.models import get_engine
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
"""Execute SQL query and return DataFrame.
Args:
sql: SQL query string.
params: Query parameters.
Returns:
pandas DataFrame with results, or empty DataFrame on error.
"""
try:
engine = get_engine()
with engine.connect() as conn:
return pd.read_sql(text(sql), conn, params=params)
except Exception:
# Return empty DataFrame on connection or query error
return pd.DataFrame()
def get_overview_data(year: int = 2021) -> pd.DataFrame:
"""Get overview data for all neighbourhoods.
Queries mart_neighbourhood_overview for livability scores and components.
Args:
year: Census year to query.
Returns:
DataFrame with columns: neighbourhood_id, neighbourhood_name,
livability_score, safety_score, affordability_score, amenity_score,
population, median_household_income, etc.
"""
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
year,
population,
median_household_income,
livability_score,
safety_score,
affordability_score,
amenity_score,
crime_rate_per_100k,
rent_to_income_pct,
avg_rent_2bed,
total_amenities_per_1000
FROM mart_neighbourhood_overview
WHERE year = :year
ORDER BY livability_score DESC NULLS LAST
"""
return _execute_query(sql, {"year": year})
def get_housing_data(year: int = 2021) -> pd.DataFrame:
"""Get housing data for all neighbourhoods.
Queries mart_neighbourhood_housing for affordability metrics.
Args:
year: Year to query.
Returns:
DataFrame with columns: neighbourhood_id, neighbourhood_name,
avg_rent_2bed, vacancy_rate, rent_to_income_pct, affordability_index, etc.
"""
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
year,
pct_owner_occupied,
pct_renter_occupied,
average_dwelling_value,
median_household_income,
avg_rent_bachelor,
avg_rent_1bed,
avg_rent_2bed,
avg_rent_3bed,
vacancy_rate,
total_rental_units,
rent_to_income_pct,
is_affordable,
affordability_index,
rent_yoy_change_pct,
income_quintile
FROM mart_neighbourhood_housing
WHERE year = :year
ORDER BY affordability_index ASC NULLS LAST
"""
return _execute_query(sql, {"year": year})
def get_safety_data(year: int = 2021) -> pd.DataFrame:
"""Get safety/crime data for all neighbourhoods.
Queries mart_neighbourhood_safety for crime statistics.
Args:
year: Year to query.
Returns:
DataFrame with columns: neighbourhood_id, neighbourhood_name,
total_crime_rate, violent_crime_rate, property_crime_rate, etc.
"""
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
year,
total_crimes,
crime_rate_per_100k as total_crime_rate,
violent_crimes,
violent_crime_rate,
property_crimes,
property_crime_rate,
theft_crimes,
theft_rate,
crime_yoy_change_pct,
crime_trend
FROM mart_neighbourhood_safety
WHERE year = :year
ORDER BY total_crime_rate ASC NULLS LAST
"""
return _execute_query(sql, {"year": year})
def get_demographics_data(year: int = 2021) -> pd.DataFrame:
"""Get demographic data for all neighbourhoods.
Queries mart_neighbourhood_demographics for population/income metrics.
Args:
year: Census year to query.
Returns:
DataFrame with columns: neighbourhood_id, neighbourhood_name,
population, median_age, median_income, diversity_index, etc.
"""
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
census_year as year,
population,
population_density,
population_change_pct,
median_household_income,
average_household_income,
income_quintile,
median_age,
pct_under_18,
pct_18_to_64,
pct_65_plus,
pct_bachelors_or_higher,
unemployment_rate,
diversity_index
FROM mart_neighbourhood_demographics
WHERE census_year = :year
ORDER BY population DESC NULLS LAST
"""
return _execute_query(sql, {"year": year})
def get_amenities_data(year: int = 2021) -> pd.DataFrame:
"""Get amenities data for all neighbourhoods.
Queries mart_neighbourhood_amenities for parks, schools, transit.
Args:
year: Year to query.
Returns:
DataFrame with columns: neighbourhood_id, neighbourhood_name,
amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
"""
sql = """
SELECT
neighbourhood_id,
neighbourhood_name,
year,
park_count,
parks_per_1000,
school_count,
schools_per_1000,
childcare_count,
childcare_per_1000,
total_amenities,
total_amenities_per_1000,
amenity_score,
amenity_rank
FROM mart_neighbourhood_amenities
WHERE year = :year
ORDER BY amenity_score DESC NULLS LAST
"""
return _execute_query(sql, {"year": year})
def get_neighbourhood_details(
neighbourhood_id: int, year: int = 2021
) -> dict[str, Any]:
"""Get detailed data for a single neighbourhood.
Combines data from all mart tables for a complete neighbourhood profile.
Args:
neighbourhood_id: The neighbourhood ID.
year: Year to query.
Returns:
Dictionary with all metrics for the neighbourhood.
"""
sql = """
SELECT
o.neighbourhood_id,
o.neighbourhood_name,
o.year,
o.population,
o.median_household_income,
o.livability_score,
o.safety_score,
o.affordability_score,
o.amenity_score,
s.total_crimes,
s.crime_rate_per_100k,
s.violent_crime_rate,
s.property_crime_rate,
h.avg_rent_2bed,
h.vacancy_rate,
h.rent_to_income_pct,
h.affordability_index,
h.pct_owner_occupied,
h.pct_renter_occupied,
d.median_age,
d.diversity_index,
d.unemployment_rate,
d.pct_bachelors_or_higher,
a.park_count,
a.school_count,
a.total_amenities
FROM mart_neighbourhood_overview o
LEFT JOIN mart_neighbourhood_safety s
ON o.neighbourhood_id = s.neighbourhood_id
AND o.year = s.year
LEFT JOIN mart_neighbourhood_housing h
ON o.neighbourhood_id = h.neighbourhood_id
AND o.year = h.year
LEFT JOIN mart_neighbourhood_demographics d
ON o.neighbourhood_id = d.neighbourhood_id
AND o.year = d.census_year
LEFT JOIN mart_neighbourhood_amenities a
ON o.neighbourhood_id = a.neighbourhood_id
AND o.year = a.year
WHERE o.neighbourhood_id = :neighbourhood_id
AND o.year = :year
"""
df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id, "year": year})
if df.empty:
return {}
return {str(k): v for k, v in df.iloc[0].to_dict().items()}
@lru_cache(maxsize=32)
def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
"""Get list of all neighbourhoods for dropdown selectors.
Args:
year: Year to query.
Returns:
List of dicts with neighbourhood_id, name, and population.
"""
sql = """
SELECT DISTINCT
neighbourhood_id,
neighbourhood_name,
population
FROM mart_neighbourhood_overview
WHERE year = :year
ORDER BY neighbourhood_name
"""
df = _execute_query(sql, {"year": year})
if df.empty:
return []
return list(df.to_dict("records")) # type: ignore[arg-type]
def get_rankings(
metric: str,
year: int = 2021,
top_n: int = 10,
ascending: bool = True,
) -> pd.DataFrame:
"""Get top/bottom neighbourhoods for a specific metric.
Args:
metric: Column name to rank by.
year: Year to query.
top_n: Number of top and bottom records.
ascending: If True, rank from lowest to highest (good for crime, rent).
Returns:
DataFrame with top and bottom neighbourhoods.
"""
# Map metrics to their source tables
table_map = {
"livability_score": "mart_neighbourhood_overview",
"safety_score": "mart_neighbourhood_overview",
"affordability_score": "mart_neighbourhood_overview",
"amenity_score": "mart_neighbourhood_overview",
"crime_rate_per_100k": "mart_neighbourhood_safety",
"total_crime_rate": "mart_neighbourhood_safety",
"avg_rent_2bed": "mart_neighbourhood_housing",
"affordability_index": "mart_neighbourhood_housing",
"population": "mart_neighbourhood_demographics",
"median_household_income": "mart_neighbourhood_demographics",
}
table = table_map.get(metric, "mart_neighbourhood_overview")
year_col = "census_year" if "demographics" in table else "year"
order = "ASC" if ascending else "DESC"
reverse_order = "DESC" if ascending else "ASC"
sql = f"""
(
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'bottom' as rank_group
FROM {table}
WHERE {year_col} = :year AND {metric} IS NOT NULL
ORDER BY {metric} {order}
LIMIT :top_n
)
UNION ALL
(
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'top' as rank_group
FROM {table}
WHERE {year_col} = :year AND {metric} IS NOT NULL
ORDER BY {metric} {reverse_order}
LIMIT :top_n
)
"""
return _execute_query(sql, {"year": year, "top_n": top_n})
def get_city_averages(year: int = 2021) -> dict[str, Any]:
"""Get city-wide average metrics.
Args:
year: Year to query.
Returns:
Dictionary with city averages for key metrics.
"""
sql = """
SELECT
AVG(livability_score) as avg_livability_score,
AVG(safety_score) as avg_safety_score,
AVG(affordability_score) as avg_affordability_score,
AVG(amenity_score) as avg_amenity_score,
SUM(population) as total_population,
AVG(median_household_income) as avg_median_income,
AVG(crime_rate_per_100k) as avg_crime_rate,
AVG(avg_rent_2bed) as avg_rent_2bed,
AVG(rent_to_income_pct) as avg_rent_to_income
FROM mart_neighbourhood_overview
WHERE year = :year
"""
df = _execute_query(sql, {"year": year})
if df.empty:
return {}
result: dict[str, Any] = {str(k): v for k, v in df.iloc[0].to_dict().items()}
# Round numeric values
for key, value in result.items():
if pd.notna(value) and isinstance(value, float):
result[key] = round(value, 2)
return result