feat: Complete Phase 5 dashboard implementation
Implement full 5-tab Toronto Neighbourhood Dashboard with real data connectivity: Dashboard Structure: - Overview tab with livability scores and rankings - Housing tab with affordability metrics - Safety tab with crime statistics - Demographics tab with population/income data - Amenities tab with parks, schools, transit Figure Factories (portfolio_app/figures/): - bar_charts.py: ranking, stacked, horizontal bars - scatter.py: scatter plots, bubble charts - radar.py: spider/radar charts - demographics.py: donut, age pyramid, income distribution Service Layer (portfolio_app/toronto/services/): - neighbourhood_service.py: queries dbt marts for all tab data - geometry_service.py: generates GeoJSON from PostGIS - Graceful error handling when database unavailable Callbacks (portfolio_app/pages/toronto/callbacks/): - map_callbacks.py: choropleth updates, map click handling - chart_callbacks.py: supporting chart updates - selection_callbacks.py: dropdown handlers, KPI updates Data Pipeline (scripts/data/): - load_toronto_data.py: orchestration script with CLI flags Lessons Learned: - Graceful error handling in service layers - Modular callback structure for multi-tab dashboards - Figure factory pattern for reusable charts Closes: #64, #65, #66, #67, #68, #69, #70 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
33
portfolio_app/toronto/services/__init__.py
Normal file
33
portfolio_app/toronto/services/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Data service layer for Toronto neighbourhood dashboard."""
|
||||
|
||||
from .geometry_service import (
|
||||
get_cmhc_zones_geojson,
|
||||
get_neighbourhoods_geojson,
|
||||
)
|
||||
from .neighbourhood_service import (
|
||||
get_amenities_data,
|
||||
get_city_averages,
|
||||
get_demographics_data,
|
||||
get_housing_data,
|
||||
get_neighbourhood_details,
|
||||
get_neighbourhood_list,
|
||||
get_overview_data,
|
||||
get_rankings,
|
||||
get_safety_data,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Neighbourhood data
|
||||
"get_overview_data",
|
||||
"get_housing_data",
|
||||
"get_safety_data",
|
||||
"get_demographics_data",
|
||||
"get_amenities_data",
|
||||
"get_neighbourhood_details",
|
||||
"get_neighbourhood_list",
|
||||
"get_rankings",
|
||||
"get_city_averages",
|
||||
# Geometry
|
||||
"get_neighbourhoods_geojson",
|
||||
"get_cmhc_zones_geojson",
|
||||
]
|
||||
176
portfolio_app/toronto/services/geometry_service.py
Normal file
176
portfolio_app/toronto/services/geometry_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Service layer for generating GeoJSON from PostGIS geometry."""
|
||||
|
||||
import json
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
from sqlalchemy import text
|
||||
|
||||
from portfolio_app.toronto.models import get_engine
|
||||
|
||||
|
||||
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
||||
"""Execute SQL query and return DataFrame."""
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
return pd.read_sql(text(sql), conn, params=params)
|
||||
|
||||
|
||||
@lru_cache(maxsize=8)
|
||||
def get_neighbourhoods_geojson(year: int = 2021) -> dict[str, Any]:
|
||||
"""Get GeoJSON FeatureCollection for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_overview for geometries and basic properties.
|
||||
|
||||
Args:
|
||||
year: Year to query for joining properties.
|
||||
|
||||
Returns:
|
||||
GeoJSON FeatureCollection dictionary.
|
||||
"""
|
||||
# Query geometries with ST_AsGeoJSON
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
ST_AsGeoJSON(geometry)::json as geom,
|
||||
population,
|
||||
livability_score
|
||||
FROM mart_neighbourhood_overview
|
||||
WHERE year = :year
|
||||
AND geometry IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
df = _execute_query(sql, {"year": year})
|
||||
except Exception:
|
||||
# Table might not exist or have data yet
|
||||
return _empty_geojson()
|
||||
|
||||
if df.empty:
|
||||
return _empty_geojson()
|
||||
|
||||
# Build GeoJSON features
|
||||
features = []
|
||||
for _, row in df.iterrows():
|
||||
geom = row["geom"]
|
||||
if geom is None:
|
||||
continue
|
||||
|
||||
# Handle geometry that might be a string or dict
|
||||
if isinstance(geom, str):
|
||||
geom = json.loads(geom)
|
||||
|
||||
feature = {
|
||||
"type": "Feature",
|
||||
"id": row["neighbourhood_id"],
|
||||
"properties": {
|
||||
"neighbourhood_id": int(row["neighbourhood_id"]),
|
||||
"neighbourhood_name": row["neighbourhood_name"],
|
||||
"population": int(row["population"])
|
||||
if pd.notna(row["population"])
|
||||
else None,
|
||||
"livability_score": float(row["livability_score"])
|
||||
if pd.notna(row["livability_score"])
|
||||
else None,
|
||||
},
|
||||
"geometry": geom,
|
||||
}
|
||||
features.append(feature)
|
||||
|
||||
return {
|
||||
"type": "FeatureCollection",
|
||||
"features": features,
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=4)
|
||||
def get_cmhc_zones_geojson() -> dict[str, Any]:
|
||||
"""Get GeoJSON FeatureCollection for CMHC zones.
|
||||
|
||||
Queries dim_cmhc_zone for zone geometries.
|
||||
|
||||
Returns:
|
||||
GeoJSON FeatureCollection dictionary.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
zone_code,
|
||||
zone_name,
|
||||
ST_AsGeoJSON(geometry)::json as geom
|
||||
FROM dim_cmhc_zone
|
||||
WHERE geometry IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
df = _execute_query(sql, {})
|
||||
except Exception:
|
||||
return _empty_geojson()
|
||||
|
||||
if df.empty:
|
||||
return _empty_geojson()
|
||||
|
||||
features = []
|
||||
for _, row in df.iterrows():
|
||||
geom = row["geom"]
|
||||
if geom is None:
|
||||
continue
|
||||
|
||||
if isinstance(geom, str):
|
||||
geom = json.loads(geom)
|
||||
|
||||
feature = {
|
||||
"type": "Feature",
|
||||
"id": row["zone_code"],
|
||||
"properties": {
|
||||
"zone_code": row["zone_code"],
|
||||
"zone_name": row["zone_name"],
|
||||
},
|
||||
"geometry": geom,
|
||||
}
|
||||
features.append(feature)
|
||||
|
||||
return {
|
||||
"type": "FeatureCollection",
|
||||
"features": features,
|
||||
}
|
||||
|
||||
|
||||
def get_neighbourhood_geometry(neighbourhood_id: int) -> dict[str, Any] | None:
|
||||
"""Get GeoJSON geometry for a single neighbourhood.
|
||||
|
||||
Args:
|
||||
neighbourhood_id: The neighbourhood ID.
|
||||
|
||||
Returns:
|
||||
GeoJSON geometry dict, or None if not found.
|
||||
"""
|
||||
sql = """
|
||||
SELECT ST_AsGeoJSON(geometry)::json as geom
|
||||
FROM dim_neighbourhood
|
||||
WHERE neighbourhood_id = :neighbourhood_id
|
||||
AND geometry IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id})
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if df.empty:
|
||||
return None
|
||||
|
||||
geom = df.iloc[0]["geom"]
|
||||
if isinstance(geom, str):
|
||||
result: dict[str, Any] = json.loads(geom)
|
||||
return result
|
||||
return dict(geom) if geom is not None else None
|
||||
|
||||
|
||||
def _empty_geojson() -> dict[str, Any]:
|
||||
"""Return an empty GeoJSON FeatureCollection."""
|
||||
return {
|
||||
"type": "FeatureCollection",
|
||||
"features": [],
|
||||
}
|
||||
392
portfolio_app/toronto/services/neighbourhood_service.py
Normal file
392
portfolio_app/toronto/services/neighbourhood_service.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""Service layer for querying neighbourhood data from dbt marts."""
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
from sqlalchemy import text
|
||||
|
||||
from portfolio_app.toronto.models import get_engine
|
||||
|
||||
|
||||
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
||||
"""Execute SQL query and return DataFrame.
|
||||
|
||||
Args:
|
||||
sql: SQL query string.
|
||||
params: Query parameters.
|
||||
|
||||
Returns:
|
||||
pandas DataFrame with results, or empty DataFrame on error.
|
||||
"""
|
||||
try:
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
return pd.read_sql(text(sql), conn, params=params)
|
||||
except Exception:
|
||||
# Return empty DataFrame on connection or query error
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def get_overview_data(year: int = 2021) -> pd.DataFrame:
|
||||
"""Get overview data for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_overview for livability scores and components.
|
||||
|
||||
Args:
|
||||
year: Census year to query.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||
livability_score, safety_score, affordability_score, amenity_score,
|
||||
population, median_household_income, etc.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
year,
|
||||
population,
|
||||
median_household_income,
|
||||
livability_score,
|
||||
safety_score,
|
||||
affordability_score,
|
||||
amenity_score,
|
||||
crime_rate_per_100k,
|
||||
rent_to_income_pct,
|
||||
avg_rent_2bed,
|
||||
total_amenities_per_1000
|
||||
FROM mart_neighbourhood_overview
|
||||
WHERE year = :year
|
||||
ORDER BY livability_score DESC NULLS LAST
|
||||
"""
|
||||
return _execute_query(sql, {"year": year})
|
||||
|
||||
|
||||
def get_housing_data(year: int = 2021) -> pd.DataFrame:
|
||||
"""Get housing data for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_housing for affordability metrics.
|
||||
|
||||
Args:
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||
avg_rent_2bed, vacancy_rate, rent_to_income_pct, affordability_index, etc.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
year,
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
average_dwelling_value,
|
||||
median_household_income,
|
||||
avg_rent_bachelor,
|
||||
avg_rent_1bed,
|
||||
avg_rent_2bed,
|
||||
avg_rent_3bed,
|
||||
vacancy_rate,
|
||||
total_rental_units,
|
||||
rent_to_income_pct,
|
||||
is_affordable,
|
||||
affordability_index,
|
||||
rent_yoy_change_pct,
|
||||
income_quintile
|
||||
FROM mart_neighbourhood_housing
|
||||
WHERE year = :year
|
||||
ORDER BY affordability_index ASC NULLS LAST
|
||||
"""
|
||||
return _execute_query(sql, {"year": year})
|
||||
|
||||
|
||||
def get_safety_data(year: int = 2021) -> pd.DataFrame:
|
||||
"""Get safety/crime data for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_safety for crime statistics.
|
||||
|
||||
Args:
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||
total_crime_rate, violent_crime_rate, property_crime_rate, etc.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
year,
|
||||
total_crimes,
|
||||
crime_rate_per_100k as total_crime_rate,
|
||||
violent_crimes,
|
||||
violent_crime_rate,
|
||||
property_crimes,
|
||||
property_crime_rate,
|
||||
theft_crimes,
|
||||
theft_rate,
|
||||
crime_yoy_change_pct,
|
||||
crime_trend
|
||||
FROM mart_neighbourhood_safety
|
||||
WHERE year = :year
|
||||
ORDER BY total_crime_rate ASC NULLS LAST
|
||||
"""
|
||||
return _execute_query(sql, {"year": year})
|
||||
|
||||
|
||||
def get_demographics_data(year: int = 2021) -> pd.DataFrame:
|
||||
"""Get demographic data for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_demographics for population/income metrics.
|
||||
|
||||
Args:
|
||||
year: Census year to query.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||
population, median_age, median_income, diversity_index, etc.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
census_year as year,
|
||||
population,
|
||||
population_density,
|
||||
population_change_pct,
|
||||
median_household_income,
|
||||
average_household_income,
|
||||
income_quintile,
|
||||
median_age,
|
||||
pct_under_18,
|
||||
pct_18_to_64,
|
||||
pct_65_plus,
|
||||
pct_bachelors_or_higher,
|
||||
unemployment_rate,
|
||||
diversity_index
|
||||
FROM mart_neighbourhood_demographics
|
||||
WHERE census_year = :year
|
||||
ORDER BY population DESC NULLS LAST
|
||||
"""
|
||||
return _execute_query(sql, {"year": year})
|
||||
|
||||
|
||||
def get_amenities_data(year: int = 2021) -> pd.DataFrame:
|
||||
"""Get amenities data for all neighbourhoods.
|
||||
|
||||
Queries mart_neighbourhood_amenities for parks, schools, transit.
|
||||
|
||||
Args:
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||
amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
year,
|
||||
park_count,
|
||||
parks_per_1000,
|
||||
school_count,
|
||||
schools_per_1000,
|
||||
childcare_count,
|
||||
childcare_per_1000,
|
||||
total_amenities,
|
||||
total_amenities_per_1000,
|
||||
amenity_score,
|
||||
amenity_rank
|
||||
FROM mart_neighbourhood_amenities
|
||||
WHERE year = :year
|
||||
ORDER BY amenity_score DESC NULLS LAST
|
||||
"""
|
||||
return _execute_query(sql, {"year": year})
|
||||
|
||||
|
||||
def get_neighbourhood_details(
|
||||
neighbourhood_id: int, year: int = 2021
|
||||
) -> dict[str, Any]:
|
||||
"""Get detailed data for a single neighbourhood.
|
||||
|
||||
Combines data from all mart tables for a complete neighbourhood profile.
|
||||
|
||||
Args:
|
||||
neighbourhood_id: The neighbourhood ID.
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
Dictionary with all metrics for the neighbourhood.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
o.neighbourhood_id,
|
||||
o.neighbourhood_name,
|
||||
o.year,
|
||||
o.population,
|
||||
o.median_household_income,
|
||||
o.livability_score,
|
||||
o.safety_score,
|
||||
o.affordability_score,
|
||||
o.amenity_score,
|
||||
s.total_crimes,
|
||||
s.crime_rate_per_100k,
|
||||
s.violent_crime_rate,
|
||||
s.property_crime_rate,
|
||||
h.avg_rent_2bed,
|
||||
h.vacancy_rate,
|
||||
h.rent_to_income_pct,
|
||||
h.affordability_index,
|
||||
h.pct_owner_occupied,
|
||||
h.pct_renter_occupied,
|
||||
d.median_age,
|
||||
d.diversity_index,
|
||||
d.unemployment_rate,
|
||||
d.pct_bachelors_or_higher,
|
||||
a.park_count,
|
||||
a.school_count,
|
||||
a.total_amenities
|
||||
FROM mart_neighbourhood_overview o
|
||||
LEFT JOIN mart_neighbourhood_safety s
|
||||
ON o.neighbourhood_id = s.neighbourhood_id
|
||||
AND o.year = s.year
|
||||
LEFT JOIN mart_neighbourhood_housing h
|
||||
ON o.neighbourhood_id = h.neighbourhood_id
|
||||
AND o.year = h.year
|
||||
LEFT JOIN mart_neighbourhood_demographics d
|
||||
ON o.neighbourhood_id = d.neighbourhood_id
|
||||
AND o.year = d.census_year
|
||||
LEFT JOIN mart_neighbourhood_amenities a
|
||||
ON o.neighbourhood_id = a.neighbourhood_id
|
||||
AND o.year = a.year
|
||||
WHERE o.neighbourhood_id = :neighbourhood_id
|
||||
AND o.year = :year
|
||||
"""
|
||||
df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id, "year": year})
|
||||
|
||||
if df.empty:
|
||||
return {}
|
||||
|
||||
return {str(k): v for k, v in df.iloc[0].to_dict().items()}
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
|
||||
"""Get list of all neighbourhoods for dropdown selectors.
|
||||
|
||||
Args:
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
List of dicts with neighbourhood_id, name, and population.
|
||||
"""
|
||||
sql = """
|
||||
SELECT DISTINCT
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
population
|
||||
FROM mart_neighbourhood_overview
|
||||
WHERE year = :year
|
||||
ORDER BY neighbourhood_name
|
||||
"""
|
||||
df = _execute_query(sql, {"year": year})
|
||||
if df.empty:
|
||||
return []
|
||||
return list(df.to_dict("records")) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def get_rankings(
|
||||
metric: str,
|
||||
year: int = 2021,
|
||||
top_n: int = 10,
|
||||
ascending: bool = True,
|
||||
) -> pd.DataFrame:
|
||||
"""Get top/bottom neighbourhoods for a specific metric.
|
||||
|
||||
Args:
|
||||
metric: Column name to rank by.
|
||||
year: Year to query.
|
||||
top_n: Number of top and bottom records.
|
||||
ascending: If True, rank from lowest to highest (good for crime, rent).
|
||||
|
||||
Returns:
|
||||
DataFrame with top and bottom neighbourhoods.
|
||||
"""
|
||||
# Map metrics to their source tables
|
||||
table_map = {
|
||||
"livability_score": "mart_neighbourhood_overview",
|
||||
"safety_score": "mart_neighbourhood_overview",
|
||||
"affordability_score": "mart_neighbourhood_overview",
|
||||
"amenity_score": "mart_neighbourhood_overview",
|
||||
"crime_rate_per_100k": "mart_neighbourhood_safety",
|
||||
"total_crime_rate": "mart_neighbourhood_safety",
|
||||
"avg_rent_2bed": "mart_neighbourhood_housing",
|
||||
"affordability_index": "mart_neighbourhood_housing",
|
||||
"population": "mart_neighbourhood_demographics",
|
||||
"median_household_income": "mart_neighbourhood_demographics",
|
||||
}
|
||||
|
||||
table = table_map.get(metric, "mart_neighbourhood_overview")
|
||||
year_col = "census_year" if "demographics" in table else "year"
|
||||
|
||||
order = "ASC" if ascending else "DESC"
|
||||
reverse_order = "DESC" if ascending else "ASC"
|
||||
|
||||
sql = f"""
|
||||
(
|
||||
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'bottom' as rank_group
|
||||
FROM {table}
|
||||
WHERE {year_col} = :year AND {metric} IS NOT NULL
|
||||
ORDER BY {metric} {order}
|
||||
LIMIT :top_n
|
||||
)
|
||||
UNION ALL
|
||||
(
|
||||
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'top' as rank_group
|
||||
FROM {table}
|
||||
WHERE {year_col} = :year AND {metric} IS NOT NULL
|
||||
ORDER BY {metric} {reverse_order}
|
||||
LIMIT :top_n
|
||||
)
|
||||
"""
|
||||
return _execute_query(sql, {"year": year, "top_n": top_n})
|
||||
|
||||
|
||||
def get_city_averages(year: int = 2021) -> dict[str, Any]:
|
||||
"""Get city-wide average metrics.
|
||||
|
||||
Args:
|
||||
year: Year to query.
|
||||
|
||||
Returns:
|
||||
Dictionary with city averages for key metrics.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
AVG(livability_score) as avg_livability_score,
|
||||
AVG(safety_score) as avg_safety_score,
|
||||
AVG(affordability_score) as avg_affordability_score,
|
||||
AVG(amenity_score) as avg_amenity_score,
|
||||
SUM(population) as total_population,
|
||||
AVG(median_household_income) as avg_median_income,
|
||||
AVG(crime_rate_per_100k) as avg_crime_rate,
|
||||
AVG(avg_rent_2bed) as avg_rent_2bed,
|
||||
AVG(rent_to_income_pct) as avg_rent_to_income
|
||||
FROM mart_neighbourhood_overview
|
||||
WHERE year = :year
|
||||
"""
|
||||
df = _execute_query(sql, {"year": year})
|
||||
|
||||
if df.empty:
|
||||
return {}
|
||||
|
||||
result: dict[str, Any] = {str(k): v for k, v in df.iloc[0].to_dict().items()}
|
||||
# Round numeric values
|
||||
for key, value in result.items():
|
||||
if pd.notna(value) and isinstance(value, float):
|
||||
result[key] = round(value, 2)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user