Implement full 5-tab Toronto Neighbourhood Dashboard with real data connectivity: Dashboard Structure: - Overview tab with livability scores and rankings - Housing tab with affordability metrics - Safety tab with crime statistics - Demographics tab with population/income data - Amenities tab with parks, schools, transit Figure Factories (portfolio_app/figures/): - bar_charts.py: ranking, stacked, horizontal bars - scatter.py: scatter plots, bubble charts - radar.py: spider/radar charts - demographics.py: donut, age pyramid, income distribution Service Layer (portfolio_app/toronto/services/): - neighbourhood_service.py: queries dbt marts for all tab data - geometry_service.py: generates GeoJSON from PostGIS - Graceful error handling when database unavailable Callbacks (portfolio_app/pages/toronto/callbacks/): - map_callbacks.py: choropleth updates, map click handling - chart_callbacks.py: supporting chart updates - selection_callbacks.py: dropdown handlers, KPI updates Data Pipeline (scripts/data/): - load_toronto_data.py: orchestration script with CLI flags Lessons Learned: - Graceful error handling in service layers - Modular callback structure for multi-tab dashboards - Figure factory pattern for reusable charts Closes: #64, #65, #66, #67, #68, #69, #70 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
393 lines
12 KiB
Python
393 lines
12 KiB
Python
"""Service layer for querying neighbourhood data from dbt marts."""
|
|
|
|
from functools import lru_cache
|
|
from typing import Any
|
|
|
|
import pandas as pd
|
|
from sqlalchemy import text
|
|
|
|
from portfolio_app.toronto.models import get_engine
|
|
|
|
|
|
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
|
"""Execute SQL query and return DataFrame.
|
|
|
|
Args:
|
|
sql: SQL query string.
|
|
params: Query parameters.
|
|
|
|
Returns:
|
|
pandas DataFrame with results, or empty DataFrame on error.
|
|
"""
|
|
try:
|
|
engine = get_engine()
|
|
with engine.connect() as conn:
|
|
return pd.read_sql(text(sql), conn, params=params)
|
|
except Exception:
|
|
# Return empty DataFrame on connection or query error
|
|
return pd.DataFrame()
|
|
|
|
|
|
def get_overview_data(year: int = 2021) -> pd.DataFrame:
|
|
"""Get overview data for all neighbourhoods.
|
|
|
|
Queries mart_neighbourhood_overview for livability scores and components.
|
|
|
|
Args:
|
|
year: Census year to query.
|
|
|
|
Returns:
|
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
|
livability_score, safety_score, affordability_score, amenity_score,
|
|
population, median_household_income, etc.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
year,
|
|
population,
|
|
median_household_income,
|
|
livability_score,
|
|
safety_score,
|
|
affordability_score,
|
|
amenity_score,
|
|
crime_rate_per_100k,
|
|
rent_to_income_pct,
|
|
avg_rent_2bed,
|
|
total_amenities_per_1000
|
|
FROM mart_neighbourhood_overview
|
|
WHERE year = :year
|
|
ORDER BY livability_score DESC NULLS LAST
|
|
"""
|
|
return _execute_query(sql, {"year": year})
|
|
|
|
|
|
def get_housing_data(year: int = 2021) -> pd.DataFrame:
|
|
"""Get housing data for all neighbourhoods.
|
|
|
|
Queries mart_neighbourhood_housing for affordability metrics.
|
|
|
|
Args:
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
|
avg_rent_2bed, vacancy_rate, rent_to_income_pct, affordability_index, etc.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
year,
|
|
pct_owner_occupied,
|
|
pct_renter_occupied,
|
|
average_dwelling_value,
|
|
median_household_income,
|
|
avg_rent_bachelor,
|
|
avg_rent_1bed,
|
|
avg_rent_2bed,
|
|
avg_rent_3bed,
|
|
vacancy_rate,
|
|
total_rental_units,
|
|
rent_to_income_pct,
|
|
is_affordable,
|
|
affordability_index,
|
|
rent_yoy_change_pct,
|
|
income_quintile
|
|
FROM mart_neighbourhood_housing
|
|
WHERE year = :year
|
|
ORDER BY affordability_index ASC NULLS LAST
|
|
"""
|
|
return _execute_query(sql, {"year": year})
|
|
|
|
|
|
def get_safety_data(year: int = 2021) -> pd.DataFrame:
|
|
"""Get safety/crime data for all neighbourhoods.
|
|
|
|
Queries mart_neighbourhood_safety for crime statistics.
|
|
|
|
Args:
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
|
total_crime_rate, violent_crime_rate, property_crime_rate, etc.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
year,
|
|
total_crimes,
|
|
crime_rate_per_100k as total_crime_rate,
|
|
violent_crimes,
|
|
violent_crime_rate,
|
|
property_crimes,
|
|
property_crime_rate,
|
|
theft_crimes,
|
|
theft_rate,
|
|
crime_yoy_change_pct,
|
|
crime_trend
|
|
FROM mart_neighbourhood_safety
|
|
WHERE year = :year
|
|
ORDER BY total_crime_rate ASC NULLS LAST
|
|
"""
|
|
return _execute_query(sql, {"year": year})
|
|
|
|
|
|
def get_demographics_data(year: int = 2021) -> pd.DataFrame:
|
|
"""Get demographic data for all neighbourhoods.
|
|
|
|
Queries mart_neighbourhood_demographics for population/income metrics.
|
|
|
|
Args:
|
|
year: Census year to query.
|
|
|
|
Returns:
|
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
|
population, median_age, median_income, diversity_index, etc.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
census_year as year,
|
|
population,
|
|
population_density,
|
|
population_change_pct,
|
|
median_household_income,
|
|
average_household_income,
|
|
income_quintile,
|
|
median_age,
|
|
pct_under_18,
|
|
pct_18_to_64,
|
|
pct_65_plus,
|
|
pct_bachelors_or_higher,
|
|
unemployment_rate,
|
|
diversity_index
|
|
FROM mart_neighbourhood_demographics
|
|
WHERE census_year = :year
|
|
ORDER BY population DESC NULLS LAST
|
|
"""
|
|
return _execute_query(sql, {"year": year})
|
|
|
|
|
|
def get_amenities_data(year: int = 2021) -> pd.DataFrame:
|
|
"""Get amenities data for all neighbourhoods.
|
|
|
|
Queries mart_neighbourhood_amenities for parks, schools, transit.
|
|
|
|
Args:
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
|
amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
year,
|
|
park_count,
|
|
parks_per_1000,
|
|
school_count,
|
|
schools_per_1000,
|
|
childcare_count,
|
|
childcare_per_1000,
|
|
total_amenities,
|
|
total_amenities_per_1000,
|
|
amenity_score,
|
|
amenity_rank
|
|
FROM mart_neighbourhood_amenities
|
|
WHERE year = :year
|
|
ORDER BY amenity_score DESC NULLS LAST
|
|
"""
|
|
return _execute_query(sql, {"year": year})
|
|
|
|
|
|
def get_neighbourhood_details(
|
|
neighbourhood_id: int, year: int = 2021
|
|
) -> dict[str, Any]:
|
|
"""Get detailed data for a single neighbourhood.
|
|
|
|
Combines data from all mart tables for a complete neighbourhood profile.
|
|
|
|
Args:
|
|
neighbourhood_id: The neighbourhood ID.
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
Dictionary with all metrics for the neighbourhood.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
o.neighbourhood_id,
|
|
o.neighbourhood_name,
|
|
o.year,
|
|
o.population,
|
|
o.median_household_income,
|
|
o.livability_score,
|
|
o.safety_score,
|
|
o.affordability_score,
|
|
o.amenity_score,
|
|
s.total_crimes,
|
|
s.crime_rate_per_100k,
|
|
s.violent_crime_rate,
|
|
s.property_crime_rate,
|
|
h.avg_rent_2bed,
|
|
h.vacancy_rate,
|
|
h.rent_to_income_pct,
|
|
h.affordability_index,
|
|
h.pct_owner_occupied,
|
|
h.pct_renter_occupied,
|
|
d.median_age,
|
|
d.diversity_index,
|
|
d.unemployment_rate,
|
|
d.pct_bachelors_or_higher,
|
|
a.park_count,
|
|
a.school_count,
|
|
a.total_amenities
|
|
FROM mart_neighbourhood_overview o
|
|
LEFT JOIN mart_neighbourhood_safety s
|
|
ON o.neighbourhood_id = s.neighbourhood_id
|
|
AND o.year = s.year
|
|
LEFT JOIN mart_neighbourhood_housing h
|
|
ON o.neighbourhood_id = h.neighbourhood_id
|
|
AND o.year = h.year
|
|
LEFT JOIN mart_neighbourhood_demographics d
|
|
ON o.neighbourhood_id = d.neighbourhood_id
|
|
AND o.year = d.census_year
|
|
LEFT JOIN mart_neighbourhood_amenities a
|
|
ON o.neighbourhood_id = a.neighbourhood_id
|
|
AND o.year = a.year
|
|
WHERE o.neighbourhood_id = :neighbourhood_id
|
|
AND o.year = :year
|
|
"""
|
|
df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id, "year": year})
|
|
|
|
if df.empty:
|
|
return {}
|
|
|
|
return {str(k): v for k, v in df.iloc[0].to_dict().items()}
|
|
|
|
|
|
@lru_cache(maxsize=32)
|
|
def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
|
|
"""Get list of all neighbourhoods for dropdown selectors.
|
|
|
|
Args:
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
List of dicts with neighbourhood_id, name, and population.
|
|
"""
|
|
sql = """
|
|
SELECT DISTINCT
|
|
neighbourhood_id,
|
|
neighbourhood_name,
|
|
population
|
|
FROM mart_neighbourhood_overview
|
|
WHERE year = :year
|
|
ORDER BY neighbourhood_name
|
|
"""
|
|
df = _execute_query(sql, {"year": year})
|
|
if df.empty:
|
|
return []
|
|
return list(df.to_dict("records")) # type: ignore[arg-type]
|
|
|
|
|
|
def get_rankings(
|
|
metric: str,
|
|
year: int = 2021,
|
|
top_n: int = 10,
|
|
ascending: bool = True,
|
|
) -> pd.DataFrame:
|
|
"""Get top/bottom neighbourhoods for a specific metric.
|
|
|
|
Args:
|
|
metric: Column name to rank by.
|
|
year: Year to query.
|
|
top_n: Number of top and bottom records.
|
|
ascending: If True, rank from lowest to highest (good for crime, rent).
|
|
|
|
Returns:
|
|
DataFrame with top and bottom neighbourhoods.
|
|
"""
|
|
# Map metrics to their source tables
|
|
table_map = {
|
|
"livability_score": "mart_neighbourhood_overview",
|
|
"safety_score": "mart_neighbourhood_overview",
|
|
"affordability_score": "mart_neighbourhood_overview",
|
|
"amenity_score": "mart_neighbourhood_overview",
|
|
"crime_rate_per_100k": "mart_neighbourhood_safety",
|
|
"total_crime_rate": "mart_neighbourhood_safety",
|
|
"avg_rent_2bed": "mart_neighbourhood_housing",
|
|
"affordability_index": "mart_neighbourhood_housing",
|
|
"population": "mart_neighbourhood_demographics",
|
|
"median_household_income": "mart_neighbourhood_demographics",
|
|
}
|
|
|
|
table = table_map.get(metric, "mart_neighbourhood_overview")
|
|
year_col = "census_year" if "demographics" in table else "year"
|
|
|
|
order = "ASC" if ascending else "DESC"
|
|
reverse_order = "DESC" if ascending else "ASC"
|
|
|
|
sql = f"""
|
|
(
|
|
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'bottom' as rank_group
|
|
FROM {table}
|
|
WHERE {year_col} = :year AND {metric} IS NOT NULL
|
|
ORDER BY {metric} {order}
|
|
LIMIT :top_n
|
|
)
|
|
UNION ALL
|
|
(
|
|
SELECT neighbourhood_id, neighbourhood_name, {metric}, 'top' as rank_group
|
|
FROM {table}
|
|
WHERE {year_col} = :year AND {metric} IS NOT NULL
|
|
ORDER BY {metric} {reverse_order}
|
|
LIMIT :top_n
|
|
)
|
|
"""
|
|
return _execute_query(sql, {"year": year, "top_n": top_n})
|
|
|
|
|
|
def get_city_averages(year: int = 2021) -> dict[str, Any]:
|
|
"""Get city-wide average metrics.
|
|
|
|
Args:
|
|
year: Year to query.
|
|
|
|
Returns:
|
|
Dictionary with city averages for key metrics.
|
|
"""
|
|
sql = """
|
|
SELECT
|
|
AVG(livability_score) as avg_livability_score,
|
|
AVG(safety_score) as avg_safety_score,
|
|
AVG(affordability_score) as avg_affordability_score,
|
|
AVG(amenity_score) as avg_amenity_score,
|
|
SUM(population) as total_population,
|
|
AVG(median_household_income) as avg_median_income,
|
|
AVG(crime_rate_per_100k) as avg_crime_rate,
|
|
AVG(avg_rent_2bed) as avg_rent_2bed,
|
|
AVG(rent_to_income_pct) as avg_rent_to_income
|
|
FROM mart_neighbourhood_overview
|
|
WHERE year = :year
|
|
"""
|
|
df = _execute_query(sql, {"year": year})
|
|
|
|
if df.empty:
|
|
return {}
|
|
|
|
result: dict[str, Any] = {str(k): v for k, v in df.iloc[0].to_dict().items()}
|
|
# Round numeric values
|
|
for key, value in result.items():
|
|
if pd.notna(value) and isinstance(value, float):
|
|
result[key] = round(value, 2)
|
|
|
|
return result
|