feat: Complete Phase 5 dashboard implementation

Implement full 5-tab Toronto Neighbourhood Dashboard with real data connectivity: Dashboard Structure: - Overview tab with livability scores and rankings - Housing tab with affordability metrics - Safety tab with crime statistics - Demographics tab with population/income data - Amenities tab with parks, schools, transit Figure Factories (portfolio_app/figures/): - bar_charts.py: ranking, stacked, horizontal bars - scatter.py: scatter plots, bubble charts - radar.py: spider/radar charts - demographics.py: donut, age pyramid, income distribution Service Layer (portfolio_app/toronto/services/): - neighbourhood_service.py: queries dbt marts for all tab data - geometry_service.py: generates GeoJSON from PostGIS - Graceful error handling when database unavailable Callbacks (portfolio_app/pages/toronto/callbacks/): - map_callbacks.py: choropleth updates, map click handling - chart_callbacks.py: supporting chart updates - selection_callbacks.py: dropdown handlers, KPI updates Data Pipeline (scripts/data/): - load_toronto_data.py: orchestration script with CLI flags Lessons Learned: - Graceful error handling in service layers - Modular callback structure for multi-tab dashboards - Figure factory pattern for reusable charts Closes: #64, #65, #66, #67, #68, #69, #70 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 11:46:18 -05:00
parent 3054441630
commit c9cf744d84
27 changed files with 4377 additions and 1770 deletions
--- a/portfolio_app/toronto/services/init.py
+++ b/portfolio_app/toronto/services/init.py
@@ -0,0 +1,33 @@
+"""Data service layer for Toronto neighbourhood dashboard."""
+
+from .geometry_service import (
+    get_cmhc_zones_geojson,
+    get_neighbourhoods_geojson,
+)
+from .neighbourhood_service import (
+    get_amenities_data,
+    get_city_averages,
+    get_demographics_data,
+    get_housing_data,
+    get_neighbourhood_details,
+    get_neighbourhood_list,
+    get_overview_data,
+    get_rankings,
+    get_safety_data,
+)
+
+__all__ = [
+    # Neighbourhood data
+    "get_overview_data",
+    "get_housing_data",
+    "get_safety_data",
+    "get_demographics_data",
+    "get_amenities_data",
+    "get_neighbourhood_details",
+    "get_neighbourhood_list",
+    "get_rankings",
+    "get_city_averages",
+    # Geometry
+    "get_neighbourhoods_geojson",
+    "get_cmhc_zones_geojson",
+]
--- a/portfolio_app/toronto/services/geometry_service.py
+++ b/portfolio_app/toronto/services/geometry_service.py
@@ -0,0 +1,176 @@
+"""Service layer for generating GeoJSON from PostGIS geometry."""
+
+import json
+from functools import lru_cache
+from typing import Any
+
+import pandas as pd
+from sqlalchemy import text
+
+from portfolio_app.toronto.models import get_engine
+
+
+def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
+    """Execute SQL query and return DataFrame."""
+    engine = get_engine()
+    with engine.connect() as conn:
+        return pd.read_sql(text(sql), conn, params=params)
+
+
+@lru_cache(maxsize=8)
+def get_neighbourhoods_geojson(year: int = 2021) -> dict[str, Any]:
+    """Get GeoJSON FeatureCollection for all neighbourhoods.
+
+    Queries mart_neighbourhood_overview for geometries and basic properties.
+
+    Args:
+        year: Year to query for joining properties.
+
+    Returns:
+        GeoJSON FeatureCollection dictionary.
+    """
+    # Query geometries with ST_AsGeoJSON
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            ST_AsGeoJSON(geometry)::json as geom,
+            population,
+            livability_score
+        FROM mart_neighbourhood_overview
+        WHERE year = :year
+          AND geometry IS NOT NULL
+    """
+
+    try:
+        df = _execute_query(sql, {"year": year})
+    except Exception:
+        # Table might not exist or have data yet
+        return _empty_geojson()
+
+    if df.empty:
+        return _empty_geojson()
+
+    # Build GeoJSON features
+    features = []
+    for _, row in df.iterrows():
+        geom = row["geom"]
+        if geom is None:
+            continue
+
+        # Handle geometry that might be a string or dict
+        if isinstance(geom, str):
+            geom = json.loads(geom)
+
+        feature = {
+            "type": "Feature",
+            "id": row["neighbourhood_id"],
+            "properties": {
+                "neighbourhood_id": int(row["neighbourhood_id"]),
+                "neighbourhood_name": row["neighbourhood_name"],
+                "population": int(row["population"])
+                if pd.notna(row["population"])
+                else None,
+                "livability_score": float(row["livability_score"])
+                if pd.notna(row["livability_score"])
+                else None,
+            },
+            "geometry": geom,
+        }
+        features.append(feature)
+
+    return {
+        "type": "FeatureCollection",
+        "features": features,
+    }
+
+
+@lru_cache(maxsize=4)
+def get_cmhc_zones_geojson() -> dict[str, Any]:
+    """Get GeoJSON FeatureCollection for CMHC zones.
+
+    Queries dim_cmhc_zone for zone geometries.
+
+    Returns:
+        GeoJSON FeatureCollection dictionary.
+    """
+    sql = """
+        SELECT
+            zone_code,
+            zone_name,
+            ST_AsGeoJSON(geometry)::json as geom
+        FROM dim_cmhc_zone
+        WHERE geometry IS NOT NULL
+    """
+
+    try:
+        df = _execute_query(sql, {})
+    except Exception:
+        return _empty_geojson()
+
+    if df.empty:
+        return _empty_geojson()
+
+    features = []
+    for _, row in df.iterrows():
+        geom = row["geom"]
+        if geom is None:
+            continue
+
+        if isinstance(geom, str):
+            geom = json.loads(geom)
+
+        feature = {
+            "type": "Feature",
+            "id": row["zone_code"],
+            "properties": {
+                "zone_code": row["zone_code"],
+                "zone_name": row["zone_name"],
+            },
+            "geometry": geom,
+        }
+        features.append(feature)
+
+    return {
+        "type": "FeatureCollection",
+        "features": features,
+    }
+
+
+def get_neighbourhood_geometry(neighbourhood_id: int) -> dict[str, Any] | None:
+    """Get GeoJSON geometry for a single neighbourhood.
+
+    Args:
+        neighbourhood_id: The neighbourhood ID.
+
+    Returns:
+        GeoJSON geometry dict, or None if not found.
+    """
+    sql = """
+        SELECT ST_AsGeoJSON(geometry)::json as geom
+        FROM dim_neighbourhood
+        WHERE neighbourhood_id = :neighbourhood_id
+          AND geometry IS NOT NULL
+    """
+
+    try:
+        df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id})
+    except Exception:
+        return None
+
+    if df.empty:
+        return None
+
+    geom = df.iloc[0]["geom"]
+    if isinstance(geom, str):
+        result: dict[str, Any] = json.loads(geom)
+        return result
+    return dict(geom) if geom is not None else None
+
+
+def _empty_geojson() -> dict[str, Any]:
+    """Return an empty GeoJSON FeatureCollection."""
+    return {
+        "type": "FeatureCollection",
+        "features": [],
+    }
--- a/portfolio_app/toronto/services/neighbourhood_service.py
+++ b/portfolio_app/toronto/services/neighbourhood_service.py
@@ -0,0 +1,392 @@
+"""Service layer for querying neighbourhood data from dbt marts."""
+
+from functools import lru_cache
+from typing import Any
+
+import pandas as pd
+from sqlalchemy import text
+
+from portfolio_app.toronto.models import get_engine
+
+
+def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
+    """Execute SQL query and return DataFrame.
+
+    Args:
+        sql: SQL query string.
+        params: Query parameters.
+
+    Returns:
+        pandas DataFrame with results, or empty DataFrame on error.
+    """
+    try:
+        engine = get_engine()
+        with engine.connect() as conn:
+            return pd.read_sql(text(sql), conn, params=params)
+    except Exception:
+        # Return empty DataFrame on connection or query error
+        return pd.DataFrame()
+
+
+def get_overview_data(year: int = 2021) -> pd.DataFrame:
+    """Get overview data for all neighbourhoods.
+
+    Queries mart_neighbourhood_overview for livability scores and components.
+
+    Args:
+        year: Census year to query.
+
+    Returns:
+        DataFrame with columns: neighbourhood_id, neighbourhood_name,
+        livability_score, safety_score, affordability_score, amenity_score,
+        population, median_household_income, etc.
+    """
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            year,
+            population,
+            median_household_income,
+            livability_score,
+            safety_score,
+            affordability_score,
+            amenity_score,
+            crime_rate_per_100k,
+            rent_to_income_pct,
+            avg_rent_2bed,
+            total_amenities_per_1000
+        FROM mart_neighbourhood_overview
+        WHERE year = :year
+        ORDER BY livability_score DESC NULLS LAST
+    """
+    return _execute_query(sql, {"year": year})
+
+
+def get_housing_data(year: int = 2021) -> pd.DataFrame:
+    """Get housing data for all neighbourhoods.
+
+    Queries mart_neighbourhood_housing for affordability metrics.
+
+    Args:
+        year: Year to query.
+
+    Returns:
+        DataFrame with columns: neighbourhood_id, neighbourhood_name,
+        avg_rent_2bed, vacancy_rate, rent_to_income_pct, affordability_index, etc.
+    """
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            year,
+            pct_owner_occupied,
+            pct_renter_occupied,
+            average_dwelling_value,
+            median_household_income,
+            avg_rent_bachelor,
+            avg_rent_1bed,
+            avg_rent_2bed,
+            avg_rent_3bed,
+            vacancy_rate,
+            total_rental_units,
+            rent_to_income_pct,
+            is_affordable,
+            affordability_index,
+            rent_yoy_change_pct,
+            income_quintile
+        FROM mart_neighbourhood_housing
+        WHERE year = :year
+        ORDER BY affordability_index ASC NULLS LAST
+    """
+    return _execute_query(sql, {"year": year})
+
+
+def get_safety_data(year: int = 2021) -> pd.DataFrame:
+    """Get safety/crime data for all neighbourhoods.
+
+    Queries mart_neighbourhood_safety for crime statistics.
+
+    Args:
+        year: Year to query.
+
+    Returns:
+        DataFrame with columns: neighbourhood_id, neighbourhood_name,
+        total_crime_rate, violent_crime_rate, property_crime_rate, etc.
+    """
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            year,
+            total_crimes,
+            crime_rate_per_100k as total_crime_rate,
+            violent_crimes,
+            violent_crime_rate,
+            property_crimes,
+            property_crime_rate,
+            theft_crimes,
+            theft_rate,
+            crime_yoy_change_pct,
+            crime_trend
+        FROM mart_neighbourhood_safety
+        WHERE year = :year
+        ORDER BY total_crime_rate ASC NULLS LAST
+    """
+    return _execute_query(sql, {"year": year})
+
+
+def get_demographics_data(year: int = 2021) -> pd.DataFrame:
+    """Get demographic data for all neighbourhoods.
+
+    Queries mart_neighbourhood_demographics for population/income metrics.
+
+    Args:
+        year: Census year to query.
+
+    Returns:
+        DataFrame with columns: neighbourhood_id, neighbourhood_name,
+        population, median_age, median_income, diversity_index, etc.
+    """
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            census_year as year,
+            population,
+            population_density,
+            population_change_pct,
+            median_household_income,
+            average_household_income,
+            income_quintile,
+            median_age,
+            pct_under_18,
+            pct_18_to_64,
+            pct_65_plus,
+            pct_bachelors_or_higher,
+            unemployment_rate,
+            diversity_index
+        FROM mart_neighbourhood_demographics
+        WHERE census_year = :year
+        ORDER BY population DESC NULLS LAST
+    """
+    return _execute_query(sql, {"year": year})
+
+
+def get_amenities_data(year: int = 2021) -> pd.DataFrame:
+    """Get amenities data for all neighbourhoods.
+
+    Queries mart_neighbourhood_amenities for parks, schools, transit.
+
+    Args:
+        year: Year to query.
+
+    Returns:
+        DataFrame with columns: neighbourhood_id, neighbourhood_name,
+        amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
+    """
+    sql = """
+        SELECT
+            neighbourhood_id,
+            neighbourhood_name,
+            year,
+            park_count,
+            parks_per_1000,
+            school_count,
+            schools_per_1000,
+            childcare_count,
+            childcare_per_1000,
+            total_amenities,
+            total_amenities_per_1000,
+            amenity_score,
+            amenity_rank
+        FROM mart_neighbourhood_amenities
+        WHERE year = :year
+        ORDER BY amenity_score DESC NULLS LAST
+    """
+    return _execute_query(sql, {"year": year})
+
+
+def get_neighbourhood_details(
+    neighbourhood_id: int, year: int = 2021
+) -> dict[str, Any]:
+    """Get detailed data for a single neighbourhood.
+
+    Combines data from all mart tables for a complete neighbourhood profile.
+
+    Args:
+        neighbourhood_id: The neighbourhood ID.
+        year: Year to query.
+
+    Returns:
+        Dictionary with all metrics for the neighbourhood.
+    """
+    sql = """
+        SELECT
+            o.neighbourhood_id,
+            o.neighbourhood_name,
+            o.year,
+            o.population,
+            o.median_household_income,
+            o.livability_score,
+            o.safety_score,
+            o.affordability_score,
+            o.amenity_score,
+            s.total_crimes,
+            s.crime_rate_per_100k,
+            s.violent_crime_rate,
+            s.property_crime_rate,
+            h.avg_rent_2bed,
+            h.vacancy_rate,
+            h.rent_to_income_pct,
+            h.affordability_index,
+            h.pct_owner_occupied,
+            h.pct_renter_occupied,
+            d.median_age,
+            d.diversity_index,
+            d.unemployment_rate,
+            d.pct_bachelors_or_higher,
+            a.park_count,
+            a.school_count,
+            a.total_amenities
+        FROM mart_neighbourhood_overview o
+        LEFT JOIN mart_neighbourhood_safety s
+            ON o.neighbourhood_id = s.neighbourhood_id
+            AND o.year = s.year
+        LEFT JOIN mart_neighbourhood_housing h
+            ON o.neighbourhood_id = h.neighbourhood_id
+            AND o.year = h.year
+        LEFT JOIN mart_neighbourhood_demographics d
+            ON o.neighbourhood_id = d.neighbourhood_id
+            AND o.year = d.census_year
+        LEFT JOIN mart_neighbourhood_amenities a
+            ON o.neighbourhood_id = a.neighbourhood_id
+            AND o.year = a.year
+        WHERE o.neighbourhood_id = :neighbourhood_id
+          AND o.year = :year
+    """
+    df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id, "year": year})
+
+    if df.empty:
+        return {}
+
+    return {str(k): v for k, v in df.iloc[0].to_dict().items()}
+
+
+@lru_cache(maxsize=32)
+def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
+    """Get list of all neighbourhoods for dropdown selectors.
+
+    Args:
+        year: Year to query.
+
+    Returns:
+        List of dicts with neighbourhood_id, name, and population.
+    """
+    sql = """
+        SELECT DISTINCT
+            neighbourhood_id,
+            neighbourhood_name,
+            population
+        FROM mart_neighbourhood_overview
+        WHERE year = :year
+        ORDER BY neighbourhood_name
+    """
+    df = _execute_query(sql, {"year": year})
+    if df.empty:
+        return []
+    return list(df.to_dict("records"))  # type: ignore[arg-type]
+
+
+def get_rankings(
+    metric: str,
+    year: int = 2021,
+    top_n: int = 10,
+    ascending: bool = True,
+) -> pd.DataFrame:
+    """Get top/bottom neighbourhoods for a specific metric.
+
+    Args:
+        metric: Column name to rank by.
+        year: Year to query.
+        top_n: Number of top and bottom records.
+        ascending: If True, rank from lowest to highest (good for crime, rent).
+
+    Returns:
+        DataFrame with top and bottom neighbourhoods.
+    """
+    # Map metrics to their source tables
+    table_map = {
+        "livability_score": "mart_neighbourhood_overview",
+        "safety_score": "mart_neighbourhood_overview",
+        "affordability_score": "mart_neighbourhood_overview",
+        "amenity_score": "mart_neighbourhood_overview",
+        "crime_rate_per_100k": "mart_neighbourhood_safety",
+        "total_crime_rate": "mart_neighbourhood_safety",
+        "avg_rent_2bed": "mart_neighbourhood_housing",
+        "affordability_index": "mart_neighbourhood_housing",
+        "population": "mart_neighbourhood_demographics",
+        "median_household_income": "mart_neighbourhood_demographics",
+    }
+
+    table = table_map.get(metric, "mart_neighbourhood_overview")
+    year_col = "census_year" if "demographics" in table else "year"
+
+    order = "ASC" if ascending else "DESC"
+    reverse_order = "DESC" if ascending else "ASC"
+
+    sql = f"""
+        (
+            SELECT neighbourhood_id, neighbourhood_name, {metric}, 'bottom' as rank_group
+            FROM {table}
+            WHERE {year_col} = :year AND {metric} IS NOT NULL
+            ORDER BY {metric} {order}
+            LIMIT :top_n
+        )
+        UNION ALL
+        (
+            SELECT neighbourhood_id, neighbourhood_name, {metric}, 'top' as rank_group
+            FROM {table}
+            WHERE {year_col} = :year AND {metric} IS NOT NULL
+            ORDER BY {metric} {reverse_order}
+            LIMIT :top_n
+        )
+    """
+    return _execute_query(sql, {"year": year, "top_n": top_n})
+
+
+def get_city_averages(year: int = 2021) -> dict[str, Any]:
+    """Get city-wide average metrics.
+
+    Args:
+        year: Year to query.
+
+    Returns:
+        Dictionary with city averages for key metrics.
+    """
+    sql = """
+        SELECT
+            AVG(livability_score) as avg_livability_score,
+            AVG(safety_score) as avg_safety_score,
+            AVG(affordability_score) as avg_affordability_score,
+            AVG(amenity_score) as avg_amenity_score,
+            SUM(population) as total_population,
+            AVG(median_household_income) as avg_median_income,
+            AVG(crime_rate_per_100k) as avg_crime_rate,
+            AVG(avg_rent_2bed) as avg_rent_2bed,
+            AVG(rent_to_income_pct) as avg_rent_to_income
+        FROM mart_neighbourhood_overview
+        WHERE year = :year
+    """
+    df = _execute_query(sql, {"year": year})
+
+    if df.empty:
+        return {}
+
+    result: dict[str, Any] = {str(k): v for k, v in df.iloc[0].to_dict().items()}
+    # Round numeric values
+    for key, value in result.items():
+        if pd.notna(value) and isinstance(value, float):
+            result[key] = round(value, 2)
+
+    return result