feat: Complete Phase 5 dashboard implementation

Implement full 5-tab Toronto Neighbourhood Dashboard with real data
connectivity:

Dashboard Structure:
- Overview tab with livability scores and rankings
- Housing tab with affordability metrics
- Safety tab with crime statistics
- Demographics tab with population/income data
- Amenities tab with parks, schools, transit

Figure Factories (portfolio_app/figures/):
- bar_charts.py: ranking, stacked, horizontal bars
- scatter.py: scatter plots, bubble charts
- radar.py: spider/radar charts
- demographics.py: donut, age pyramid, income distribution

Service Layer (portfolio_app/toronto/services/):
- neighbourhood_service.py: queries dbt marts for all tab data
- geometry_service.py: generates GeoJSON from PostGIS
- Graceful error handling when database unavailable

Callbacks (portfolio_app/pages/toronto/callbacks/):
- map_callbacks.py: choropleth updates, map click handling
- chart_callbacks.py: supporting chart updates
- selection_callbacks.py: dropdown handlers, KPI updates

Data Pipeline (scripts/data/):
- load_toronto_data.py: orchestration script with CLI flags

Lessons Learned:
- Graceful error handling in service layers
- Modular callback structure for multi-tab dashboards
- Figure factory pattern for reusable charts

Closes: #64, #65, #66, #67, #68, #69, #70

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-17 11:46:18 -05:00
parent 3054441630
commit c9cf744d84
27 changed files with 4377 additions and 1770 deletions

View File

@@ -57,6 +57,7 @@ class TorontoOpenDataParser:
self._cache_dir = cache_dir
self._timeout = timeout
self._client: httpx.Client | None = None
self._neighbourhood_name_map: dict[str, int] | None = None
@property
def client(self) -> httpx.Client:
@@ -75,6 +76,63 @@ class TorontoOpenDataParser:
self._client.close()
self._client = None
def _get_neighbourhood_name_map(self) -> dict[str, int]:
"""Build and cache a mapping of neighbourhood names to IDs.
Returns:
Dictionary mapping normalized neighbourhood names to area_id.
"""
if self._neighbourhood_name_map is not None:
return self._neighbourhood_name_map
neighbourhoods = self.get_neighbourhoods()
self._neighbourhood_name_map = {}
for n in neighbourhoods:
# Add multiple variations of the name for flexible matching
name_lower = n.area_name.lower().strip()
self._neighbourhood_name_map[name_lower] = n.area_id
# Also add without common suffixes/prefixes
for suffix in [" neighbourhood", " area", "-"]:
if suffix in name_lower:
alt_name = name_lower.replace(suffix, "").strip()
self._neighbourhood_name_map[alt_name] = n.area_id
logger.debug(
f"Built neighbourhood name map with {len(self._neighbourhood_name_map)} entries"
)
return self._neighbourhood_name_map
def _match_neighbourhood_id(self, name: str) -> int | None:
"""Match a neighbourhood name to its ID.
Args:
name: Neighbourhood name from census data.
Returns:
Neighbourhood ID or None if not found.
"""
name_map = self._get_neighbourhood_name_map()
name_lower = name.lower().strip()
# Direct match
if name_lower in name_map:
return name_map[name_lower]
# Try removing parenthetical content
if "(" in name_lower:
base_name = name_lower.split("(")[0].strip()
if base_name in name_map:
return name_map[base_name]
# Try fuzzy matching with first few chars
for key, area_id in name_map.items():
if key.startswith(name_lower[:10]) or name_lower.startswith(key[:10]):
return area_id
return None
def __enter__(self) -> "TorontoOpenDataParser":
return self
@@ -254,11 +312,30 @@ class TorontoOpenDataParser:
logger.info(f"Parsed {len(records)} neighbourhoods")
return records
# Mapping of indicator names to CensusRecord fields
# Keys are partial matches (case-insensitive) found in the "Characteristic" column
CENSUS_INDICATOR_MAPPING: dict[str, str] = {
"population, 2021": "population",
"population, 2016": "population",
"population density per square kilometre": "population_density",
"median total income of household": "median_household_income",
"average total income of household": "average_household_income",
"unemployment rate": "unemployment_rate",
"bachelor's degree or higher": "pct_bachelors_or_higher",
"owner": "pct_owner_occupied",
"renter": "pct_renter_occupied",
"median age": "median_age",
"average value of dwellings": "average_dwelling_value",
}
def get_census_profiles(self, year: int = 2021) -> list[CensusRecord]:
"""Fetch neighbourhood census profiles.
Note: Census profile data structure varies by year. This method
extracts key demographic indicators where available.
The Toronto Open Data neighbourhood profiles dataset is pivoted:
- Rows are demographic indicators (e.g., "Population", "Median Income")
- Columns are neighbourhoods (e.g., "Agincourt North", "Alderwood")
This method transposes the data to create one CensusRecord per neighbourhood.
Args:
year: Census year (2016 or 2021).
@@ -266,7 +343,6 @@ class TorontoOpenDataParser:
Returns:
List of validated CensusRecord objects.
"""
# Census profiles are typically in CSV/datastore format
try:
raw_records = self._fetch_csv_as_json(
self.DATASETS["neighbourhood_profiles"]
@@ -275,13 +351,119 @@ class TorontoOpenDataParser:
logger.warning(f"Could not fetch census profiles: {e}")
return []
# Census profiles are pivoted - rows are indicators, columns are neighbourhoods
# This requires special handling based on the actual data structure
if not raw_records:
logger.warning("Census profiles dataset is empty")
return []
logger.info(f"Fetched {len(raw_records)} census profile rows")
# For now, return empty list - actual implementation depends on data structure
# TODO: Implement census profile parsing based on actual data format
return []
# Find the characteristic/indicator column name
sample_row = raw_records[0]
char_col = None
for col in sample_row:
col_lower = col.lower()
if "characteristic" in col_lower or "category" in col_lower:
char_col = col
break
if not char_col:
# Try common column names
for candidate in ["Characteristic", "Category", "Topic", "_id"]:
if candidate in sample_row:
char_col = candidate
break
if not char_col:
logger.warning("Could not find characteristic column in census data")
return []
# Identify neighbourhood columns (exclude metadata columns)
exclude_cols = {
char_col,
"_id",
"Topic",
"Data Source",
"Characteristic",
"Category",
}
neighbourhood_cols = [col for col in sample_row if col not in exclude_cols]
logger.info(f"Found {len(neighbourhood_cols)} neighbourhood columns")
# Build a lookup: neighbourhood_name -> {field: value}
neighbourhood_data: dict[str, dict[str, Decimal | int | None]] = {
col: {} for col in neighbourhood_cols
}
# Process each row to extract indicator values
for row in raw_records:
characteristic = str(row.get(char_col, "")).lower().strip()
# Check if this row matches any indicator we care about
for indicator_pattern, field_name in self.CENSUS_INDICATOR_MAPPING.items():
if indicator_pattern in characteristic:
# Extract values for each neighbourhood
for col in neighbourhood_cols:
value = row.get(col)
if value is not None and value != "":
try:
# Clean and convert value
str_val = str(value).replace(",", "").replace("$", "")
str_val = str_val.replace("%", "").strip()
if str_val and str_val not in ("x", "X", "F", ".."):
numeric_val = Decimal(str_val)
# Only store if not already set (first match wins)
if field_name not in neighbourhood_data[col]:
neighbourhood_data[col][
field_name
] = numeric_val
except (ValueError, TypeError):
pass
break # Move to next row after matching
# Convert to CensusRecord objects
records = []
unmatched = []
for neighbourhood_name, data in neighbourhood_data.items():
if not data:
continue
# Match neighbourhood name to ID
neighbourhood_id = self._match_neighbourhood_id(neighbourhood_name)
if neighbourhood_id is None:
unmatched.append(neighbourhood_name)
continue
try:
pop_val = data.get("population")
population = int(pop_val) if pop_val is not None else None
record = CensusRecord(
neighbourhood_id=neighbourhood_id,
census_year=year,
population=population,
population_density=data.get("population_density"),
median_household_income=data.get("median_household_income"),
average_household_income=data.get("average_household_income"),
unemployment_rate=data.get("unemployment_rate"),
pct_bachelors_or_higher=data.get("pct_bachelors_or_higher"),
pct_owner_occupied=data.get("pct_owner_occupied"),
pct_renter_occupied=data.get("pct_renter_occupied"),
median_age=data.get("median_age"),
average_dwelling_value=data.get("average_dwelling_value"),
)
records.append(record)
except Exception as e:
logger.debug(f"Skipping neighbourhood {neighbourhood_name}: {e}")
if unmatched:
logger.warning(
f"Could not match {len(unmatched)} neighbourhoods: {unmatched[:5]}..."
)
logger.info(f"Parsed {len(records)} census records for year {year}")
return records
def get_parks(self) -> list[AmenityRecord]:
"""Fetch park locations.