feat: Complete Phase 5 dashboard implementation
Implement full 5-tab Toronto Neighbourhood Dashboard with real data connectivity: Dashboard Structure: - Overview tab with livability scores and rankings - Housing tab with affordability metrics - Safety tab with crime statistics - Demographics tab with population/income data - Amenities tab with parks, schools, transit Figure Factories (portfolio_app/figures/): - bar_charts.py: ranking, stacked, horizontal bars - scatter.py: scatter plots, bubble charts - radar.py: spider/radar charts - demographics.py: donut, age pyramid, income distribution Service Layer (portfolio_app/toronto/services/): - neighbourhood_service.py: queries dbt marts for all tab data - geometry_service.py: generates GeoJSON from PostGIS - Graceful error handling when database unavailable Callbacks (portfolio_app/pages/toronto/callbacks/): - map_callbacks.py: choropleth updates, map click handling - chart_callbacks.py: supporting chart updates - selection_callbacks.py: dropdown handlers, KPI updates Data Pipeline (scripts/data/): - load_toronto_data.py: orchestration script with CLI flags Lessons Learned: - Graceful error handling in service layers - Modular callback structure for multi-tab dashboards - Figure factory pattern for reusable charts Closes: #64, #65, #66, #67, #68, #69, #70 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -57,6 +57,7 @@ class TorontoOpenDataParser:
|
||||
self._cache_dir = cache_dir
|
||||
self._timeout = timeout
|
||||
self._client: httpx.Client | None = None
|
||||
self._neighbourhood_name_map: dict[str, int] | None = None
|
||||
|
||||
@property
|
||||
def client(self) -> httpx.Client:
|
||||
@@ -75,6 +76,63 @@ class TorontoOpenDataParser:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
|
||||
def _get_neighbourhood_name_map(self) -> dict[str, int]:
|
||||
"""Build and cache a mapping of neighbourhood names to IDs.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping normalized neighbourhood names to area_id.
|
||||
"""
|
||||
if self._neighbourhood_name_map is not None:
|
||||
return self._neighbourhood_name_map
|
||||
|
||||
neighbourhoods = self.get_neighbourhoods()
|
||||
self._neighbourhood_name_map = {}
|
||||
|
||||
for n in neighbourhoods:
|
||||
# Add multiple variations of the name for flexible matching
|
||||
name_lower = n.area_name.lower().strip()
|
||||
self._neighbourhood_name_map[name_lower] = n.area_id
|
||||
|
||||
# Also add without common suffixes/prefixes
|
||||
for suffix in [" neighbourhood", " area", "-"]:
|
||||
if suffix in name_lower:
|
||||
alt_name = name_lower.replace(suffix, "").strip()
|
||||
self._neighbourhood_name_map[alt_name] = n.area_id
|
||||
|
||||
logger.debug(
|
||||
f"Built neighbourhood name map with {len(self._neighbourhood_name_map)} entries"
|
||||
)
|
||||
return self._neighbourhood_name_map
|
||||
|
||||
def _match_neighbourhood_id(self, name: str) -> int | None:
|
||||
"""Match a neighbourhood name to its ID.
|
||||
|
||||
Args:
|
||||
name: Neighbourhood name from census data.
|
||||
|
||||
Returns:
|
||||
Neighbourhood ID or None if not found.
|
||||
"""
|
||||
name_map = self._get_neighbourhood_name_map()
|
||||
name_lower = name.lower().strip()
|
||||
|
||||
# Direct match
|
||||
if name_lower in name_map:
|
||||
return name_map[name_lower]
|
||||
|
||||
# Try removing parenthetical content
|
||||
if "(" in name_lower:
|
||||
base_name = name_lower.split("(")[0].strip()
|
||||
if base_name in name_map:
|
||||
return name_map[base_name]
|
||||
|
||||
# Try fuzzy matching with first few chars
|
||||
for key, area_id in name_map.items():
|
||||
if key.startswith(name_lower[:10]) or name_lower.startswith(key[:10]):
|
||||
return area_id
|
||||
|
||||
return None
|
||||
|
||||
def __enter__(self) -> "TorontoOpenDataParser":
|
||||
return self
|
||||
|
||||
@@ -254,11 +312,30 @@ class TorontoOpenDataParser:
|
||||
logger.info(f"Parsed {len(records)} neighbourhoods")
|
||||
return records
|
||||
|
||||
# Mapping of indicator names to CensusRecord fields
|
||||
# Keys are partial matches (case-insensitive) found in the "Characteristic" column
|
||||
CENSUS_INDICATOR_MAPPING: dict[str, str] = {
|
||||
"population, 2021": "population",
|
||||
"population, 2016": "population",
|
||||
"population density per square kilometre": "population_density",
|
||||
"median total income of household": "median_household_income",
|
||||
"average total income of household": "average_household_income",
|
||||
"unemployment rate": "unemployment_rate",
|
||||
"bachelor's degree or higher": "pct_bachelors_or_higher",
|
||||
"owner": "pct_owner_occupied",
|
||||
"renter": "pct_renter_occupied",
|
||||
"median age": "median_age",
|
||||
"average value of dwellings": "average_dwelling_value",
|
||||
}
|
||||
|
||||
def get_census_profiles(self, year: int = 2021) -> list[CensusRecord]:
|
||||
"""Fetch neighbourhood census profiles.
|
||||
|
||||
Note: Census profile data structure varies by year. This method
|
||||
extracts key demographic indicators where available.
|
||||
The Toronto Open Data neighbourhood profiles dataset is pivoted:
|
||||
- Rows are demographic indicators (e.g., "Population", "Median Income")
|
||||
- Columns are neighbourhoods (e.g., "Agincourt North", "Alderwood")
|
||||
|
||||
This method transposes the data to create one CensusRecord per neighbourhood.
|
||||
|
||||
Args:
|
||||
year: Census year (2016 or 2021).
|
||||
@@ -266,7 +343,6 @@ class TorontoOpenDataParser:
|
||||
Returns:
|
||||
List of validated CensusRecord objects.
|
||||
"""
|
||||
# Census profiles are typically in CSV/datastore format
|
||||
try:
|
||||
raw_records = self._fetch_csv_as_json(
|
||||
self.DATASETS["neighbourhood_profiles"]
|
||||
@@ -275,13 +351,119 @@ class TorontoOpenDataParser:
|
||||
logger.warning(f"Could not fetch census profiles: {e}")
|
||||
return []
|
||||
|
||||
# Census profiles are pivoted - rows are indicators, columns are neighbourhoods
|
||||
# This requires special handling based on the actual data structure
|
||||
if not raw_records:
|
||||
logger.warning("Census profiles dataset is empty")
|
||||
return []
|
||||
|
||||
logger.info(f"Fetched {len(raw_records)} census profile rows")
|
||||
|
||||
# For now, return empty list - actual implementation depends on data structure
|
||||
# TODO: Implement census profile parsing based on actual data format
|
||||
return []
|
||||
# Find the characteristic/indicator column name
|
||||
sample_row = raw_records[0]
|
||||
char_col = None
|
||||
for col in sample_row:
|
||||
col_lower = col.lower()
|
||||
if "characteristic" in col_lower or "category" in col_lower:
|
||||
char_col = col
|
||||
break
|
||||
|
||||
if not char_col:
|
||||
# Try common column names
|
||||
for candidate in ["Characteristic", "Category", "Topic", "_id"]:
|
||||
if candidate in sample_row:
|
||||
char_col = candidate
|
||||
break
|
||||
|
||||
if not char_col:
|
||||
logger.warning("Could not find characteristic column in census data")
|
||||
return []
|
||||
|
||||
# Identify neighbourhood columns (exclude metadata columns)
|
||||
exclude_cols = {
|
||||
char_col,
|
||||
"_id",
|
||||
"Topic",
|
||||
"Data Source",
|
||||
"Characteristic",
|
||||
"Category",
|
||||
}
|
||||
neighbourhood_cols = [col for col in sample_row if col not in exclude_cols]
|
||||
|
||||
logger.info(f"Found {len(neighbourhood_cols)} neighbourhood columns")
|
||||
|
||||
# Build a lookup: neighbourhood_name -> {field: value}
|
||||
neighbourhood_data: dict[str, dict[str, Decimal | int | None]] = {
|
||||
col: {} for col in neighbourhood_cols
|
||||
}
|
||||
|
||||
# Process each row to extract indicator values
|
||||
for row in raw_records:
|
||||
characteristic = str(row.get(char_col, "")).lower().strip()
|
||||
|
||||
# Check if this row matches any indicator we care about
|
||||
for indicator_pattern, field_name in self.CENSUS_INDICATOR_MAPPING.items():
|
||||
if indicator_pattern in characteristic:
|
||||
# Extract values for each neighbourhood
|
||||
for col in neighbourhood_cols:
|
||||
value = row.get(col)
|
||||
if value is not None and value != "":
|
||||
try:
|
||||
# Clean and convert value
|
||||
str_val = str(value).replace(",", "").replace("$", "")
|
||||
str_val = str_val.replace("%", "").strip()
|
||||
if str_val and str_val not in ("x", "X", "F", ".."):
|
||||
numeric_val = Decimal(str_val)
|
||||
# Only store if not already set (first match wins)
|
||||
if field_name not in neighbourhood_data[col]:
|
||||
neighbourhood_data[col][
|
||||
field_name
|
||||
] = numeric_val
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
break # Move to next row after matching
|
||||
|
||||
# Convert to CensusRecord objects
|
||||
records = []
|
||||
unmatched = []
|
||||
|
||||
for neighbourhood_name, data in neighbourhood_data.items():
|
||||
if not data:
|
||||
continue
|
||||
|
||||
# Match neighbourhood name to ID
|
||||
neighbourhood_id = self._match_neighbourhood_id(neighbourhood_name)
|
||||
if neighbourhood_id is None:
|
||||
unmatched.append(neighbourhood_name)
|
||||
continue
|
||||
|
||||
try:
|
||||
pop_val = data.get("population")
|
||||
population = int(pop_val) if pop_val is not None else None
|
||||
|
||||
record = CensusRecord(
|
||||
neighbourhood_id=neighbourhood_id,
|
||||
census_year=year,
|
||||
population=population,
|
||||
population_density=data.get("population_density"),
|
||||
median_household_income=data.get("median_household_income"),
|
||||
average_household_income=data.get("average_household_income"),
|
||||
unemployment_rate=data.get("unemployment_rate"),
|
||||
pct_bachelors_or_higher=data.get("pct_bachelors_or_higher"),
|
||||
pct_owner_occupied=data.get("pct_owner_occupied"),
|
||||
pct_renter_occupied=data.get("pct_renter_occupied"),
|
||||
median_age=data.get("median_age"),
|
||||
average_dwelling_value=data.get("average_dwelling_value"),
|
||||
)
|
||||
records.append(record)
|
||||
except Exception as e:
|
||||
logger.debug(f"Skipping neighbourhood {neighbourhood_name}: {e}")
|
||||
|
||||
if unmatched:
|
||||
logger.warning(
|
||||
f"Could not match {len(unmatched)} neighbourhoods: {unmatched[:5]}..."
|
||||
)
|
||||
|
||||
logger.info(f"Parsed {len(records)} census records for year {year}")
|
||||
return records
|
||||
|
||||
def get_parks(self) -> list[AmenityRecord]:
|
||||
"""Fetch park locations.
|
||||
|
||||
Reference in New Issue
Block a user