feat: Complete Phase 5 dashboard implementation

Implement full 5-tab Toronto Neighbourhood Dashboard with real data connectivity: Dashboard Structure: - Overview tab with livability scores and rankings - Housing tab with affordability metrics - Safety tab with crime statistics - Demographics tab with population/income data - Amenities tab with parks, schools, transit Figure Factories (portfolio_app/figures/): - bar_charts.py: ranking, stacked, horizontal bars - scatter.py: scatter plots, bubble charts - radar.py: spider/radar charts - demographics.py: donut, age pyramid, income distribution Service Layer (portfolio_app/toronto/services/): - neighbourhood_service.py: queries dbt marts for all tab data - geometry_service.py: generates GeoJSON from PostGIS - Graceful error handling when database unavailable Callbacks (portfolio_app/pages/toronto/callbacks/): - map_callbacks.py: choropleth updates, map click handling - chart_callbacks.py: supporting chart updates - selection_callbacks.py: dropdown handlers, KPI updates Data Pipeline (scripts/data/): - load_toronto_data.py: orchestration script with CLI flags Lessons Learned: - Graceful error handling in service layers - Modular callback structure for multi-tab dashboards - Figure factory pattern for reusable charts Closes: #64, #65, #66, #67, #68, #69, #70 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 11:46:18 -05:00
parent 3054441630
commit c9cf744d84
27 changed files with 4377 additions and 1770 deletions
--- a/portfolio_app/toronto/parsers/toronto_open_data.py
+++ b/portfolio_app/toronto/parsers/toronto_open_data.py
@@ -57,6 +57,7 @@ class TorontoOpenDataParser:
        self._cache_dir = cache_dir
        self._timeout = timeout
        self._client: httpx.Client | None = None
+        self._neighbourhood_name_map: dict[str, int] | None = None

    @property
    def client(self) -> httpx.Client:
@@ -75,6 +76,63 @@ class TorontoOpenDataParser:
            self._client.close()
            self._client = None

+    def _get_neighbourhood_name_map(self) -> dict[str, int]:
+        """Build and cache a mapping of neighbourhood names to IDs.
+
+        Returns:
+            Dictionary mapping normalized neighbourhood names to area_id.
+        """
+        if self._neighbourhood_name_map is not None:
+            return self._neighbourhood_name_map
+
+        neighbourhoods = self.get_neighbourhoods()
+        self._neighbourhood_name_map = {}
+
+        for n in neighbourhoods:
+            # Add multiple variations of the name for flexible matching
+            name_lower = n.area_name.lower().strip()
+            self._neighbourhood_name_map[name_lower] = n.area_id
+
+            # Also add without common suffixes/prefixes
+            for suffix in [" neighbourhood", " area", "-"]:
+                if suffix in name_lower:
+                    alt_name = name_lower.replace(suffix, "").strip()
+                    self._neighbourhood_name_map[alt_name] = n.area_id
+
+        logger.debug(
+            f"Built neighbourhood name map with {len(self._neighbourhood_name_map)} entries"
+        )
+        return self._neighbourhood_name_map
+
+    def _match_neighbourhood_id(self, name: str) -> int | None:
+        """Match a neighbourhood name to its ID.
+
+        Args:
+            name: Neighbourhood name from census data.
+
+        Returns:
+            Neighbourhood ID or None if not found.
+        """
+        name_map = self._get_neighbourhood_name_map()
+        name_lower = name.lower().strip()
+
+        # Direct match
+        if name_lower in name_map:
+            return name_map[name_lower]
+
+        # Try removing parenthetical content
+        if "(" in name_lower:
+            base_name = name_lower.split("(")[0].strip()
+            if base_name in name_map:
+                return name_map[base_name]
+
+        # Try fuzzy matching with first few chars
+        for key, area_id in name_map.items():
+            if key.startswith(name_lower[:10]) or name_lower.startswith(key[:10]):
+                return area_id
+
+        return None
+
    def __enter__(self) -> "TorontoOpenDataParser":
        return self

@@ -254,11 +312,30 @@ class TorontoOpenDataParser:
        logger.info(f"Parsed {len(records)} neighbourhoods")
        return records

+    # Mapping of indicator names to CensusRecord fields
+    # Keys are partial matches (case-insensitive) found in the "Characteristic" column
+    CENSUS_INDICATOR_MAPPING: dict[str, str] = {
+        "population, 2021": "population",
+        "population, 2016": "population",
+        "population density per square kilometre": "population_density",
+        "median total income of household": "median_household_income",
+        "average total income of household": "average_household_income",
+        "unemployment rate": "unemployment_rate",
+        "bachelor's degree or higher": "pct_bachelors_or_higher",
+        "owner": "pct_owner_occupied",
+        "renter": "pct_renter_occupied",
+        "median age": "median_age",
+        "average value of dwellings": "average_dwelling_value",
+    }
+
    def get_census_profiles(self, year: int = 2021) -> list[CensusRecord]:
        """Fetch neighbourhood census profiles.

-        Note: Census profile data structure varies by year. This method
-        extracts key demographic indicators where available.
+        The Toronto Open Data neighbourhood profiles dataset is pivoted:
+        - Rows are demographic indicators (e.g., "Population", "Median Income")
+        - Columns are neighbourhoods (e.g., "Agincourt North", "Alderwood")
+
+        This method transposes the data to create one CensusRecord per neighbourhood.

        Args:
            year: Census year (2016 or 2021).
@@ -266,7 +343,6 @@ class TorontoOpenDataParser:
        Returns:
            List of validated CensusRecord objects.
        """
-        # Census profiles are typically in CSV/datastore format
        try:
            raw_records = self._fetch_csv_as_json(
                self.DATASETS["neighbourhood_profiles"]
@@ -275,13 +351,119 @@ class TorontoOpenDataParser:
            logger.warning(f"Could not fetch census profiles: {e}")
            return []

-        # Census profiles are pivoted - rows are indicators, columns are neighbourhoods
-        # This requires special handling based on the actual data structure
+        if not raw_records:
+            logger.warning("Census profiles dataset is empty")
+            return []
+
        logger.info(f"Fetched {len(raw_records)} census profile rows")

-        # For now, return empty list - actual implementation depends on data structure
-        # TODO: Implement census profile parsing based on actual data format
-        return []
+        # Find the characteristic/indicator column name
+        sample_row = raw_records[0]
+        char_col = None
+        for col in sample_row:
+            col_lower = col.lower()
+            if "characteristic" in col_lower or "category" in col_lower:
+                char_col = col
+                break
+
+        if not char_col:
+            # Try common column names
+            for candidate in ["Characteristic", "Category", "Topic", "_id"]:
+                if candidate in sample_row:
+                    char_col = candidate
+                    break
+
+        if not char_col:
+            logger.warning("Could not find characteristic column in census data")
+            return []
+
+        # Identify neighbourhood columns (exclude metadata columns)
+        exclude_cols = {
+            char_col,
+            "_id",
+            "Topic",
+            "Data Source",
+            "Characteristic",
+            "Category",
+        }
+        neighbourhood_cols = [col for col in sample_row if col not in exclude_cols]
+
+        logger.info(f"Found {len(neighbourhood_cols)} neighbourhood columns")
+
+        # Build a lookup: neighbourhood_name -> {field: value}
+        neighbourhood_data: dict[str, dict[str, Decimal | int | None]] = {
+            col: {} for col in neighbourhood_cols
+        }
+
+        # Process each row to extract indicator values
+        for row in raw_records:
+            characteristic = str(row.get(char_col, "")).lower().strip()
+
+            # Check if this row matches any indicator we care about
+            for indicator_pattern, field_name in self.CENSUS_INDICATOR_MAPPING.items():
+                if indicator_pattern in characteristic:
+                    # Extract values for each neighbourhood
+                    for col in neighbourhood_cols:
+                        value = row.get(col)
+                        if value is not None and value != "":
+                            try:
+                                # Clean and convert value
+                                str_val = str(value).replace(",", "").replace("$", "")
+                                str_val = str_val.replace("%", "").strip()
+                                if str_val and str_val not in ("x", "X", "F", ".."):
+                                    numeric_val = Decimal(str_val)
+                                    # Only store if not already set (first match wins)
+                                    if field_name not in neighbourhood_data[col]:
+                                        neighbourhood_data[col][
+                                            field_name
+                                        ] = numeric_val
+                            except (ValueError, TypeError):
+                                pass
+                    break  # Move to next row after matching
+
+        # Convert to CensusRecord objects
+        records = []
+        unmatched = []
+
+        for neighbourhood_name, data in neighbourhood_data.items():
+            if not data:
+                continue
+
+            # Match neighbourhood name to ID
+            neighbourhood_id = self._match_neighbourhood_id(neighbourhood_name)
+            if neighbourhood_id is None:
+                unmatched.append(neighbourhood_name)
+                continue
+
+            try:
+                pop_val = data.get("population")
+                population = int(pop_val) if pop_val is not None else None
+
+                record = CensusRecord(
+                    neighbourhood_id=neighbourhood_id,
+                    census_year=year,
+                    population=population,
+                    population_density=data.get("population_density"),
+                    median_household_income=data.get("median_household_income"),
+                    average_household_income=data.get("average_household_income"),
+                    unemployment_rate=data.get("unemployment_rate"),
+                    pct_bachelors_or_higher=data.get("pct_bachelors_or_higher"),
+                    pct_owner_occupied=data.get("pct_owner_occupied"),
+                    pct_renter_occupied=data.get("pct_renter_occupied"),
+                    median_age=data.get("median_age"),
+                    average_dwelling_value=data.get("average_dwelling_value"),
+                )
+                records.append(record)
+            except Exception as e:
+                logger.debug(f"Skipping neighbourhood {neighbourhood_name}: {e}")
+
+        if unmatched:
+            logger.warning(
+                f"Could not match {len(unmatched)} neighbourhoods: {unmatched[:5]}..."
+            )
+
+        logger.info(f"Parsed {len(records)} census records for year {year}")
+        return records

    def get_parks(self) -> list[AmenityRecord]:
        """Fetch park locations.