feat: Sprint 10 - Architecture docs, CI/CD, operational scripts

Phase 1 - Architecture Documentation: - Add Architecture section with Mermaid flowchart to README - Create docs/DATABASE_SCHEMA.md with full ERD Phase 2 - CI/CD: - Add CI badge to README - Create .gitea/workflows/ci.yml for linting and tests - Create .gitea/workflows/deploy-staging.yml - Create .gitea/workflows/deploy-production.yml Phase 3 - Operational Scripts: - Create scripts/logs.sh for docker compose log following - Create scripts/run-detached.sh with health check loop - Create scripts/etl/toronto.sh for Toronto data pipeline - Add Makefile targets: logs, run-detached, etl-toronto Phase 4 - Runbooks: - Create docs/runbooks/adding-dashboard.md - Create docs/runbooks/deployment.md Phase 5 - Hygiene: - Create MIT LICENSE file Phase 6 - Production: - Add live demo link to README (leodata.science) Closes #78, #79, #80, #81, #82, #83, #84, #85, #86, #87, #88, #89, #91 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
fix: Repair data pipeline with StatCan CMHC rental data
2026-01-17 17:10:30 -05:00 · 2026-01-17 15:38:31 -05:00
32 changed files with 2022 additions and 157 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,35 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - development
+      - staging
+      - main
+  pull_request:
+    branches:
+      - development
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install ruff pytest
+
+      - name: Run linter
+        run: ruff check .
+
+      - name: Run tests
+        run: pytest tests/ -v --tb=short
--- a/.gitea/workflows/deploy-production.yml
+++ b/.gitea/workflows/deploy-production.yml
@@ -0,0 +1,44 @@
+name: Deploy to Production
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to Production Server
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.PROD_HOST }}
+          username: ${{ secrets.PROD_USER }}
+          key: ${{ secrets.PROD_SSH_KEY }}
+          script: |
+            set -euo pipefail
+
+            cd ~/apps/personal-portfolio
+
+            echo "Pulling latest changes..."
+            git fetch origin main
+            git reset --hard origin/main
+
+            echo "Activating virtual environment..."
+            source .venv/bin/activate
+
+            echo "Installing dependencies..."
+            pip install -r requirements.txt --quiet
+
+            echo "Running dbt models..."
+            cd dbt && dbt run --profiles-dir . && cd ..
+
+            echo "Restarting application..."
+            docker compose down
+            docker compose up -d
+
+            echo "Waiting for health check..."
+            sleep 10
+            curl -f http://localhost:8050/health || exit 1
+
+            echo "Production deployment complete!"
--- a/.gitea/workflows/deploy-staging.yml
+++ b/.gitea/workflows/deploy-staging.yml
@@ -0,0 +1,44 @@
+name: Deploy to Staging
+
+on:
+  push:
+    branches:
+      - staging
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to Staging Server
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.STAGING_HOST }}
+          username: ${{ secrets.STAGING_USER }}
+          key: ${{ secrets.STAGING_SSH_KEY }}
+          script: |
+            set -euo pipefail
+
+            cd ~/apps/personal-portfolio
+
+            echo "Pulling latest changes..."
+            git fetch origin staging
+            git reset --hard origin/staging
+
+            echo "Activating virtual environment..."
+            source .venv/bin/activate
+
+            echo "Installing dependencies..."
+            pip install -r requirements.txt --quiet
+
+            echo "Running dbt models..."
+            cd dbt && dbt run --profiles-dir . && cd ..
+
+            echo "Restarting application..."
+            docker compose down
+            docker compose up -d
+
+            echo "Waiting for health check..."
+            sleep 10
+            curl -f http://localhost:8050/health || exit 1
+
+            echo "Staging deployment complete!"
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -18,7 +18,7 @@ Working context for Claude Code on the Analytics Portfolio project.

 ```bash
 make setup          # Install deps, create .env, init pre-commit
-make docker-up      # Start PostgreSQL + PostGIS
+make docker-up      # Start PostgreSQL + PostGIS (auto-detects x86/ARM)
 make docker-down    # Stop containers
 make db-init        # Initialize database schema
 make run            # Start Dash dev server
@@ -193,6 +193,7 @@ notebooks/                   # Data documentation (Phase 6)
 - SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
 - PostGIS extension required in database
 - Docker Compose V2 format (no `version` field)
+- **Multi-architecture support**: `make docker-up` auto-detects CPU architecture and uses the appropriate PostGIS image (x86_64: `postgis/postgis`, ARM64: `imresamu/postgis`)

 ---

--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024-2025 Leo Miranda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/27
+++ b/27
@@ -1,4 +1,4 @@
-.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help
+.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto

 # Default target
 .DEFAULT_GOAL := help
@@ -8,6 +8,17 @@ PYTHON := python3
 PIP := pip
 DOCKER_COMPOSE := docker compose

+# Architecture detection for Docker images
+ARCH := $(shell uname -m)
+ifeq ($(ARCH),aarch64)
+    POSTGIS_IMAGE := imresamu/postgis:16-3.4
+else ifeq ($(ARCH),arm64)
+    POSTGIS_IMAGE := imresamu/postgis:16-3.4
+else
+    POSTGIS_IMAGE := postgis/postgis:16-3.4
+endif
+export POSTGIS_IMAGE
+
 # Colors for output
 BLUE := \033[0;34m
 GREEN := \033[0;32m
@@ -39,6 +50,7 @@ setup: ## Install dependencies, create .env, init pre-commit

 docker-up: ## Start PostgreSQL + PostGIS containers
 	@echo "$(GREEN)Starting database containers...$(NC)"
+	@echo "$(BLUE)Architecture: $(ARCH) -> Using image: $(POSTGIS_IMAGE)$(NC)"
 	$(DOCKER_COMPOSE) up -d
 	@echo "$(GREEN)Waiting for database to be ready...$(NC)"
 	@sleep 3
@@ -139,6 +151,19 @@ ci: ## Run all checks (lint, typecheck, test)
 	$(MAKE) test
 	@echo "$(GREEN)All checks passed!$(NC)"

+# =============================================================================
+# Operations
+# =============================================================================
+
+logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
+	@./scripts/logs.sh $(SERVICE)
+
+run-detached: ## Start containers and wait for health check
+	@./scripts/run-detached.sh
+
+etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
+	@./scripts/etl/toronto.sh $(MODE)
+
 # =============================================================================
 # Deployment
 # =============================================================================
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # Analytics Portfolio

+[![CI](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions/workflows/ci.yml/badge.svg)](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
+
+**Live Demo:** [leodata.science](https://leodata.science)
+
 A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.

 ## Live Pages
@@ -32,6 +36,42 @@ An interactive choropleth dashboard analyzing Toronto's 158 official neighbourho
 - Toronto Police Service (crime statistics)
 - CMHC Rental Market Survey (rental data by zone)

+## Architecture
+
+```mermaid
+flowchart LR
+    subgraph Sources
+        A1[City of Toronto API]
+        A2[Toronto Police API]
+        A3[CMHC Data]
+    end
+
+    subgraph ETL
+        B1[Parsers]
+        B2[Loaders]
+    end
+
+    subgraph Database
+        C1[(PostgreSQL/PostGIS)]
+        C2[dbt Models]
+    end
+
+    subgraph Application
+        D1[Dash App]
+        D2[Plotly Figures]
+    end
+
+    A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
+```
+
+**Pipeline Stages:**
+- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
+- **ETL**: Python parsers extract and validate data; loaders persist to database
+- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
+- **Application**: Dash serves interactive dashboards with Plotly visualizations
+
+For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
+
 ## Quick Start

 ```bash
--- a/dbt/models/intermediate/int_census__toronto_cma.sql
+++ b/dbt/models/intermediate/int_census__toronto_cma.sql
@@ -0,0 +1,60 @@
+-- Intermediate: Toronto CMA census statistics by year
+-- Provides city-wide averages for metrics not available at neighbourhood level
+-- Used when neighbourhood-level data is unavailable (e.g., median household income)
+-- Grain: One row per year
+
+with years as (
+    select * from {{ ref('int_year_spine') }}
+),
+
+census as (
+    select * from {{ ref('stg_toronto__census') }}
+),
+
+-- Census data is only available for 2016 and 2021
+-- Map each analysis year to the appropriate census year
+year_to_census as (
+    select
+        y.year,
+        case
+            when y.year <= 2018 then 2016
+            else 2021
+        end as census_year
+    from years y
+),
+
+-- Toronto CMA median household income from Statistics Canada
+-- Source: Census Profile Table 98-316-X2021001
+-- 2016: $65,829 (from Census Profile)
+-- 2021: $84,000 (from Census Profile)
+cma_income as (
+    select 2016 as census_year, 65829 as median_household_income union all
+    select 2021 as census_year, 84000 as median_household_income
+),
+
+-- City-wide aggregates from loaded neighbourhood data
+city_aggregates as (
+    select
+        census_year,
+        sum(population) as total_population,
+        avg(population_density) as avg_population_density,
+        avg(unemployment_rate) as avg_unemployment_rate
+    from census
+    where population is not null
+    group by census_year
+),
+
+final as (
+    select
+        y.year,
+        y.census_year,
+        ci.median_household_income,
+        ca.total_population,
+        ca.avg_population_density,
+        ca.avg_unemployment_rate
+    from year_to_census y
+    left join cma_income ci on y.census_year = ci.census_year
+    left join city_aggregates ca on y.census_year = ca.census_year
+)
+
+select * from final
--- a/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql
+++ b/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql
@@ -34,7 +34,7 @@ amenity_scores as (
        n.population,
        n.land_area_sqkm,

-        a.year,
+        coalesce(a.year, 2021) as year,

        -- Raw counts
        a.parks_count,
--- a/dbt/models/intermediate/int_neighbourhood__crime_summary.sql
+++ b/dbt/models/intermediate/int_neighbourhood__crime_summary.sql
@@ -64,15 +64,17 @@ crime_summary as (
        w.robbery_count,
        w.theft_over_count,
        w.homicide_count,
-        w.avg_rate_per_100k,
        w.yoy_change_pct,

-        -- Crime rate per 100K population
+        -- Crime rate per 100K population (use source data avg, or calculate if population available)
+        coalesce(
+            w.avg_rate_per_100k,
            case
                when n.population > 0
                then round(w.total_incidents::numeric / n.population * 100000, 2)
                else null
-        end as crime_rate_per_100k
+            end
+        ) as crime_rate_per_100k

    from neighbourhoods n
    inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
--- a/dbt/models/intermediate/int_neighbourhood__demographics.sql
+++ b/dbt/models/intermediate/int_neighbourhood__demographics.sql
@@ -17,7 +17,8 @@ demographics as (
        n.geometry,
        n.land_area_sqkm,

-        c.census_year,
+        -- Use census_year from census data, or fall back to dim_neighbourhood's year
+        coalesce(c.census_year, n.census_year, 2021) as census_year,
        c.population,
        c.population_density,
        c.median_household_income,
--- a/dbt/models/intermediate/int_neighbourhood__housing.sql
+++ b/dbt/models/intermediate/int_neighbourhood__housing.sql
@@ -20,7 +20,7 @@ housing as (
        n.neighbourhood_name,
        n.geometry,

-        coalesce(r.year, c.census_year) as year,
+        coalesce(r.year, c.census_year, 2021) as year,

        -- Census housing metrics
        c.pct_owner_occupied,
--- a/dbt/models/intermediate/int_rentals__toronto_cma.sql
+++ b/dbt/models/intermediate/int_rentals__toronto_cma.sql
@@ -0,0 +1,25 @@
+-- Intermediate: Toronto CMA rental metrics by year
+-- Aggregates rental data to city-wide averages by year
+-- Source: StatCan CMHC data at CMA level
+-- Grain: One row per year
+
+with rentals as (
+    select * from {{ ref('stg_cmhc__rentals') }}
+),
+
+-- Pivot bedroom types to columns
+yearly_rentals as (
+    select
+        year,
+        max(case when bedroom_type = 'bachelor' then avg_rent end) as avg_rent_bachelor,
+        max(case when bedroom_type = '1bed' then avg_rent end) as avg_rent_1bed,
+        max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_2bed,
+        max(case when bedroom_type = '3bed' then avg_rent end) as avg_rent_3bed,
+        -- Use 2-bedroom as standard reference
+        max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_standard,
+        max(vacancy_rate) as vacancy_rate
+    from rentals
+    group by year
+)
+
+select * from yearly_rentals
--- a/dbt/models/intermediate/int_year_spine.sql
+++ b/dbt/models/intermediate/int_year_spine.sql
@@ -0,0 +1,11 @@
+-- Intermediate: Year spine for analysis
+-- Creates a row for each year from 2014-2025
+-- Used to drive time-series analysis across all data sources
+
+with years as (
+    -- Generate years from available data sources
+    -- Crime data: 2014-2024, Rentals: 2019-2025
+    select generate_series(2014, 2025) as year
+)
+
+select year from years
--- a/dbt/models/marts/mart_neighbourhood_overview.sql
+++ b/dbt/models/marts/mart_neighbourhood_overview.sql
@@ -1,79 +1,119 @@
 -- Mart: Neighbourhood Overview with Composite Livability Score
 -- Dashboard Tab: Overview
 -- Grain: One row per neighbourhood per year
+-- Time spine: Years 2014-2025 (driven by crime/rental data availability)

-with demographics as (
-    select * from {{ ref('int_neighbourhood__demographics') }}
+with years as (
+    select * from {{ ref('int_year_spine') }}
 ),

-housing as (
-    select * from {{ ref('int_neighbourhood__housing') }}
+neighbourhoods as (
+    select * from {{ ref('stg_toronto__neighbourhoods') }}
 ),

+-- Create base: all neighbourhoods × all years
+neighbourhood_years as (
+    select
+        n.neighbourhood_id,
+        n.neighbourhood_name,
+        n.geometry,
+        y.year
+    from neighbourhoods n
+    cross join years y
+),
+
+-- Census data (available for 2016, 2021)
+-- For each year, use the most recent census data available
+census as (
+    select * from {{ ref('stg_toronto__census') }}
+),
+
+census_mapped as (
+    select
+        ny.neighbourhood_id,
+        ny.year,
+        c.population,
+        c.unemployment_rate,
+        c.pct_bachelors_or_higher as education_bachelors_pct
+    from neighbourhood_years ny
+    left join census c on ny.neighbourhood_id = c.neighbourhood_id
+        -- Use census year <= analysis year, prefer most recent
+        and c.census_year = (
+            select max(c2.census_year)
+            from {{ ref('stg_toronto__census') }} c2
+            where c2.neighbourhood_id = ny.neighbourhood_id
+            and c2.census_year <= ny.year
+        )
+),
+
+-- CMA-level census data (for income - not available at neighbourhood level)
+cma_census as (
+    select * from {{ ref('int_census__toronto_cma') }}
+),
+
+-- Crime data (2014-2024)
 crime as (
    select * from {{ ref('int_neighbourhood__crime_summary') }}
 ),

-amenities as (
-    select * from {{ ref('int_neighbourhood__amenity_scores') }}
+-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
+rentals as (
+    select * from {{ ref('int_rentals__toronto_cma') }}
 ),

-- Compute percentile ranks for scoring components
-percentiles as (
+-- Compute scores
+scored as (
    select
-        d.neighbourhood_id,
-        d.neighbourhood_name,
-        d.geometry,
-        d.census_year as year,
-        d.population,
-        d.median_household_income,
+        ny.neighbourhood_id,
+        ny.neighbourhood_name,
+        ny.geometry,
+        ny.year,
+        cm.population,
+        -- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
+        cma.median_household_income,

        -- Safety score: inverse of crime rate (higher = safer)
        case
-            when c.crime_rate_per_100k is not null
+            when cr.crime_rate_per_100k is not null
            then 100 - percent_rank() over (
-                partition by d.census_year
-                order by c.crime_rate_per_100k
+                partition by ny.year
+                order by cr.crime_rate_per_100k
            ) * 100
            else null
        end as safety_score,

        -- Affordability score: inverse of rent-to-income ratio
+        -- Using CMA-level income since neighbourhood-level not available
        case
-            when h.rent_to_income_pct is not null
+            when cma.median_household_income > 0 and r.avg_rent_standard > 0
            then 100 - percent_rank() over (
-                partition by d.census_year
-                order by h.rent_to_income_pct
+                partition by ny.year
+                order by (r.avg_rent_standard * 12 / cma.median_household_income)
            ) * 100
            else null
        end as affordability_score,

-        -- Amenity score: based on amenities per capita
+        -- Raw metrics
+        cr.crime_rate_per_100k,
        case
-            when a.total_amenities_per_1000 is not null
-            then percent_rank() over (
-                partition by d.census_year
-                order by a.total_amenities_per_1000
-            ) * 100
+            when cma.median_household_income > 0 and r.avg_rent_standard > 0
+            then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
            else null
-        end as amenity_score,
+        end as rent_to_income_pct,
+        r.avg_rent_standard as avg_rent_2bed,
+        r.vacancy_rate

-        -- Raw metrics for reference
-        c.crime_rate_per_100k,
-        h.rent_to_income_pct,
-        h.avg_rent_2bed,
-        a.total_amenities_per_1000
-
-    from demographics d
-    left join housing h
-        on d.neighbourhood_id = h.neighbourhood_id
-        and d.census_year = h.year
-    left join crime c
-        on d.neighbourhood_id = c.neighbourhood_id
-        and d.census_year = c.year
-    left join amenities a
-        on d.neighbourhood_id = a.neighbourhood_id
-        and d.census_year = a.year
+    from neighbourhood_years ny
+    left join census_mapped cm
+        on ny.neighbourhood_id = cm.neighbourhood_id
+        and ny.year = cm.year
+    left join cma_census cma
+        on ny.year = cma.year
+    left join crime cr
+        on ny.neighbourhood_id = cr.neighbourhood_id
+        and ny.year = cr.year
+    left join rentals r
+        on ny.year = r.year
 ),

 final as (
@@ -88,13 +128,14 @@ final as (
        -- Component scores (0-100)
        round(safety_score::numeric, 1) as safety_score,
        round(affordability_score::numeric, 1) as affordability_score,
-        round(amenity_score::numeric, 1) as amenity_score,
+        -- Amenity score not available at this level, use placeholder
+        50.0 as amenity_score,

-        -- Composite livability score: safety (30%), affordability (40%), amenities (30%)
+        -- Composite livability score: safety (40%), affordability (40%), amenities (20%)
        round(
-            (coalesce(safety_score, 50) * 0.30 +
+            (coalesce(safety_score, 50) * 0.40 +
             coalesce(affordability_score, 50) * 0.40 +
-             coalesce(amenity_score, 50) * 0.30)::numeric,
+             50 * 0.20)::numeric,
            1
        ) as livability_score,

@@ -102,9 +143,10 @@ final as (
        crime_rate_per_100k,
        rent_to_income_pct,
        avg_rent_2bed,
-        total_amenities_per_1000
+        vacancy_rate,
+        null::numeric as total_amenities_per_1000

-    from percentiles
+    from scored
 )

 select * from final
--- a/dbt/models/staging/stg_cmhc__rentals.sql
+++ b/dbt/models/staging/stg_cmhc__rentals.sql
@@ -1,9 +1,13 @@
 -- Staged CMHC rental market survey data
-- Source: fact_rentals table loaded from CMHC CSV exports
+-- Source: fact_rentals table loaded from CMHC/StatCan
 -- Grain: One row per zone per bedroom type per survey year

 with source as (
-    select * from {{ source('toronto_housing', 'fact_rentals') }}
+    select
+        f.*,
+        t.year as survey_year
+    from {{ source('toronto_housing', 'fact_rentals') }} f
+    join {{ source('toronto_housing', 'dim_time') }} t on f.date_key = t.date_key
 ),

 staged as (
@@ -11,6 +15,7 @@ staged as (
        id as rental_id,
        date_key,
        zone_key,
+        survey_year as year,
        bedroom_type,
        universe as rental_universe,
        avg_rent,
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 services:
  db:
-    image: postgis/postgis:16-3.4
+    image: ${POSTGIS_IMAGE:-postgis/postgis:16-3.4}
    container_name: portfolio-db
    restart: unless-stopped
    ports:
--- a/docs/DATABASE_SCHEMA.md
+++ b/docs/DATABASE_SCHEMA.md
@@ -0,0 +1,307 @@
+# Database Schema
+
+This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
+
+## Entity Relationship Diagram
+
+```mermaid
+erDiagram
+    dim_time {
+        int date_key PK
+        date full_date UK
+        int year
+        int month
+        int quarter
+        string month_name
+        bool is_month_start
+    }
+
+    dim_cmhc_zone {
+        int zone_key PK
+        string zone_code UK
+        string zone_name
+        geometry geometry
+    }
+
+    dim_neighbourhood {
+        int neighbourhood_id PK
+        string name
+        geometry geometry
+        int population
+        numeric land_area_sqkm
+        numeric pop_density_per_sqkm
+        numeric pct_bachelors_or_higher
+        numeric median_household_income
+        numeric pct_owner_occupied
+        numeric pct_renter_occupied
+        int census_year
+    }
+
+    dim_policy_event {
+        int event_id PK
+        date event_date
+        date effective_date
+        string level
+        string category
+        string title
+        text description
+        string expected_direction
+        string source_url
+        string confidence
+    }
+
+    fact_rentals {
+        int id PK
+        int date_key FK
+        int zone_key FK
+        string bedroom_type
+        int universe
+        numeric avg_rent
+        numeric median_rent
+        numeric vacancy_rate
+        numeric availability_rate
+        numeric turnover_rate
+        numeric rent_change_pct
+        string reliability_code
+    }
+
+    fact_census {
+        int id PK
+        int neighbourhood_id FK
+        int census_year
+        int population
+        numeric population_density
+        numeric median_household_income
+        numeric average_household_income
+        numeric unemployment_rate
+        numeric pct_bachelors_or_higher
+        numeric pct_owner_occupied
+        numeric pct_renter_occupied
+        numeric median_age
+        numeric average_dwelling_value
+    }
+
+    fact_crime {
+        int id PK
+        int neighbourhood_id FK
+        int year
+        string crime_type
+        int count
+        numeric rate_per_100k
+    }
+
+    fact_amenities {
+        int id PK
+        int neighbourhood_id FK
+        string amenity_type
+        int count
+        int year
+    }
+
+    bridge_cmhc_neighbourhood {
+        int id PK
+        string cmhc_zone_code FK
+        int neighbourhood_id FK
+        numeric weight
+    }
+
+    dim_time ||--o{ fact_rentals : "date_key"
+    dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
+    dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
+    dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
+    dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
+    dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
+    dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
+```
+
+## Schema Layers
+
+### Raw Schema
+
+Raw data is loaded directly from external sources without transformation:
+
+| Table | Source | Description |
+|-------|--------|-------------|
+| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries |
+| `raw.census_profiles` | City of Toronto API | Census profile data |
+| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood |
+| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data |
+
+### Staging Schema (dbt)
+
+Staging models provide 1:1 cleaned representations of source data:
+
+| Model | Source Table | Purpose |
+|-------|-------------|---------|
+| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
+| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
+| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
+| `stg_police__crimes` | raw.crime_data | Standardized crime categories |
+
+### Marts Schema (dbt)
+
+Analytical tables ready for dashboard consumption:
+
+| Model | Grain | Purpose |
+|-------|-------|---------|
+| `mart_neighbourhood_summary` | neighbourhood | Composite livability scores |
+| `mart_rental_trends` | zone × month | Time-series rental analysis |
+| `mart_crime_rates` | neighbourhood × year | Crime rate calculations |
+| `mart_amenity_density` | neighbourhood | Amenity accessibility scores |
+
+## Table Details
+
+### Dimension Tables
+
+#### dim_time
+Time dimension for date-based analysis. Grain: one row per month.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
+| full_date | DATE | UNIQUE, NOT NULL | First day of month |
+| year | INTEGER | NOT NULL | Calendar year |
+| month | INTEGER | NOT NULL | Month number (1-12) |
+| quarter | INTEGER | NOT NULL | Quarter (1-4) |
+| month_name | VARCHAR(20) | NOT NULL | Month name |
+| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
+
+#### dim_cmhc_zone
+CMHC rental market zones (~20 zones covering Toronto).
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| zone_key | INTEGER | PK, AUTO | Surrogate key |
+| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
+| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
+| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
+
+#### dim_neighbourhood
+Toronto's 158 official neighbourhoods.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| neighbourhood_id | INTEGER | PK | City-assigned ID |
+| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
+| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
+| population | INTEGER | | Total population |
+| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
+| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
+| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
+| median_household_income | NUMERIC(12,2) | | Median income |
+| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
+| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
+| census_year | INTEGER | DEFAULT 2021 | Census reference year |
+
+#### dim_policy_event
+Policy events for time-series annotation (rent control, interest rates, etc.).
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| event_id | INTEGER | PK, AUTO | Surrogate key |
+| event_date | DATE | NOT NULL | Announcement date |
+| effective_date | DATE | | Implementation date |
+| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
+| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
+| title | VARCHAR(200) | NOT NULL | Event title |
+| description | TEXT | | Detailed description |
+| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
+| source_url | VARCHAR(500) | | Reference link |
+| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
+
+### Fact Tables
+
+#### fact_rentals
+CMHC rental market survey data. Grain: zone × bedroom type × survey date.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| date_key | INTEGER | FK → dim_time | Survey date reference |
+| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
+| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
+| universe | INTEGER | | Total rental units |
+| avg_rent | NUMERIC(10,2) | | Average rent |
+| median_rent | NUMERIC(10,2) | | Median rent |
+| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
+| availability_rate | NUMERIC(5,2) | | Availability percentage |
+| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
+| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
+| reliability_code | VARCHAR(2) | | CMHC data quality code |
+
+#### fact_census
+Census statistics. Grain: neighbourhood × census year.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
+| population | INTEGER | | Total population |
+| population_density | NUMERIC(10,2) | | People per km² |
+| median_household_income | NUMERIC(12,2) | | Median income |
+| average_household_income | NUMERIC(12,2) | | Average income |
+| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
+| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
+| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
+| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
+| median_age | NUMERIC(5,2) | | Median resident age |
+| average_dwelling_value | NUMERIC(12,2) | | Average home value |
+
+#### fact_crime
+Crime statistics. Grain: neighbourhood × year × crime type.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| year | INTEGER | NOT NULL | Calendar year |
+| crime_type | VARCHAR(50) | NOT NULL | Crime category |
+| count | INTEGER | NOT NULL | Number of incidents |
+| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
+
+#### fact_amenities
+Amenity counts. Grain: neighbourhood × amenity type × year.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
+| count | INTEGER | NOT NULL | Number of amenities |
+| year | INTEGER | NOT NULL | Reference year |
+
+### Bridge Tables
+
+#### bridge_cmhc_neighbourhood
+Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
+
+## Indexes
+
+| Table | Index | Columns | Purpose |
+|-------|-------|---------|---------|
+| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
+| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
+| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
+| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
+| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
+| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
+| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
+| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
+
+## PostGIS Extensions
+
+The database requires PostGIS for geospatial operations:
+
+```sql
+CREATE EXTENSION IF NOT EXISTS postgis;
+```
+
+All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
--- a/docs/runbooks/adding-dashboard.md
+++ b/docs/runbooks/adding-dashboard.md
@@ -0,0 +1,200 @@
+# Runbook: Adding a New Dashboard
+
+This runbook describes how to add a new data dashboard to the portfolio application.
+
+## Prerequisites
+
+- [ ] Data sources identified and accessible
+- [ ] Database schema designed
+- [ ] Basic Dash/Plotly familiarity
+
+## Directory Structure
+
+Create the following structure under `portfolio_app/`:
+
+```
+portfolio_app/
+├── pages/
+│   └── {dashboard_name}/
+│       ├── dashboard.py      # Main layout with tabs
+│       ├── methodology.py    # Data sources and methods page
+│       ├── tabs/
+│       │   ├── __init__.py
+│       │   ├── overview.py   # Overview tab layout
+│       │   └── ...           # Additional tab layouts
+│       └── callbacks/
+│           ├── __init__.py
+│           └── ...           # Callback modules
+├── {dashboard_name}/         # Data logic (outside pages/)
+│   ├── __init__.py
+│   ├── parsers/              # API/CSV extraction
+│   │   └── __init__.py
+│   ├── loaders/              # Database operations
+│   │   └── __init__.py
+│   ├── schemas/              # Pydantic models
+│   │   └── __init__.py
+│   └── models/               # SQLAlchemy ORM
+│       └── __init__.py
+```
+
+## Step-by-Step Checklist
+
+### 1. Data Layer
+
+- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
+- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
+- [ ] Create parsers in `{dashboard_name}/parsers/`
+- [ ] Create loaders in `{dashboard_name}/loaders/`
+- [ ] Add database migrations if needed
+
+### 2. dbt Models
+
+Create dbt models in `dbt/models/`:
+
+- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning
+- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic
+- [ ] `marts/mart_{domain}.sql` - Final analytical tables
+
+Follow naming conventions:
+- Staging: `stg_{source}__{entity}`
+- Intermediate: `int_{domain}__{transform}`
+- Marts: `mart_{domain}`
+
+### 3. Visualization Layer
+
+- [ ] Create figure factories in `figures/` (or reuse existing)
+- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
+
+### 4. Dashboard Pages
+
+#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
+
+```python
+import dash
+from dash import html, dcc
+import dash_mantine_components as dmc
+
+dash.register_page(
+    __name__,
+    path="/{dashboard_name}",
+    title="{Dashboard Title}",
+    description="{Description}"
+)
+
+def layout():
+    return dmc.Container([
+        # Header
+        dmc.Title("{Dashboard Title}", order=1),
+
+        # Tabs
+        dmc.Tabs([
+            dmc.TabsList([
+                dmc.TabsTab("Overview", value="overview"),
+                # Add more tabs
+            ]),
+            dmc.TabsPanel(overview_tab(), value="overview"),
+            # Add more panels
+        ], value="overview"),
+    ])
+```
+
+#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
+
+- [ ] Create one file per tab
+- [ ] Export layout function from each
+
+#### Callbacks (`pages/{dashboard_name}/callbacks/`)
+
+- [ ] Create callback modules for interactivity
+- [ ] Import and register in dashboard.py
+
+### 5. Navigation
+
+Add to sidebar in `components/sidebar.py`:
+
+```python
+dmc.NavLink(
+    label="{Dashboard Name}",
+    href="/{dashboard_name}",
+    icon=DashIconify(icon="..."),
+)
+```
+
+### 6. Documentation
+
+- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
+- [ ] Document data sources
+- [ ] Document transformation logic
+- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
+
+### 7. Testing
+
+- [ ] Add unit tests for parsers
+- [ ] Add unit tests for loaders
+- [ ] Add integration tests for callbacks
+- [ ] Run `make test`
+
+### 8. Final Verification
+
+- [ ] All pages render without errors
+- [ ] All callbacks respond correctly
+- [ ] Data loads successfully
+- [ ] dbt models run cleanly (`make dbt-run`)
+- [ ] Linting passes (`make lint`)
+- [ ] Tests pass (`make test`)
+
+## Example: Toronto Dashboard
+
+Reference implementation: `portfolio_app/pages/toronto/`
+
+Key files:
+- `dashboard.py` - Main layout with 5 tabs
+- `tabs/overview.py` - Livability scores, scatter plots
+- `callbacks/map_callbacks.py` - Choropleth interactions
+- `toronto/models/dimensions.py` - Dimension tables
+- `toronto/models/facts.py` - Fact tables
+
+## Common Patterns
+
+### Figure Factories
+
+```python
+# figures/choropleth.py
+def create_choropleth_figure(
+    gdf: gpd.GeoDataFrame,
+    value_column: str,
+    title: str,
+    **kwargs
+) -> go.Figure:
+    ...
+```
+
+### Callbacks
+
+```python
+# callbacks/map_callbacks.py
+@callback(
+    Output("neighbourhood-details", "children"),
+    Input("choropleth-map", "clickData"),
+)
+def update_details(click_data):
+    ...
+```
+
+### Data Loading
+
+```python
+# {dashboard_name}/loaders/load.py
+def load_data(session: Session) -> None:
+    # Parse from source
+    records = parse_source_data()
+
+    # Validate with Pydantic
+    validated = [Schema(**r) for r in records]
+
+    # Load to database
+    for record in validated:
+        session.add(Model(**record.model_dump()))
+
+    session.commit()
+```
--- a/docs/runbooks/deployment.md
+++ b/docs/runbooks/deployment.md
@@ -0,0 +1,232 @@
+# Runbook: Deployment
+
+This runbook covers deployment procedures for the Analytics Portfolio application.
+
+## Environments
+
+| Environment | Branch | Server | URL |
+|-------------|--------|--------|-----|
+| Development | `development` | Local | http://localhost:8050 |
+| Staging | `staging` | Homelab (hotserv) | Internal |
+| Production | `main` | Bandit Labs VPS | https://leodata.science |
+
+## CI/CD Pipeline
+
+### Automatic Deployment
+
+Deployments are triggered automatically via Gitea Actions:
+
+1. **Push to `staging`** → Deploys to staging server
+2. **Push to `main`** → Deploys to production server
+
+### Workflow Files
+
+- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
+- `.gitea/workflows/deploy-staging.yml` - Staging deployment
+- `.gitea/workflows/deploy-production.yml` - Production deployment
+
+### Required Secrets
+
+Configure these in Gitea repository settings:
+
+| Secret | Description |
+|--------|-------------|
+| `STAGING_HOST` | Staging server hostname/IP |
+| `STAGING_USER` | SSH username for staging |
+| `STAGING_SSH_KEY` | Private key for staging SSH |
+| `PROD_HOST` | Production server hostname/IP |
+| `PROD_USER` | SSH username for production |
+| `PROD_SSH_KEY` | Private key for production SSH |
+
+## Manual Deployment
+
+### Prerequisites
+
+- SSH access to target server
+- Repository cloned at `~/apps/personal-portfolio`
+- Virtual environment created at `.venv`
+- Docker and Docker Compose installed
+- PostgreSQL container running
+
+### Steps
+
+```bash
+# 1. SSH to server
+ssh user@server
+
+# 2. Navigate to app directory
+cd ~/apps/personal-portfolio
+
+# 3. Pull latest changes
+git fetch origin {branch}
+git reset --hard origin/{branch}
+
+# 4. Activate virtual environment
+source .venv/bin/activate
+
+# 5. Install dependencies
+pip install -r requirements.txt
+
+# 6. Run database migrations (if any)
+# python -m alembic upgrade head
+
+# 7. Run dbt models
+cd dbt && dbt run --profiles-dir . && cd ..
+
+# 8. Restart application
+docker compose down
+docker compose up -d
+
+# 9. Verify health
+curl http://localhost:8050/health
+```
+
+## Rollback Procedure
+
+### Quick Rollback
+
+If deployment fails, rollback to previous commit:
+
+```bash
+# 1. Find previous working commit
+git log --oneline -10
+
+# 2. Reset to that commit
+git reset --hard {commit_hash}
+
+# 3. Restart services
+docker compose down
+docker compose up -d
+
+# 4. Verify
+curl http://localhost:8050/health
+```
+
+### Full Rollback (Database)
+
+If database changes need to be reverted:
+
+```bash
+# 1. Stop application
+docker compose down
+
+# 2. Restore database backup
+pg_restore -h localhost -U portfolio -d portfolio backup.dump
+
+# 3. Revert code
+git reset --hard {commit_hash}
+
+# 4. Run dbt at that version
+cd dbt && dbt run --profiles-dir . && cd ..
+
+# 5. Restart
+docker compose up -d
+```
+
+## Health Checks
+
+### Application Health
+
+```bash
+curl http://localhost:8050/health
+```
+
+Expected response:
+```json
+{"status": "healthy"}
+```
+
+### Database Health
+
+```bash
+docker compose exec postgres pg_isready -U portfolio
+```
+
+### Container Status
+
+```bash
+docker compose ps
+```
+
+## Monitoring
+
+### View Logs
+
+```bash
+# All services
+make logs
+
+# Specific service
+make logs SERVICE=postgres
+
+# Or directly
+docker compose logs -f
+```
+
+### Check Resource Usage
+
+```bash
+docker stats
+```
+
+## Troubleshooting
+
+### Application Won't Start
+
+1. Check container logs: `docker compose logs app`
+2. Verify environment variables: `cat .env`
+3. Check database connectivity: `docker compose exec postgres pg_isready`
+4. Verify port availability: `lsof -i :8050`
+
+### Database Connection Errors
+
+1. Check postgres container: `docker compose ps postgres`
+2. Verify DATABASE_URL in `.env`
+3. Check postgres logs: `docker compose logs postgres`
+4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
+
+### dbt Failures
+
+1. Check dbt logs: `cd dbt && dbt debug`
+2. Verify profiles.yml: `cat dbt/profiles.yml`
+3. Run with verbose output: `dbt run --debug`
+
+### Out of Memory
+
+1. Check memory usage: `free -h`
+2. Review container limits in docker-compose.yml
+3. Consider increasing swap or server resources
+
+## Backup Procedures
+
+### Database Backup
+
+```bash
+# Create backup
+docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
+
+# Compressed backup
+docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
+```
+
+### Restore from Backup
+
+```bash
+# From SQL file
+docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
+
+# From dump file
+docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
+```
+
+## Deployment Checklist
+
+Before deploying to production:
+
+- [ ] All tests pass (`make test`)
+- [ ] Linting passes (`make lint`)
+- [ ] Staging deployment successful
+- [ ] Manual testing on staging complete
+- [ ] Database backup taken
+- [ ] Rollback plan confirmed
+- [ ] Team notified of deployment window
--- a/portfolio_app/pages/toronto/callbacks/chart_callbacks.py
+++ b/portfolio_app/pages/toronto/callbacks/chart_callbacks.py
@@ -1,6 +1,7 @@
 """Chart callbacks for supporting visualizations."""
 # mypy: disable-error-code="misc,no-untyped-def,arg-type"

+import pandas as pd
 import plotly.graph_objects as go
 from dash import Input, Output, callback

@@ -43,7 +44,24 @@ def update_overview_scatter(year: str) -> go.Figure:
    # Compute safety score (inverse of crime rate)
    if "total_crime_rate" in merged.columns:
        max_crime = merged["total_crime_rate"].max()
-        merged["safety_score"] = 100 - (merged["total_crime_rate"] / max_crime * 100)
+        if max_crime and max_crime > 0:
+            merged["safety_score"] = 100 - (
+                merged["total_crime_rate"] / max_crime * 100
+            )
+        else:
+            merged["safety_score"] = 50  # Default if no crime data
+
+    # Fill NULL population with median or default value for sizing
+    if "population" in merged.columns:
+        median_pop = merged["population"].median()
+        default_pop = median_pop if pd.notna(median_pop) else 10000
+        merged["population"] = merged["population"].fillna(default_pop)
+
+    # Filter rows with required data for scatter plot
+    merged = merged.dropna(subset=["median_household_income", "safety_score"])
+
+    if merged.empty:
+        return _empty_chart("Insufficient data for scatter plot")

    data = merged.to_dict("records")

@@ -76,12 +94,13 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
        return _empty_chart("No trend data available")

    # Placeholder for trend data - would be historical
+    base_rent = averages.get("avg_rent_2bed") or 2000
    data = [
-        {"year": "2019", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.85},
-        {"year": "2020", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.88},
-        {"year": "2021", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.92},
-        {"year": "2022", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.96},
-        {"year": "2023", "avg_rent": averages.get("avg_rent_2bed", 2000)},
+        {"year": "2019", "avg_rent": base_rent * 0.85},
+        {"year": "2020", "avg_rent": base_rent * 0.88},
+        {"year": "2021", "avg_rent": base_rent * 0.92},
+        {"year": "2022", "avg_rent": base_rent * 0.96},
+        {"year": "2023", "avg_rent": base_rent},
    ]

    fig = go.Figure()
@@ -330,10 +349,11 @@ def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure
    # Get city averages
    averages = get_city_averages(year_int)

+    amenity_score = averages.get("avg_amenity_score") or 50
    city_data = {
-        "parks_per_1000": averages.get("avg_amenity_score", 50) / 100 * 10,
-        "schools_per_1000": averages.get("avg_amenity_score", 50) / 100 * 5,
-        "childcare_per_1000": averages.get("avg_amenity_score", 50) / 100 * 3,
+        "parks_per_1000": amenity_score / 100 * 10,
+        "schools_per_1000": amenity_score / 100 * 5,
+        "childcare_per_1000": amenity_score / 100 * 3,
        "transit_access": 70,
    }

--- a/portfolio_app/toronto/loaders/init.py
+++ b/portfolio_app/toronto/loaders/init.py
@@ -3,7 +3,12 @@
 from .amenities import load_amenities, load_amenity_counts
 from .base import bulk_insert, get_session, upsert_by_key
 from .census import load_census_data
-from .cmhc import load_cmhc_record, load_cmhc_rentals
+from .cmhc import (
+    ensure_toronto_cma_zone,
+    load_cmhc_record,
+    load_cmhc_rentals,
+    load_statcan_cmhc_data,
+)
 from .cmhc_crosswalk import (
    build_cmhc_neighbourhood_crosswalk,
    disaggregate_zone_value,
@@ -32,6 +37,8 @@ __all__ = [
    # Fact loaders
    "load_cmhc_rentals",
    "load_cmhc_record",
+    "load_statcan_cmhc_data",
+    "ensure_toronto_cma_zone",
    # Phase 3 loaders
    "load_census_data",
    "load_crime_data",
--- a/portfolio_app/toronto/loaders/cmhc.py
+++ b/portfolio_app/toronto/loaders/cmhc.py
@@ -1,5 +1,9 @@
 """Loader for CMHC rental data into fact_rentals."""

+import logging
+from datetime import date
+from typing import Any
+
 from sqlalchemy.orm import Session

 from portfolio_app.toronto.models import DimCMHCZone, DimTime, FactRentals
@@ -8,6 +12,12 @@ from portfolio_app.toronto.schemas import CMHCAnnualSurvey, CMHCRentalRecord
 from .base import get_session, upsert_by_key
 from .dimensions import generate_date_key

+logger = logging.getLogger(__name__)
+
+# Toronto CMA zone code for CMA-level data
+TORONTO_CMA_ZONE_CODE = "TORCMA"
+TORONTO_CMA_ZONE_NAME = "Toronto CMA"
+

 def load_cmhc_rentals(
    survey: CMHCAnnualSurvey,
@@ -135,3 +145,117 @@ def load_cmhc_record(
        return _load(session)
    with get_session() as sess:
        return _load(sess)
+
+
+def ensure_toronto_cma_zone(session: Session | None = None) -> int:
+    """Ensure Toronto CMA zone exists in dim_cmhc_zone.
+
+    Creates the zone if it doesn't exist.
+
+    Args:
+        session: Optional existing session.
+
+    Returns:
+        The zone_key for Toronto CMA.
+    """
+
+    def _ensure(sess: Session) -> int:
+        zone = (
+            sess.query(DimCMHCZone).filter_by(zone_code=TORONTO_CMA_ZONE_CODE).first()
+        )
+        if zone:
+            return int(zone.zone_key)
+
+        # Create new zone
+        new_zone = DimCMHCZone(
+            zone_code=TORONTO_CMA_ZONE_CODE,
+            zone_name=TORONTO_CMA_ZONE_NAME,
+            geometry=None,  # CMA-level doesn't need geometry
+        )
+        sess.add(new_zone)
+        sess.flush()
+        logger.info(f"Created Toronto CMA zone with zone_key={new_zone.zone_key}")
+        return int(new_zone.zone_key)
+
+    if session:
+        return _ensure(session)
+    with get_session() as sess:
+        result = _ensure(sess)
+        sess.commit()
+        return result
+
+
+def load_statcan_cmhc_data(
+    records: list[Any],  # List of CMHCRentalRecord from statcan_cmhc parser
+    session: Session | None = None,
+) -> int:
+    """Load CMHC rental data from StatCan parser into fact_rentals.
+
+    This function handles CMA-level data from the StatCan API, which provides
+    aggregate Toronto data rather than zone-level HMIP data.
+
+    Args:
+        records: List of CMHCRentalRecord dataclass instances from statcan_cmhc parser.
+        session: Optional existing session.
+
+    Returns:
+        Number of records loaded.
+    """
+    from portfolio_app.toronto.parsers.statcan_cmhc import (
+        CMHCRentalRecord as StatCanRecord,
+    )
+
+    def _load(sess: Session) -> int:
+        # Ensure Toronto CMA zone exists
+        zone_key = ensure_toronto_cma_zone(sess)
+
+        loaded = 0
+        for record in records:
+            if not isinstance(record, StatCanRecord):
+                logger.warning(f"Skipping invalid record type: {type(record)}")
+                continue
+
+            # Generate date key for this record's survey date
+            survey_date = date(record.year, record.month, 1)
+            date_key = generate_date_key(survey_date)
+
+            # Verify time dimension exists
+            time_dim = sess.query(DimTime).filter_by(date_key=date_key).first()
+            if not time_dim:
+                logger.warning(
+                    f"Time dimension not found for {survey_date}, skipping record"
+                )
+                continue
+
+            # Create fact record
+            fact = FactRentals(
+                date_key=date_key,
+                zone_key=zone_key,
+                bedroom_type=record.bedroom_type,
+                universe=record.universe,
+                avg_rent=float(record.avg_rent) if record.avg_rent else None,
+                median_rent=None,  # StatCan doesn't provide median
+                vacancy_rate=float(record.vacancy_rate)
+                if record.vacancy_rate
+                else None,
+                availability_rate=None,
+                turnover_rate=None,
+                rent_change_pct=None,
+                reliability_code=None,
+            )
+
+            # Upsert
+            inserted, updated = upsert_by_key(
+                sess, FactRentals, [fact], ["date_key", "zone_key", "bedroom_type"]
+            )
+            loaded += inserted + updated
+
+        logger.info(f"Loaded {loaded} CMHC rental records from StatCan")
+        return loaded
+
+    if session:
+        return _load(session)
+    with get_session() as sess:
+        result = _load(sess)
+        sess.commit()
+        return result
--- a/portfolio_app/toronto/parsers/statcan_cmhc.py
+++ b/portfolio_app/toronto/parsers/statcan_cmhc.py
@@ -0,0 +1,383 @@
+"""Parser for CMHC rental data via Statistics Canada API.
+
+Downloads rental market data (average rent, vacancy rates, universe)
+from Statistics Canada's Web Data Service.
+
+Data Sources:
+- Table 34-10-0127: Vacancy rates
+- Table 34-10-0129: Rental universe (total units)
+- Table 34-10-0133: Average rent by bedroom type
+"""
+
+import contextlib
+import io
+import logging
+import zipfile
+from dataclasses import dataclass
+from decimal import Decimal
+from pathlib import Path
+from typing import Any
+
+import httpx
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+# StatCan Web Data Service endpoints
+STATCAN_API_BASE = "https://www150.statcan.gc.ca/t1/wds/rest"
+STATCAN_DOWNLOAD_BASE = "https://www150.statcan.gc.ca/n1/tbl/csv"
+
+# CMHC table IDs
+CMHC_TABLES = {
+    "vacancy": "34100127",
+    "universe": "34100129",
+    "rent": "34100133",
+}
+
+# Toronto CMA identifier in StatCan data
+TORONTO_DGUID = "2011S0503535"
+TORONTO_GEO_NAME = "Toronto, Ontario"
+
+
+@dataclass
+class CMHCRentalRecord:
+    """Rental market record for database loading."""
+
+    year: int
+    month: int  # CMHC surveys in October, so month=10
+    zone_name: str
+    bedroom_type: str
+    avg_rent: Decimal | None
+    vacancy_rate: Decimal | None
+    universe: int | None
+
+
+class StatCanCMHCParser:
+    """Parser for CMHC rental data from Statistics Canada.
+
+    Downloads and processes rental market survey data including:
+    - Average rents by bedroom type
+    - Vacancy rates
+    - Rental universe (total units)
+
+    Data is available from 1987 to present, updated annually in January.
+    """
+
+    BEDROOM_TYPE_MAP = {
+        "Bachelor units": "bachelor",
+        "One bedroom units": "1bed",
+        "Two bedroom units": "2bed",
+        "Three bedroom units": "3bed",
+        "Total": "total",
+    }
+
+    STRUCTURE_FILTER = "Apartment structures of six units and over"
+
+    def __init__(
+        self,
+        cache_dir: Path | None = None,
+        timeout: float = 60.0,
+    ) -> None:
+        """Initialize parser.
+
+        Args:
+            cache_dir: Optional directory for caching downloaded files.
+            timeout: HTTP request timeout in seconds.
+        """
+        self._cache_dir = cache_dir
+        self._timeout = timeout
+        self._client: httpx.Client | None = None
+
+    @property
+    def client(self) -> httpx.Client:
+        """Lazy-initialize HTTP client."""
+        if self._client is None:
+            self._client = httpx.Client(
+                timeout=self._timeout,
+                follow_redirects=True,
+            )
+        return self._client
+
+    def close(self) -> None:
+        """Close HTTP client."""
+        if self._client is not None:
+            self._client.close()
+            self._client = None
+
+    def __enter__(self) -> "StatCanCMHCParser":
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        self.close()
+
+    def _get_download_url(self, table_id: str) -> str:
+        """Get CSV download URL for a StatCan table.
+
+        Args:
+            table_id: StatCan table ID (e.g., "34100133").
+
+        Returns:
+            Direct download URL for the CSV zip file.
+        """
+        api_url = f"{STATCAN_API_BASE}/getFullTableDownloadCSV/{table_id}/en"
+        response = self.client.get(api_url)
+        response.raise_for_status()
+
+        data = response.json()
+        if data.get("status") != "SUCCESS":
+            raise ValueError(f"StatCan API error: {data}")
+
+        return str(data["object"])
+
+    def _download_table(self, table_id: str) -> pd.DataFrame:
+        """Download and extract a StatCan table as DataFrame.
+
+        Args:
+            table_id: StatCan table ID.
+
+        Returns:
+            DataFrame with table data.
+        """
+        # Check cache first
+        if self._cache_dir:
+            cache_file = self._cache_dir / f"{table_id}.csv"
+            if cache_file.exists():
+                logger.debug(f"Loading {table_id} from cache")
+                return pd.read_csv(cache_file)
+
+        # Get download URL and fetch
+        download_url = self._get_download_url(table_id)
+        logger.info(f"Downloading StatCan table {table_id}...")
+
+        response = self.client.get(download_url)
+        response.raise_for_status()
+
+        # Extract CSV from zip
+        with zipfile.ZipFile(io.BytesIO(response.content)) as zf:
+            csv_name = f"{table_id}.csv"
+            with zf.open(csv_name) as f:
+                df = pd.read_csv(f)
+
+        # Cache if directory specified
+        if self._cache_dir:
+            self._cache_dir.mkdir(parents=True, exist_ok=True)
+            df.to_csv(self._cache_dir / f"{table_id}.csv", index=False)
+
+        logger.info(f"Downloaded {len(df)} records from table {table_id}")
+        return df
+
+    def _filter_toronto(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Filter DataFrame to Toronto CMA only.
+
+        Args:
+            df: Full StatCan DataFrame.
+
+        Returns:
+            DataFrame filtered to Toronto.
+        """
+        # Try DGUID first, then GEO name
+        if "DGUID" in df.columns:
+            toronto_df = df[df["DGUID"] == TORONTO_DGUID]
+            if len(toronto_df) > 0:
+                return toronto_df
+
+        if "GEO" in df.columns:
+            return df[df["GEO"] == TORONTO_GEO_NAME]
+
+        raise ValueError("Could not identify Toronto data in DataFrame")
+
+    def get_vacancy_rates(
+        self,
+        years: list[int] | None = None,
+    ) -> dict[int, Decimal]:
+        """Fetch Toronto vacancy rates by year.
+
+        Args:
+            years: Optional list of years to filter.
+
+        Returns:
+            Dictionary mapping year to vacancy rate.
+        """
+        df = self._download_table(CMHC_TABLES["vacancy"])
+        df = self._filter_toronto(df)
+
+        # Filter years if specified
+        if years:
+            df = df[df["REF_DATE"].isin(years)]
+
+        # Extract year -> rate mapping
+        rates = {}
+        for _, row in df.iterrows():
+            year = int(row["REF_DATE"])
+            value = row.get("VALUE")
+            if pd.notna(value):
+                rates[year] = Decimal(str(value))
+
+        logger.info(f"Fetched vacancy rates for {len(rates)} years")
+        return rates
+
+    def get_rental_universe(
+        self,
+        years: list[int] | None = None,
+    ) -> dict[tuple[int, str], int]:
+        """Fetch Toronto rental universe (total units) by year and bedroom type.
+
+        Args:
+            years: Optional list of years to filter.
+
+        Returns:
+            Dictionary mapping (year, bedroom_type) to unit count.
+        """
+        df = self._download_table(CMHC_TABLES["universe"])
+        df = self._filter_toronto(df)
+
+        # Filter to standard apartment structures
+        if "Type of structure" in df.columns:
+            df = df[df["Type of structure"] == self.STRUCTURE_FILTER]
+
+        if years:
+            df = df[df["REF_DATE"].isin(years)]
+
+        universe = {}
+        for _, row in df.iterrows():
+            year = int(row["REF_DATE"])
+            bedroom_raw = row.get("Type of unit", "Total")
+            bedroom = self.BEDROOM_TYPE_MAP.get(bedroom_raw, "other")
+            value = row.get("VALUE")
+
+            if pd.notna(value) and value is not None:
+                universe[(year, bedroom)] = int(str(value))
+
+        logger.info(
+            f"Fetched rental universe for {len(universe)} year/bedroom combinations"
+        )
+        return universe
+
+    def get_average_rents(
+        self,
+        years: list[int] | None = None,
+    ) -> dict[tuple[int, str], Decimal]:
+        """Fetch Toronto average rents by year and bedroom type.
+
+        Args:
+            years: Optional list of years to filter.
+
+        Returns:
+            Dictionary mapping (year, bedroom_type) to average rent.
+        """
+        df = self._download_table(CMHC_TABLES["rent"])
+        df = self._filter_toronto(df)
+
+        # Filter to standard apartment structures (most reliable data)
+        if "Type of structure" in df.columns:
+            df = df[df["Type of structure"] == self.STRUCTURE_FILTER]
+
+        if years:
+            df = df[df["REF_DATE"].isin(years)]
+
+        rents = {}
+        for _, row in df.iterrows():
+            year = int(row["REF_DATE"])
+            bedroom_raw = row.get("Type of unit", "Total")
+            bedroom = self.BEDROOM_TYPE_MAP.get(bedroom_raw, "other")
+            value = row.get("VALUE")
+
+            if pd.notna(value) and str(value) not in ("F", ".."):
+                with contextlib.suppress(Exception):
+                    rents[(year, bedroom)] = Decimal(str(value))
+
+        logger.info(f"Fetched average rents for {len(rents)} year/bedroom combinations")
+        return rents
+
+    def get_all_rental_data(
+        self,
+        start_year: int = 2014,
+        end_year: int | None = None,
+    ) -> list[CMHCRentalRecord]:
+        """Fetch all Toronto rental data and combine into records.
+
+        Args:
+            start_year: First year to include.
+            end_year: Last year to include (defaults to current year + 1).
+
+        Returns:
+            List of CMHCRentalRecord objects ready for database loading.
+        """
+        import datetime
+
+        if end_year is None:
+            end_year = datetime.date.today().year + 1
+
+        years = list(range(start_year, end_year + 1))
+
+        logger.info(
+            f"Fetching CMHC rental data for Toronto ({start_year}-{end_year})..."
+        )
+
+        # Fetch all data types
+        vacancy_rates = self.get_vacancy_rates(years)
+        rents = self.get_average_rents(years)
+        universe = self.get_rental_universe(years)
+
+        # Combine into records
+        records = []
+        bedroom_types = ["bachelor", "1bed", "2bed", "3bed"]
+
+        for year in years:
+            vacancy = vacancy_rates.get(year)
+
+            for bedroom in bedroom_types:
+                avg_rent = rents.get((year, bedroom))
+                units = universe.get((year, bedroom))
+
+                # Skip if no rent data for this year/bedroom
+                if avg_rent is None:
+                    continue
+
+                records.append(
+                    CMHCRentalRecord(
+                        year=year,
+                        month=10,  # CMHC surveys in October
+                        zone_name="Toronto CMA",
+                        bedroom_type=bedroom,
+                        avg_rent=avg_rent,
+                        vacancy_rate=vacancy,
+                        universe=units,
+                    )
+                )
+
+        logger.info(f"Created {len(records)} CMHC rental records")
+        return records
+
+
+def fetch_toronto_rental_data(
+    start_year: int = 2014,
+    end_year: int | None = None,
+    cache_dir: Path | None = None,
+) -> list[CMHCRentalRecord]:
+    """Convenience function to fetch Toronto rental data.
+
+    Args:
+        start_year: First year to include.
+        end_year: Last year to include.
+        cache_dir: Optional cache directory.
+
+    Returns:
+        List of CMHCRentalRecord objects.
+    """
+    with StatCanCMHCParser(cache_dir=cache_dir) as parser:
+        return parser.get_all_rental_data(start_year, end_year)
+
+
+if __name__ == "__main__":
+    # Test the parser
+    logging.basicConfig(level=logging.INFO)
+
+    records = fetch_toronto_rental_data(start_year=2020)
+
+    print(f"\nFetched {len(records)} records")
+    print("\nSample records:")
+    for r in records[:10]:
+        print(
+            f"  {r.year} {r.bedroom_type}: ${r.avg_rent} rent, {r.vacancy_rate}% vacancy"
+        )
--- a/portfolio_app/toronto/parsers/toronto_open_data.py
+++ b/portfolio_app/toronto/parsers/toronto_open_data.py
@@ -6,6 +6,7 @@ from the City of Toronto's Open Data Portal.
 API Documentation: https://open.toronto.ca/dataset/
 """

+import contextlib
 import json
 import logging
 from decimal import Decimal
@@ -193,6 +194,9 @@ class TorontoOpenDataParser:
    def _fetch_geojson(self, package_id: str) -> dict[str, Any]:
        """Fetch GeoJSON data from a package.

+        Handles both pure GeoJSON responses and CSV responses with embedded
+        geometry columns (common in Toronto Open Data).
+
        Args:
            package_id: The package/dataset ID.

@@ -212,16 +216,65 @@ class TorontoOpenDataParser:

        response = self.client.get(url)
        response.raise_for_status()
-        data = response.json()

-        # Cache the response
+        # Try to parse as JSON first
+        try:
+            data = response.json()
+            # If it's already a valid GeoJSON FeatureCollection, return it
+            if isinstance(data, dict) and data.get("type") == "FeatureCollection":
                if self._cache_dir:
                    self._cache_dir.mkdir(parents=True, exist_ok=True)
                    cache_file = self._cache_dir / f"{package_id}.geojson"
                    with open(cache_file, "w", encoding="utf-8") as f:
                        json.dump(data, f)
-
                return dict(data)
+        except json.JSONDecodeError:
+            pass
+
+        # If JSON parsing failed, it's likely CSV with embedded geometry
+        # Parse CSV and convert to GeoJSON FeatureCollection
+        logger.info("Response is CSV format, converting to GeoJSON...")
+        import csv
+        import io
+
+        # Increase field size limit for large geometry columns
+        csv.field_size_limit(10 * 1024 * 1024)  # 10 MB
+
+        csv_text = response.text
+        reader = csv.DictReader(io.StringIO(csv_text))
+
+        features = []
+        for row in reader:
+            # Extract geometry from the 'geometry' column if present
+            geometry = None
+            if "geometry" in row and row["geometry"]:
+                with contextlib.suppress(json.JSONDecodeError):
+                    geometry = json.loads(row["geometry"])
+
+            # Build properties from all other columns
+            properties = {k: v for k, v in row.items() if k != "geometry"}
+
+            features.append(
+                {
+                    "type": "Feature",
+                    "geometry": geometry,
+                    "properties": properties,
+                }
+            )
+
+        geojson_data: dict[str, Any] = {
+            "type": "FeatureCollection",
+            "features": features,
+        }
+
+        # Cache the converted response
+        if self._cache_dir:
+            self._cache_dir.mkdir(parents=True, exist_ok=True)
+            cache_file = self._cache_dir / f"{package_id}.geojson"
+            with open(cache_file, "w", encoding="utf-8") as f:
+                json.dump(geojson_data, f)
+
+        return geojson_data

    def _fetch_csv_as_json(self, package_id: str) -> list[dict[str, Any]]:
        """Fetch CSV data as JSON records via CKAN datastore.
@@ -282,29 +335,32 @@ class TorontoOpenDataParser:
            props = feature.get("properties", {})
            geometry = feature.get("geometry")

-            # Extract area_id from various possible property names
-            area_id = props.get("AREA_ID") or props.get("area_id")
-            if area_id is None:
-                # Try AREA_SHORT_CODE as fallback
-                short_code = props.get("AREA_SHORT_CODE", "")
+            # Use AREA_SHORT_CODE as the primary ID (1-158 range)
+            # AREA_ID is a large internal identifier not useful for our schema
+            short_code = props.get("AREA_SHORT_CODE") or props.get(
+                "area_short_code", ""
+            )
            if short_code:
-                    # Extract numeric part
-                    area_id = int("".join(c for c in short_code if c.isdigit()) or "0")
+                area_id = int("".join(c for c in str(short_code) if c.isdigit()) or "0")
+            else:
+                # Fallback to _id (row number) if AREA_SHORT_CODE not available
+                area_id = int(props.get("_id", 0))
+
+            if area_id == 0:
+                logger.warning(f"Skipping neighbourhood with no valid ID: {props}")
+                continue

            area_name = (
                props.get("AREA_NAME")
                or props.get("area_name")
                or f"Neighbourhood {area_id}"
            )
-            area_short_code = props.get("AREA_SHORT_CODE") or props.get(
-                "area_short_code"
-            )

            records.append(
                NeighbourhoodRecord(
-                    area_id=int(area_id),
+                    area_id=area_id,
                    area_name=str(area_name),
-                    area_short_code=area_short_code,
+                    area_short_code=str(short_code) if short_code else None,
                    geometry=geometry,
                )
            )
@@ -314,17 +370,17 @@ class TorontoOpenDataParser:

    # Mapping of indicator names to CensusRecord fields
    # Keys are partial matches (case-insensitive) found in the "Characteristic" column
+    # Order matters - first match wins, so more specific patterns come first
+    # Note: owner/renter counts are raw numbers, not percentages - calculated in dbt
    CENSUS_INDICATOR_MAPPING: dict[str, str] = {
        "population, 2021": "population",
        "population, 2016": "population",
        "population density per square kilometre": "population_density",
-        "median total income of household": "median_household_income",
-        "average total income of household": "average_household_income",
+        "median total income of households in": "median_household_income",
+        "average total income of households in": "average_household_income",
        "unemployment rate": "unemployment_rate",
        "bachelor's degree or higher": "pct_bachelors_or_higher",
-        "owner": "pct_owner_occupied",
-        "renter": "pct_renter_occupied",
-        "median age": "median_age",
+        "average age": "median_age",
        "average value of dwellings": "average_dwelling_value",
    }

@@ -358,17 +414,31 @@ class TorontoOpenDataParser:
        logger.info(f"Fetched {len(raw_records)} census profile rows")

        # Find the characteristic/indicator column name
+        # Prioritize "Characteristic" over "Category" since both may exist
        sample_row = raw_records[0]
        char_col = None
+
+        # First try exact match for Characteristic
+        if "Characteristic" in sample_row:
+            char_col = "Characteristic"
+        else:
+            # Fall back to pattern matching
            for col in sample_row:
                col_lower = col.lower()
-            if "characteristic" in col_lower or "category" in col_lower:
+                if "characteristic" in col_lower:
+                    char_col = col
+                    break
+
+            # Last resort: try Category
+            if not char_col:
+                for col in sample_row:
+                    if "category" in col.lower():
                        char_col = col
                        break

        if not char_col:
-            # Try common column names
-            for candidate in ["Characteristic", "Category", "Topic", "_id"]:
+            # Try other common column names
+            for candidate in ["Topic", "_id"]:
                if candidate in sample_row:
                    char_col = candidate
                    break
--- a/portfolio_app/toronto/services/geometry_service.py
+++ b/portfolio_app/toronto/services/geometry_service.py
@@ -37,7 +37,7 @@ def get_neighbourhoods_geojson(year: int = 2021) -> dict[str, Any]:
            ST_AsGeoJSON(geometry)::json as geom,
            population,
            livability_score
-        FROM mart_neighbourhood_overview
+        FROM public_marts.mart_neighbourhood_overview
        WHERE year = :year
          AND geometry IS NOT NULL
    """
--- a/portfolio_app/toronto/services/neighbourhood_service.py
+++ b/portfolio_app/toronto/services/neighbourhood_service.py
@@ -1,5 +1,6 @@
 """Service layer for querying neighbourhood data from dbt marts."""

+import logging
 from functools import lru_cache
 from typing import Any

@@ -8,6 +9,8 @@ from sqlalchemy import text

 from portfolio_app.toronto.models import get_engine

+logger = logging.getLogger(__name__)
+

 def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
    """Execute SQL query and return DataFrame.
@@ -23,8 +26,10 @@ def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFra
        engine = get_engine()
        with engine.connect() as conn:
            return pd.read_sql(text(sql), conn, params=params)
-    except Exception:
-        # Return empty DataFrame on connection or query error
+    except Exception as e:
+        logger.error(f"Query failed: {e}")
+        logger.debug(f"Failed SQL: {sql}")
+        logger.debug(f"Params: {params}")
        return pd.DataFrame()


@@ -56,7 +61,7 @@ def get_overview_data(year: int = 2021) -> pd.DataFrame:
            rent_to_income_pct,
            avg_rent_2bed,
            total_amenities_per_1000
-        FROM mart_neighbourhood_overview
+        FROM public_marts.mart_neighbourhood_overview
        WHERE year = :year
        ORDER BY livability_score DESC NULLS LAST
    """
@@ -95,7 +100,7 @@ def get_housing_data(year: int = 2021) -> pd.DataFrame:
            affordability_index,
            rent_yoy_change_pct,
            income_quintile
-        FROM mart_neighbourhood_housing
+        FROM public_marts.mart_neighbourhood_housing
        WHERE year = :year
        ORDER BY affordability_index ASC NULLS LAST
    """
@@ -112,26 +117,22 @@ def get_safety_data(year: int = 2021) -> pd.DataFrame:

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
-        total_crime_rate, violent_crime_rate, property_crime_rate, etc.
+        total_crime_rate, violent_crimes, property_crimes, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
-            total_crimes,
+            total_incidents as total_crimes,
            crime_rate_per_100k as total_crime_rate,
-            violent_crimes,
-            violent_crime_rate,
-            property_crimes,
-            property_crime_rate,
-            theft_crimes,
-            theft_rate,
-            crime_yoy_change_pct,
-            crime_trend
-        FROM mart_neighbourhood_safety
+            assault_count + robbery_count + homicide_count as violent_crimes,
+            break_enter_count + auto_theft_count as property_crimes,
+            theft_over_count as theft_crimes,
+            crime_yoy_change_pct
+        FROM public_marts.mart_neighbourhood_safety
        WHERE year = :year
-        ORDER BY total_crime_rate ASC NULLS LAST
+        ORDER BY crime_rate_per_100k ASC NULLS LAST
    """
    return _execute_query(sql, {"year": year})

@@ -152,22 +153,22 @@ def get_demographics_data(year: int = 2021) -> pd.DataFrame:
        SELECT
            neighbourhood_id,
            neighbourhood_name,
-            census_year as year,
+            year,
            population,
            population_density,
-            population_change_pct,
            median_household_income,
            average_household_income,
            income_quintile,
+            income_index,
            median_age,
-            pct_under_18,
-            pct_18_to_64,
-            pct_65_plus,
-            pct_bachelors_or_higher,
+            age_index,
+            pct_owner_occupied,
+            pct_renter_occupied,
+            education_bachelors_pct as pct_bachelors_or_higher,
            unemployment_rate,
-            diversity_index
-        FROM mart_neighbourhood_demographics
-        WHERE census_year = :year
+            tenure_diversity_index as diversity_index
+        FROM public_marts.mart_neighbourhood_demographics
+        WHERE year = :year
        ORDER BY population DESC NULLS LAST
    """
    return _execute_query(sql, {"year": year})
@@ -183,26 +184,26 @@ def get_amenities_data(year: int = 2021) -> pd.DataFrame:

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
-        amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
+        amenity_score, parks_per_1000, schools_per_1000, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
-            park_count,
+            parks_count as park_count,
            parks_per_1000,
-            school_count,
+            schools_count as school_count,
            schools_per_1000,
-            childcare_count,
-            childcare_per_1000,
+            transit_count as childcare_count,
+            transit_per_1000 as childcare_per_1000,
            total_amenities,
            total_amenities_per_1000,
-            amenity_score,
-            amenity_rank
-        FROM mart_neighbourhood_amenities
+            amenity_index as amenity_score,
+            amenity_tier as amenity_rank
+        FROM public_marts.mart_neighbourhood_amenities
        WHERE year = :year
-        ORDER BY amenity_score DESC NULLS LAST
+        ORDER BY amenity_index DESC NULLS LAST
    """
    return _execute_query(sql, {"year": year})

@@ -249,17 +250,17 @@ def get_neighbourhood_details(
            a.park_count,
            a.school_count,
            a.total_amenities
-        FROM mart_neighbourhood_overview o
-        LEFT JOIN mart_neighbourhood_safety s
+        FROM public_marts.mart_neighbourhood_overview o
+        LEFT JOIN public_marts.mart_neighbourhood_safety s
            ON o.neighbourhood_id = s.neighbourhood_id
            AND o.year = s.year
-        LEFT JOIN mart_neighbourhood_housing h
+        LEFT JOIN public_marts.mart_neighbourhood_housing h
            ON o.neighbourhood_id = h.neighbourhood_id
            AND o.year = h.year
-        LEFT JOIN mart_neighbourhood_demographics d
+        LEFT JOIN public_marts.mart_neighbourhood_demographics d
            ON o.neighbourhood_id = d.neighbourhood_id
            AND o.year = d.census_year
-        LEFT JOIN mart_neighbourhood_amenities a
+        LEFT JOIN public_marts.mart_neighbourhood_amenities a
            ON o.neighbourhood_id = a.neighbourhood_id
            AND o.year = a.year
        WHERE o.neighbourhood_id = :neighbourhood_id
@@ -288,7 +289,7 @@ def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
            neighbourhood_id,
            neighbourhood_name,
            population
-        FROM mart_neighbourhood_overview
+        FROM public_marts.mart_neighbourhood_overview
        WHERE year = :year
        ORDER BY neighbourhood_name
    """
@@ -317,19 +318,19 @@ def get_rankings(
    """
    # Map metrics to their source tables
    table_map = {
-        "livability_score": "mart_neighbourhood_overview",
-        "safety_score": "mart_neighbourhood_overview",
-        "affordability_score": "mart_neighbourhood_overview",
-        "amenity_score": "mart_neighbourhood_overview",
-        "crime_rate_per_100k": "mart_neighbourhood_safety",
-        "total_crime_rate": "mart_neighbourhood_safety",
-        "avg_rent_2bed": "mart_neighbourhood_housing",
-        "affordability_index": "mart_neighbourhood_housing",
-        "population": "mart_neighbourhood_demographics",
-        "median_household_income": "mart_neighbourhood_demographics",
+        "livability_score": "public_marts.mart_neighbourhood_overview",
+        "safety_score": "public_marts.mart_neighbourhood_overview",
+        "affordability_score": "public_marts.mart_neighbourhood_overview",
+        "amenity_score": "public_marts.mart_neighbourhood_overview",
+        "crime_rate_per_100k": "public_marts.mart_neighbourhood_safety",
+        "total_crime_rate": "public_marts.mart_neighbourhood_safety",
+        "avg_rent_2bed": "public_marts.mart_neighbourhood_housing",
+        "affordability_index": "public_marts.mart_neighbourhood_housing",
+        "population": "public_marts.mart_neighbourhood_demographics",
+        "median_household_income": "public_marts.mart_neighbourhood_demographics",
    }

-    table = table_map.get(metric, "mart_neighbourhood_overview")
+    table = table_map.get(metric, "public_marts.mart_neighbourhood_overview")
    year_col = "census_year" if "demographics" in table else "year"

    order = "ASC" if ascending else "DESC"
@@ -375,7 +376,7 @@ def get_city_averages(year: int = 2021) -> dict[str, Any]:
            AVG(crime_rate_per_100k) as avg_crime_rate,
            AVG(avg_rent_2bed) as avg_rent_2bed,
            AVG(rent_to_income_pct) as avg_rent_to_income
-        FROM mart_neighbourhood_overview
+        FROM public_marts.mart_neighbourhood_overview
        WHERE year = :year
    """
    df = _execute_query(sql, {"year": year})
--- a/scripts/data/load_toronto_data.py
+++ b/scripts/data/load_toronto_data.py
@@ -38,12 +38,16 @@ from portfolio_app.toronto.loaders import (  # noqa: E402
    load_census_data,
    load_crime_data,
    load_neighbourhoods,
+    load_statcan_cmhc_data,
    load_time_dimension,
 )
 from portfolio_app.toronto.parsers import (  # noqa: E402
    TorontoOpenDataParser,
    TorontoPoliceParser,
 )
+from portfolio_app.toronto.parsers.statcan_cmhc import (  # noqa: E402
+    fetch_toronto_rental_data,
+)
 from portfolio_app.toronto.schemas import Neighbourhood  # noqa: E402

 # Configure logging
@@ -91,6 +95,9 @@ class DataPipeline:
                # 5. Load amenities
                self._load_amenities(session)

+                # 6. Load CMHC rental data from StatCan
+                self._load_rentals(session)
+
                session.commit()
                logger.info("All data committed to database")

@@ -241,6 +248,32 @@ class DataPipeline:

        self.stats["amenities"] = total_count

+    def _load_rentals(self, session: Any) -> None:
+        """Fetch and load CMHC rental data from StatCan."""
+        logger.info("Fetching CMHC rental data from Statistics Canada...")
+
+        if self.dry_run:
+            logger.info("  [DRY RUN] Would fetch and load CMHC rental data")
+            return
+
+        try:
+            # Fetch rental data (2014-present)
+            rental_records = fetch_toronto_rental_data(start_year=2014)
+
+            if not rental_records:
+                logger.warning("  No rental records fetched")
+                return
+
+            count = load_statcan_cmhc_data(rental_records, session)
+            self.stats["rentals"] = count
+            logger.info(f"  Loaded {count} CMHC rental records")
+        except Exception as e:
+            logger.warning(f"  Failed to load CMHC rental data: {e}")
+            if self.verbose:
+                import traceback
+
+                traceback.print_exc()
+
    def run_dbt(self) -> bool:
        """Run dbt to transform data.

--- a/scripts/db/init_schema.py
+++ b/scripts/db/init_schema.py
@@ -25,8 +25,10 @@ def main() -> int:
        engine = get_engine()

        # Test connection
+        from sqlalchemy import text
+
        with engine.connect() as conn:
-            result = conn.execute("SELECT 1")
+            result = conn.execute(text("SELECT 1"))
            result.fetchone()
        print("Database connection successful")

--- a/scripts/etl/toronto.sh
+++ b/scripts/etl/toronto.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# scripts/etl/toronto.sh - Run Toronto data pipeline
+#
+# Usage:
+#   ./scripts/etl/toronto.sh --full        # Complete reload of all data
+#   ./scripts/etl/toronto.sh --incremental # Only new data since last run
+#   ./scripts/etl/toronto.sh               # Default: incremental
+#
+# Logs are written to .dev/logs/etl/
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+LOG_DIR="$PROJECT_ROOT/.dev/logs/etl"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+LOG_FILE="$LOG_DIR/toronto_${TIMESTAMP}.log"
+
+MODE="${1:---incremental}"
+
+mkdir -p "$LOG_DIR"
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+log "Starting Toronto ETL pipeline (mode: $MODE)"
+log "Log file: $LOG_FILE"
+
+cd "$PROJECT_ROOT"
+
+# Activate virtual environment if it exists
+if [ -d ".venv" ]; then
+    source .venv/bin/activate
+    log "Activated virtual environment"
+fi
+
+case "$MODE" in
+    --full)
+        log "Running FULL data reload..."
+
+        log "Step 1/4: Parsing neighbourhood data..."
+        python -m portfolio_app.toronto.parsers.neighbourhoods 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 2/4: Parsing census data..."
+        python -m portfolio_app.toronto.parsers.census 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 3/4: Parsing crime data..."
+        python -m portfolio_app.toronto.parsers.crime 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 4/4: Running dbt transformations..."
+        cd dbt && dbt run --full-refresh --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
+        ;;
+
+    --incremental)
+        log "Running INCREMENTAL update..."
+
+        log "Step 1/2: Checking for new data..."
+        # Add incremental logic here when implemented
+
+        log "Step 2/2: Running dbt transformations..."
+        cd dbt && dbt run --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
+        ;;
+
+    *)
+        log "ERROR: Unknown mode '$MODE'. Use --full or --incremental"
+        exit 1
+        ;;
+esac
+
+log "Toronto ETL pipeline completed successfully"
+log "Full log available at: $LOG_FILE"
--- a/scripts/logs.sh
+++ b/scripts/logs.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# scripts/logs.sh - Follow docker compose logs
+#
+# Usage:
+#   ./scripts/logs.sh           # All services
+#   ./scripts/logs.sh postgres  # Specific service
+#   ./scripts/logs.sh -n 100    # Last 100 lines
+
+set -euo pipefail
+
+SERVICE="${1:-}"
+EXTRA_ARGS="${@:2}"
+
+if [[ -n "$SERVICE" && "$SERVICE" != -* ]]; then
+    echo "Following logs for service: $SERVICE"
+    docker compose logs -f "$SERVICE" $EXTRA_ARGS
+else
+    echo "Following logs for all services"
+    docker compose logs -f $@
+fi
--- a/scripts/run-detached.sh
+++ b/scripts/run-detached.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# scripts/run-detached.sh - Start containers and wait for health
+#
+# Usage:
+#   ./scripts/run-detached.sh
+
+set -euo pipefail
+
+TIMEOUT=60
+INTERVAL=5
+
+echo "Starting containers in detached mode..."
+docker compose up -d
+
+echo "Waiting for services to become healthy..."
+elapsed=0
+
+while [ $elapsed -lt $TIMEOUT ]; do
+    # Check if postgres is ready
+    if docker compose exec -T postgres pg_isready -U portfolio > /dev/null 2>&1; then
+        echo "PostgreSQL is ready!"
+
+        # Check if app health endpoint responds (if running)
+        if curl -sf http://localhost:8050/health > /dev/null 2>&1; then
+            echo "Application health check passed!"
+            echo "All services are healthy."
+            exit 0
+        fi
+    fi
+
+    echo "Waiting... ($elapsed/$TIMEOUT seconds)"
+    sleep $INTERVAL
+    elapsed=$((elapsed + INTERVAL))
+done
+
+echo "ERROR: Health check timed out after $TIMEOUT seconds"
+docker compose ps
+exit 1