2026-02-01 21:32:42 +00:00
12 changed files with 1067 additions and 1 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -0,0 +1,35 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - development
+      - staging
+      - main
+  pull_request:
+    branches:
+      - development
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install ruff pytest
+
+      - name: Run linter
+        run: ruff check .
+
+      - name: Run tests
+        run: pytest tests/ -v --tb=short
--- a/.gitea/workflows/deploy-production.yml
+++ b/.gitea/workflows/deploy-production.yml
@@ -0,0 +1,44 @@
+name: Deploy to Production
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to Production Server
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.PROD_HOST }}
+          username: ${{ secrets.PROD_USER }}
+          key: ${{ secrets.PROD_SSH_KEY }}
+          script: |
+            set -euo pipefail
+
+            cd ~/apps/personal-portfolio
+
+            echo "Pulling latest changes..."
+            git fetch origin main
+            git reset --hard origin/main
+
+            echo "Activating virtual environment..."
+            source .venv/bin/activate
+
+            echo "Installing dependencies..."
+            pip install -r requirements.txt --quiet
+
+            echo "Running dbt models..."
+            cd dbt && dbt run --profiles-dir . && cd ..
+
+            echo "Restarting application..."
+            docker compose down
+            docker compose up -d
+
+            echo "Waiting for health check..."
+            sleep 10
+            curl -f http://localhost:8050/health || exit 1
+
+            echo "Production deployment complete!"
--- a/.gitea/workflows/deploy-staging.yml
+++ b/.gitea/workflows/deploy-staging.yml
@@ -0,0 +1,44 @@
+name: Deploy to Staging
+
+on:
+  push:
+    branches:
+      - staging
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to Staging Server
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.STAGING_HOST }}
+          username: ${{ secrets.STAGING_USER }}
+          key: ${{ secrets.STAGING_SSH_KEY }}
+          script: |
+            set -euo pipefail
+
+            cd ~/apps/personal-portfolio
+
+            echo "Pulling latest changes..."
+            git fetch origin staging
+            git reset --hard origin/staging
+
+            echo "Activating virtual environment..."
+            source .venv/bin/activate
+
+            echo "Installing dependencies..."
+            pip install -r requirements.txt --quiet
+
+            echo "Running dbt models..."
+            cd dbt && dbt run --profiles-dir . && cd ..
+
+            echo "Restarting application..."
+            docker compose down
+            docker compose up -d
+
+            echo "Waiting for health check..."
+            sleep 10
+            curl -f http://localhost:8050/health || exit 1
+
+            echo "Staging deployment complete!"
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024-2025 Leo Miranda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/15
+++ b/15
@@ -1,4 +1,4 @@
-.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help
+.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto

 # Default target
 .DEFAULT_GOAL := help
@@ -151,6 +151,19 @@ ci: ## Run all checks (lint, typecheck, test)
 	$(MAKE) test
 	@echo "$(GREEN)All checks passed!$(NC)"

+# =============================================================================
+# Operations
+# =============================================================================
+
+logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
+	@./scripts/logs.sh $(SERVICE)
+
+run-detached: ## Start containers and wait for health check
+	@./scripts/run-detached.sh
+
+etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
+	@./scripts/etl/toronto.sh $(MODE)
+
 # =============================================================================
 # Deployment
 # =============================================================================
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # Analytics Portfolio

+[![CI](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions/workflows/ci.yml/badge.svg)](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
+
+**Live Demo:** [leodata.science](https://leodata.science)
+
 A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.

 ## Live Pages
@@ -32,6 +36,42 @@ An interactive choropleth dashboard analyzing Toronto's 158 official neighbourho
 - Toronto Police Service (crime statistics)
 - CMHC Rental Market Survey (rental data by zone)

+## Architecture
+
+```mermaid
+flowchart LR
+    subgraph Sources
+        A1[City of Toronto API]
+        A2[Toronto Police API]
+        A3[CMHC Data]
+    end
+
+    subgraph ETL
+        B1[Parsers]
+        B2[Loaders]
+    end
+
+    subgraph Database
+        C1[(PostgreSQL/PostGIS)]
+        C2[dbt Models]
+    end
+
+    subgraph Application
+        D1[Dash App]
+        D2[Plotly Figures]
+    end
+
+    A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
+```
+
+**Pipeline Stages:**
+- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
+- **ETL**: Python parsers extract and validate data; loaders persist to database
+- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
+- **Application**: Dash serves interactive dashboards with Plotly visualizations
+
+For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
+
 ## Quick Start

 ```bash
--- a/docs/DATABASE_SCHEMA.md
+++ b/docs/DATABASE_SCHEMA.md
@@ -0,0 +1,307 @@
+# Database Schema
+
+This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
+
+## Entity Relationship Diagram
+
+```mermaid
+erDiagram
+    dim_time {
+        int date_key PK
+        date full_date UK
+        int year
+        int month
+        int quarter
+        string month_name
+        bool is_month_start
+    }
+
+    dim_cmhc_zone {
+        int zone_key PK
+        string zone_code UK
+        string zone_name
+        geometry geometry
+    }
+
+    dim_neighbourhood {
+        int neighbourhood_id PK
+        string name
+        geometry geometry
+        int population
+        numeric land_area_sqkm
+        numeric pop_density_per_sqkm
+        numeric pct_bachelors_or_higher
+        numeric median_household_income
+        numeric pct_owner_occupied
+        numeric pct_renter_occupied
+        int census_year
+    }
+
+    dim_policy_event {
+        int event_id PK
+        date event_date
+        date effective_date
+        string level
+        string category
+        string title
+        text description
+        string expected_direction
+        string source_url
+        string confidence
+    }
+
+    fact_rentals {
+        int id PK
+        int date_key FK
+        int zone_key FK
+        string bedroom_type
+        int universe
+        numeric avg_rent
+        numeric median_rent
+        numeric vacancy_rate
+        numeric availability_rate
+        numeric turnover_rate
+        numeric rent_change_pct
+        string reliability_code
+    }
+
+    fact_census {
+        int id PK
+        int neighbourhood_id FK
+        int census_year
+        int population
+        numeric population_density
+        numeric median_household_income
+        numeric average_household_income
+        numeric unemployment_rate
+        numeric pct_bachelors_or_higher
+        numeric pct_owner_occupied
+        numeric pct_renter_occupied
+        numeric median_age
+        numeric average_dwelling_value
+    }
+
+    fact_crime {
+        int id PK
+        int neighbourhood_id FK
+        int year
+        string crime_type
+        int count
+        numeric rate_per_100k
+    }
+
+    fact_amenities {
+        int id PK
+        int neighbourhood_id FK
+        string amenity_type
+        int count
+        int year
+    }
+
+    bridge_cmhc_neighbourhood {
+        int id PK
+        string cmhc_zone_code FK
+        int neighbourhood_id FK
+        numeric weight
+    }
+
+    dim_time ||--o{ fact_rentals : "date_key"
+    dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
+    dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
+    dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
+    dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
+    dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
+    dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
+```
+
+## Schema Layers
+
+### Raw Schema
+
+Raw data is loaded directly from external sources without transformation:
+
+| Table | Source | Description |
+|-------|--------|-------------|
+| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries |
+| `raw.census_profiles` | City of Toronto API | Census profile data |
+| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood |
+| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data |
+
+### Staging Schema (dbt)
+
+Staging models provide 1:1 cleaned representations of source data:
+
+| Model | Source Table | Purpose |
+|-------|-------------|---------|
+| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
+| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
+| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
+| `stg_police__crimes` | raw.crime_data | Standardized crime categories |
+
+### Marts Schema (dbt)
+
+Analytical tables ready for dashboard consumption:
+
+| Model | Grain | Purpose |
+|-------|-------|---------|
+| `mart_neighbourhood_summary` | neighbourhood | Composite livability scores |
+| `mart_rental_trends` | zone × month | Time-series rental analysis |
+| `mart_crime_rates` | neighbourhood × year | Crime rate calculations |
+| `mart_amenity_density` | neighbourhood | Amenity accessibility scores |
+
+## Table Details
+
+### Dimension Tables
+
+#### dim_time
+Time dimension for date-based analysis. Grain: one row per month.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
+| full_date | DATE | UNIQUE, NOT NULL | First day of month |
+| year | INTEGER | NOT NULL | Calendar year |
+| month | INTEGER | NOT NULL | Month number (1-12) |
+| quarter | INTEGER | NOT NULL | Quarter (1-4) |
+| month_name | VARCHAR(20) | NOT NULL | Month name |
+| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
+
+#### dim_cmhc_zone
+CMHC rental market zones (~20 zones covering Toronto).
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| zone_key | INTEGER | PK, AUTO | Surrogate key |
+| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
+| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
+| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
+
+#### dim_neighbourhood
+Toronto's 158 official neighbourhoods.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| neighbourhood_id | INTEGER | PK | City-assigned ID |
+| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
+| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
+| population | INTEGER | | Total population |
+| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
+| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
+| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
+| median_household_income | NUMERIC(12,2) | | Median income |
+| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
+| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
+| census_year | INTEGER | DEFAULT 2021 | Census reference year |
+
+#### dim_policy_event
+Policy events for time-series annotation (rent control, interest rates, etc.).
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| event_id | INTEGER | PK, AUTO | Surrogate key |
+| event_date | DATE | NOT NULL | Announcement date |
+| effective_date | DATE | | Implementation date |
+| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
+| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
+| title | VARCHAR(200) | NOT NULL | Event title |
+| description | TEXT | | Detailed description |
+| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
+| source_url | VARCHAR(500) | | Reference link |
+| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
+
+### Fact Tables
+
+#### fact_rentals
+CMHC rental market survey data. Grain: zone × bedroom type × survey date.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| date_key | INTEGER | FK → dim_time | Survey date reference |
+| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
+| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
+| universe | INTEGER | | Total rental units |
+| avg_rent | NUMERIC(10,2) | | Average rent |
+| median_rent | NUMERIC(10,2) | | Median rent |
+| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
+| availability_rate | NUMERIC(5,2) | | Availability percentage |
+| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
+| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
+| reliability_code | VARCHAR(2) | | CMHC data quality code |
+
+#### fact_census
+Census statistics. Grain: neighbourhood × census year.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
+| population | INTEGER | | Total population |
+| population_density | NUMERIC(10,2) | | People per km² |
+| median_household_income | NUMERIC(12,2) | | Median income |
+| average_household_income | NUMERIC(12,2) | | Average income |
+| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
+| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
+| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
+| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
+| median_age | NUMERIC(5,2) | | Median resident age |
+| average_dwelling_value | NUMERIC(12,2) | | Average home value |
+
+#### fact_crime
+Crime statistics. Grain: neighbourhood × year × crime type.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| year | INTEGER | NOT NULL | Calendar year |
+| crime_type | VARCHAR(50) | NOT NULL | Crime category |
+| count | INTEGER | NOT NULL | Number of incidents |
+| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
+
+#### fact_amenities
+Amenity counts. Grain: neighbourhood × amenity type × year.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
+| count | INTEGER | NOT NULL | Number of amenities |
+| year | INTEGER | NOT NULL | Reference year |
+
+### Bridge Tables
+
+#### bridge_cmhc_neighbourhood
+Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
+
+| Column | Type | Constraints | Description |
+|--------|------|-------------|-------------|
+| id | INTEGER | PK, AUTO | Surrogate key |
+| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
+| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
+| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
+
+## Indexes
+
+| Table | Index | Columns | Purpose |
+|-------|-------|---------|---------|
+| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
+| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
+| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
+| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
+| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
+| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
+| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
+| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
+
+## PostGIS Extensions
+
+The database requires PostGIS for geospatial operations:
+
+```sql
+CREATE EXTENSION IF NOT EXISTS postgis;
+```
+
+All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
--- a/docs/runbooks/adding-dashboard.md
+++ b/docs/runbooks/adding-dashboard.md
@@ -0,0 +1,200 @@
+# Runbook: Adding a New Dashboard
+
+This runbook describes how to add a new data dashboard to the portfolio application.
+
+## Prerequisites
+
+- [ ] Data sources identified and accessible
+- [ ] Database schema designed
+- [ ] Basic Dash/Plotly familiarity
+
+## Directory Structure
+
+Create the following structure under `portfolio_app/`:
+
+```
+portfolio_app/
+├── pages/
+│   └── {dashboard_name}/
+│       ├── dashboard.py      # Main layout with tabs
+│       ├── methodology.py    # Data sources and methods page
+│       ├── tabs/
+│       │   ├── __init__.py
+│       │   ├── overview.py   # Overview tab layout
+│       │   └── ...           # Additional tab layouts
+│       └── callbacks/
+│           ├── __init__.py
+│           └── ...           # Callback modules
+├── {dashboard_name}/         # Data logic (outside pages/)
+│   ├── __init__.py
+│   ├── parsers/              # API/CSV extraction
+│   │   └── __init__.py
+│   ├── loaders/              # Database operations
+│   │   └── __init__.py
+│   ├── schemas/              # Pydantic models
+│   │   └── __init__.py
+│   └── models/               # SQLAlchemy ORM
+│       └── __init__.py
+```
+
+## Step-by-Step Checklist
+
+### 1. Data Layer
+
+- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
+- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
+- [ ] Create parsers in `{dashboard_name}/parsers/`
+- [ ] Create loaders in `{dashboard_name}/loaders/`
+- [ ] Add database migrations if needed
+
+### 2. dbt Models
+
+Create dbt models in `dbt/models/`:
+
+- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning
+- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic
+- [ ] `marts/mart_{domain}.sql` - Final analytical tables
+
+Follow naming conventions:
+- Staging: `stg_{source}__{entity}`
+- Intermediate: `int_{domain}__{transform}`
+- Marts: `mart_{domain}`
+
+### 3. Visualization Layer
+
+- [ ] Create figure factories in `figures/` (or reuse existing)
+- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
+
+### 4. Dashboard Pages
+
+#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
+
+```python
+import dash
+from dash import html, dcc
+import dash_mantine_components as dmc
+
+dash.register_page(
+    __name__,
+    path="/{dashboard_name}",
+    title="{Dashboard Title}",
+    description="{Description}"
+)
+
+def layout():
+    return dmc.Container([
+        # Header
+        dmc.Title("{Dashboard Title}", order=1),
+
+        # Tabs
+        dmc.Tabs([
+            dmc.TabsList([
+                dmc.TabsTab("Overview", value="overview"),
+                # Add more tabs
+            ]),
+            dmc.TabsPanel(overview_tab(), value="overview"),
+            # Add more panels
+        ], value="overview"),
+    ])
+```
+
+#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
+
+- [ ] Create one file per tab
+- [ ] Export layout function from each
+
+#### Callbacks (`pages/{dashboard_name}/callbacks/`)
+
+- [ ] Create callback modules for interactivity
+- [ ] Import and register in dashboard.py
+
+### 5. Navigation
+
+Add to sidebar in `components/sidebar.py`:
+
+```python
+dmc.NavLink(
+    label="{Dashboard Name}",
+    href="/{dashboard_name}",
+    icon=DashIconify(icon="..."),
+)
+```
+
+### 6. Documentation
+
+- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
+- [ ] Document data sources
+- [ ] Document transformation logic
+- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
+
+### 7. Testing
+
+- [ ] Add unit tests for parsers
+- [ ] Add unit tests for loaders
+- [ ] Add integration tests for callbacks
+- [ ] Run `make test`
+
+### 8. Final Verification
+
+- [ ] All pages render without errors
+- [ ] All callbacks respond correctly
+- [ ] Data loads successfully
+- [ ] dbt models run cleanly (`make dbt-run`)
+- [ ] Linting passes (`make lint`)
+- [ ] Tests pass (`make test`)
+
+## Example: Toronto Dashboard
+
+Reference implementation: `portfolio_app/pages/toronto/`
+
+Key files:
+- `dashboard.py` - Main layout with 5 tabs
+- `tabs/overview.py` - Livability scores, scatter plots
+- `callbacks/map_callbacks.py` - Choropleth interactions
+- `toronto/models/dimensions.py` - Dimension tables
+- `toronto/models/facts.py` - Fact tables
+
+## Common Patterns
+
+### Figure Factories
+
+```python
+# figures/choropleth.py
+def create_choropleth_figure(
+    gdf: gpd.GeoDataFrame,
+    value_column: str,
+    title: str,
+    **kwargs
+) -> go.Figure:
+    ...
+```
+
+### Callbacks
+
+```python
+# callbacks/map_callbacks.py
+@callback(
+    Output("neighbourhood-details", "children"),
+    Input("choropleth-map", "clickData"),
+)
+def update_details(click_data):
+    ...
+```
+
+### Data Loading
+
+```python
+# {dashboard_name}/loaders/load.py
+def load_data(session: Session) -> None:
+    # Parse from source
+    records = parse_source_data()
+
+    # Validate with Pydantic
+    validated = [Schema(**r) for r in records]
+
+    # Load to database
+    for record in validated:
+        session.add(Model(**record.model_dump()))
+
+    session.commit()
+```
--- a/docs/runbooks/deployment.md
+++ b/docs/runbooks/deployment.md
@@ -0,0 +1,232 @@
+# Runbook: Deployment
+
+This runbook covers deployment procedures for the Analytics Portfolio application.
+
+## Environments
+
+| Environment | Branch | Server | URL |
+|-------------|--------|--------|-----|
+| Development | `development` | Local | http://localhost:8050 |
+| Staging | `staging` | Homelab (hotserv) | Internal |
+| Production | `main` | Bandit Labs VPS | https://leodata.science |
+
+## CI/CD Pipeline
+
+### Automatic Deployment
+
+Deployments are triggered automatically via Gitea Actions:
+
+1. **Push to `staging`** → Deploys to staging server
+2. **Push to `main`** → Deploys to production server
+
+### Workflow Files
+
+- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
+- `.gitea/workflows/deploy-staging.yml` - Staging deployment
+- `.gitea/workflows/deploy-production.yml` - Production deployment
+
+### Required Secrets
+
+Configure these in Gitea repository settings:
+
+| Secret | Description |
+|--------|-------------|
+| `STAGING_HOST` | Staging server hostname/IP |
+| `STAGING_USER` | SSH username for staging |
+| `STAGING_SSH_KEY` | Private key for staging SSH |
+| `PROD_HOST` | Production server hostname/IP |
+| `PROD_USER` | SSH username for production |
+| `PROD_SSH_KEY` | Private key for production SSH |
+
+## Manual Deployment
+
+### Prerequisites
+
+- SSH access to target server
+- Repository cloned at `~/apps/personal-portfolio`
+- Virtual environment created at `.venv`
+- Docker and Docker Compose installed
+- PostgreSQL container running
+
+### Steps
+
+```bash
+# 1. SSH to server
+ssh user@server
+
+# 2. Navigate to app directory
+cd ~/apps/personal-portfolio
+
+# 3. Pull latest changes
+git fetch origin {branch}
+git reset --hard origin/{branch}
+
+# 4. Activate virtual environment
+source .venv/bin/activate
+
+# 5. Install dependencies
+pip install -r requirements.txt
+
+# 6. Run database migrations (if any)
+# python -m alembic upgrade head
+
+# 7. Run dbt models
+cd dbt && dbt run --profiles-dir . && cd ..
+
+# 8. Restart application
+docker compose down
+docker compose up -d
+
+# 9. Verify health
+curl http://localhost:8050/health
+```
+
+## Rollback Procedure
+
+### Quick Rollback
+
+If deployment fails, rollback to previous commit:
+
+```bash
+# 1. Find previous working commit
+git log --oneline -10
+
+# 2. Reset to that commit
+git reset --hard {commit_hash}
+
+# 3. Restart services
+docker compose down
+docker compose up -d
+
+# 4. Verify
+curl http://localhost:8050/health
+```
+
+### Full Rollback (Database)
+
+If database changes need to be reverted:
+
+```bash
+# 1. Stop application
+docker compose down
+
+# 2. Restore database backup
+pg_restore -h localhost -U portfolio -d portfolio backup.dump
+
+# 3. Revert code
+git reset --hard {commit_hash}
+
+# 4. Run dbt at that version
+cd dbt && dbt run --profiles-dir . && cd ..
+
+# 5. Restart
+docker compose up -d
+```
+
+## Health Checks
+
+### Application Health
+
+```bash
+curl http://localhost:8050/health
+```
+
+Expected response:
+```json
+{"status": "healthy"}
+```
+
+### Database Health
+
+```bash
+docker compose exec postgres pg_isready -U portfolio
+```
+
+### Container Status
+
+```bash
+docker compose ps
+```
+
+## Monitoring
+
+### View Logs
+
+```bash
+# All services
+make logs
+
+# Specific service
+make logs SERVICE=postgres
+
+# Or directly
+docker compose logs -f
+```
+
+### Check Resource Usage
+
+```bash
+docker stats
+```
+
+## Troubleshooting
+
+### Application Won't Start
+
+1. Check container logs: `docker compose logs app`
+2. Verify environment variables: `cat .env`
+3. Check database connectivity: `docker compose exec postgres pg_isready`
+4. Verify port availability: `lsof -i :8050`
+
+### Database Connection Errors
+
+1. Check postgres container: `docker compose ps postgres`
+2. Verify DATABASE_URL in `.env`
+3. Check postgres logs: `docker compose logs postgres`
+4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
+
+### dbt Failures
+
+1. Check dbt logs: `cd dbt && dbt debug`
+2. Verify profiles.yml: `cat dbt/profiles.yml`
+3. Run with verbose output: `dbt run --debug`
+
+### Out of Memory
+
+1. Check memory usage: `free -h`
+2. Review container limits in docker-compose.yml
+3. Consider increasing swap or server resources
+
+## Backup Procedures
+
+### Database Backup
+
+```bash
+# Create backup
+docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
+
+# Compressed backup
+docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
+```
+
+### Restore from Backup
+
+```bash
+# From SQL file
+docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
+
+# From dump file
+docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
+```
+
+## Deployment Checklist
+
+Before deploying to production:
+
+- [ ] All tests pass (`make test`)
+- [ ] Linting passes (`make lint`)
+- [ ] Staging deployment successful
+- [ ] Manual testing on staging complete
+- [ ] Database backup taken
+- [ ] Rollback plan confirmed
+- [ ] Team notified of deployment window
--- a/scripts/etl/toronto.sh
+++ b/scripts/etl/toronto.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# scripts/etl/toronto.sh - Run Toronto data pipeline
+#
+# Usage:
+#   ./scripts/etl/toronto.sh --full        # Complete reload of all data
+#   ./scripts/etl/toronto.sh --incremental # Only new data since last run
+#   ./scripts/etl/toronto.sh               # Default: incremental
+#
+# Logs are written to .dev/logs/etl/
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+LOG_DIR="$PROJECT_ROOT/.dev/logs/etl"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+LOG_FILE="$LOG_DIR/toronto_${TIMESTAMP}.log"
+
+MODE="${1:---incremental}"
+
+mkdir -p "$LOG_DIR"
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
+}
+
+log "Starting Toronto ETL pipeline (mode: $MODE)"
+log "Log file: $LOG_FILE"
+
+cd "$PROJECT_ROOT"
+
+# Activate virtual environment if it exists
+if [ -d ".venv" ]; then
+    source .venv/bin/activate
+    log "Activated virtual environment"
+fi
+
+case "$MODE" in
+    --full)
+        log "Running FULL data reload..."
+
+        log "Step 1/4: Parsing neighbourhood data..."
+        python -m portfolio_app.toronto.parsers.neighbourhoods 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 2/4: Parsing census data..."
+        python -m portfolio_app.toronto.parsers.census 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 3/4: Parsing crime data..."
+        python -m portfolio_app.toronto.parsers.crime 2>&1 | tee -a "$LOG_FILE"
+
+        log "Step 4/4: Running dbt transformations..."
+        cd dbt && dbt run --full-refresh --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
+        ;;
+
+    --incremental)
+        log "Running INCREMENTAL update..."
+
+        log "Step 1/2: Checking for new data..."
+        # Add incremental logic here when implemented
+
+        log "Step 2/2: Running dbt transformations..."
+        cd dbt && dbt run --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
+        ;;
+
+    *)
+        log "ERROR: Unknown mode '$MODE'. Use --full or --incremental"
+        exit 1
+        ;;
+esac
+
+log "Toronto ETL pipeline completed successfully"
+log "Full log available at: $LOG_FILE"
--- a/scripts/logs.sh
+++ b/scripts/logs.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# scripts/logs.sh - Follow docker compose logs
+#
+# Usage:
+#   ./scripts/logs.sh           # All services
+#   ./scripts/logs.sh postgres  # Specific service
+#   ./scripts/logs.sh -n 100    # Last 100 lines
+
+set -euo pipefail
+
+SERVICE="${1:-}"
+EXTRA_ARGS="${@:2}"
+
+if [[ -n "$SERVICE" && "$SERVICE" != -* ]]; then
+    echo "Following logs for service: $SERVICE"
+    docker compose logs -f "$SERVICE" $EXTRA_ARGS
+else
+    echo "Following logs for all services"
+    docker compose logs -f $@
+fi
--- a/scripts/run-detached.sh
+++ b/scripts/run-detached.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# scripts/run-detached.sh - Start containers and wait for health
+#
+# Usage:
+#   ./scripts/run-detached.sh
+
+set -euo pipefail
+
+TIMEOUT=60
+INTERVAL=5
+
+echo "Starting containers in detached mode..."
+docker compose up -d
+
+echo "Waiting for services to become healthy..."
+elapsed=0
+
+while [ $elapsed -lt $TIMEOUT ]; do
+    # Check if postgres is ready
+    if docker compose exec -T postgres pg_isready -U portfolio > /dev/null 2>&1; then
+        echo "PostgreSQL is ready!"
+
+        # Check if app health endpoint responds (if running)
+        if curl -sf http://localhost:8050/health > /dev/null 2>&1; then
+            echo "Application health check passed!"
+            echo "All services are healthy."
+            exit 0
+        fi
+    fi
+
+    echo "Waiting... ($elapsed/$TIMEOUT seconds)"
+    sleep $INTERVAL
+    elapsed=$((elapsed + INTERVAL))
+done
+
+echo "ERROR: Health check timed out after $TIMEOUT seconds"
+docker compose ps
+exit 1