Compare commits
72 Commits
e1135a77a8
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| a2c213be5d | |||
| 0455ec69a0 | |||
| 9e216962b1 | |||
| dfa5f92d8a | |||
| 0c9769fd27 | |||
| cb908a18c3 | |||
| 558022f26e | |||
| 9e27fb8011 | |||
| cda2a078d9 | |||
| dd8de9810d | |||
| 56bcc1bb1d | |||
| ee0a7ef7ad | |||
| fd9850778e | |||
| 01e98103c7 | |||
| 62d1a52eed | |||
| e37611673f | |||
| 33306a911b | |||
| a5d6866d63 | |||
| f58b2f70e2 | |||
| 263b52d5e4 | |||
| f345d41535 | |||
| 14701f334c | |||
| 92763a17c4 | |||
| 546ee1cc92 | |||
| 9cc2cf0e00 | |||
| 28f239e8cd | |||
| c3de98c4a5 | |||
| eee015efac | |||
| 941305e71c | |||
| 54665bac63 | |||
| 3eb32a4766 | |||
| 69c4216cd5 | |||
| 6e00a17c05 | |||
| 8f3c5554f9 | |||
| 5839eabf1e | |||
| ebe48304d7 | |||
| 2fc2a1bdb5 | |||
| 6872aa510b | |||
| 9a1fc81f79 | |||
| cf6e874961 | |||
| 451dc10a10 | |||
| 193b9289b9 | |||
| 7a16e6d121 | |||
| ecc50e5d98 | |||
| ae3742630e | |||
| e70965b429 | |||
| 25954f17bb | |||
| bffd44a5a5 | |||
| bf6e392002 | |||
| d0f32edba7 | |||
| 4818c53fd2 | |||
| 1a878313f8 | |||
| 1eba95d4d1 | |||
| c9cf744d84 | |||
| 3054441630 | |||
| b6d210ec6b | |||
| 053acf6436 | |||
| f69d0c15a7 | |||
| 81993b23a7 | |||
| 457efec77f | |||
| f5f2bf3706 | |||
| fcaefabce8 | |||
| cb877df9e1 | |||
| 48b4eeeb62 | |||
| d3ca4ad4eb | |||
| e7bc545f25 | |||
| c8f4cc6241 | |||
| 3cd2eada7c | |||
| 138e6fe497 | |||
| cd7b5ce154 | |||
| d64f90b3d3 | |||
| b3fb94c7cb |
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
- staging
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-and-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install ruff pytest
|
||||||
|
|
||||||
|
- name: Run linter
|
||||||
|
run: ruff check .
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: pytest tests/ -v --tb=short
|
||||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Production
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Production Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.PROD_HOST }}
|
||||||
|
username: ${{ secrets.PROD_USER }}
|
||||||
|
key: ${{ secrets.PROD_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin main
|
||||||
|
git reset --hard origin/main
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Production deployment complete!"
|
||||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Staging
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- staging
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Staging Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.STAGING_HOST }}
|
||||||
|
username: ${{ secrets.STAGING_USER }}
|
||||||
|
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin staging
|
||||||
|
git reset --hard origin/staging
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Staging deployment complete!"
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -198,3 +198,4 @@ cython_debug/
|
|||||||
# PyPI configuration file
|
# PyPI configuration file
|
||||||
.pypirc
|
.pypirc
|
||||||
|
|
||||||
|
dbt/.user.yml
|
||||||
|
|||||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.defaultInterpreterPath": "/home/leomiranda/WorkDev/personal/personal-portfolio/.venv/bin/python"
|
||||||
|
}
|
||||||
358
CLAUDE.md
358
CLAUDE.md
@@ -1,13 +1,56 @@
|
|||||||
# CLAUDE.md
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## ⛔ MANDATORY BEHAVIOR RULES - READ FIRST
|
||||||
|
|
||||||
|
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||||
|
|
||||||
|
### 1. WHEN USER ASKS YOU TO CHECK SOMETHING - CHECK EVERYTHING
|
||||||
|
- Search ALL locations, not just where you think it is
|
||||||
|
- Check cache directories: `~/.claude/plugins/cache/`
|
||||||
|
- Check installed: `~/.claude/plugins/marketplaces/`
|
||||||
|
- Check source directories
|
||||||
|
- **NEVER say "no" or "that's not the issue" without exhaustive verification**
|
||||||
|
|
||||||
|
### 2. WHEN USER SAYS SOMETHING IS WRONG - BELIEVE THEM
|
||||||
|
- The user knows their system better than you
|
||||||
|
- Investigate thoroughly before disagreeing
|
||||||
|
- **Your confidence is often wrong. User's instincts are often right.**
|
||||||
|
|
||||||
|
### 3. NEVER SAY "DONE" WITHOUT VERIFICATION
|
||||||
|
- Run the actual command/script to verify
|
||||||
|
- Show the output to the user
|
||||||
|
- **"Done" means VERIFIED WORKING, not "I made changes"**
|
||||||
|
|
||||||
|
### 4. SHOW EXACTLY WHAT USER ASKS FOR
|
||||||
|
- If user asks for messages, show the MESSAGES
|
||||||
|
- If user asks for code, show the CODE
|
||||||
|
- **Do not interpret or summarize unless asked**
|
||||||
|
|
||||||
|
**FAILURE TO FOLLOW THESE RULES = WASTED USER TIME = UNACCEPTABLE**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Mandatory Behavior Rules
|
||||||
|
|
||||||
|
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||||
|
|
||||||
|
1. **CHECK EVERYTHING** - Search ALL locations before saying "no" (cache, installed, source directories)
|
||||||
|
2. **BELIEVE THE USER** - Investigate thoroughly before disagreeing; user instincts are often right
|
||||||
|
3. **VERIFY BEFORE "DONE"** - Run commands, show output; "done" means verified working
|
||||||
|
4. **SHOW EXACTLY WHAT'S ASKED** - Do not interpret or summarize unless requested
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
Working context for Claude Code on the Analytics Portfolio project.
|
Working context for Claude Code on the Analytics Portfolio project.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Project Status
|
## Project Status
|
||||||
|
|
||||||
**Current Sprint**: 1 (Project Bootstrap)
|
**Last Completed Sprint**: 9 (Neighbourhood Dashboard Transition)
|
||||||
**Phase**: 1 - Toronto Housing Dashboard
|
**Current State**: Ready for deployment sprint or new features
|
||||||
**Branch**: `development` (feature branches merge here)
|
**Branch**: `development` (feature branches merge here)
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -17,15 +60,33 @@ Working context for Claude Code on the Analytics Portfolio project.
|
|||||||
### Run Commands
|
### Run Commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Setup & Database
|
||||||
make setup # Install deps, create .env, init pre-commit
|
make setup # Install deps, create .env, init pre-commit
|
||||||
make docker-up # Start PostgreSQL + PostGIS
|
make docker-up # Start PostgreSQL + PostGIS (auto-detects x86/ARM)
|
||||||
make docker-down # Stop containers
|
make docker-down # Stop containers
|
||||||
make db-init # Initialize database schema
|
make db-init # Initialize database schema
|
||||||
|
make db-reset # Drop and recreate database (DESTRUCTIVE)
|
||||||
|
|
||||||
|
# Data Loading
|
||||||
|
make load-data # Load all project data (currently: Toronto)
|
||||||
|
make load-toronto # Load Toronto data from APIs
|
||||||
|
|
||||||
|
# Application
|
||||||
make run # Start Dash dev server
|
make run # Start Dash dev server
|
||||||
|
|
||||||
|
# Testing & Quality
|
||||||
make test # Run pytest
|
make test # Run pytest
|
||||||
make lint # Run ruff linter
|
make lint # Run ruff linter
|
||||||
make format # Run ruff formatter
|
make format # Run ruff formatter
|
||||||
make ci # Run all checks
|
make typecheck # Run mypy type checker
|
||||||
|
make ci # Run all checks (lint, typecheck, test)
|
||||||
|
|
||||||
|
# dbt
|
||||||
|
make dbt-run # Run dbt models
|
||||||
|
make dbt-test # Run dbt tests
|
||||||
|
make dbt-docs # Generate and serve dbt documentation
|
||||||
|
|
||||||
|
# Run `make help` for full target list
|
||||||
```
|
```
|
||||||
|
|
||||||
### Branch Workflow
|
### Branch Workflow
|
||||||
@@ -43,85 +104,50 @@ make ci # Run all checks
|
|||||||
|
|
||||||
| Context | Style | Example |
|
| Context | Style | Example |
|
||||||
|---------|-------|---------|
|
|---------|-------|---------|
|
||||||
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
| Same directory | Single dot | `from .neighbourhood import NeighbourhoodRecord` |
|
||||||
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
| Sibling directory | Double dot | `from ..schemas.neighbourhood import CensusRecord` |
|
||||||
| External packages | Absolute | `import pandas as pd` |
|
| External packages | Absolute | `import pandas as pd` |
|
||||||
|
|
||||||
### Module Responsibilities
|
### Module Responsibilities
|
||||||
|
|
||||||
| Directory | Contains | Purpose |
|
| Directory | Purpose |
|
||||||
|-----------|----------|---------|
|
|-----------|---------|
|
||||||
| `schemas/` | Pydantic models | Data validation |
|
| `schemas/` | Pydantic models for data validation |
|
||||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
| `models/` | SQLAlchemy ORM for database persistence |
|
||||||
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
| `parsers/` | API/CSV extraction for raw data ingestion |
|
||||||
| `loaders/` | Database operations | Data loading |
|
| `loaders/` | Database operations for data loading |
|
||||||
| `figures/` | Chart factories | Plotly figure generation |
|
| `services/` | Query functions for dbt mart queries |
|
||||||
| `callbacks/` | Dash callbacks | In `pages/{dashboard}/callbacks/` |
|
| `figures/` | Chart factories for Plotly figure generation |
|
||||||
| `errors/` | Exceptions + handlers | Error handling |
|
| `errors/` | Custom exception classes (see `errors/exceptions.py`) |
|
||||||
|
|
||||||
### Type Hints
|
|
||||||
|
|
||||||
Use Python 3.10+ style:
|
|
||||||
```python
|
|
||||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error Handling
|
|
||||||
|
|
||||||
```python
|
|
||||||
# errors/exceptions.py
|
|
||||||
class PortfolioError(Exception):
|
|
||||||
"""Base exception."""
|
|
||||||
|
|
||||||
class ParseError(PortfolioError):
|
|
||||||
"""PDF/CSV parsing failed."""
|
|
||||||
|
|
||||||
class ValidationError(PortfolioError):
|
|
||||||
"""Pydantic or business rule validation failed."""
|
|
||||||
|
|
||||||
class LoadError(PortfolioError):
|
|
||||||
"""Database load operation failed."""
|
|
||||||
```
|
|
||||||
|
|
||||||
### Code Standards
|
### Code Standards
|
||||||
|
|
||||||
|
- Python 3.10+ type hints: `list[str]`, `dict[str, int] | None`
|
||||||
- Single responsibility functions with verb naming
|
- Single responsibility functions with verb naming
|
||||||
- Early returns over deep nesting
|
- Early returns over deep nesting
|
||||||
- Google-style docstrings only for non-obvious behavior
|
- Google-style docstrings only for non-obvious behavior
|
||||||
- Module-level constants for magic values
|
|
||||||
- Pydantic BaseSettings for runtime config
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Application Structure
|
## Application Structure
|
||||||
|
|
||||||
```
|
**Entry Point:** `portfolio_app/app.py` (Dash app factory with Pages routing)
|
||||||
portfolio_app/
|
|
||||||
├── app.py # Dash app factory with Pages routing
|
|
||||||
├── config.py # Pydantic BaseSettings
|
|
||||||
├── assets/ # CSS, images (auto-served)
|
|
||||||
├── pages/
|
|
||||||
│ ├── home.py # Bio landing page -> /
|
|
||||||
│ └── toronto/
|
|
||||||
│ ├── dashboard.py # Layout only -> /toronto
|
|
||||||
│ └── callbacks/ # Interaction logic
|
|
||||||
├── components/ # Shared UI (navbar, footer, cards)
|
|
||||||
├── figures/ # Shared chart factories
|
|
||||||
├── toronto/ # Toronto data logic
|
|
||||||
│ ├── parsers/
|
|
||||||
│ ├── loaders/
|
|
||||||
│ ├── schemas/ # Pydantic
|
|
||||||
│ └── models/ # SQLAlchemy
|
|
||||||
└── errors/
|
|
||||||
```
|
|
||||||
|
|
||||||
### URL Routing
|
| Directory | Purpose |
|
||||||
|
|-----------|---------|
|
||||||
|
| `pages/` | Dash Pages (file-based routing) |
|
||||||
|
| `pages/toronto/` | Toronto Dashboard (`tabs/` for layouts, `callbacks/` for interactions) |
|
||||||
|
| `components/` | Shared UI components |
|
||||||
|
| `figures/toronto/` | Toronto chart factories |
|
||||||
|
| `toronto/` | Toronto data logic (parsers, loaders, schemas, models) |
|
||||||
|
|
||||||
| URL | Page | Sprint |
|
**Key URLs:** `/` (home), `/toronto` (dashboard), `/blog` (listing), `/blog/{slug}` (articles), `/health` (status)
|
||||||
|-----|------|--------|
|
|
||||||
| `/` | Bio landing page | 2 |
|
### Multi-Dashboard Architecture
|
||||||
| `/toronto` | Toronto Housing Dashboard | 6 |
|
|
||||||
|
- **figures/**: Domain-namespaced (`figures/toronto/`, future: `figures/football/`)
|
||||||
|
- **dbt models**: Domain subdirectories (`staging/toronto/`, `marts/toronto/`)
|
||||||
|
- **Database schemas**: Domain-specific raw data (`raw_toronto`, future: `raw_football`)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -133,87 +159,45 @@ portfolio_app/
|
|||||||
| Validation | Pydantic | >=2.0 |
|
| Validation | Pydantic | >=2.0 |
|
||||||
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
||||||
| Transformation | dbt-postgres | >=1.7 |
|
| Transformation | dbt-postgres | >=1.7 |
|
||||||
| Data Processing | Pandas | >=2.1 |
|
| Visualization | Dash + Plotly + dash-mantine-components | >=2.14 |
|
||||||
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
||||||
| Visualization | Dash + Plotly | >=2.14 |
|
|
||||||
| UI Components | dash-mantine-components | Latest stable |
|
|
||||||
| Testing | pytest | >=7.0 |
|
|
||||||
| Python | 3.11+ | Via pyenv |
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
**Notes**:
|
**Notes**: SQLAlchemy 2.0 + Pydantic 2.0 only. Docker Compose V2 format (no `version` field).
|
||||||
- SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
|
|
||||||
- PostGIS extension required in database
|
|
||||||
- Docker Compose V2 format (no `version` field)
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Data Model Overview
|
## Data Model Overview
|
||||||
|
|
||||||
### Geographic Reality (Toronto Housing)
|
### Database Schemas
|
||||||
|
|
||||||
```
|
| Schema | Purpose |
|
||||||
TRREB Districts (~35) - Purchase data (W01, C01, E01...)
|
|--------|---------|
|
||||||
CMHC Zones (~20) - Rental data (Census Tract aligned)
|
| `public` | Shared dimensions (dim_time) |
|
||||||
City Neighbourhoods (158) - Enrichment/overlay only
|
| `raw_toronto` | Toronto-specific raw/dimension tables |
|
||||||
```
|
| `stg_toronto` | Toronto dbt staging views |
|
||||||
|
| `int_toronto` | Toronto dbt intermediate views |
|
||||||
|
| `mart_toronto` | Toronto dbt mart tables |
|
||||||
|
|
||||||
**Critical**: These geographies do NOT align. Display as separate layers—do not force crosswalks.
|
### dbt Project: `portfolio`
|
||||||
|
|
||||||
### Star Schema
|
|
||||||
|
|
||||||
| Table | Type | Keys |
|
|
||||||
|-------|------|------|
|
|
||||||
| `fact_purchases` | Fact | -> dim_time, dim_trreb_district |
|
|
||||||
| `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone |
|
|
||||||
| `dim_time` | Dimension | date_key (PK) |
|
|
||||||
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
|
||||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
|
||||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
|
||||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
|
||||||
|
|
||||||
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
|
||||||
|
|
||||||
### dbt Layers
|
|
||||||
|
|
||||||
| Layer | Naming | Purpose |
|
| Layer | Naming | Purpose |
|
||||||
|-------|--------|---------|
|
|-------|--------|---------|
|
||||||
|
| Shared | `stg_dimensions__*` | Cross-domain dimensions |
|
||||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||||
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
||||||
| Marts | `mart_{domain}` | Final analytical tables |
|
| Marts | `mart_{domain}` | Final analytical tables |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## DO NOT BUILD (Phase 1)
|
## Deferred Features
|
||||||
|
|
||||||
**Stop and flag if a task seems to require these**:
|
**Stop and flag if a task requires these**:
|
||||||
|
|
||||||
| Feature | Reason |
|
| Feature | Reason |
|
||||||
|---------|--------|
|
|---------|--------|
|
||||||
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 |
|
|
||||||
| Crime data integration | Deferred to Phase 4 |
|
|
||||||
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||||
| ML prediction models | Energy project scope (Phase 3) |
|
| ML prediction models | Energy project scope (future phase) |
|
||||||
| Multi-project shared infrastructure | Build first, abstract second (Phase 2) |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Sprint 1 Deliverables
|
|
||||||
|
|
||||||
| Category | Tasks |
|
|
||||||
|----------|-------|
|
|
||||||
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
|
||||||
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
|
||||||
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
|
||||||
| **Tests** | tests/ directory, conftest.py, pytest config |
|
|
||||||
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
|
||||||
|
|
||||||
### Human Tasks (Cannot Automate)
|
|
||||||
|
|
||||||
| Task | Tool | Effort |
|
|
||||||
|------|------|--------|
|
|
||||||
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
|
||||||
| Research policy events (10-20) | Manual | 2-3 hours |
|
|
||||||
| Replace social link placeholders | Manual | 5 minutes |
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -233,25 +217,123 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Script Standards
|
|
||||||
|
|
||||||
All scripts in `scripts/`:
|
|
||||||
- Include usage comments at top
|
|
||||||
- Idempotent where possible
|
|
||||||
- Exit codes: 0 = success, 1 = error
|
|
||||||
- Use `set -euo pipefail` for bash
|
|
||||||
- Log to stdout, errors to stderr
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Reference Documents
|
## Reference Documents
|
||||||
|
|
||||||
| Document | Location | Use When |
|
| Document | Location | Use When |
|
||||||
|----------|----------|----------|
|
|----------|----------|----------|
|
||||||
| Full specification | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
| Project reference | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||||
| Data schemas | `docs/toronto_housing_dashboard_spec_v5.md` | Parser/model tasks |
|
| Developer guide | `docs/CONTRIBUTING.md` | How to add pages, tabs |
|
||||||
| WBS details | `docs/wbs_sprint_plan_v4.md` | Sprint planning |
|
| Lessons learned | `docs/project-lessons-learned/INDEX.md` | Past issues and solutions |
|
||||||
|
| Deployment runbook | `docs/runbooks/deployment.md` | Deploying to environments |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*Last Updated: Sprint 1*
|
## Plugin Reference
|
||||||
|
|
||||||
|
### Sprint Management: projman
|
||||||
|
|
||||||
|
**CRITICAL: Always use projman for sprint and task management.**
|
||||||
|
|
||||||
|
| Skill | Trigger | Purpose |
|
||||||
|
|-------|---------|---------|
|
||||||
|
| `/projman:sprint-plan` | New sprint/feature | Architecture analysis + Gitea issue creation |
|
||||||
|
| `/projman:sprint-start` | Begin implementation | Load lessons learned, start execution |
|
||||||
|
| `/projman:sprint-status` | Check progress | Review blockers and completion |
|
||||||
|
| `/projman:sprint-close` | Sprint completion | Capture lessons learned |
|
||||||
|
|
||||||
|
**Default workflow**: `/projman:sprint-plan` before code -> create issues -> `/projman:sprint-start` -> track via Gitea -> `/projman:sprint-close`
|
||||||
|
|
||||||
|
**Gitea**: `personal-projects/personal-portfolio` at `gitea.hotserv.cloud`
|
||||||
|
|
||||||
|
### Data Platform: data-platform
|
||||||
|
|
||||||
|
Use for dbt, PostgreSQL, and PostGIS operations.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/data-platform:data-review` | Audit data integrity, schema validity, dbt compliance |
|
||||||
|
| `/data-platform:data-gate` | CI/CD data quality gate (pass/fail) |
|
||||||
|
|
||||||
|
**When to use:** Schema changes, dbt model development, data loading, before merging data PRs.
|
||||||
|
|
||||||
|
**MCP tools available:** `pg_connect`, `pg_query`, `pg_tables`, `pg_columns`, `pg_schemas`, `st_*` (PostGIS), `dbt_*` operations.
|
||||||
|
|
||||||
|
### Visualization: viz-platform
|
||||||
|
|
||||||
|
Use for Dash/Mantine component validation and chart creation.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/viz-platform:component` | Inspect DMC component props and validation |
|
||||||
|
| `/viz-platform:chart` | Create themed Plotly charts |
|
||||||
|
| `/viz-platform:theme` | Apply/validate themes |
|
||||||
|
| `/viz-platform:dashboard` | Create dashboard layouts |
|
||||||
|
|
||||||
|
**When to use:** Dashboard development, new visualizations, component prop lookup.
|
||||||
|
|
||||||
|
### Code Quality: code-sentinel
|
||||||
|
|
||||||
|
Use for security scanning and refactoring analysis.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/code-sentinel:security-scan` | Full security audit of codebase |
|
||||||
|
| `/code-sentinel:refactor` | Apply refactoring patterns |
|
||||||
|
| `/code-sentinel:refactor-dry` | Preview refactoring without applying |
|
||||||
|
|
||||||
|
**When to use:** Before major releases, after adding auth/data handling code, periodic audits.
|
||||||
|
|
||||||
|
### Documentation: doc-guardian
|
||||||
|
|
||||||
|
Use for documentation drift detection and synchronization.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/doc-guardian:doc-audit` | Scan project for documentation drift |
|
||||||
|
| `/doc-guardian:doc-sync` | Synchronize pending documentation updates |
|
||||||
|
|
||||||
|
**When to use:** After significant code changes, before releases.
|
||||||
|
|
||||||
|
### Pull Requests: pr-review
|
||||||
|
|
||||||
|
Use for comprehensive PR review with multiple analysis perspectives.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/pr-review:initial-setup` | Configure PR review for project |
|
||||||
|
| Triggered automatically | Security, performance, maintainability, test analysis |
|
||||||
|
|
||||||
|
**When to use:** Before merging significant PRs to `development` or `main`.
|
||||||
|
|
||||||
|
### Requirement Clarification: clarity-assist
|
||||||
|
|
||||||
|
Use when requirements are ambiguous or need decomposition.
|
||||||
|
|
||||||
|
**When to use:** Unclear specifications, complex feature requests, conflicting requirements.
|
||||||
|
|
||||||
|
### Contract Validation: contract-validator
|
||||||
|
|
||||||
|
Use for plugin interface validation.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/contract-validator:agent-check` | Quick agent definition validation |
|
||||||
|
| `/contract-validator:full-validation` | Full plugin contract validation |
|
||||||
|
|
||||||
|
**When to use:** When modifying plugin integrations or agent definitions.
|
||||||
|
|
||||||
|
### Git Workflow: git-flow
|
||||||
|
|
||||||
|
Use for standardized git operations.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/git-flow:commit` | Auto-generated conventional commit |
|
||||||
|
| `/git-flow:branch-start` | Create feature/fix/chore branch |
|
||||||
|
| `/git-flow:git-status` | Comprehensive status with recommendations |
|
||||||
|
|
||||||
|
**When to use:** Complex merge scenarios, branch management, standardized commits.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Last Updated: February 2026*
|
||||||
|
|||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024-2025 Leo Miranda
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
65
Makefile
65
Makefile
@@ -1,13 +1,25 @@
|
|||||||
.PHONY: setup docker-up docker-down db-init run test dbt-run dbt-test lint format ci deploy clean help
|
.PHONY: setup docker-up docker-down db-init load-data load-all load-toronto load-toronto-only seed-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||||
|
|
||||||
# Default target
|
# Default target
|
||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
PYTHON := python3
|
VENV := .venv
|
||||||
PIP := pip
|
PYTHON := $(VENV)/bin/python3
|
||||||
|
PIP := $(VENV)/bin/pip
|
||||||
DOCKER_COMPOSE := docker compose
|
DOCKER_COMPOSE := docker compose
|
||||||
|
|
||||||
|
# Architecture detection for Docker images
|
||||||
|
ARCH := $(shell uname -m)
|
||||||
|
ifeq ($(ARCH),aarch64)
|
||||||
|
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||||
|
else ifeq ($(ARCH),arm64)
|
||||||
|
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||||
|
else
|
||||||
|
POSTGIS_IMAGE := postgis/postgis:16-3.4
|
||||||
|
endif
|
||||||
|
export POSTGIS_IMAGE
|
||||||
|
|
||||||
# Colors for output
|
# Colors for output
|
||||||
BLUE := \033[0;34m
|
BLUE := \033[0;34m
|
||||||
GREEN := \033[0;32m
|
GREEN := \033[0;32m
|
||||||
@@ -39,6 +51,7 @@ setup: ## Install dependencies, create .env, init pre-commit
|
|||||||
|
|
||||||
docker-up: ## Start PostgreSQL + PostGIS containers
|
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||||
@echo "$(GREEN)Starting database containers...$(NC)"
|
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||||
|
@echo "$(BLUE)Architecture: $(ARCH) -> Using image: $(POSTGIS_IMAGE)$(NC)"
|
||||||
$(DOCKER_COMPOSE) up -d
|
$(DOCKER_COMPOSE) up -d
|
||||||
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||||
@sleep 3
|
@sleep 3
|
||||||
@@ -57,11 +70,7 @@ docker-logs: ## View container logs
|
|||||||
|
|
||||||
db-init: ## Initialize database schema
|
db-init: ## Initialize database schema
|
||||||
@echo "$(GREEN)Initializing database schema...$(NC)"
|
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||||
@if [ -f scripts/db/init.sh ]; then \
|
$(PYTHON) scripts/db/init_schema.py
|
||||||
bash scripts/db/init.sh; \
|
|
||||||
else \
|
|
||||||
echo "$(YELLOW)scripts/db/init.sh not found - skipping$(NC)"; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||||
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||||
@@ -71,6 +80,27 @@ db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
|||||||
@sleep 3
|
@sleep 3
|
||||||
$(MAKE) db-init
|
$(MAKE) db-init
|
||||||
|
|
||||||
|
# Domain-specific data loading
|
||||||
|
load-toronto: ## Load Toronto data from APIs
|
||||||
|
@echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/load_toronto_data.py
|
||||||
|
@echo "$(GREEN)Seeding Toronto development data...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||||
|
|
||||||
|
load-toronto-only: ## Load Toronto data without running dbt or seeding
|
||||||
|
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
||||||
|
|
||||||
|
# Aggregate data loading
|
||||||
|
load-data: load-toronto ## Load all project data (currently: Toronto)
|
||||||
|
@echo "$(GREEN)All data loaded!$(NC)"
|
||||||
|
|
||||||
|
load-all: load-data ## Alias for load-data
|
||||||
|
|
||||||
|
seed-data: ## Seed sample development data (amenities, median_age)
|
||||||
|
@echo "$(GREEN)Seeding development data...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Application
|
# Application
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -97,15 +127,15 @@ test-cov: ## Run pytest with coverage
|
|||||||
|
|
||||||
dbt-run: ## Run dbt models
|
dbt-run: ## Run dbt models
|
||||||
@echo "$(GREEN)Running dbt models...$(NC)"
|
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||||
cd dbt && dbt run
|
@set -a && . ./.env && set +a && cd dbt && dbt run --profiles-dir .
|
||||||
|
|
||||||
dbt-test: ## Run dbt tests
|
dbt-test: ## Run dbt tests
|
||||||
@echo "$(GREEN)Running dbt tests...$(NC)"
|
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||||
cd dbt && dbt test
|
@set -a && . ./.env && set +a && cd dbt && dbt test --profiles-dir .
|
||||||
|
|
||||||
dbt-docs: ## Generate dbt documentation
|
dbt-docs: ## Generate dbt documentation
|
||||||
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||||
cd dbt && dbt docs generate && dbt docs serve
|
@set -a && . ./.env && set +a && cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir .
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Code Quality
|
# Code Quality
|
||||||
@@ -131,6 +161,19 @@ ci: ## Run all checks (lint, typecheck, test)
|
|||||||
$(MAKE) test
|
$(MAKE) test
|
||||||
@echo "$(GREEN)All checks passed!$(NC)"
|
@echo "$(GREEN)All checks passed!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Operations
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||||
|
@./scripts/logs.sh $(SERVICE)
|
||||||
|
|
||||||
|
run-detached: ## Start containers and wait for health check
|
||||||
|
@./scripts/run-detached.sh
|
||||||
|
|
||||||
|
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||||
|
@./scripts/etl/toronto.sh $(MODE)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Deployment
|
# Deployment
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
186
README.md
186
README.md
@@ -1,36 +1,82 @@
|
|||||||
# Analytics Portfolio
|
# Analytics Portfolio
|
||||||
|
|
||||||
A data analytics portfolio showcasing end-to-end data engineering, visualization, and analysis capabilities.
|
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||||
|
|
||||||
## Projects
|
**Live Demo:** [leodata.science](https://leodata.science)
|
||||||
|
|
||||||
### Toronto Housing Dashboard
|
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
An interactive choropleth dashboard analyzing Toronto's housing market using multi-source data integration.
|
## Live Pages
|
||||||
|
|
||||||
**Features:**
|
| Route | Page | Description |
|
||||||
- Purchase market analysis from TRREB monthly reports
|
|-------|------|-------------|
|
||||||
- Rental market analysis from CMHC annual surveys
|
| `/` | Home | Bio landing page |
|
||||||
- Interactive choropleth maps by district/zone
|
| `/about` | About | Background and experience |
|
||||||
- Time series visualization with policy event annotations
|
| `/projects` | Projects | Portfolio project showcase |
|
||||||
- Purchase/Rental mode toggle
|
| `/resume` | Resume | Professional CV |
|
||||||
|
| `/contact` | Contact | Contact form |
|
||||||
|
| `/blog` | Blog | Technical articles |
|
||||||
|
| `/blog/{slug}` | Article | Individual blog posts |
|
||||||
|
| `/toronto` | Toronto Dashboard | Neighbourhood analysis (5 tabs) |
|
||||||
|
| `/toronto/methodology` | Methodology | Dashboard data sources and methods |
|
||||||
|
| `/health` | Health | API health check endpoint |
|
||||||
|
|
||||||
**Data Sources:**
|
## Toronto Neighbourhood Dashboard
|
||||||
- [TRREB Market Watch](https://trreb.ca/market-data/market-watch/) - Monthly purchase statistics
|
|
||||||
- [CMHC Rental Market Survey](https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market) - Annual rental data
|
|
||||||
|
|
||||||
**Tech Stack:**
|
An interactive choropleth dashboard analyzing Toronto's 158 official neighbourhoods across five dimensions:
|
||||||
- Python 3.11+ / Dash / Plotly
|
|
||||||
- PostgreSQL + PostGIS
|
- **Overview**: Composite livability scores, income vs safety scatter
|
||||||
- dbt for data transformation
|
- **Housing**: Affordability index, rent trends, dwelling types
|
||||||
- Pydantic for validation
|
- **Safety**: Crime rates, breakdowns by type, trend analysis
|
||||||
- SQLAlchemy 2.0
|
- **Demographics**: Income distribution, age pyramids, population density
|
||||||
|
- **Amenities**: Parks, schools, transit accessibility
|
||||||
|
|
||||||
|
**Data Sources**:
|
||||||
|
- City of Toronto Open Data Portal (neighbourhoods, census profiles, amenities)
|
||||||
|
- Toronto Police Service (crime statistics)
|
||||||
|
- CMHC Rental Market Survey (rental data by zone)
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
subgraph Sources
|
||||||
|
A1[City of Toronto API]
|
||||||
|
A2[Toronto Police API]
|
||||||
|
A3[CMHC Data]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph ETL
|
||||||
|
B1[Parsers]
|
||||||
|
B2[Loaders]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Database
|
||||||
|
C1[(PostgreSQL/PostGIS)]
|
||||||
|
C2[dbt Models]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Application
|
||||||
|
D1[Dash App]
|
||||||
|
D2[Plotly Figures]
|
||||||
|
end
|
||||||
|
|
||||||
|
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pipeline Stages:**
|
||||||
|
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||||
|
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||||
|
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||||
|
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||||
|
|
||||||
|
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clone and setup
|
# Clone and setup
|
||||||
git clone https://github.com/lmiranda/personal-portfolio.git
|
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||||
cd personal-portfolio
|
cd personal-portfolio
|
||||||
|
|
||||||
# Install dependencies and configure environment
|
# Install dependencies and configure environment
|
||||||
@@ -55,48 +101,75 @@ portfolio_app/
|
|||||||
├── app.py # Dash app factory
|
├── app.py # Dash app factory
|
||||||
├── config.py # Pydantic settings
|
├── config.py # Pydantic settings
|
||||||
├── pages/
|
├── pages/
|
||||||
│ ├── home.py # Bio landing page (/)
|
│ ├── home.py # Bio landing (/)
|
||||||
│ └── toronto/ # Toronto dashboard (/toronto)
|
│ ├── about.py # About page
|
||||||
|
│ ├── contact.py # Contact form
|
||||||
|
│ ├── projects.py # Project showcase
|
||||||
|
│ ├── resume.py # Resume/CV
|
||||||
|
│ ├── blog/ # Blog system
|
||||||
|
│ │ ├── index.py # Article listing
|
||||||
|
│ │ └── article.py # Article renderer
|
||||||
|
│ └── toronto/ # Toronto dashboard
|
||||||
|
│ ├── dashboard.py # Main layout with tabs
|
||||||
|
│ ├── methodology.py # Data documentation
|
||||||
|
│ ├── tabs/ # Tab layouts (5)
|
||||||
|
│ └── callbacks/ # Interaction logic
|
||||||
├── components/ # Shared UI components
|
├── components/ # Shared UI components
|
||||||
├── figures/ # Plotly figure factories
|
├── figures/
|
||||||
└── toronto/ # Toronto data logic
|
│ └── toronto/ # Toronto figure factories
|
||||||
├── parsers/ # PDF/CSV extraction
|
├── content/
|
||||||
├── loaders/ # Database operations
|
│ └── blog/ # Markdown blog articles
|
||||||
├── schemas/ # Pydantic models
|
├── toronto/ # Toronto data logic
|
||||||
└── models/ # SQLAlchemy ORM
|
│ ├── parsers/ # API data extraction
|
||||||
|
│ ├── loaders/ # Database operations
|
||||||
|
│ ├── schemas/ # Pydantic models
|
||||||
|
│ └── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||||
|
└── errors/ # Exception handling
|
||||||
|
|
||||||
dbt/
|
dbt/ # dbt project: portfolio
|
||||||
├── models/
|
├── models/
|
||||||
│ ├── staging/ # 1:1 source tables
|
│ ├── shared/ # Cross-domain dimensions
|
||||||
│ ├── intermediate/ # Business logic
|
│ ├── staging/toronto/ # Toronto staging models
|
||||||
│ └── marts/ # Analytical tables
|
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
│ └── marts/toronto/ # Toronto analytical tables
|
||||||
|
|
||||||
|
notebooks/
|
||||||
|
└── toronto/ # Toronto documentation (15 notebooks)
|
||||||
|
├── overview/ # Overview tab visualizations
|
||||||
|
├── housing/ # Housing tab visualizations
|
||||||
|
├── safety/ # Safety tab visualizations
|
||||||
|
├── demographics/ # Demographics tab visualizations
|
||||||
|
└── amenities/ # Amenities tab visualizations
|
||||||
|
|
||||||
|
docs/
|
||||||
|
├── PROJECT_REFERENCE.md # Architecture reference
|
||||||
|
├── CONTRIBUTING.md # Developer guide
|
||||||
|
└── project-lessons-learned/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Tech Stack
|
||||||
|
|
||||||
|
| Layer | Technology |
|
||||||
|
|-------|------------|
|
||||||
|
| Database | PostgreSQL 16 + PostGIS |
|
||||||
|
| Validation | Pydantic 2.x |
|
||||||
|
| ORM | SQLAlchemy 2.x |
|
||||||
|
| Transformation | dbt-postgres |
|
||||||
|
| Data Processing | Pandas, GeoPandas |
|
||||||
|
| Visualization | Dash + Plotly |
|
||||||
|
| UI Components | dash-mantine-components |
|
||||||
|
| Testing | pytest |
|
||||||
|
| Python | 3.11+ |
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make test # Run tests
|
make test # Run pytest
|
||||||
make lint # Run linter
|
make lint # Run ruff linter
|
||||||
make format # Format code
|
make format # Format code
|
||||||
make ci # Run all checks
|
make ci # Run all checks
|
||||||
```
|
make dbt-run # Run dbt models
|
||||||
|
make dbt-test # Run dbt tests
|
||||||
## Data Pipeline
|
|
||||||
|
|
||||||
```
|
|
||||||
Raw Files (PDF/Excel)
|
|
||||||
↓
|
|
||||||
Parsers (pdfplumber, pandas)
|
|
||||||
↓
|
|
||||||
Pydantic Validation
|
|
||||||
↓
|
|
||||||
SQLAlchemy Loaders
|
|
||||||
↓
|
|
||||||
PostgreSQL + PostGIS
|
|
||||||
↓
|
|
||||||
dbt Transformations
|
|
||||||
↓
|
|
||||||
Dash Visualization
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Environment Variables
|
## Environment Variables
|
||||||
@@ -109,12 +182,19 @@ POSTGRES_USER=portfolio
|
|||||||
POSTGRES_PASSWORD=<secure>
|
POSTGRES_PASSWORD=<secure>
|
||||||
POSTGRES_DB=portfolio
|
POSTGRES_DB=portfolio
|
||||||
DASH_DEBUG=true
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=<random>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- **For developers**: See `docs/CONTRIBUTING.md` for setup and contribution guidelines
|
||||||
|
- **For Claude Code**: See `CLAUDE.md` for AI assistant context
|
||||||
|
- **Architecture**: See `docs/PROJECT_REFERENCE.md` for technical details
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT
|
MIT
|
||||||
|
|
||||||
## Author
|
## Author
|
||||||
|
|
||||||
Leo Miranda - [GitHub](https://github.com/lmiranda) | [LinkedIn](https://linkedin.com/in/yourprofile)
|
Leo Miranda
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
name: 'toronto_housing'
|
name: 'portfolio'
|
||||||
version: '1.0.0'
|
|
||||||
config-version: 2
|
config-version: 2
|
||||||
|
|
||||||
profile: 'toronto_housing'
|
profile: 'portfolio'
|
||||||
|
|
||||||
model-paths: ["models"]
|
model-paths: ["models"]
|
||||||
analysis-paths: ["analyses"]
|
analysis-paths: ["analyses"]
|
||||||
@@ -16,13 +15,19 @@ clean-targets:
|
|||||||
- "dbt_packages"
|
- "dbt_packages"
|
||||||
|
|
||||||
models:
|
models:
|
||||||
toronto_housing:
|
portfolio:
|
||||||
|
shared:
|
||||||
|
+materialized: view
|
||||||
|
+schema: shared
|
||||||
staging:
|
staging:
|
||||||
+materialized: view
|
toronto:
|
||||||
+schema: staging
|
+materialized: view
|
||||||
|
+schema: stg_toronto
|
||||||
intermediate:
|
intermediate:
|
||||||
+materialized: view
|
toronto:
|
||||||
+schema: intermediate
|
+materialized: view
|
||||||
|
+schema: int_toronto
|
||||||
marts:
|
marts:
|
||||||
+materialized: table
|
toronto:
|
||||||
+schema: marts
|
+materialized: table
|
||||||
|
+schema: mart_toronto
|
||||||
|
|||||||
11
dbt/macros/generate_schema_name.sql
Normal file
11
dbt/macros/generate_schema_name.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Override dbt default schema name generation.
|
||||||
|
-- Use the custom schema name directly instead of
|
||||||
|
-- concatenating with the target schema.
|
||||||
|
-- See: https://docs.getdbt.com/docs/build/custom-schemas
|
||||||
|
{% macro generate_schema_name(custom_schema_name, node) %}
|
||||||
|
{%- if custom_schema_name is none -%}
|
||||||
|
{{ target.schema }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ custom_schema_name | trim }}
|
||||||
|
{%- endif -%}
|
||||||
|
{% endmacro %}
|
||||||
0
dbt/macros/toronto/.gitkeep
Normal file
0
dbt/macros/toronto/.gitkeep
Normal file
@@ -1,24 +0,0 @@
|
|||||||
version: 2
|
|
||||||
|
|
||||||
models:
|
|
||||||
- name: int_purchases__monthly
|
|
||||||
description: "Purchase data enriched with time and district dimensions"
|
|
||||||
columns:
|
|
||||||
- name: purchase_id
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: district_code
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: int_rentals__annual
|
|
||||||
description: "Rental data enriched with time and zone dimensions"
|
|
||||||
columns:
|
|
||||||
- name: rental_id
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: zone_code
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
-- Intermediate: Monthly purchase data enriched with dimensions
|
|
||||||
-- Joins purchases with time and district dimensions for analysis
|
|
||||||
|
|
||||||
with purchases as (
|
|
||||||
select * from {{ ref('stg_trreb__purchases') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
time_dim as (
|
|
||||||
select * from {{ ref('stg_dimensions__time') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
district_dim as (
|
|
||||||
select * from {{ ref('stg_dimensions__trreb_districts') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
enriched as (
|
|
||||||
select
|
|
||||||
p.purchase_id,
|
|
||||||
|
|
||||||
-- Time attributes
|
|
||||||
t.date_key,
|
|
||||||
t.full_date,
|
|
||||||
t.year,
|
|
||||||
t.month,
|
|
||||||
t.quarter,
|
|
||||||
t.month_name,
|
|
||||||
|
|
||||||
-- District attributes
|
|
||||||
d.district_key,
|
|
||||||
d.district_code,
|
|
||||||
d.district_name,
|
|
||||||
d.area_type,
|
|
||||||
|
|
||||||
-- Metrics
|
|
||||||
p.sales_count,
|
|
||||||
p.dollar_volume,
|
|
||||||
p.avg_price,
|
|
||||||
p.median_price,
|
|
||||||
p.new_listings,
|
|
||||||
p.active_listings,
|
|
||||||
p.days_on_market,
|
|
||||||
p.sale_to_list_ratio,
|
|
||||||
|
|
||||||
-- Calculated metrics
|
|
||||||
case
|
|
||||||
when p.active_listings > 0
|
|
||||||
then round(p.sales_count::numeric / p.active_listings, 3)
|
|
||||||
else null
|
|
||||||
end as absorption_rate,
|
|
||||||
|
|
||||||
case
|
|
||||||
when p.sales_count > 0
|
|
||||||
then round(p.active_listings::numeric / p.sales_count, 1)
|
|
||||||
else null
|
|
||||||
end as months_of_inventory
|
|
||||||
|
|
||||||
from purchases p
|
|
||||||
inner join time_dim t on p.date_key = t.date_key
|
|
||||||
inner join district_dim d on p.district_key = d.district_key
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from enriched
|
|
||||||
87
dbt/models/intermediate/toronto/_intermediate.yml
Normal file
87
dbt/models/intermediate/toronto/_intermediate.yml
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: int_rentals__annual
|
||||||
|
description: "Rental data enriched with time and zone dimensions"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: zone_code
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: int_neighbourhood__demographics
|
||||||
|
description: "Combined census demographics with neighbourhood attributes"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: census_year
|
||||||
|
description: "Census year"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: income_quintile
|
||||||
|
description: "Income quintile (1-5, city-wide)"
|
||||||
|
|
||||||
|
- name: int_neighbourhood__housing
|
||||||
|
description: "Housing indicators combining census and rental data"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Reference year"
|
||||||
|
- name: rent_to_income_pct
|
||||||
|
description: "Rent as percentage of median income"
|
||||||
|
- name: is_affordable
|
||||||
|
description: "Boolean: rent <= 30% of income"
|
||||||
|
|
||||||
|
- name: int_neighbourhood__crime_summary
|
||||||
|
description: "Aggregated crime with year-over-year trends"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Statistics year"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: crime_rate_per_100k
|
||||||
|
description: "Total crime rate per 100K population"
|
||||||
|
- name: yoy_change_pct
|
||||||
|
description: "Year-over-year change percentage"
|
||||||
|
|
||||||
|
- name: int_neighbourhood__amenity_scores
|
||||||
|
description: "Normalized amenities per capita and per area"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Reference year"
|
||||||
|
- name: total_amenities_per_1000
|
||||||
|
description: "Total amenities per 1000 population"
|
||||||
|
- name: amenities_per_sqkm
|
||||||
|
description: "Total amenities per square km"
|
||||||
|
|
||||||
|
- name: int_rentals__neighbourhood_allocated
|
||||||
|
description: "CMHC rental data allocated to neighbourhoods via area weights"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Survey year"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: avg_rent_2bed
|
||||||
|
description: "Weighted average 2-bedroom rent"
|
||||||
|
- name: vacancy_rate
|
||||||
|
description: "Weighted average vacancy rate"
|
||||||
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
-- Intermediate: Toronto CMA census statistics by year
|
||||||
|
-- Provides city-wide averages for metrics not available at neighbourhood level
|
||||||
|
-- Used when neighbourhood-level data is unavailable (e.g., median household income)
|
||||||
|
-- Grain: One row per year
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
select * from {{ ref('int_year_spine') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Census data is only available for 2016 and 2021
|
||||||
|
-- Map each analysis year to the appropriate census year
|
||||||
|
year_to_census as (
|
||||||
|
select
|
||||||
|
y.year,
|
||||||
|
case
|
||||||
|
when y.year <= 2018 then 2016
|
||||||
|
else 2021
|
||||||
|
end as census_year
|
||||||
|
from years y
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Toronto CMA median household income from Statistics Canada
|
||||||
|
-- Source: Census Profile Table 98-316-X2021001
|
||||||
|
-- 2016: $65,829 (from Census Profile)
|
||||||
|
-- 2021: $84,000 (from Census Profile)
|
||||||
|
cma_income as (
|
||||||
|
select 2016 as census_year, 65829 as median_household_income union all
|
||||||
|
select 2021 as census_year, 84000 as median_household_income
|
||||||
|
),
|
||||||
|
|
||||||
|
-- City-wide aggregates from loaded neighbourhood data
|
||||||
|
city_aggregates as (
|
||||||
|
select
|
||||||
|
census_year,
|
||||||
|
sum(population) as total_population,
|
||||||
|
avg(population_density) as avg_population_density,
|
||||||
|
avg(unemployment_rate) as avg_unemployment_rate
|
||||||
|
from census
|
||||||
|
where population is not null
|
||||||
|
group by census_year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
y.year,
|
||||||
|
y.census_year,
|
||||||
|
ci.median_household_income,
|
||||||
|
ca.total_population,
|
||||||
|
ca.avg_population_density,
|
||||||
|
ca.avg_unemployment_rate
|
||||||
|
from year_to_census y
|
||||||
|
left join cma_income ci on y.census_year = ci.census_year
|
||||||
|
left join city_aggregates ca on y.census_year = ca.census_year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
-- Intermediate: Normalized amenities per 1000 population
|
||||||
|
-- Pivots amenity types and calculates per-capita metrics
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
amenities as (
|
||||||
|
select * from {{ ref('stg_toronto__amenities') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Aggregate amenity types
|
||||||
|
amenities_by_year as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
amenity_year as year,
|
||||||
|
sum(case when amenity_type = 'Parks' then amenity_count else 0 end) as parks_count,
|
||||||
|
sum(case when amenity_type = 'Schools' then amenity_count else 0 end) as schools_count,
|
||||||
|
sum(case when amenity_type = 'Transit Stops' then amenity_count else 0 end) as transit_count,
|
||||||
|
sum(case when amenity_type = 'Libraries' then amenity_count else 0 end) as libraries_count,
|
||||||
|
sum(case when amenity_type = 'Community Centres' then amenity_count else 0 end) as community_centres_count,
|
||||||
|
sum(case when amenity_type = 'Recreation' then amenity_count else 0 end) as recreation_count,
|
||||||
|
sum(amenity_count) as total_amenities
|
||||||
|
from amenities
|
||||||
|
group by neighbourhood_id, amenity_year
|
||||||
|
),
|
||||||
|
|
||||||
|
amenity_scores as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
n.population,
|
||||||
|
n.land_area_sqkm,
|
||||||
|
|
||||||
|
coalesce(a.year, 2021) as year,
|
||||||
|
|
||||||
|
-- Raw counts
|
||||||
|
a.parks_count,
|
||||||
|
a.schools_count,
|
||||||
|
a.transit_count,
|
||||||
|
a.libraries_count,
|
||||||
|
a.community_centres_count,
|
||||||
|
a.recreation_count,
|
||||||
|
a.total_amenities,
|
||||||
|
|
||||||
|
-- Per 1000 population
|
||||||
|
case when n.population > 0
|
||||||
|
then round(a.parks_count::numeric / n.population * 1000, 3)
|
||||||
|
else null
|
||||||
|
end as parks_per_1000,
|
||||||
|
|
||||||
|
case when n.population > 0
|
||||||
|
then round(a.schools_count::numeric / n.population * 1000, 3)
|
||||||
|
else null
|
||||||
|
end as schools_per_1000,
|
||||||
|
|
||||||
|
case when n.population > 0
|
||||||
|
then round(a.transit_count::numeric / n.population * 1000, 3)
|
||||||
|
else null
|
||||||
|
end as transit_per_1000,
|
||||||
|
|
||||||
|
case when n.population > 0
|
||||||
|
then round(a.total_amenities::numeric / n.population * 1000, 3)
|
||||||
|
else null
|
||||||
|
end as total_amenities_per_1000,
|
||||||
|
|
||||||
|
-- Per square km
|
||||||
|
case when n.land_area_sqkm > 0
|
||||||
|
then round(a.total_amenities::numeric / n.land_area_sqkm, 2)
|
||||||
|
else null
|
||||||
|
end as amenities_per_sqkm
|
||||||
|
|
||||||
|
from neighbourhoods n
|
||||||
|
left join amenities_by_year a on n.neighbourhood_id = a.neighbourhood_id
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from amenity_scores
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
-- Intermediate: Aggregated crime by neighbourhood with YoY change
|
||||||
|
-- Pivots crime types and calculates year-over-year trends
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
crime as (
|
||||||
|
select * from {{ ref('stg_toronto__crime') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Aggregate crime types
|
||||||
|
crime_by_year as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
crime_year as year,
|
||||||
|
sum(incident_count) as total_incidents,
|
||||||
|
sum(case when crime_type = 'assault' then incident_count else 0 end) as assault_count,
|
||||||
|
sum(case when crime_type = 'auto_theft' then incident_count else 0 end) as auto_theft_count,
|
||||||
|
sum(case when crime_type = 'break_and_enter' then incident_count else 0 end) as break_enter_count,
|
||||||
|
sum(case when crime_type = 'robbery' then incident_count else 0 end) as robbery_count,
|
||||||
|
sum(case when crime_type = 'theft_over' then incident_count else 0 end) as theft_over_count,
|
||||||
|
sum(case when crime_type = 'homicide' then incident_count else 0 end) as homicide_count,
|
||||||
|
avg(rate_per_100k) as avg_rate_per_100k
|
||||||
|
from crime
|
||||||
|
group by neighbourhood_id, crime_year
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Add year-over-year changes
|
||||||
|
with_yoy as (
|
||||||
|
select
|
||||||
|
c.*,
|
||||||
|
lag(c.total_incidents, 1) over (
|
||||||
|
partition by c.neighbourhood_id
|
||||||
|
order by c.year
|
||||||
|
) as prev_year_incidents,
|
||||||
|
round(
|
||||||
|
(c.total_incidents - lag(c.total_incidents, 1) over (
|
||||||
|
partition by c.neighbourhood_id
|
||||||
|
order by c.year
|
||||||
|
))::numeric /
|
||||||
|
nullif(lag(c.total_incidents, 1) over (
|
||||||
|
partition by c.neighbourhood_id
|
||||||
|
order by c.year
|
||||||
|
), 0) * 100,
|
||||||
|
2
|
||||||
|
) as yoy_change_pct
|
||||||
|
from crime_by_year c
|
||||||
|
),
|
||||||
|
|
||||||
|
crime_summary as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
n.population,
|
||||||
|
|
||||||
|
w.year,
|
||||||
|
w.total_incidents,
|
||||||
|
w.assault_count,
|
||||||
|
w.auto_theft_count,
|
||||||
|
w.break_enter_count,
|
||||||
|
w.robbery_count,
|
||||||
|
w.theft_over_count,
|
||||||
|
w.homicide_count,
|
||||||
|
w.yoy_change_pct,
|
||||||
|
|
||||||
|
-- Crime rate per 100K population (use source data avg, or calculate if population available)
|
||||||
|
coalesce(
|
||||||
|
w.avg_rate_per_100k,
|
||||||
|
case
|
||||||
|
when n.population > 0
|
||||||
|
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
||||||
|
else null
|
||||||
|
end
|
||||||
|
) as crime_rate_per_100k
|
||||||
|
|
||||||
|
from neighbourhoods n
|
||||||
|
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from crime_summary
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
-- Intermediate: Combined census demographics by neighbourhood
|
||||||
|
-- Joins neighbourhoods with census data for demographic analysis
|
||||||
|
-- Grain: One row per neighbourhood per census year
|
||||||
|
|
||||||
|
with neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
demographics as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
n.land_area_sqkm,
|
||||||
|
|
||||||
|
-- Use census_year from census data, or fall back to dim_neighbourhood's year
|
||||||
|
coalesce(c.census_year, n.census_year, 2021) as census_year,
|
||||||
|
c.population,
|
||||||
|
c.population_density,
|
||||||
|
c.median_household_income,
|
||||||
|
c.average_household_income,
|
||||||
|
c.median_age,
|
||||||
|
c.unemployment_rate,
|
||||||
|
c.pct_bachelors_or_higher as education_bachelors_pct,
|
||||||
|
c.average_dwelling_value,
|
||||||
|
|
||||||
|
-- Tenure mix
|
||||||
|
c.pct_owner_occupied,
|
||||||
|
c.pct_renter_occupied,
|
||||||
|
|
||||||
|
-- Income quintile (city-wide comparison)
|
||||||
|
ntile(5) over (
|
||||||
|
partition by c.census_year
|
||||||
|
order by c.median_household_income
|
||||||
|
) as income_quintile
|
||||||
|
|
||||||
|
from neighbourhoods n
|
||||||
|
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from demographics
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
-- Intermediate: Housing indicators by neighbourhood
|
||||||
|
-- Combines census housing data with allocated CMHC rental data
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
allocated_rentals as (
|
||||||
|
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
housing as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
|
||||||
|
coalesce(r.year, c.census_year, 2021) as year,
|
||||||
|
|
||||||
|
-- Census housing metrics
|
||||||
|
c.pct_owner_occupied,
|
||||||
|
c.pct_renter_occupied,
|
||||||
|
c.average_dwelling_value,
|
||||||
|
c.median_household_income,
|
||||||
|
|
||||||
|
-- Allocated rental metrics (weighted average from CMHC zones)
|
||||||
|
r.avg_rent_2bed,
|
||||||
|
r.vacancy_rate,
|
||||||
|
|
||||||
|
-- Affordability calculations
|
||||||
|
case
|
||||||
|
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||||
|
then round((r.avg_rent_2bed * 12 / c.median_household_income) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as rent_to_income_pct,
|
||||||
|
|
||||||
|
-- Affordability threshold (30% of income)
|
||||||
|
case
|
||||||
|
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||||
|
then r.avg_rent_2bed * 12 <= c.median_household_income * 0.30
|
||||||
|
else null
|
||||||
|
end as is_affordable
|
||||||
|
|
||||||
|
from neighbourhoods n
|
||||||
|
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||||
|
left join allocated_rentals r
|
||||||
|
on n.neighbourhood_id = r.neighbourhood_id
|
||||||
|
and r.year = c.census_year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from housing
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
-- Intermediate: CMHC rentals allocated to neighbourhoods via area weights
|
||||||
|
-- Disaggregates zone-level rental data to neighbourhood level
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with crosswalk as (
|
||||||
|
select * from {{ ref('stg_cmhc__zone_crosswalk') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
rentals as (
|
||||||
|
select * from {{ ref('int_rentals__annual') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Allocate rental metrics to neighbourhoods using area weights
|
||||||
|
allocated as (
|
||||||
|
select
|
||||||
|
c.neighbourhood_id,
|
||||||
|
r.year,
|
||||||
|
r.bedroom_type,
|
||||||
|
|
||||||
|
-- Weighted average rent (using area weight)
|
||||||
|
sum(r.avg_rent * c.area_weight) as weighted_avg_rent,
|
||||||
|
sum(r.median_rent * c.area_weight) as weighted_median_rent,
|
||||||
|
sum(c.area_weight) as total_weight,
|
||||||
|
|
||||||
|
-- Weighted vacancy rate
|
||||||
|
sum(r.vacancy_rate * c.area_weight) / nullif(sum(c.area_weight), 0) as vacancy_rate,
|
||||||
|
|
||||||
|
-- Weighted rental universe
|
||||||
|
sum(r.rental_universe * c.area_weight) as rental_units_estimate
|
||||||
|
|
||||||
|
from crosswalk c
|
||||||
|
inner join rentals r on c.cmhc_zone_code = r.zone_code
|
||||||
|
group by c.neighbourhood_id, r.year, r.bedroom_type
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Pivot to get 2-bedroom as primary metric
|
||||||
|
pivoted as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
year,
|
||||||
|
max(case when bedroom_type = '2bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
|
||||||
|
max(case when bedroom_type = '1bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
|
||||||
|
max(case when bedroom_type = 'bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
|
||||||
|
max(case when bedroom_type = '3bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
|
||||||
|
avg(vacancy_rate) as vacancy_rate,
|
||||||
|
sum(rental_units_estimate) as total_rental_units
|
||||||
|
from allocated
|
||||||
|
group by neighbourhood_id, year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
|
||||||
|
p.year,
|
||||||
|
round(p.avg_rent_bachelor::numeric, 2) as avg_rent_bachelor,
|
||||||
|
round(p.avg_rent_1bed::numeric, 2) as avg_rent_1bed,
|
||||||
|
round(p.avg_rent_2bed::numeric, 2) as avg_rent_2bed,
|
||||||
|
round(p.avg_rent_3bed::numeric, 2) as avg_rent_3bed,
|
||||||
|
round(p.vacancy_rate::numeric, 2) as vacancy_rate,
|
||||||
|
round(p.total_rental_units::numeric, 0) as total_rental_units
|
||||||
|
|
||||||
|
from neighbourhoods n
|
||||||
|
inner join pivoted p on n.neighbourhood_id = p.neighbourhood_id
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Intermediate: Toronto CMA rental metrics by year
|
||||||
|
-- Aggregates rental data to city-wide averages by year
|
||||||
|
-- Source: StatCan CMHC data at CMA level
|
||||||
|
-- Grain: One row per year
|
||||||
|
|
||||||
|
with rentals as (
|
||||||
|
select * from {{ ref('stg_cmhc__rentals') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Pivot bedroom types to columns
|
||||||
|
yearly_rentals as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
max(case when bedroom_type = 'bachelor' then avg_rent end) as avg_rent_bachelor,
|
||||||
|
max(case when bedroom_type = '1bed' then avg_rent end) as avg_rent_1bed,
|
||||||
|
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_2bed,
|
||||||
|
max(case when bedroom_type = '3bed' then avg_rent end) as avg_rent_3bed,
|
||||||
|
-- Use 2-bedroom as standard reference
|
||||||
|
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_standard,
|
||||||
|
max(vacancy_rate) as vacancy_rate
|
||||||
|
from rentals
|
||||||
|
group by year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from yearly_rentals
|
||||||
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Intermediate: Year spine for analysis
|
||||||
|
-- Creates a row for each year from 2014-2025
|
||||||
|
-- Used to drive time-series analysis across all data sources
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
-- Generate years from available data sources
|
||||||
|
-- Crime data: 2014-2024, Rentals: 2019-2025
|
||||||
|
select generate_series(2014, 2025) as year
|
||||||
|
)
|
||||||
|
|
||||||
|
select year from years
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
version: 2
|
|
||||||
|
|
||||||
models:
|
|
||||||
- name: mart_toronto_purchases
|
|
||||||
description: "Final mart for Toronto purchase/sales analysis by district and time"
|
|
||||||
columns:
|
|
||||||
- name: purchase_id
|
|
||||||
description: "Unique purchase record identifier"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: mart_toronto_rentals
|
|
||||||
description: "Final mart for Toronto rental market analysis by zone and time"
|
|
||||||
columns:
|
|
||||||
- name: rental_id
|
|
||||||
description: "Unique rental record identifier"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: mart_toronto_market_summary
|
|
||||||
description: "Combined market summary aggregating purchases and rentals at Toronto level"
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
-- Mart: Toronto Market Summary
|
|
||||||
-- Aggregated view combining purchase and rental market indicators
|
|
||||||
-- Grain: One row per year-month
|
|
||||||
|
|
||||||
with purchases_agg as (
|
|
||||||
select
|
|
||||||
year,
|
|
||||||
month,
|
|
||||||
month_name,
|
|
||||||
quarter,
|
|
||||||
|
|
||||||
-- Aggregate purchase metrics across all districts
|
|
||||||
sum(sales_count) as total_sales,
|
|
||||||
sum(dollar_volume) as total_dollar_volume,
|
|
||||||
round(avg(avg_price), 0) as avg_price_all_districts,
|
|
||||||
round(avg(median_price), 0) as median_price_all_districts,
|
|
||||||
sum(new_listings) as total_new_listings,
|
|
||||||
sum(active_listings) as total_active_listings,
|
|
||||||
round(avg(days_on_market), 0) as avg_days_on_market,
|
|
||||||
round(avg(sale_to_list_ratio), 2) as avg_sale_to_list_ratio,
|
|
||||||
round(avg(absorption_rate), 3) as avg_absorption_rate,
|
|
||||||
round(avg(months_of_inventory), 1) as avg_months_of_inventory,
|
|
||||||
round(avg(avg_price_yoy_pct), 2) as avg_price_yoy_pct
|
|
||||||
|
|
||||||
from {{ ref('mart_toronto_purchases') }}
|
|
||||||
group by year, month, month_name, quarter
|
|
||||||
),
|
|
||||||
|
|
||||||
rentals_agg as (
|
|
||||||
select
|
|
||||||
year,
|
|
||||||
|
|
||||||
-- Aggregate rental metrics across all zones (all bedroom types)
|
|
||||||
round(avg(avg_rent), 0) as avg_rent_all_zones,
|
|
||||||
round(avg(vacancy_rate), 2) as avg_vacancy_rate,
|
|
||||||
round(avg(rent_change_pct), 2) as avg_rent_change_pct,
|
|
||||||
sum(rental_universe) as total_rental_universe
|
|
||||||
|
|
||||||
from {{ ref('mart_toronto_rentals') }}
|
|
||||||
group by year
|
|
||||||
),
|
|
||||||
|
|
||||||
final as (
|
|
||||||
select
|
|
||||||
p.year,
|
|
||||||
p.month,
|
|
||||||
p.month_name,
|
|
||||||
p.quarter,
|
|
||||||
|
|
||||||
-- Purchase market indicators
|
|
||||||
p.total_sales,
|
|
||||||
p.total_dollar_volume,
|
|
||||||
p.avg_price_all_districts,
|
|
||||||
p.median_price_all_districts,
|
|
||||||
p.total_new_listings,
|
|
||||||
p.total_active_listings,
|
|
||||||
p.avg_days_on_market,
|
|
||||||
p.avg_sale_to_list_ratio,
|
|
||||||
p.avg_absorption_rate,
|
|
||||||
p.avg_months_of_inventory,
|
|
||||||
p.avg_price_yoy_pct,
|
|
||||||
|
|
||||||
-- Rental market indicators (annual, so join on year)
|
|
||||||
r.avg_rent_all_zones,
|
|
||||||
r.avg_vacancy_rate,
|
|
||||||
r.avg_rent_change_pct,
|
|
||||||
r.total_rental_universe,
|
|
||||||
|
|
||||||
-- Affordability indicator (price to rent ratio)
|
|
||||||
case
|
|
||||||
when r.avg_rent_all_zones > 0
|
|
||||||
then round(p.avg_price_all_districts / (r.avg_rent_all_zones * 12), 1)
|
|
||||||
else null
|
|
||||||
end as price_to_annual_rent_ratio
|
|
||||||
|
|
||||||
from purchases_agg p
|
|
||||||
left join rentals_agg r on p.year = r.year
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from final
|
|
||||||
order by year desc, month desc
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
-- Mart: Toronto Purchase Market Analysis
|
|
||||||
-- Final analytical table for purchase/sales data visualization
|
|
||||||
-- Grain: One row per district per month
|
|
||||||
|
|
||||||
with purchases as (
|
|
||||||
select * from {{ ref('int_purchases__monthly') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
-- Add year-over-year calculations
|
|
||||||
with_yoy as (
|
|
||||||
select
|
|
||||||
p.*,
|
|
||||||
|
|
||||||
-- Previous year same month values
|
|
||||||
lag(p.avg_price, 12) over (
|
|
||||||
partition by p.district_code
|
|
||||||
order by p.date_key
|
|
||||||
) as avg_price_prev_year,
|
|
||||||
|
|
||||||
lag(p.sales_count, 12) over (
|
|
||||||
partition by p.district_code
|
|
||||||
order by p.date_key
|
|
||||||
) as sales_count_prev_year,
|
|
||||||
|
|
||||||
lag(p.median_price, 12) over (
|
|
||||||
partition by p.district_code
|
|
||||||
order by p.date_key
|
|
||||||
) as median_price_prev_year
|
|
||||||
|
|
||||||
from purchases p
|
|
||||||
),
|
|
||||||
|
|
||||||
final as (
|
|
||||||
select
|
|
||||||
purchase_id,
|
|
||||||
date_key,
|
|
||||||
full_date,
|
|
||||||
year,
|
|
||||||
month,
|
|
||||||
quarter,
|
|
||||||
month_name,
|
|
||||||
district_key,
|
|
||||||
district_code,
|
|
||||||
district_name,
|
|
||||||
area_type,
|
|
||||||
sales_count,
|
|
||||||
dollar_volume,
|
|
||||||
avg_price,
|
|
||||||
median_price,
|
|
||||||
new_listings,
|
|
||||||
active_listings,
|
|
||||||
days_on_market,
|
|
||||||
sale_to_list_ratio,
|
|
||||||
absorption_rate,
|
|
||||||
months_of_inventory,
|
|
||||||
|
|
||||||
-- Year-over-year changes
|
|
||||||
case
|
|
||||||
when avg_price_prev_year > 0
|
|
||||||
then round(((avg_price - avg_price_prev_year) / avg_price_prev_year) * 100, 2)
|
|
||||||
else null
|
|
||||||
end as avg_price_yoy_pct,
|
|
||||||
|
|
||||||
case
|
|
||||||
when sales_count_prev_year > 0
|
|
||||||
then round(((sales_count - sales_count_prev_year)::numeric / sales_count_prev_year) * 100, 2)
|
|
||||||
else null
|
|
||||||
end as sales_count_yoy_pct,
|
|
||||||
|
|
||||||
case
|
|
||||||
when median_price_prev_year > 0
|
|
||||||
then round(((median_price - median_price_prev_year) / median_price_prev_year) * 100, 2)
|
|
||||||
else null
|
|
||||||
end as median_price_yoy_pct
|
|
||||||
|
|
||||||
from with_yoy
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from final
|
|
||||||
135
dbt/models/marts/toronto/_marts.yml
Normal file
135
dbt/models/marts/toronto/_marts.yml
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: mart_toronto_rentals
|
||||||
|
description: "Final mart for Toronto rental market analysis by zone and time"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
description: "Unique rental record identifier"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: mart_neighbourhood_overview
|
||||||
|
description: "Neighbourhood overview with composite livability score"
|
||||||
|
meta:
|
||||||
|
dashboard_tab: Overview
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry for mapping"
|
||||||
|
- name: livability_score
|
||||||
|
description: "Composite score: safety (30%), affordability (40%), amenities (30%)"
|
||||||
|
- name: safety_score
|
||||||
|
description: "Safety component score (0-100)"
|
||||||
|
- name: affordability_score
|
||||||
|
description: "Affordability component score (0-100)"
|
||||||
|
- name: amenity_score
|
||||||
|
description: "Amenity component score (0-100)"
|
||||||
|
|
||||||
|
- name: mart_neighbourhood_housing
|
||||||
|
description: "Housing and affordability metrics by neighbourhood"
|
||||||
|
meta:
|
||||||
|
dashboard_tab: Housing
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry for mapping"
|
||||||
|
- name: rent_to_income_pct
|
||||||
|
description: "Rent as percentage of median income"
|
||||||
|
- name: affordability_index
|
||||||
|
description: "100 = city average affordability"
|
||||||
|
- name: rent_yoy_change_pct
|
||||||
|
description: "Year-over-year rent change"
|
||||||
|
|
||||||
|
- name: mart_neighbourhood_safety
|
||||||
|
description: "Crime rates and safety metrics by neighbourhood"
|
||||||
|
meta:
|
||||||
|
dashboard_tab: Safety
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry for mapping"
|
||||||
|
- name: crime_rate_per_100k
|
||||||
|
description: "Total crime rate per 100K population"
|
||||||
|
- name: crime_index
|
||||||
|
description: "100 = city average crime rate"
|
||||||
|
- name: safety_tier
|
||||||
|
description: "Safety tier (1=safest, 5=highest crime)"
|
||||||
|
data_tests:
|
||||||
|
- accepted_values:
|
||||||
|
arguments:
|
||||||
|
values: [1, 2, 3, 4, 5]
|
||||||
|
|
||||||
|
- name: mart_neighbourhood_demographics
|
||||||
|
description: "Demographics and income metrics by neighbourhood"
|
||||||
|
meta:
|
||||||
|
dashboard_tab: Demographics
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry for mapping"
|
||||||
|
- name: median_household_income
|
||||||
|
description: "Median household income"
|
||||||
|
- name: income_index
|
||||||
|
description: "100 = city average income"
|
||||||
|
- name: income_quintile
|
||||||
|
description: "Income quintile (1-5)"
|
||||||
|
data_tests:
|
||||||
|
- accepted_values:
|
||||||
|
arguments:
|
||||||
|
values: [1, 2, 3, 4, 5]
|
||||||
|
|
||||||
|
- name: mart_neighbourhood_amenities
|
||||||
|
description: "Amenity access metrics by neighbourhood"
|
||||||
|
meta:
|
||||||
|
dashboard_tab: Amenities
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood identifier"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry for mapping"
|
||||||
|
- name: total_amenities_per_1000
|
||||||
|
description: "Total amenities per 1000 population"
|
||||||
|
- name: amenity_index
|
||||||
|
description: "100 = city average amenities"
|
||||||
|
- name: amenity_tier
|
||||||
|
description: "Amenity tier (1=best, 5=lowest)"
|
||||||
|
data_tests:
|
||||||
|
- accepted_values:
|
||||||
|
arguments:
|
||||||
|
values: [1, 2, 3, 4, 5]
|
||||||
89
dbt/models/marts/toronto/mart_neighbourhood_amenities.sql
Normal file
89
dbt/models/marts/toronto/mart_neighbourhood_amenities.sql
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
-- Mart: Neighbourhood Amenities Analysis
|
||||||
|
-- Dashboard Tab: Amenities
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with amenities as (
|
||||||
|
select * from {{ ref('int_neighbourhood__amenity_scores') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- City-wide averages for comparison
|
||||||
|
city_avg as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
avg(parks_per_1000) as city_avg_parks,
|
||||||
|
avg(schools_per_1000) as city_avg_schools,
|
||||||
|
avg(transit_per_1000) as city_avg_transit,
|
||||||
|
avg(total_amenities_per_1000) as city_avg_total_amenities
|
||||||
|
from amenities
|
||||||
|
group by year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
a.neighbourhood_id,
|
||||||
|
a.neighbourhood_name,
|
||||||
|
a.geometry,
|
||||||
|
a.population,
|
||||||
|
a.land_area_sqkm,
|
||||||
|
a.year,
|
||||||
|
|
||||||
|
-- Raw counts
|
||||||
|
a.parks_count,
|
||||||
|
a.schools_count,
|
||||||
|
a.transit_count,
|
||||||
|
a.libraries_count,
|
||||||
|
a.community_centres_count,
|
||||||
|
a.recreation_count,
|
||||||
|
a.total_amenities,
|
||||||
|
|
||||||
|
-- Per 1000 population
|
||||||
|
a.parks_per_1000,
|
||||||
|
a.schools_per_1000,
|
||||||
|
a.transit_per_1000,
|
||||||
|
a.total_amenities_per_1000,
|
||||||
|
|
||||||
|
-- Per square km
|
||||||
|
a.amenities_per_sqkm,
|
||||||
|
|
||||||
|
-- City averages
|
||||||
|
round(ca.city_avg_parks::numeric, 3) as city_avg_parks_per_1000,
|
||||||
|
round(ca.city_avg_schools::numeric, 3) as city_avg_schools_per_1000,
|
||||||
|
round(ca.city_avg_transit::numeric, 3) as city_avg_transit_per_1000,
|
||||||
|
|
||||||
|
-- Amenity index (100 = city average)
|
||||||
|
case
|
||||||
|
when ca.city_avg_total_amenities > 0
|
||||||
|
then round(a.total_amenities_per_1000 / ca.city_avg_total_amenities * 100, 1)
|
||||||
|
else null
|
||||||
|
end as amenity_index,
|
||||||
|
|
||||||
|
-- Category indices
|
||||||
|
case
|
||||||
|
when ca.city_avg_parks > 0
|
||||||
|
then round(a.parks_per_1000 / ca.city_avg_parks * 100, 1)
|
||||||
|
else null
|
||||||
|
end as parks_index,
|
||||||
|
|
||||||
|
case
|
||||||
|
when ca.city_avg_schools > 0
|
||||||
|
then round(a.schools_per_1000 / ca.city_avg_schools * 100, 1)
|
||||||
|
else null
|
||||||
|
end as schools_index,
|
||||||
|
|
||||||
|
case
|
||||||
|
when ca.city_avg_transit > 0
|
||||||
|
then round(a.transit_per_1000 / ca.city_avg_transit * 100, 1)
|
||||||
|
else null
|
||||||
|
end as transit_index,
|
||||||
|
|
||||||
|
-- Amenity tier (1 = best, 5 = lowest)
|
||||||
|
ntile(5) over (
|
||||||
|
partition by a.year
|
||||||
|
order by a.total_amenities_per_1000 desc
|
||||||
|
) as amenity_tier
|
||||||
|
|
||||||
|
from amenities a
|
||||||
|
left join city_avg ca on a.year = ca.year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
81
dbt/models/marts/toronto/mart_neighbourhood_demographics.sql
Normal file
81
dbt/models/marts/toronto/mart_neighbourhood_demographics.sql
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
-- Mart: Neighbourhood Demographics Analysis
|
||||||
|
-- Dashboard Tab: Demographics
|
||||||
|
-- Grain: One row per neighbourhood per census year
|
||||||
|
|
||||||
|
with demographics as (
|
||||||
|
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- City-wide averages for comparison
|
||||||
|
city_avg as (
|
||||||
|
select
|
||||||
|
census_year,
|
||||||
|
avg(median_household_income) as city_avg_income,
|
||||||
|
avg(median_age) as city_avg_age,
|
||||||
|
avg(unemployment_rate) as city_avg_unemployment,
|
||||||
|
avg(education_bachelors_pct) as city_avg_education,
|
||||||
|
avg(population_density) as city_avg_density
|
||||||
|
from demographics
|
||||||
|
group by census_year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
d.neighbourhood_id,
|
||||||
|
d.neighbourhood_name,
|
||||||
|
d.geometry,
|
||||||
|
d.census_year as year,
|
||||||
|
|
||||||
|
-- Population
|
||||||
|
d.population,
|
||||||
|
d.land_area_sqkm,
|
||||||
|
d.population_density,
|
||||||
|
|
||||||
|
-- Income
|
||||||
|
d.median_household_income,
|
||||||
|
d.average_household_income,
|
||||||
|
d.income_quintile,
|
||||||
|
|
||||||
|
-- Income index (100 = city average)
|
||||||
|
case
|
||||||
|
when ca.city_avg_income > 0
|
||||||
|
then round(d.median_household_income / ca.city_avg_income * 100, 1)
|
||||||
|
else null
|
||||||
|
end as income_index,
|
||||||
|
|
||||||
|
-- Demographics
|
||||||
|
d.median_age,
|
||||||
|
d.unemployment_rate,
|
||||||
|
d.education_bachelors_pct,
|
||||||
|
|
||||||
|
-- Age index (100 = city average)
|
||||||
|
case
|
||||||
|
when ca.city_avg_age > 0
|
||||||
|
then round(d.median_age / ca.city_avg_age * 100, 1)
|
||||||
|
else null
|
||||||
|
end as age_index,
|
||||||
|
|
||||||
|
-- Housing tenure
|
||||||
|
d.pct_owner_occupied,
|
||||||
|
d.pct_renter_occupied,
|
||||||
|
d.average_dwelling_value,
|
||||||
|
|
||||||
|
-- Diversity index (using tenure mix as proxy - higher rental = more diverse typically)
|
||||||
|
round(
|
||||||
|
1 - (
|
||||||
|
power(d.pct_owner_occupied / 100, 2) +
|
||||||
|
power(d.pct_renter_occupied / 100, 2)
|
||||||
|
),
|
||||||
|
3
|
||||||
|
) * 100 as tenure_diversity_index,
|
||||||
|
|
||||||
|
-- City comparisons
|
||||||
|
round(ca.city_avg_income::numeric, 2) as city_avg_income,
|
||||||
|
round(ca.city_avg_age::numeric, 1) as city_avg_age,
|
||||||
|
round(ca.city_avg_unemployment::numeric, 2) as city_avg_unemployment
|
||||||
|
|
||||||
|
from demographics d
|
||||||
|
left join city_avg ca on d.census_year = ca.census_year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
93
dbt/models/marts/toronto/mart_neighbourhood_housing.sql
Normal file
93
dbt/models/marts/toronto/mart_neighbourhood_housing.sql
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
-- Mart: Neighbourhood Housing Analysis
|
||||||
|
-- Dashboard Tab: Housing
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with housing as (
|
||||||
|
select * from {{ ref('int_neighbourhood__housing') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
rentals as (
|
||||||
|
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
demographics as (
|
||||||
|
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Add year-over-year rent changes
|
||||||
|
with_yoy as (
|
||||||
|
select
|
||||||
|
h.*,
|
||||||
|
r.avg_rent_bachelor,
|
||||||
|
r.avg_rent_1bed,
|
||||||
|
r.avg_rent_3bed,
|
||||||
|
r.total_rental_units,
|
||||||
|
d.income_quintile,
|
||||||
|
|
||||||
|
-- Previous year rent for YoY calculation
|
||||||
|
lag(h.avg_rent_2bed, 1) over (
|
||||||
|
partition by h.neighbourhood_id
|
||||||
|
order by h.year
|
||||||
|
) as prev_year_rent_2bed
|
||||||
|
|
||||||
|
from housing h
|
||||||
|
left join rentals r
|
||||||
|
on h.neighbourhood_id = r.neighbourhood_id
|
||||||
|
and h.year = r.year
|
||||||
|
left join demographics d
|
||||||
|
on h.neighbourhood_id = d.neighbourhood_id
|
||||||
|
and h.year = d.census_year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
neighbourhood_name,
|
||||||
|
geometry,
|
||||||
|
year,
|
||||||
|
|
||||||
|
-- Tenure mix
|
||||||
|
pct_owner_occupied,
|
||||||
|
pct_renter_occupied,
|
||||||
|
|
||||||
|
-- Housing values
|
||||||
|
average_dwelling_value,
|
||||||
|
median_household_income,
|
||||||
|
|
||||||
|
-- Rental metrics
|
||||||
|
avg_rent_bachelor,
|
||||||
|
avg_rent_1bed,
|
||||||
|
avg_rent_2bed,
|
||||||
|
avg_rent_3bed,
|
||||||
|
vacancy_rate,
|
||||||
|
total_rental_units,
|
||||||
|
|
||||||
|
-- Affordability
|
||||||
|
rent_to_income_pct,
|
||||||
|
is_affordable,
|
||||||
|
|
||||||
|
-- Affordability index (100 = city average)
|
||||||
|
round(
|
||||||
|
rent_to_income_pct / nullif(
|
||||||
|
avg(rent_to_income_pct) over (partition by year),
|
||||||
|
0
|
||||||
|
) * 100,
|
||||||
|
1
|
||||||
|
) as affordability_index,
|
||||||
|
|
||||||
|
-- Year-over-year rent change
|
||||||
|
case
|
||||||
|
when prev_year_rent_2bed > 0
|
||||||
|
then round(
|
||||||
|
(avg_rent_2bed - prev_year_rent_2bed) / prev_year_rent_2bed * 100,
|
||||||
|
2
|
||||||
|
)
|
||||||
|
else null
|
||||||
|
end as rent_yoy_change_pct,
|
||||||
|
|
||||||
|
income_quintile
|
||||||
|
|
||||||
|
from with_yoy
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
-- Mart: Neighbourhood Overview with Composite Livability Score
|
||||||
|
-- Dashboard Tab: Overview
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
select * from {{ ref('int_year_spine') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Create base: all neighbourhoods × all years
|
||||||
|
neighbourhood_years as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
y.year
|
||||||
|
from neighbourhoods n
|
||||||
|
cross join years y
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Census data (available for 2016, 2021)
|
||||||
|
-- For each year, use the most recent census data available
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census_mapped as (
|
||||||
|
select
|
||||||
|
ny.neighbourhood_id,
|
||||||
|
ny.year,
|
||||||
|
c.population,
|
||||||
|
c.unemployment_rate,
|
||||||
|
c.pct_bachelors_or_higher as education_bachelors_pct
|
||||||
|
from neighbourhood_years ny
|
||||||
|
left join census c on ny.neighbourhood_id = c.neighbourhood_id
|
||||||
|
-- Use census year <= analysis year, prefer most recent
|
||||||
|
and c.census_year = (
|
||||||
|
select max(c2.census_year)
|
||||||
|
from {{ ref('stg_toronto__census') }} c2
|
||||||
|
where c2.neighbourhood_id = ny.neighbourhood_id
|
||||||
|
and c2.census_year <= ny.year
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
-- CMA-level census data (for income - not available at neighbourhood level)
|
||||||
|
cma_census as (
|
||||||
|
select * from {{ ref('int_census__toronto_cma') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Crime data (2014-2024)
|
||||||
|
crime as (
|
||||||
|
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
|
||||||
|
rentals as (
|
||||||
|
select * from {{ ref('int_rentals__toronto_cma') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Compute scores
|
||||||
|
scored as (
|
||||||
|
select
|
||||||
|
ny.neighbourhood_id,
|
||||||
|
ny.neighbourhood_name,
|
||||||
|
ny.geometry,
|
||||||
|
ny.year,
|
||||||
|
cm.population,
|
||||||
|
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
|
||||||
|
cma.median_household_income,
|
||||||
|
|
||||||
|
-- Safety score: inverse of crime rate (higher = safer)
|
||||||
|
case
|
||||||
|
when cr.crime_rate_per_100k is not null
|
||||||
|
then 100 - percent_rank() over (
|
||||||
|
partition by ny.year
|
||||||
|
order by cr.crime_rate_per_100k
|
||||||
|
) * 100
|
||||||
|
else null
|
||||||
|
end as safety_score,
|
||||||
|
|
||||||
|
-- Affordability score: inverse of rent-to-income ratio
|
||||||
|
-- Using CMA-level income since neighbourhood-level not available
|
||||||
|
case
|
||||||
|
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||||
|
then 100 - percent_rank() over (
|
||||||
|
partition by ny.year
|
||||||
|
order by (r.avg_rent_standard * 12 / cma.median_household_income)
|
||||||
|
) * 100
|
||||||
|
else null
|
||||||
|
end as affordability_score,
|
||||||
|
|
||||||
|
-- Raw metrics
|
||||||
|
cr.crime_rate_per_100k,
|
||||||
|
case
|
||||||
|
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||||
|
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as rent_to_income_pct,
|
||||||
|
r.avg_rent_standard as avg_rent_2bed,
|
||||||
|
r.vacancy_rate
|
||||||
|
|
||||||
|
from neighbourhood_years ny
|
||||||
|
left join census_mapped cm
|
||||||
|
on ny.neighbourhood_id = cm.neighbourhood_id
|
||||||
|
and ny.year = cm.year
|
||||||
|
left join cma_census cma
|
||||||
|
on ny.year = cma.year
|
||||||
|
left join crime cr
|
||||||
|
on ny.neighbourhood_id = cr.neighbourhood_id
|
||||||
|
and ny.year = cr.year
|
||||||
|
left join rentals r
|
||||||
|
on ny.year = r.year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
neighbourhood_name,
|
||||||
|
geometry,
|
||||||
|
year,
|
||||||
|
population,
|
||||||
|
median_household_income,
|
||||||
|
|
||||||
|
-- Component scores (0-100)
|
||||||
|
round(safety_score::numeric, 1) as safety_score,
|
||||||
|
round(affordability_score::numeric, 1) as affordability_score,
|
||||||
|
-- TODO: Replace with actual amenity score when fact_amenities is populated
|
||||||
|
-- Currently uses neutral placeholder (50.0) which affects livability_score accuracy
|
||||||
|
50.0 as amenity_score,
|
||||||
|
|
||||||
|
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
|
||||||
|
round(
|
||||||
|
(coalesce(safety_score, 50) * 0.40 +
|
||||||
|
coalesce(affordability_score, 50) * 0.40 +
|
||||||
|
50 * 0.20)::numeric,
|
||||||
|
1
|
||||||
|
) as livability_score,
|
||||||
|
|
||||||
|
-- Raw metrics
|
||||||
|
crime_rate_per_100k,
|
||||||
|
rent_to_income_pct,
|
||||||
|
avg_rent_2bed,
|
||||||
|
vacancy_rate,
|
||||||
|
null::numeric as total_amenities_per_1000
|
||||||
|
|
||||||
|
from scored
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
78
dbt/models/marts/toronto/mart_neighbourhood_safety.sql
Normal file
78
dbt/models/marts/toronto/mart_neighbourhood_safety.sql
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
-- Mart: Neighbourhood Safety Analysis
|
||||||
|
-- Dashboard Tab: Safety
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
|
||||||
|
with crime as (
|
||||||
|
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- City-wide averages for comparison
|
||||||
|
city_avg as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
avg(crime_rate_per_100k) as city_avg_crime_rate,
|
||||||
|
avg(assault_count) as city_avg_assault,
|
||||||
|
avg(auto_theft_count) as city_avg_auto_theft,
|
||||||
|
avg(break_enter_count) as city_avg_break_enter
|
||||||
|
from crime
|
||||||
|
group by year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
c.neighbourhood_id,
|
||||||
|
c.neighbourhood_name,
|
||||||
|
c.geometry,
|
||||||
|
c.population,
|
||||||
|
c.year,
|
||||||
|
|
||||||
|
-- Total crime
|
||||||
|
c.total_incidents,
|
||||||
|
c.crime_rate_per_100k,
|
||||||
|
c.yoy_change_pct as crime_yoy_change_pct,
|
||||||
|
|
||||||
|
-- Crime breakdown
|
||||||
|
c.assault_count,
|
||||||
|
c.auto_theft_count,
|
||||||
|
c.break_enter_count,
|
||||||
|
c.robbery_count,
|
||||||
|
c.theft_over_count,
|
||||||
|
c.homicide_count,
|
||||||
|
|
||||||
|
-- Per 100K rates by type
|
||||||
|
case when c.population > 0
|
||||||
|
then round(c.assault_count::numeric / c.population * 100000, 2)
|
||||||
|
else null
|
||||||
|
end as assault_rate_per_100k,
|
||||||
|
|
||||||
|
case when c.population > 0
|
||||||
|
then round(c.auto_theft_count::numeric / c.population * 100000, 2)
|
||||||
|
else null
|
||||||
|
end as auto_theft_rate_per_100k,
|
||||||
|
|
||||||
|
case when c.population > 0
|
||||||
|
then round(c.break_enter_count::numeric / c.population * 100000, 2)
|
||||||
|
else null
|
||||||
|
end as break_enter_rate_per_100k,
|
||||||
|
|
||||||
|
-- Comparison to city average
|
||||||
|
round(ca.city_avg_crime_rate::numeric, 2) as city_avg_crime_rate,
|
||||||
|
|
||||||
|
-- Crime index (100 = city average)
|
||||||
|
case
|
||||||
|
when ca.city_avg_crime_rate > 0
|
||||||
|
then round(c.crime_rate_per_100k / ca.city_avg_crime_rate * 100, 1)
|
||||||
|
else null
|
||||||
|
end as crime_index,
|
||||||
|
|
||||||
|
-- Safety tier based on crime rate percentile
|
||||||
|
ntile(5) over (
|
||||||
|
partition by c.year
|
||||||
|
order by c.crime_rate_per_100k desc
|
||||||
|
) as safety_tier
|
||||||
|
|
||||||
|
from crime c
|
||||||
|
left join city_avg ca on c.year = ca.year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
33
dbt/models/shared/_shared.yml
Normal file
33
dbt/models/shared/_shared.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_dimensions__time
|
||||||
|
description: "Staged time dimension - shared across all projects"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Primary key (YYYYMM format)"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: full_date
|
||||||
|
description: "First day of month"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Calendar year"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: month
|
||||||
|
description: "Month number (1-12)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: quarter
|
||||||
|
description: "Quarter (1-4)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: month_name
|
||||||
|
description: "Month name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: is_month_start
|
||||||
|
description: "Always true (monthly grain)"
|
||||||
25
dbt/models/shared/_sources.yml
Normal file
25
dbt/models/shared/_sources.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
sources:
|
||||||
|
- name: shared
|
||||||
|
description: "Shared dimension tables used across all dashboards"
|
||||||
|
database: portfolio
|
||||||
|
schema: public
|
||||||
|
tables:
|
||||||
|
- name: dim_time
|
||||||
|
description: "Time dimension (monthly grain) - shared across all projects"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Primary key (YYYYMM format)"
|
||||||
|
- name: full_date
|
||||||
|
description: "First day of month"
|
||||||
|
- name: year
|
||||||
|
description: "Calendar year"
|
||||||
|
- name: month
|
||||||
|
description: "Month number (1-12)"
|
||||||
|
- name: quarter
|
||||||
|
description: "Quarter (1-4)"
|
||||||
|
- name: month_name
|
||||||
|
description: "Month name"
|
||||||
|
- name: is_month_start
|
||||||
|
description: "Always true (monthly grain)"
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
-- Staged time dimension
|
-- Staged time dimension
|
||||||
-- Source: dim_time table
|
-- Source: shared.dim_time table
|
||||||
-- Grain: One row per month
|
-- Grain: One row per month
|
||||||
|
-- Note: Shared dimension used across all dashboard projects
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'dim_time') }}
|
select * from {{ source('shared', 'dim_time') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
version: 2
|
|
||||||
|
|
||||||
sources:
|
|
||||||
- name: toronto_housing
|
|
||||||
description: "Toronto housing data loaded from TRREB and CMHC sources"
|
|
||||||
database: portfolio
|
|
||||||
schema: public
|
|
||||||
tables:
|
|
||||||
- name: fact_purchases
|
|
||||||
description: "TRREB monthly purchase/sales statistics by district"
|
|
||||||
columns:
|
|
||||||
- name: id
|
|
||||||
description: "Primary key"
|
|
||||||
- name: date_key
|
|
||||||
description: "Foreign key to dim_time"
|
|
||||||
- name: district_key
|
|
||||||
description: "Foreign key to dim_trreb_district"
|
|
||||||
|
|
||||||
- name: fact_rentals
|
|
||||||
description: "CMHC annual rental survey data by zone and bedroom type"
|
|
||||||
columns:
|
|
||||||
- name: id
|
|
||||||
description: "Primary key"
|
|
||||||
- name: date_key
|
|
||||||
description: "Foreign key to dim_time"
|
|
||||||
- name: zone_key
|
|
||||||
description: "Foreign key to dim_cmhc_zone"
|
|
||||||
|
|
||||||
- name: dim_time
|
|
||||||
description: "Time dimension (monthly grain)"
|
|
||||||
columns:
|
|
||||||
- name: date_key
|
|
||||||
description: "Primary key (YYYYMMDD format)"
|
|
||||||
|
|
||||||
- name: dim_trreb_district
|
|
||||||
description: "TRREB district dimension with geometry"
|
|
||||||
columns:
|
|
||||||
- name: district_key
|
|
||||||
description: "Primary key"
|
|
||||||
- name: district_code
|
|
||||||
description: "TRREB district code"
|
|
||||||
|
|
||||||
- name: dim_cmhc_zone
|
|
||||||
description: "CMHC zone dimension with geometry"
|
|
||||||
columns:
|
|
||||||
- name: zone_key
|
|
||||||
description: "Primary key"
|
|
||||||
- name: zone_code
|
|
||||||
description: "CMHC zone code"
|
|
||||||
|
|
||||||
- name: dim_neighbourhood
|
|
||||||
description: "City of Toronto neighbourhoods (reference only)"
|
|
||||||
columns:
|
|
||||||
- name: neighbourhood_id
|
|
||||||
description: "Primary key"
|
|
||||||
|
|
||||||
- name: dim_policy_event
|
|
||||||
description: "Housing policy events for annotation"
|
|
||||||
columns:
|
|
||||||
- name: event_id
|
|
||||||
description: "Primary key"
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
version: 2
|
|
||||||
|
|
||||||
models:
|
|
||||||
- name: stg_trreb__purchases
|
|
||||||
description: "Staged TRREB purchase/sales data from fact_purchases"
|
|
||||||
columns:
|
|
||||||
- name: purchase_id
|
|
||||||
description: "Unique identifier for purchase record"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: date_key
|
|
||||||
description: "Date dimension key (YYYYMMDD)"
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
- name: district_key
|
|
||||||
description: "TRREB district dimension key"
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: stg_cmhc__rentals
|
|
||||||
description: "Staged CMHC rental market data from fact_rentals"
|
|
||||||
columns:
|
|
||||||
- name: rental_id
|
|
||||||
description: "Unique identifier for rental record"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: date_key
|
|
||||||
description: "Date dimension key (YYYYMMDD)"
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
- name: zone_key
|
|
||||||
description: "CMHC zone dimension key"
|
|
||||||
tests:
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: stg_dimensions__time
|
|
||||||
description: "Staged time dimension"
|
|
||||||
columns:
|
|
||||||
- name: date_key
|
|
||||||
description: "Date dimension key (YYYYMMDD)"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: stg_dimensions__trreb_districts
|
|
||||||
description: "Staged TRREB district dimension"
|
|
||||||
columns:
|
|
||||||
- name: district_key
|
|
||||||
description: "District dimension key"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: district_code
|
|
||||||
description: "TRREB district code (e.g., W01, C01)"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: stg_dimensions__cmhc_zones
|
|
||||||
description: "Staged CMHC zone dimension"
|
|
||||||
columns:
|
|
||||||
- name: zone_key
|
|
||||||
description: "Zone dimension key"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
- name: zone_code
|
|
||||||
description: "CMHC zone code"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
-- Staged CMHC zone dimension
|
|
||||||
-- Source: dim_cmhc_zone table
|
|
||||||
-- Grain: One row per zone
|
|
||||||
|
|
||||||
with source as (
|
|
||||||
select * from {{ source('toronto_housing', 'dim_cmhc_zone') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
staged as (
|
|
||||||
select
|
|
||||||
zone_key,
|
|
||||||
zone_code,
|
|
||||||
zone_name,
|
|
||||||
geometry
|
|
||||||
from source
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from staged
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
-- Staged TRREB district dimension
|
|
||||||
-- Source: dim_trreb_district table
|
|
||||||
-- Grain: One row per district
|
|
||||||
|
|
||||||
with source as (
|
|
||||||
select * from {{ source('toronto_housing', 'dim_trreb_district') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
staged as (
|
|
||||||
select
|
|
||||||
district_key,
|
|
||||||
district_code,
|
|
||||||
district_name,
|
|
||||||
area_type,
|
|
||||||
geometry
|
|
||||||
from source
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from staged
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
-- Staged TRREB purchase/sales data
|
|
||||||
-- Source: fact_purchases table loaded from TRREB Market Watch PDFs
|
|
||||||
-- Grain: One row per district per month
|
|
||||||
|
|
||||||
with source as (
|
|
||||||
select * from {{ source('toronto_housing', 'fact_purchases') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
staged as (
|
|
||||||
select
|
|
||||||
id as purchase_id,
|
|
||||||
date_key,
|
|
||||||
district_key,
|
|
||||||
sales_count,
|
|
||||||
dollar_volume,
|
|
||||||
avg_price,
|
|
||||||
median_price,
|
|
||||||
new_listings,
|
|
||||||
active_listings,
|
|
||||||
avg_dom as days_on_market,
|
|
||||||
avg_sp_lp as sale_to_list_ratio
|
|
||||||
from source
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from staged
|
|
||||||
93
dbt/models/staging/toronto/_sources.yml
Normal file
93
dbt/models/staging/toronto/_sources.yml
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
sources:
|
||||||
|
- name: toronto
|
||||||
|
description: "Toronto data loaded from CMHC and City of Toronto sources"
|
||||||
|
database: portfolio
|
||||||
|
schema: raw_toronto
|
||||||
|
tables:
|
||||||
|
- name: fact_rentals
|
||||||
|
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: date_key
|
||||||
|
description: "Foreign key to dim_time"
|
||||||
|
- name: zone_key
|
||||||
|
description: "Foreign key to dim_cmhc_zone"
|
||||||
|
|
||||||
|
- name: dim_cmhc_zone
|
||||||
|
description: "CMHC zone dimension with geometry"
|
||||||
|
columns:
|
||||||
|
- name: zone_key
|
||||||
|
description: "Primary key"
|
||||||
|
- name: zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
|
||||||
|
- name: dim_neighbourhood
|
||||||
|
description: "City of Toronto neighbourhoods (158 official boundaries)"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Primary key"
|
||||||
|
|
||||||
|
- name: dim_policy_event
|
||||||
|
description: "Housing policy events for annotation"
|
||||||
|
columns:
|
||||||
|
- name: event_id
|
||||||
|
description: "Primary key"
|
||||||
|
|
||||||
|
- name: fact_census
|
||||||
|
description: "Census demographics by neighbourhood and year"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Foreign key to dim_neighbourhood"
|
||||||
|
- name: census_year
|
||||||
|
description: "Census year (2016, 2021, etc.)"
|
||||||
|
- name: population
|
||||||
|
description: "Total population"
|
||||||
|
- name: median_household_income
|
||||||
|
description: "Median household income"
|
||||||
|
|
||||||
|
- name: fact_crime
|
||||||
|
description: "Crime statistics by neighbourhood, year, and type"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Foreign key to dim_neighbourhood"
|
||||||
|
- name: year
|
||||||
|
description: "Statistics year"
|
||||||
|
- name: crime_type
|
||||||
|
description: "Type of crime"
|
||||||
|
- name: count
|
||||||
|
description: "Number of incidents"
|
||||||
|
- name: rate_per_100k
|
||||||
|
description: "Rate per 100,000 population"
|
||||||
|
|
||||||
|
- name: fact_amenities
|
||||||
|
description: "Amenity counts by neighbourhood and type"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Foreign key to dim_neighbourhood"
|
||||||
|
- name: amenity_type
|
||||||
|
description: "Type of amenity (parks, schools, transit)"
|
||||||
|
- name: count
|
||||||
|
description: "Number of amenities"
|
||||||
|
- name: year
|
||||||
|
description: "Reference year"
|
||||||
|
|
||||||
|
- name: bridge_cmhc_neighbourhood
|
||||||
|
description: "CMHC zone to neighbourhood mapping with area weights"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: cmhc_zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood ID"
|
||||||
|
- name: weight
|
||||||
|
description: "Proportional area weight (0-1)"
|
||||||
120
dbt/models/staging/toronto/_staging.yml
Normal file
120
dbt/models/staging/toronto/_staging.yml
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_cmhc__rentals
|
||||||
|
description: "Staged CMHC rental market data from fact_rentals"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
description: "Unique identifier for rental record"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: date_key
|
||||||
|
description: "Date dimension key (YYYYMMDD)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: zone_key
|
||||||
|
description: "CMHC zone dimension key"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_dimensions__cmhc_zones
|
||||||
|
description: "Staged CMHC zone dimension"
|
||||||
|
columns:
|
||||||
|
- name: zone_key
|
||||||
|
description: "Zone dimension key"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_toronto__neighbourhoods
|
||||||
|
description: "Staged Toronto neighbourhood dimension (158 official boundaries)"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood primary key"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_name
|
||||||
|
description: "Official neighbourhood name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: geometry
|
||||||
|
description: "PostGIS geometry (POLYGON)"
|
||||||
|
|
||||||
|
- name: stg_toronto__census
|
||||||
|
description: "Staged census demographics by neighbourhood"
|
||||||
|
columns:
|
||||||
|
- name: census_id
|
||||||
|
description: "Census record identifier"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood foreign key"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: census_year
|
||||||
|
description: "Census year (2016, 2021)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_toronto__crime
|
||||||
|
description: "Staged crime statistics by neighbourhood"
|
||||||
|
columns:
|
||||||
|
- name: crime_id
|
||||||
|
description: "Crime record identifier"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood foreign key"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: crime_type
|
||||||
|
description: "Type of crime"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_toronto__amenities
|
||||||
|
description: "Staged amenity counts by neighbourhood"
|
||||||
|
columns:
|
||||||
|
- name: amenity_id
|
||||||
|
description: "Amenity record identifier"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood foreign key"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: amenity_type
|
||||||
|
description: "Type of amenity"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_cmhc__zone_crosswalk
|
||||||
|
description: "Staged CMHC zone to neighbourhood crosswalk with area weights"
|
||||||
|
columns:
|
||||||
|
- name: crosswalk_id
|
||||||
|
description: "Crosswalk record identifier"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: cmhc_zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Neighbourhood foreign key"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: area_weight
|
||||||
|
description: "Proportional area weight (0-1)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
-- Staged CMHC rental market survey data
|
-- Staged CMHC rental market survey data
|
||||||
-- Source: fact_rentals table loaded from CMHC CSV exports
|
-- Source: fact_rentals table loaded from CMHC/StatCan
|
||||||
-- Grain: One row per zone per bedroom type per survey year
|
-- Grain: One row per zone per bedroom type per survey year
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'fact_rentals') }}
|
select
|
||||||
|
f.*,
|
||||||
|
t.year as survey_year
|
||||||
|
from {{ source('toronto', 'fact_rentals') }} f
|
||||||
|
join {{ source('shared', 'dim_time') }} t on f.date_key = t.date_key
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -11,6 +15,7 @@ staged as (
|
|||||||
id as rental_id,
|
id as rental_id,
|
||||||
date_key,
|
date_key,
|
||||||
zone_key,
|
zone_key,
|
||||||
|
survey_year as year,
|
||||||
bedroom_type,
|
bedroom_type,
|
||||||
universe as rental_universe,
|
universe as rental_universe,
|
||||||
avg_rent,
|
avg_rent,
|
||||||
18
dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql
Normal file
18
dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
-- Staged CMHC zone to neighbourhood crosswalk
|
||||||
|
-- Source: bridge_cmhc_neighbourhood table
|
||||||
|
-- Grain: One row per zone-neighbourhood intersection
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'bridge_cmhc_neighbourhood') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as crosswalk_id,
|
||||||
|
cmhc_zone_code,
|
||||||
|
neighbourhood_id,
|
||||||
|
weight as area_weight
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Staged CMHC zone dimension
|
||||||
|
-- Source: dim_cmhc_zone table
|
||||||
|
-- Grain: One row per zone
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'dim_cmhc_zone') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
zone_key,
|
||||||
|
zone_code,
|
||||||
|
zone_name
|
||||||
|
-- geometry column excluded: CMHC does not provide zone boundaries
|
||||||
|
-- Spatial analysis uses dim_neighbourhood geometry instead
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
19
dbt/models/staging/toronto/stg_toronto__amenities.sql
Normal file
19
dbt/models/staging/toronto/stg_toronto__amenities.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Staged amenity counts by neighbourhood
|
||||||
|
-- Source: fact_amenities table
|
||||||
|
-- Grain: One row per neighbourhood per amenity type per year
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'fact_amenities') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as amenity_id,
|
||||||
|
neighbourhood_id,
|
||||||
|
amenity_type,
|
||||||
|
count as amenity_count,
|
||||||
|
year as amenity_year
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
27
dbt/models/staging/toronto/stg_toronto__census.sql
Normal file
27
dbt/models/staging/toronto/stg_toronto__census.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Staged census demographics by neighbourhood
|
||||||
|
-- Source: fact_census table
|
||||||
|
-- Grain: One row per neighbourhood per census year
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'fact_census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as census_id,
|
||||||
|
neighbourhood_id,
|
||||||
|
census_year,
|
||||||
|
population,
|
||||||
|
population_density,
|
||||||
|
median_household_income,
|
||||||
|
average_household_income,
|
||||||
|
unemployment_rate,
|
||||||
|
pct_bachelors_or_higher,
|
||||||
|
pct_owner_occupied,
|
||||||
|
pct_renter_occupied,
|
||||||
|
median_age,
|
||||||
|
average_dwelling_value
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
20
dbt/models/staging/toronto/stg_toronto__crime.sql
Normal file
20
dbt/models/staging/toronto/stg_toronto__crime.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
-- Staged crime statistics by neighbourhood
|
||||||
|
-- Source: fact_crime table
|
||||||
|
-- Grain: One row per neighbourhood per year per crime type
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'fact_crime') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as crime_id,
|
||||||
|
neighbourhood_id,
|
||||||
|
year as crime_year,
|
||||||
|
crime_type,
|
||||||
|
count as incident_count,
|
||||||
|
rate_per_100k
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
25
dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql
Normal file
25
dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Staged Toronto neighbourhood dimension
|
||||||
|
-- Source: dim_neighbourhood table
|
||||||
|
-- Grain: One row per neighbourhood (158 total)
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'dim_neighbourhood') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
name as neighbourhood_name,
|
||||||
|
geometry,
|
||||||
|
population,
|
||||||
|
land_area_sqkm,
|
||||||
|
pop_density_per_sqkm,
|
||||||
|
pct_bachelors_or_higher,
|
||||||
|
median_household_income,
|
||||||
|
pct_owner_occupied,
|
||||||
|
pct_renter_occupied,
|
||||||
|
census_year
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
11
dbt/package-lock.yml
Normal file
11
dbt/package-lock.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
packages:
|
||||||
|
- name: dbt_utils
|
||||||
|
package: dbt-labs/dbt_utils
|
||||||
|
version: 1.3.3
|
||||||
|
- name: dbt_expectations
|
||||||
|
package: calogica/dbt_expectations
|
||||||
|
version: 0.10.4
|
||||||
|
- name: dbt_date
|
||||||
|
package: calogica/dbt_date
|
||||||
|
version: 0.10.1
|
||||||
|
sha1_hash: 51a51ab489f7b302c8745ae3c3781271816b01be
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
toronto_housing:
|
portfolio:
|
||||||
target: dev
|
target: dev
|
||||||
outputs:
|
outputs:
|
||||||
dev:
|
dev:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
services:
|
services:
|
||||||
db:
|
db:
|
||||||
image: postgis/postgis:16-3.4
|
image: ${POSTGIS_IMAGE:-postgis/postgis:16-3.4}
|
||||||
container_name: portfolio-db
|
container_name: portfolio-db
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
500
docs/CONTRIBUTING.md
Normal file
500
docs/CONTRIBUTING.md
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
# Developer Guide
|
||||||
|
|
||||||
|
Instructions for contributing to the Analytics Portfolio project.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Development Setup](#development-setup)
|
||||||
|
2. [Adding a Blog Post](#adding-a-blog-post)
|
||||||
|
3. [Adding a New Page](#adding-a-new-page)
|
||||||
|
4. [Adding a Dashboard Tab](#adding-a-dashboard-tab)
|
||||||
|
5. [Creating Figure Factories](#creating-figure-factories)
|
||||||
|
6. [Branch Workflow](#branch-workflow)
|
||||||
|
7. [Code Standards](#code-standards)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Development Setup
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Python 3.11+ (via pyenv)
|
||||||
|
- Docker and Docker Compose
|
||||||
|
- Git
|
||||||
|
|
||||||
|
### Initial Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone repository
|
||||||
|
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||||
|
cd personal-portfolio
|
||||||
|
|
||||||
|
# Run setup (creates venv, installs deps, copies .env.example)
|
||||||
|
make setup
|
||||||
|
|
||||||
|
# Start PostgreSQL + PostGIS
|
||||||
|
make docker-up
|
||||||
|
|
||||||
|
# Initialize database
|
||||||
|
make db-init
|
||||||
|
|
||||||
|
# Start development server
|
||||||
|
make run
|
||||||
|
```
|
||||||
|
|
||||||
|
The app runs at `http://localhost:8050`.
|
||||||
|
|
||||||
|
### Useful Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make test # Run tests
|
||||||
|
make test-cov # Run tests with coverage
|
||||||
|
make lint # Check code style
|
||||||
|
make format # Auto-format code
|
||||||
|
make typecheck # Run mypy type checker
|
||||||
|
make ci # Run all checks (lint, typecheck, test)
|
||||||
|
make dbt-run # Run dbt transformations
|
||||||
|
make dbt-test # Run dbt tests
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding a Blog Post
|
||||||
|
|
||||||
|
Blog posts are Markdown files with YAML frontmatter, stored in `portfolio_app/content/blog/`.
|
||||||
|
|
||||||
|
### Step 1: Create the Markdown File
|
||||||
|
|
||||||
|
Create a new file in `portfolio_app/content/blog/`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
touch portfolio_app/content/blog/your-article-slug.md
|
||||||
|
```
|
||||||
|
|
||||||
|
The filename becomes the URL slug: `/blog/your-article-slug`
|
||||||
|
|
||||||
|
### Step 2: Add Frontmatter
|
||||||
|
|
||||||
|
Every blog post requires YAML frontmatter at the top:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
---
|
||||||
|
title: "Your Article Title"
|
||||||
|
date: "2026-01-17"
|
||||||
|
description: "A brief description for the article card (1-2 sentences)"
|
||||||
|
tags:
|
||||||
|
- data-engineering
|
||||||
|
- python
|
||||||
|
- lessons-learned
|
||||||
|
status: published
|
||||||
|
---
|
||||||
|
|
||||||
|
Your article content starts here...
|
||||||
|
```
|
||||||
|
|
||||||
|
**Required fields:**
|
||||||
|
|
||||||
|
| Field | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `title` | Article title (displayed on cards and page) |
|
||||||
|
| `date` | Publication date in `YYYY-MM-DD` format |
|
||||||
|
| `description` | Short summary for article listing cards |
|
||||||
|
| `tags` | List of tags (displayed as badges) |
|
||||||
|
| `status` | `published` or `draft` (drafts are hidden from listing) |
|
||||||
|
|
||||||
|
### Step 3: Write Content
|
||||||
|
|
||||||
|
Use standard Markdown:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Section Heading
|
||||||
|
|
||||||
|
Regular paragraph text.
|
||||||
|
|
||||||
|
### Subsection
|
||||||
|
|
||||||
|
- Bullet points
|
||||||
|
- Another point
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Code blocks with syntax highlighting
|
||||||
|
def example():
|
||||||
|
return "Hello"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Bold text** and *italic text*.
|
||||||
|
|
||||||
|
> Blockquotes for callouts
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 4: Test Locally
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make run
|
||||||
|
```
|
||||||
|
|
||||||
|
Visit `http://localhost:8050/blog` to see the article listing.
|
||||||
|
Visit `http://localhost:8050/blog/your-article-slug` for the full article.
|
||||||
|
|
||||||
|
### Example: Complete Blog Post
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
---
|
||||||
|
title: "Building ETL Pipelines with Python"
|
||||||
|
date: "2026-01-17"
|
||||||
|
description: "Lessons from building production data pipelines at scale"
|
||||||
|
tags:
|
||||||
|
- python
|
||||||
|
- etl
|
||||||
|
- data-engineering
|
||||||
|
status: published
|
||||||
|
---
|
||||||
|
|
||||||
|
When I started building data pipelines, I made every mistake possible...
|
||||||
|
|
||||||
|
## The Problem
|
||||||
|
|
||||||
|
Most tutorials show toy examples. Real pipelines are different.
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
def safe_transform(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
try:
|
||||||
|
return df.apply(transform_row, axis=1)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(f"Transform failed: {e}")
|
||||||
|
raise
|
||||||
|
```
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Ship something that works, then iterate.
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding a New Page
|
||||||
|
|
||||||
|
Pages use Dash's automatic routing based on file location in `portfolio_app/pages/`.
|
||||||
|
|
||||||
|
### Step 1: Create the Page File
|
||||||
|
|
||||||
|
```bash
|
||||||
|
touch portfolio_app/pages/your_page.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Register the Page
|
||||||
|
|
||||||
|
Every page must call `dash.register_page()`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
"""Your page description."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/your-page", # URL path
|
||||||
|
name="Your Page", # Display name (for nav)
|
||||||
|
title="Your Page Title" # Browser tab title
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def layout() -> dmc.Container:
|
||||||
|
"""Page layout function."""
|
||||||
|
return dmc.Container(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title("Your Page", order=1),
|
||||||
|
dmc.Text("Page content here."),
|
||||||
|
],
|
||||||
|
gap="lg",
|
||||||
|
),
|
||||||
|
size="md",
|
||||||
|
py="xl",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Page with Dynamic Content
|
||||||
|
|
||||||
|
For pages with URL parameters:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# pages/blog/article.py
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path_template="/blog/<slug>", # Dynamic parameter
|
||||||
|
name="Article",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def layout(slug: str = "") -> dmc.Container:
|
||||||
|
"""Layout receives URL parameters as arguments."""
|
||||||
|
article = get_article(slug)
|
||||||
|
if not article:
|
||||||
|
return dmc.Text("Article not found")
|
||||||
|
|
||||||
|
return dmc.Container(
|
||||||
|
dmc.Title(article["meta"]["title"]),
|
||||||
|
# ...
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 4: Add Navigation (Optional)
|
||||||
|
|
||||||
|
To add the page to the sidebar, edit `portfolio_app/components/sidebar.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# For main pages (Home, About, Blog, etc.)
|
||||||
|
NAV_ITEMS_MAIN = [
|
||||||
|
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||||
|
{"path": "/your-page", "icon": "tabler:star", "label": "Your Page"},
|
||||||
|
# ...
|
||||||
|
]
|
||||||
|
|
||||||
|
# For project/dashboard pages
|
||||||
|
NAV_ITEMS_PROJECTS = [
|
||||||
|
{"path": "/projects", "icon": "tabler:folder", "label": "Projects"},
|
||||||
|
{"path": "/your-dashboard", "icon": "tabler:chart-bar", "label": "Your Dashboard"},
|
||||||
|
# ...
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
The sidebar uses icon buttons with tooltips. Each item needs `path`, `icon` (Tabler icon name), and `label` (tooltip text).
|
||||||
|
|
||||||
|
### URL Routing Summary
|
||||||
|
|
||||||
|
| File Location | URL |
|
||||||
|
|---------------|-----|
|
||||||
|
| `pages/home.py` | `/` (if `path="/"`) |
|
||||||
|
| `pages/about.py` | `/about` |
|
||||||
|
| `pages/blog/index.py` | `/blog` |
|
||||||
|
| `pages/blog/article.py` | `/blog/<slug>` |
|
||||||
|
| `pages/toronto/dashboard.py` | `/toronto` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding a Dashboard Tab
|
||||||
|
|
||||||
|
Dashboard tabs are in `portfolio_app/pages/toronto/tabs/`.
|
||||||
|
|
||||||
|
### Step 1: Create Tab Layout
|
||||||
|
|
||||||
|
```python
|
||||||
|
# pages/toronto/tabs/your_tab.py
|
||||||
|
"""Your tab description."""
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
from portfolio_app.figures.toronto.choropleth import create_choropleth
|
||||||
|
from portfolio_app.toronto.demo_data import get_demo_data
|
||||||
|
|
||||||
|
|
||||||
|
def create_your_tab_layout() -> dmc.Stack:
|
||||||
|
"""Create the tab layout."""
|
||||||
|
data = get_demo_data()
|
||||||
|
|
||||||
|
return dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Grid(
|
||||||
|
[
|
||||||
|
dmc.GridCol(
|
||||||
|
# Map on left
|
||||||
|
create_choropleth(data, "your_metric"),
|
||||||
|
span=8,
|
||||||
|
),
|
||||||
|
dmc.GridCol(
|
||||||
|
# KPI cards on right
|
||||||
|
create_kpi_cards(data),
|
||||||
|
span=4,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Charts below
|
||||||
|
create_supporting_charts(data),
|
||||||
|
],
|
||||||
|
gap="lg",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Register in Dashboard
|
||||||
|
|
||||||
|
Edit `pages/toronto/dashboard.py` to add the tab:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from portfolio_app.pages.toronto.tabs.your_tab import create_your_tab_layout
|
||||||
|
|
||||||
|
# In the tabs list:
|
||||||
|
dmc.TabsTab("Your Tab", value="your-tab"),
|
||||||
|
|
||||||
|
# In the panels:
|
||||||
|
dmc.TabsPanel(create_your_tab_layout(), value="your-tab"),
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Creating Figure Factories
|
||||||
|
|
||||||
|
Figure factories are organized by dashboard domain under `portfolio_app/figures/{domain}/`.
|
||||||
|
|
||||||
|
### Pattern
|
||||||
|
|
||||||
|
```python
|
||||||
|
# figures/toronto/your_chart.py
|
||||||
|
"""Your chart type factory for Toronto dashboard."""
|
||||||
|
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def create_your_chart(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
x_col: str,
|
||||||
|
y_col: str,
|
||||||
|
title: str = "",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a your_chart figure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame with data.
|
||||||
|
x_col: Column for x-axis.
|
||||||
|
y_col: Column for y-axis.
|
||||||
|
title: Optional chart title.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured Plotly figure.
|
||||||
|
"""
|
||||||
|
fig = px.bar(df, x=x_col, y=y_col, title=title)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
template="plotly_white",
|
||||||
|
margin=dict(l=40, r=40, t=40, b=40),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
```
|
||||||
|
|
||||||
|
### Export from `__init__.py`
|
||||||
|
|
||||||
|
```python
|
||||||
|
# figures/toronto/__init__.py
|
||||||
|
from .your_chart import create_your_chart
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_your_chart",
|
||||||
|
# ...
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Importing Figure Factories
|
||||||
|
|
||||||
|
```python
|
||||||
|
# In callbacks or tabs
|
||||||
|
from portfolio_app.figures.toronto import create_choropleth_figure
|
||||||
|
from portfolio_app.figures.toronto.bar_charts import create_ranking_bar
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Branch Workflow
|
||||||
|
|
||||||
|
```
|
||||||
|
main (production)
|
||||||
|
↑
|
||||||
|
staging (pre-production)
|
||||||
|
↑
|
||||||
|
development (integration)
|
||||||
|
↑
|
||||||
|
feature/XX-description (your work)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating a Feature Branch
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start from development
|
||||||
|
git checkout development
|
||||||
|
git pull origin development
|
||||||
|
|
||||||
|
# Create feature branch
|
||||||
|
git checkout -b feature/10-add-new-page
|
||||||
|
|
||||||
|
# Work, commit, push
|
||||||
|
git add .
|
||||||
|
git commit -m "feat: Add new page"
|
||||||
|
git push -u origin feature/10-add-new-page
|
||||||
|
```
|
||||||
|
|
||||||
|
### Merging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Merge into development
|
||||||
|
git checkout development
|
||||||
|
git merge feature/10-add-new-page
|
||||||
|
git push origin development
|
||||||
|
|
||||||
|
# Delete feature branch
|
||||||
|
git branch -d feature/10-add-new-page
|
||||||
|
git push origin --delete feature/10-add-new-page
|
||||||
|
```
|
||||||
|
|
||||||
|
**Rules:**
|
||||||
|
- Never commit directly to `main` or `staging`
|
||||||
|
- Never delete `development`
|
||||||
|
- Feature branches are temporary
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code Standards
|
||||||
|
|
||||||
|
### Type Hints
|
||||||
|
|
||||||
|
Use Python 3.10+ style:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Imports
|
||||||
|
|
||||||
|
| Context | Style |
|
||||||
|
|---------|-------|
|
||||||
|
| Same directory | `from .module import X` |
|
||||||
|
| Sibling directory | `from ..schemas.model import Y` |
|
||||||
|
| External packages | `import pandas as pd` |
|
||||||
|
|
||||||
|
### Formatting
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make format # Runs ruff formatter
|
||||||
|
make lint # Checks style
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docstrings
|
||||||
|
|
||||||
|
Google style, only for non-obvious functions:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def calculate_score(values: list[float], weights: list[float]) -> float:
|
||||||
|
"""Calculate weighted score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
values: Raw metric values.
|
||||||
|
weights: Weight for each metric.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Weighted average score.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Questions?
|
||||||
|
|
||||||
|
Check `CLAUDE.md` for AI assistant context and architectural decisions.
|
||||||
335
docs/DATABASE_SCHEMA.md
Normal file
335
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
# Database Schema
|
||||||
|
|
||||||
|
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
|
## Entity Relationship Diagram
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
dim_time {
|
||||||
|
int date_key PK
|
||||||
|
date full_date UK
|
||||||
|
int year
|
||||||
|
int month
|
||||||
|
int quarter
|
||||||
|
string month_name
|
||||||
|
bool is_month_start
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_cmhc_zone {
|
||||||
|
int zone_key PK
|
||||||
|
string zone_code UK
|
||||||
|
string zone_name
|
||||||
|
geometry geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_neighbourhood {
|
||||||
|
int neighbourhood_id PK
|
||||||
|
string name
|
||||||
|
geometry geometry
|
||||||
|
int population
|
||||||
|
numeric land_area_sqkm
|
||||||
|
numeric pop_density_per_sqkm
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric median_household_income
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
int census_year
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_policy_event {
|
||||||
|
int event_id PK
|
||||||
|
date event_date
|
||||||
|
date effective_date
|
||||||
|
string level
|
||||||
|
string category
|
||||||
|
string title
|
||||||
|
text description
|
||||||
|
string expected_direction
|
||||||
|
string source_url
|
||||||
|
string confidence
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_rentals {
|
||||||
|
int id PK
|
||||||
|
int date_key FK
|
||||||
|
int zone_key FK
|
||||||
|
string bedroom_type
|
||||||
|
int universe
|
||||||
|
numeric avg_rent
|
||||||
|
numeric median_rent
|
||||||
|
numeric vacancy_rate
|
||||||
|
numeric availability_rate
|
||||||
|
numeric turnover_rate
|
||||||
|
numeric rent_change_pct
|
||||||
|
string reliability_code
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_census {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int census_year
|
||||||
|
int population
|
||||||
|
numeric population_density
|
||||||
|
numeric median_household_income
|
||||||
|
numeric average_household_income
|
||||||
|
numeric unemployment_rate
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
numeric median_age
|
||||||
|
numeric average_dwelling_value
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_crime {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int year
|
||||||
|
string crime_type
|
||||||
|
int count
|
||||||
|
numeric rate_per_100k
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_amenities {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
string amenity_type
|
||||||
|
int count
|
||||||
|
int year
|
||||||
|
}
|
||||||
|
|
||||||
|
bridge_cmhc_neighbourhood {
|
||||||
|
int id PK
|
||||||
|
string cmhc_zone_code FK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
numeric weight
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_time ||--o{ fact_rentals : "date_key"
|
||||||
|
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||||
|
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||||
|
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||||
|
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Schema Layers
|
||||||
|
|
||||||
|
### Database Schemas
|
||||||
|
|
||||||
|
| Schema | Purpose | Managed By |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `public` | Shared dimensions (dim_time) | SQLAlchemy |
|
||||||
|
| `raw_toronto` | Toronto dimension and fact tables | SQLAlchemy |
|
||||||
|
| `stg_toronto` | Toronto staging models | dbt |
|
||||||
|
| `int_toronto` | Toronto intermediate models | dbt |
|
||||||
|
| `mart_toronto` | Toronto analytical tables | dbt |
|
||||||
|
|
||||||
|
### Raw Toronto Schema (raw_toronto)
|
||||||
|
|
||||||
|
Toronto-specific tables loaded by SQLAlchemy:
|
||||||
|
|
||||||
|
| Table | Source | Description |
|
||||||
|
|-------|--------|-------------|
|
||||||
|
| `dim_neighbourhood` | City of Toronto API | 158 neighbourhood boundaries |
|
||||||
|
| `dim_cmhc_zone` | CMHC | ~20 rental market zones |
|
||||||
|
| `dim_policy_event` | Manual | Policy events for annotation |
|
||||||
|
| `fact_census` | City of Toronto API | Census profile data |
|
||||||
|
| `fact_crime` | Toronto Police API | Crime statistics |
|
||||||
|
| `fact_amenities` | City of Toronto API | Amenity counts |
|
||||||
|
| `fact_rentals` | CMHC Data Files | Rental market survey data |
|
||||||
|
| `bridge_cmhc_neighbourhood` | Computed | Zone-neighbourhood mapping |
|
||||||
|
|
||||||
|
### Public Schema
|
||||||
|
|
||||||
|
Shared dimensions used across all projects:
|
||||||
|
|
||||||
|
| Table | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `dim_time` | Time dimension (monthly grain) |
|
||||||
|
|
||||||
|
### Staging Schema - stg_toronto (dbt)
|
||||||
|
|
||||||
|
Staging models provide 1:1 cleaned representations of source data:
|
||||||
|
|
||||||
|
| Model | Source Table | Purpose |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||||
|
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||||
|
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||||
|
| `stg_toronto__crime` | raw.crime_data | Standardized crime categories |
|
||||||
|
| `stg_toronto__amenities` | raw.amenities | Typed amenity counts |
|
||||||
|
| `stg_dimensions__time` | generated | Time dimension |
|
||||||
|
| `stg_dimensions__cmhc_zones` | raw.cmhc_zones | CMHC zone boundaries |
|
||||||
|
| `stg_cmhc__zone_crosswalk` | raw.crosswalk | Zone-neighbourhood mapping |
|
||||||
|
|
||||||
|
### Marts Schema - mart_toronto (dbt)
|
||||||
|
|
||||||
|
Analytical tables ready for dashboard consumption:
|
||||||
|
|
||||||
|
| Model | Grain | Purpose |
|
||||||
|
|-------|-------|---------|
|
||||||
|
| `mart_neighbourhood_overview` | neighbourhood | Composite livability scores |
|
||||||
|
| `mart_neighbourhood_housing` | neighbourhood | Housing and rent metrics |
|
||||||
|
| `mart_neighbourhood_safety` | neighbourhood × year | Crime rate calculations |
|
||||||
|
| `mart_neighbourhood_demographics` | neighbourhood | Income, age, population metrics |
|
||||||
|
| `mart_neighbourhood_amenities` | neighbourhood | Amenity accessibility scores |
|
||||||
|
| `mart_toronto_rentals` | zone × month | Time-series rental analysis |
|
||||||
|
|
||||||
|
## Table Details
|
||||||
|
|
||||||
|
### Dimension Tables
|
||||||
|
|
||||||
|
#### dim_time
|
||||||
|
Time dimension for date-based analysis. Grain: one row per month.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||||
|
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||||
|
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||||
|
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||||
|
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||||
|
|
||||||
|
#### dim_cmhc_zone
|
||||||
|
CMHC rental market zones (~20 zones covering Toronto).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||||
|
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||||
|
|
||||||
|
#### dim_neighbourhood
|
||||||
|
Toronto's 158 official neighbourhoods.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||||
|
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||||
|
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||||
|
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||||
|
|
||||||
|
#### dim_policy_event
|
||||||
|
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| event_date | DATE | NOT NULL | Announcement date |
|
||||||
|
| effective_date | DATE | | Implementation date |
|
||||||
|
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||||
|
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||||
|
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||||
|
| description | TEXT | | Detailed description |
|
||||||
|
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||||
|
| source_url | VARCHAR(500) | | Reference link |
|
||||||
|
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||||
|
|
||||||
|
### Fact Tables
|
||||||
|
|
||||||
|
#### fact_rentals
|
||||||
|
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||||
|
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||||
|
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||||
|
| universe | INTEGER | | Total rental units |
|
||||||
|
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||||
|
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||||
|
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||||
|
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||||
|
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||||
|
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||||
|
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||||
|
|
||||||
|
#### fact_census
|
||||||
|
Census statistics. Grain: neighbourhood × census year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| population_density | NUMERIC(10,2) | | People per km² |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||||
|
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||||
|
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||||
|
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||||
|
|
||||||
|
#### fact_crime
|
||||||
|
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||||
|
| count | INTEGER | NOT NULL | Number of incidents |
|
||||||
|
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||||
|
|
||||||
|
#### fact_amenities
|
||||||
|
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||||
|
| count | INTEGER | NOT NULL | Number of amenities |
|
||||||
|
| year | INTEGER | NOT NULL | Reference year |
|
||||||
|
|
||||||
|
### Bridge Tables
|
||||||
|
|
||||||
|
#### bridge_cmhc_neighbourhood
|
||||||
|
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||||
|
|
||||||
|
## Indexes
|
||||||
|
|
||||||
|
| Table | Index | Columns | Purpose |
|
||||||
|
|-------|-------|---------|---------|
|
||||||
|
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||||
|
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||||
|
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||||
|
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||||
|
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||||
|
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||||
|
|
||||||
|
## PostGIS Extensions
|
||||||
|
|
||||||
|
The database requires PostGIS for geospatial operations:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||||
|
```
|
||||||
|
|
||||||
|
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||||
@@ -1,21 +1,193 @@
|
|||||||
# Portfolio Project Reference
|
# Portfolio Project Reference
|
||||||
|
|
||||||
**Project**: Analytics Portfolio
|
**Project**: Analytics Portfolio
|
||||||
**Owner**: Leo
|
**Owner**: Leo Miranda
|
||||||
**Status**: Ready for Sprint 1
|
**Status**: Sprint 9 Complete (Dashboard Implementation Done)
|
||||||
|
**Last Updated**: January 2026
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
Two-project analytics portfolio demonstrating end-to-end data engineering, visualization, and ML capabilities.
|
Personal portfolio website with an interactive Toronto Neighbourhood Dashboard demonstrating data engineering, visualization, and analytics capabilities.
|
||||||
|
|
||||||
| Project | Domain | Key Skills | Phase |
|
| Component | Description | Status |
|
||||||
|---------|--------|------------|-------|
|
|-----------|-------------|--------|
|
||||||
| **Toronto Housing Dashboard** | Real estate | ETL, dimensional modeling, geospatial, choropleth | Phase 1 (Active) |
|
| Portfolio Website | Bio, About, Projects, Resume, Contact, Blog | Complete |
|
||||||
| **Energy Pricing Analysis** | Utility markets | Time series, ML prediction, API integration | Phase 3 (Future) |
|
| Toronto Dashboard | 5-tab neighbourhood analysis | Complete |
|
||||||
|
| Data Pipeline | dbt models, figure factories | Complete |
|
||||||
|
| Deployment | Production deployment | Pending |
|
||||||
|
|
||||||
**Platform**: Monolithic Dash application on self-hosted VPS (bio landing page + dashboards).
|
---
|
||||||
|
|
||||||
|
## Completed Work
|
||||||
|
|
||||||
|
### Sprint 1-6: Foundation
|
||||||
|
- Repository setup, Docker, PostgreSQL + PostGIS
|
||||||
|
- Bio landing page implementation
|
||||||
|
- Initial data model design
|
||||||
|
|
||||||
|
### Sprint 7: Navigation & Theme
|
||||||
|
- Sidebar navigation
|
||||||
|
- Dark/light theme toggle
|
||||||
|
- dash-mantine-components integration
|
||||||
|
|
||||||
|
### Sprint 8: Portfolio Website
|
||||||
|
- About, Contact, Projects, Resume pages
|
||||||
|
- Blog system with Markdown/frontmatter
|
||||||
|
- Health endpoint
|
||||||
|
|
||||||
|
### Sprint 9: Neighbourhood Dashboard Transition
|
||||||
|
- Phase 1: Deleted legacy TRREB code
|
||||||
|
- Phase 2: Documentation cleanup
|
||||||
|
- Phase 3: New neighbourhood-centric data model
|
||||||
|
- Phase 4: dbt model restructuring
|
||||||
|
- Phase 5: 5-tab dashboard implementation
|
||||||
|
- Phase 6: 15 documentation notebooks
|
||||||
|
- Phase 7: Final documentation review
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Application Architecture
|
||||||
|
|
||||||
|
### URL Routes
|
||||||
|
|
||||||
|
| URL | Page | File |
|
||||||
|
|-----|------|------|
|
||||||
|
| `/` | Home | `pages/home.py` |
|
||||||
|
| `/about` | About | `pages/about.py` |
|
||||||
|
| `/contact` | Contact | `pages/contact.py` |
|
||||||
|
| `/projects` | Projects | `pages/projects.py` |
|
||||||
|
| `/resume` | Resume | `pages/resume.py` |
|
||||||
|
| `/blog` | Blog listing | `pages/blog/index.py` |
|
||||||
|
| `/blog/{slug}` | Article | `pages/blog/article.py` |
|
||||||
|
| `/toronto` | Dashboard | `pages/toronto/dashboard.py` |
|
||||||
|
| `/toronto/methodology` | Methodology | `pages/toronto/methodology.py` |
|
||||||
|
| `/health` | Health check | `pages/health.py` |
|
||||||
|
|
||||||
|
### Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── app.py # Dash app factory
|
||||||
|
├── config.py # Pydantic BaseSettings
|
||||||
|
├── assets/ # CSS, images
|
||||||
|
├── callbacks/ # Global callbacks (sidebar, theme)
|
||||||
|
├── components/ # Shared UI components
|
||||||
|
├── content/blog/ # Markdown blog articles
|
||||||
|
├── errors/ # Exception handling
|
||||||
|
├── figures/
|
||||||
|
│ └── toronto/ # Toronto figure factories
|
||||||
|
├── pages/
|
||||||
|
│ ├── home.py
|
||||||
|
│ ├── about.py
|
||||||
|
│ ├── contact.py
|
||||||
|
│ ├── projects.py
|
||||||
|
│ ├── resume.py
|
||||||
|
│ ├── health.py
|
||||||
|
│ ├── blog/
|
||||||
|
│ │ ├── index.py
|
||||||
|
│ │ └── article.py
|
||||||
|
│ └── toronto/
|
||||||
|
│ ├── dashboard.py
|
||||||
|
│ ├── methodology.py
|
||||||
|
│ ├── tabs/ # 5 tab layouts
|
||||||
|
│ └── callbacks/ # Dashboard interactions (map_callbacks, chart_callbacks, selection_callbacks)
|
||||||
|
├── toronto/ # Data logic
|
||||||
|
│ ├── parsers/ # API extraction (geo, toronto_open_data, toronto_police, cmhc)
|
||||||
|
│ ├── loaders/ # Database operations (base, cmhc, cmhc_crosswalk)
|
||||||
|
│ ├── schemas/ # Pydantic models
|
||||||
|
│ ├── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||||
|
│ ├── services/ # Query functions (neighbourhood_service, geometry_service)
|
||||||
|
│ └── demo_data.py # Sample data
|
||||||
|
└── utils/
|
||||||
|
└── markdown_loader.py # Blog article loading
|
||||||
|
|
||||||
|
dbt/ # dbt project: portfolio
|
||||||
|
├── models/
|
||||||
|
│ ├── shared/ # Cross-domain dimensions
|
||||||
|
│ ├── staging/toronto/ # Toronto staging models
|
||||||
|
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
│ └── marts/toronto/ # Toronto mart tables
|
||||||
|
|
||||||
|
notebooks/
|
||||||
|
└── toronto/ # Toronto documentation notebooks
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Toronto Dashboard
|
||||||
|
|
||||||
|
### Data Sources
|
||||||
|
|
||||||
|
| Source | Data | Format |
|
||||||
|
|--------|------|--------|
|
||||||
|
| City of Toronto Open Data | Neighbourhoods (158), Census profiles, Parks, Schools, Childcare, TTC | GeoJSON, CSV, API |
|
||||||
|
| Toronto Police Service | Crime rates, MCI, Shootings | CSV, API |
|
||||||
|
| CMHC | Rental Market Survey | CSV |
|
||||||
|
|
||||||
|
### Geographic Model
|
||||||
|
|
||||||
|
```
|
||||||
|
City of Toronto Neighbourhoods (158) ← Primary analysis unit
|
||||||
|
CMHC Zones (~20) ← Rental data (Census Tract aligned)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dashboard Tabs
|
||||||
|
|
||||||
|
| Tab | Choropleth Metric | Supporting Charts |
|
||||||
|
|-----|-------------------|-------------------|
|
||||||
|
| Overview | Livability score | Top/Bottom 10 bar, Income vs Safety scatter |
|
||||||
|
| Housing | Affordability index | Rent trend line, Tenure breakdown bar |
|
||||||
|
| Safety | Crime rate per 100K | Crime breakdown bar, Crime trend line |
|
||||||
|
| Demographics | Median income | Age distribution, Population density bar |
|
||||||
|
| Amenities | Amenity index | Amenity radar, Transit accessibility bar |
|
||||||
|
|
||||||
|
### Star Schema
|
||||||
|
|
||||||
|
| Table | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `dim_neighbourhood` | Dimension | 158 neighbourhoods with geometry |
|
||||||
|
| `dim_time` | Dimension | Date dimension |
|
||||||
|
| `dim_cmhc_zone` | Dimension | ~20 CMHC zones with geometry |
|
||||||
|
| `fact_census` | Fact | Census indicators by neighbourhood |
|
||||||
|
| `fact_crime` | Fact | Crime stats by neighbourhood |
|
||||||
|
| `fact_rentals` | Fact | Rental data by CMHC zone |
|
||||||
|
| `fact_amenities` | Fact | Amenity counts by neighbourhood |
|
||||||
|
|
||||||
|
### dbt Project: `portfolio`
|
||||||
|
|
||||||
|
**Model Structure:**
|
||||||
|
```
|
||||||
|
dbt/models/
|
||||||
|
├── shared/ # Cross-domain dimensions (stg_dimensions__time)
|
||||||
|
├── staging/toronto/ # Toronto staging models
|
||||||
|
├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
└── marts/toronto/ # Toronto mart tables
|
||||||
|
```
|
||||||
|
|
||||||
|
| Layer | Naming | Example |
|
||||||
|
|-------|--------|---------|
|
||||||
|
| Shared | `stg_dimensions__*` | `stg_dimensions__time` |
|
||||||
|
| Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` |
|
||||||
|
| Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` |
|
||||||
|
| Marts | `mart_{domain}` | `mart_neighbourhood_overview` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tech Stack
|
||||||
|
|
||||||
|
| Layer | Technology | Version |
|
||||||
|
|-------|------------|---------|
|
||||||
|
| Database | PostgreSQL + PostGIS | 16.x |
|
||||||
|
| Validation | Pydantic | 2.x |
|
||||||
|
| ORM | SQLAlchemy | 2.x |
|
||||||
|
| Transformation | dbt-postgres | 1.7+ |
|
||||||
|
| Data Processing | Pandas, GeoPandas | Latest |
|
||||||
|
| Visualization | Dash + Plotly | 2.14+ |
|
||||||
|
| UI Components | dash-mantine-components | Latest |
|
||||||
|
| Testing | pytest | 7.0+ |
|
||||||
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -23,325 +195,51 @@ Two-project analytics portfolio demonstrating end-to-end data engineering, visua
|
|||||||
|
|
||||||
| Branch | Purpose | Deploys To |
|
| Branch | Purpose | Deploys To |
|
||||||
|--------|---------|------------|
|
|--------|---------|------------|
|
||||||
| `main` | Production releases only | VPS (production) |
|
| `main` | Production releases | VPS (production) |
|
||||||
| `staging` | Pre-production testing | VPS (staging) |
|
| `staging` | Pre-production testing | VPS (staging) |
|
||||||
| `development` | Active development | Local only |
|
| `development` | Active development | Local only |
|
||||||
|
|
||||||
**Rules**:
|
**Rules:**
|
||||||
- All feature branches created FROM `development`
|
- Feature branches from `development`: `feature/{sprint}-{description}`
|
||||||
- All feature branches merge INTO `development`
|
- Merge into `development` when complete
|
||||||
- `development` → `staging` for testing
|
- `development` → `staging` → `main` for releases
|
||||||
- `staging` → `main` for release
|
- Never delete `development`
|
||||||
- Direct commits to `main` or `staging` are forbidden
|
|
||||||
- Branch naming: `feature/{sprint}-{description}` or `fix/{issue-id}`
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Tech Stack (Locked)
|
## Code Standards
|
||||||
|
|
||||||
| Layer | Technology | Version |
|
### Type Hints (Python 3.10+)
|
||||||
|-------|------------|---------|
|
|
||||||
| Database | PostgreSQL + PostGIS | 16.x |
|
|
||||||
| Validation | Pydantic | ≥2.0 |
|
|
||||||
| ORM | SQLAlchemy | ≥2.0 (2.0-style API only) |
|
|
||||||
| Transformation | dbt-postgres | ≥1.7 |
|
|
||||||
| Data Processing | Pandas | ≥2.1 |
|
|
||||||
| Geospatial | GeoPandas + Shapely | ≥0.14 |
|
|
||||||
| Visualization | Dash + Plotly | ≥2.14 |
|
|
||||||
| UI Components | dash-mantine-components | Latest stable |
|
|
||||||
| Testing | pytest | ≥7.0 |
|
|
||||||
| Python | 3.11+ | Via pyenv |
|
|
||||||
|
|
||||||
**Compatibility Notes**:
|
```python
|
||||||
- SQLAlchemy 2.0 + Pydantic 2.0 integrate well—never mix 1.x APIs
|
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||||
- PostGIS extension required—enable during db init
|
...
|
||||||
- Docker Compose V2 (no `version` field in compose files)
|
```
|
||||||
|
|
||||||
---
|
### Imports
|
||||||
|
|
||||||
## Code Conventions
|
| Context | Style |
|
||||||
|
|---------|-------|
|
||||||
### Import Style
|
| Same directory | `from .module import X` |
|
||||||
|
| Sibling directory | `from ..schemas.model import Y` |
|
||||||
| Context | Style | Example |
|
| External | `import pandas as pd` |
|
||||||
|---------|-------|---------|
|
|
||||||
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
|
||||||
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
|
||||||
| External packages | Absolute | `import pandas as pd` |
|
|
||||||
|
|
||||||
### Module Separation
|
|
||||||
|
|
||||||
| Directory | Contains | Purpose |
|
|
||||||
|-----------|----------|---------|
|
|
||||||
| `schemas/` | Pydantic models | Data validation |
|
|
||||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
|
||||||
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
|
||||||
| `loaders/` | Database operations | Data loading |
|
|
||||||
| `figures/` | Chart factories | Plotly figure generation |
|
|
||||||
| `callbacks/` | Dash callbacks | Per-dashboard, in `pages/{dashboard}/callbacks/` |
|
|
||||||
| `errors/` | Exceptions + handlers | Error handling |
|
|
||||||
|
|
||||||
### Code Standards
|
|
||||||
|
|
||||||
- **Type hints**: Mandatory, Python 3.10+ style (`list[str]`, `dict[str, int]`, `X | None`)
|
|
||||||
- **Functions**: Single responsibility, verb naming, early returns over nesting
|
|
||||||
- **Docstrings**: Google style, minimal—only for non-obvious behavior
|
|
||||||
- **Constants**: Module-level for magic values, Pydantic BaseSettings for runtime config
|
|
||||||
|
|
||||||
### Error Handling
|
### Error Handling
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# errors/exceptions.py
|
|
||||||
class PortfolioError(Exception):
|
class PortfolioError(Exception):
|
||||||
"""Base exception."""
|
"""Base exception."""
|
||||||
|
|
||||||
class ParseError(PortfolioError):
|
class ParseError(PortfolioError):
|
||||||
"""PDF/CSV parsing failed."""
|
"""Data parsing failed."""
|
||||||
|
|
||||||
class ValidationError(PortfolioError):
|
class ValidationError(PortfolioError):
|
||||||
"""Pydantic or business rule validation failed."""
|
"""Validation failed."""
|
||||||
|
|
||||||
class LoadError(PortfolioError):
|
class LoadError(PortfolioError):
|
||||||
"""Database load operation failed."""
|
"""Database load failed."""
|
||||||
```
|
```
|
||||||
|
|
||||||
- Decorators for infrastructure concerns (logging, retry, transactions)
|
|
||||||
- Explicit handling for domain logic (business rules, recovery strategies)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Application Architecture
|
|
||||||
|
|
||||||
### Dash Pages Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
portfolio_app/
|
|
||||||
├── app.py # Dash app factory with Pages routing
|
|
||||||
├── config.py # Pydantic BaseSettings
|
|
||||||
├── assets/ # CSS, images (auto-served by Dash)
|
|
||||||
├── pages/
|
|
||||||
│ ├── home.py # Bio landing page → /
|
|
||||||
│ ├── toronto/
|
|
||||||
│ │ ├── dashboard.py # Layout only → /toronto
|
|
||||||
│ │ └── callbacks/ # Interaction logic
|
|
||||||
│ └── energy/ # Phase 3
|
|
||||||
├── components/ # Shared UI (navbar, footer, cards)
|
|
||||||
├── figures/ # Shared chart factories
|
|
||||||
├── toronto/ # Toronto data logic
|
|
||||||
│ ├── parsers/
|
|
||||||
│ ├── loaders/
|
|
||||||
│ ├── schemas/ # Pydantic
|
|
||||||
│ └── models/ # SQLAlchemy
|
|
||||||
└── errors/
|
|
||||||
```
|
|
||||||
|
|
||||||
### URL Routing (Automatic)
|
|
||||||
|
|
||||||
| URL | Page | Status |
|
|
||||||
|-----|------|--------|
|
|
||||||
| `/` | Bio landing page | Sprint 2 |
|
|
||||||
| `/toronto` | Toronto Housing Dashboard | Sprint 6 |
|
|
||||||
| `/energy` | Energy Pricing Dashboard | Phase 3 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Phase 1: Toronto Housing Dashboard
|
|
||||||
|
|
||||||
### Data Sources
|
|
||||||
|
|
||||||
| Track | Source | Format | Geography | Frequency |
|
|
||||||
|-------|--------|--------|-----------|-----------|
|
|
||||||
| Purchases | TRREB Monthly Reports | PDF | ~35 Districts | Monthly |
|
|
||||||
| Rentals | CMHC Rental Market Survey | CSV | ~20 Zones | Annual |
|
|
||||||
| Enrichment | City of Toronto Open Data | GeoJSON/CSV | 158 Neighbourhoods | Census |
|
|
||||||
| Policy Events | Curated list | CSV | N/A | Event-based |
|
|
||||||
|
|
||||||
### Geographic Reality
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ City of Toronto Neighbourhoods (158) │ ← Enrichment only
|
|
||||||
├─────────────────────────────────────────────────────────────────┤
|
|
||||||
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
|
||||||
├─────────────────────────────────────────────────────────────────┤
|
|
||||||
│ CMHC Zones (~20) — Census Tract aligned │ ← Rental data
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**Critical**: These geographies do NOT align. Display as separate layers with toggle—do not force crosswalks.
|
|
||||||
|
|
||||||
### Data Model (Star Schema)
|
|
||||||
|
|
||||||
| Table | Type | Keys |
|
|
||||||
|-------|------|------|
|
|
||||||
| `fact_purchases` | Fact | → dim_time, dim_trreb_district |
|
|
||||||
| `fact_rentals` | Fact | → dim_time, dim_cmhc_zone |
|
|
||||||
| `dim_time` | Dimension | date_key (PK) |
|
|
||||||
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
|
||||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
|
||||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
|
||||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
|
||||||
|
|
||||||
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
|
||||||
|
|
||||||
### dbt Layer Structure
|
|
||||||
|
|
||||||
| Layer | Naming | Purpose |
|
|
||||||
|-------|--------|---------|
|
|
||||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
|
||||||
| Intermediate | `int_{domain}__{transform}` | Business logic, filtering |
|
|
||||||
| Marts | `mart_{domain}` | Final analytical tables |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Sprint Overview
|
|
||||||
|
|
||||||
| Sprint | Focus | Milestone |
|
|
||||||
|--------|-------|-----------|
|
|
||||||
| 1 | Project bootstrap, start TRREB digitization | — |
|
|
||||||
| 2 | Bio page, data acquisition | **Launch 1: Bio Live** |
|
|
||||||
| 3 | Parsers, schemas, models | — |
|
|
||||||
| 4 | Loaders, dbt | — |
|
|
||||||
| 5 | Visualization | — |
|
|
||||||
| 6 | Polish, deploy dashboard | **Launch 2: Dashboard Live** |
|
|
||||||
| 7 | Buffer | — |
|
|
||||||
|
|
||||||
### Sprint 1 Deliverables
|
|
||||||
|
|
||||||
| Category | Tasks |
|
|
||||||
|----------|-------|
|
|
||||||
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
|
||||||
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
|
||||||
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
|
||||||
| **Tests** | tests/ directory, conftest.py, pytest config |
|
|
||||||
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
|
||||||
|
|
||||||
### Human Tasks (Cannot Automate)
|
|
||||||
|
|
||||||
| Task | Tool | Effort |
|
|
||||||
|------|------|--------|
|
|
||||||
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
|
||||||
| Research policy events (10-20) | Manual research | 2-3 hours |
|
|
||||||
| Replace social link placeholders | Manual | 5 minutes |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Scope Boundaries
|
|
||||||
|
|
||||||
### Phase 1 — Build These
|
|
||||||
|
|
||||||
- Bio landing page with content from bio_content_v2.md
|
|
||||||
- TRREB PDF parser
|
|
||||||
- CMHC CSV processor
|
|
||||||
- PostgreSQL + PostGIS database layer
|
|
||||||
- Star schema (facts + dimensions)
|
|
||||||
- dbt models with tests
|
|
||||||
- Choropleth visualization (Dash)
|
|
||||||
- Policy event annotation layer
|
|
||||||
- Neighbourhood overlay (toggle-able)
|
|
||||||
|
|
||||||
### Phase 1 — Do NOT Build
|
|
||||||
|
|
||||||
| Feature | Reason | When |
|
|
||||||
|---------|--------|------|
|
|
||||||
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 | After Energy project |
|
|
||||||
| Crime data integration | Deferred scope | Phase 4 |
|
|
||||||
| Historical boundary reconciliation (140→158) | 2021+ data only for V1 | Phase 4 |
|
|
||||||
| ML prediction models | Energy project scope | Phase 3 |
|
|
||||||
| Multi-project shared infrastructure | Build first, abstract second | Phase 2 |
|
|
||||||
|
|
||||||
If a task seems to require Phase 3/4 features, **stop and flag it**.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## File Structure
|
|
||||||
|
|
||||||
### Root-Level Files (Allowed)
|
|
||||||
|
|
||||||
| File | Purpose |
|
|
||||||
|------|---------|
|
|
||||||
| `README.md` | Project overview |
|
|
||||||
| `CLAUDE.md` | AI assistant context |
|
|
||||||
| `pyproject.toml` | Python packaging |
|
|
||||||
| `.gitignore` | Git ignore rules |
|
|
||||||
| `.env.example` | Environment template |
|
|
||||||
| `.python-version` | pyenv version |
|
|
||||||
| `.pre-commit-config.yaml` | Pre-commit hooks |
|
|
||||||
| `docker-compose.yml` | Container orchestration |
|
|
||||||
| `Makefile` | Task automation |
|
|
||||||
|
|
||||||
### Directory Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
portfolio/
|
|
||||||
├── portfolio_app/ # Monolithic Dash application
|
|
||||||
│ ├── app.py
|
|
||||||
│ ├── config.py
|
|
||||||
│ ├── assets/
|
|
||||||
│ ├── pages/
|
|
||||||
│ ├── components/
|
|
||||||
│ ├── figures/
|
|
||||||
│ ├── toronto/
|
|
||||||
│ └── errors/
|
|
||||||
├── tests/
|
|
||||||
├── dbt/
|
|
||||||
├── data/
|
|
||||||
│ └── toronto/
|
|
||||||
│ ├── raw/
|
|
||||||
│ ├── processed/ # gitignored
|
|
||||||
│ └── reference/
|
|
||||||
├── scripts/
|
|
||||||
│ ├── db/
|
|
||||||
│ ├── docker/
|
|
||||||
│ ├── deploy/
|
|
||||||
│ ├── dbt/
|
|
||||||
│ └── dev/
|
|
||||||
├── docs/
|
|
||||||
├── notebooks/
|
|
||||||
├── backups/ # gitignored
|
|
||||||
└── reports/ # gitignored
|
|
||||||
```
|
|
||||||
|
|
||||||
### Gitignored Directories
|
|
||||||
|
|
||||||
- `data/*/processed/`
|
|
||||||
- `reports/`
|
|
||||||
- `backups/`
|
|
||||||
- `notebooks/*.html`
|
|
||||||
- `.env`
|
|
||||||
- `__pycache__/`
|
|
||||||
- `.venv/`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Makefile Targets
|
|
||||||
|
|
||||||
| Target | Purpose |
|
|
||||||
|--------|---------|
|
|
||||||
| `setup` | Install deps, create .env, init pre-commit |
|
|
||||||
| `docker-up` | Start PostgreSQL + PostGIS |
|
|
||||||
| `docker-down` | Stop containers |
|
|
||||||
| `db-init` | Initialize database schema |
|
|
||||||
| `run` | Start Dash dev server |
|
|
||||||
| `test` | Run pytest |
|
|
||||||
| `dbt-run` | Run dbt models |
|
|
||||||
| `dbt-test` | Run dbt tests |
|
|
||||||
| `lint` | Run ruff linter |
|
|
||||||
| `format` | Run ruff formatter |
|
|
||||||
| `ci` | Run all checks |
|
|
||||||
| `deploy` | Deploy to production |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Script Standards
|
|
||||||
|
|
||||||
All scripts in `scripts/`:
|
|
||||||
- Include usage comments at top
|
|
||||||
- Idempotent where possible
|
|
||||||
- Exit codes: 0 = success, 1 = error
|
|
||||||
- Use `set -euo pipefail` for bash
|
|
||||||
- Log to stdout, errors to stderr
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Environment Variables
|
## Environment Variables
|
||||||
@@ -360,37 +258,61 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Success Criteria
|
## Makefile Targets
|
||||||
|
|
||||||
### Launch 1 (Sprint 2)
|
| Target | Purpose |
|
||||||
- [ ] Bio page accessible via HTTPS
|
|--------|---------|
|
||||||
- [ ] All bio content rendered (from bio_content_v2.md)
|
| `setup` | Install deps, create .env, init pre-commit |
|
||||||
- [ ] No placeholder text visible
|
| `docker-up` | Start PostgreSQL + PostGIS (auto-detects x86/ARM) |
|
||||||
- [ ] Mobile responsive
|
| `docker-down` | Stop containers |
|
||||||
- [ ] Social links functional
|
| `docker-logs` | View container logs |
|
||||||
|
| `db-init` | Initialize database schema |
|
||||||
### Launch 2 (Sprint 6)
|
| `db-reset` | Drop and recreate database (DESTRUCTIVE) |
|
||||||
- [ ] Choropleth renders TRREB districts and CMHC zones
|
| `load-data` | Load Toronto data from APIs, seed dev data |
|
||||||
- [ ] Purchase/rental mode toggle works
|
| `load-toronto-only` | Load Toronto data without dbt or seeding |
|
||||||
- [ ] Time navigation works
|
| `seed-data` | Seed sample development data |
|
||||||
- [ ] Policy event markers visible
|
| `run` | Start Dash dev server |
|
||||||
- [ ] Neighbourhood overlay toggleable
|
| `test` | Run pytest |
|
||||||
- [ ] Methodology documentation published
|
| `test-cov` | Run pytest with coverage |
|
||||||
- [ ] Data sources cited
|
| `lint` | Run ruff linter |
|
||||||
|
| `format` | Run ruff formatter |
|
||||||
|
| `typecheck` | Run mypy type checker |
|
||||||
|
| `ci` | Run all checks (lint, typecheck, test) |
|
||||||
|
| `dbt-run` | Run dbt models |
|
||||||
|
| `dbt-test` | Run dbt tests |
|
||||||
|
| `dbt-docs` | Generate and serve dbt documentation |
|
||||||
|
| `clean` | Remove build artifacts and caches |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Reference Documents
|
## Next Steps
|
||||||
|
|
||||||
For detailed specifications, see:
|
### Deployment (Sprint 10+)
|
||||||
|
- [ ] Production Docker configuration
|
||||||
|
- [ ] CI/CD pipeline
|
||||||
|
- [ ] HTTPS/SSL setup
|
||||||
|
- [ ] Domain configuration
|
||||||
|
|
||||||
| Document | Location | Use When |
|
### Data Enhancement
|
||||||
|----------|----------|----------|
|
- [ ] Connect to live APIs (currently using demo data)
|
||||||
| Data schemas | `docs/toronto_housing_spec.md` | Parser/model tasks |
|
- [ ] Data refresh automation
|
||||||
| WBS details | `docs/wbs.md` | Sprint planning |
|
- [ ] Historical data loading
|
||||||
| Bio content | `docs/bio_content.md` | Building home.py |
|
|
||||||
|
### Future Projects
|
||||||
|
- Energy Pricing Analysis dashboard (planned)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*Reference Version: 1.0*
|
## Related Documents
|
||||||
*Created: January 2026*
|
|
||||||
|
| Document | Purpose |
|
||||||
|
|----------|---------|
|
||||||
|
| `README.md` | Quick start guide |
|
||||||
|
| `CLAUDE.md` | AI assistant context |
|
||||||
|
| `docs/CONTRIBUTING.md` | Developer guide |
|
||||||
|
| `notebooks/README.md` | Notebook documentation |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Reference Version: 3.0*
|
||||||
|
*Updated: January 2026*
|
||||||
|
|||||||
@@ -1,134 +0,0 @@
|
|||||||
# Portfolio Bio Content
|
|
||||||
|
|
||||||
**Version**: 2.0
|
|
||||||
**Last Updated**: January 2026
|
|
||||||
**Purpose**: Content source for `portfolio_app/pages/home.py`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Document Context
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
|
||||||
| **Role** | Bio content and social links for landing page |
|
|
||||||
| **Consumed By** | `portfolio_app/pages/home.py` |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Headline
|
|
||||||
|
|
||||||
**Primary**: Leo | Data Engineer & Analytics Developer
|
|
||||||
|
|
||||||
**Tagline**: I build data infrastructure that actually gets used.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Professional Summary
|
|
||||||
|
|
||||||
Over the past 5 years, I've designed and evolved an enterprise analytics platform from scratch—now processing 1B+ rows across 21 tables with Python-based ETL pipelines and dbt-style SQL transformations. The result: 40% efficiency gains, 30% reduction in call abandon rates, and dashboards that executives actually open.
|
|
||||||
|
|
||||||
My approach: dimensional modeling (star schema), layered transformations (staging → intermediate → marts), and automation that eliminates manual work. I've built everything from self-service analytics portals to OCR-powered receipt processing systems.
|
|
||||||
|
|
||||||
Currently at Summitt Energy supporting multi-market operations across Canada and 8 US states. Previously cut my teeth on IT infrastructure projects at Petrobras (Fortune 500) and the Project Management Institute.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Tech Stack
|
|
||||||
|
|
||||||
| Category | Technologies |
|
|
||||||
|----------|--------------|
|
|
||||||
| **Languages** | Python, SQL |
|
|
||||||
| **Data Processing** | Pandas, SQLAlchemy, FastAPI |
|
|
||||||
| **Databases** | PostgreSQL, MSSQL |
|
|
||||||
| **Visualization** | Power BI, Plotly, Dash |
|
|
||||||
| **Patterns** | dbt, dimensional modeling, star schema |
|
|
||||||
| **Other** | Genesys Cloud |
|
|
||||||
|
|
||||||
**Display Format** (for landing page):
|
|
||||||
```
|
|
||||||
Python (Pandas, SQLAlchemy, FastAPI) • SQL (MSSQL, PostgreSQL) • Power BI • Plotly/Dash • Genesys Cloud • dbt patterns
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Side Project
|
|
||||||
|
|
||||||
**Bandit Labs** — Building automation and AI tooling for small businesses.
|
|
||||||
|
|
||||||
*Note: Keep this brief on portfolio; link only if separate landing page exists.*
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Social Links
|
|
||||||
|
|
||||||
| Platform | URL | Icon |
|
|
||||||
|----------|-----|------|
|
|
||||||
| **LinkedIn** | `https://linkedin.com/in/[USERNAME]` | `lucide-react: Linkedin` |
|
|
||||||
| **GitHub** | `https://github.com/[USERNAME]` | `lucide-react: Github` |
|
|
||||||
|
|
||||||
> **TODO**: Replace `[USERNAME]` placeholders with actual URLs before bio page launch.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Availability Statement
|
|
||||||
|
|
||||||
Open to **Senior Data Analyst**, **Analytics Engineer**, and **BI Developer** opportunities in Toronto or remote.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Portfolio Projects Section
|
|
||||||
|
|
||||||
*Dynamically populated based on deployed projects.*
|
|
||||||
|
|
||||||
| Project | Status | Link |
|
|
||||||
|---------|--------|------|
|
|
||||||
| Toronto Housing Dashboard | In Development | `/toronto` |
|
|
||||||
| Energy Pricing Analysis | Planned | `/energy` |
|
|
||||||
|
|
||||||
**Display Logic**:
|
|
||||||
- Show only projects with `status = deployed`
|
|
||||||
- "In Development" projects can show as coming soon or be hidden (user preference)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Implementation Notes
|
|
||||||
|
|
||||||
### Content Hierarchy for `home.py`
|
|
||||||
|
|
||||||
```
|
|
||||||
1. Name + Tagline (hero section)
|
|
||||||
2. Professional Summary (2-3 paragraphs)
|
|
||||||
3. Tech Stack (horizontal chips or inline list)
|
|
||||||
4. Portfolio Projects (cards linking to dashboards)
|
|
||||||
5. Social Links (icon buttons)
|
|
||||||
6. Availability statement (subtle, bottom)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Styling Recommendations
|
|
||||||
|
|
||||||
- Clean, minimal — let the projects speak
|
|
||||||
- Dark/light mode support via dash-mantine-components theme
|
|
||||||
- No headshot required (optional)
|
|
||||||
- Mobile-responsive layout
|
|
||||||
|
|
||||||
### Content Updates
|
|
||||||
|
|
||||||
When updating bio content:
|
|
||||||
1. Edit this document
|
|
||||||
2. Update `home.py` to reflect changes
|
|
||||||
3. Redeploy
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Related Documents
|
|
||||||
|
|
||||||
| Document | Relationship |
|
|
||||||
|----------|--------------|
|
|
||||||
| `portfolio_project_plan_v5.md` | Parent — references this for bio content |
|
|
||||||
| `portfolio_app/pages/home.py` | Consumer — implements this content |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Document Version: 2.0*
|
|
||||||
*Updated: January 2026*
|
|
||||||
56
docs/project-lessons-learned/INDEX.md
Normal file
56
docs/project-lessons-learned/INDEX.md
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# Project Lessons Learned
|
||||||
|
|
||||||
|
This folder contains lessons learned from sprints and development work. These lessons help prevent repeating mistakes and capture valuable insights.
|
||||||
|
|
||||||
|
**Note:** This is a temporary local backup while Wiki.js integration is being configured. Once Wiki.js is ready, lessons will be migrated there for better searchability.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Lessons Index
|
||||||
|
|
||||||
|
| Date | Sprint/Phase | Title | Tags |
|
||||||
|
|------|--------------|-------|------|
|
||||||
|
| 2026-02-01 | Sprint 10 | [Formspree Integration with Dash Callbacks](./sprint-10-formspree-dash-integration.md) | formspree, dash, callbacks, forms, spam-protection, honeypot, ajax |
|
||||||
|
| 2026-01-17 | Sprint 9 | [Gitea Labels API Requires Org Context](./sprint-9-gitea-labels-user-repos.md) | gitea, mcp, api, labels, projman, configuration |
|
||||||
|
| 2026-01-17 | Sprint 9 | [Always Read CLAUDE.md Before Asking Questions](./sprint-9-read-claude-md-first.md) | projman, claude-code, context, documentation, workflow |
|
||||||
|
| 2026-01-17 | Sprint 9-10 | [Graceful Error Handling in Service Layers](./sprint-9-10-graceful-error-handling.md) | python, postgresql, error-handling, dash, graceful-degradation, arm64 |
|
||||||
|
| 2026-01-17 | Sprint 9-10 | [Modular Callback Structure](./sprint-9-10-modular-callback-structure.md) | dash, callbacks, architecture, python, code-organization |
|
||||||
|
| 2026-01-17 | Sprint 9-10 | [Figure Factory Pattern](./sprint-9-10-figure-factory-pattern.md) | plotly, dash, design-patterns, python, visualization |
|
||||||
|
| 2026-01-16 | Phase 4 | [dbt Test Syntax Deprecation](./phase-4-dbt-test-syntax.md) | dbt, testing, yaml, deprecation |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to Use
|
||||||
|
|
||||||
|
### When Starting a Sprint
|
||||||
|
1. Review relevant lessons in this folder before implementation
|
||||||
|
2. Search by tags or keywords to find applicable insights
|
||||||
|
3. Apply prevention strategies from past lessons
|
||||||
|
|
||||||
|
### When Closing a Sprint
|
||||||
|
1. Document any significant lessons learned
|
||||||
|
2. Use the template below
|
||||||
|
3. Add entry to the index table above
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Lesson Template
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# [Sprint/Phase] - [Lesson Title]
|
||||||
|
|
||||||
|
## Context
|
||||||
|
[What were you trying to do?]
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
[What went wrong or what insight emerged?]
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
[How did you solve it?]
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
[How can this be avoided in future sprints?]
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
[Comma-separated tags for search]
|
||||||
|
```
|
||||||
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Phase 4 - dbt Test Syntax Deprecation
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Implementing dbt mart models with `accepted_values` tests for tier columns (safety_tier, income_quintile, amenity_tier) that should only contain values 1-5.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
dbt 1.9+ introduced a deprecation warning for generic test arguments. The old syntax:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
tests:
|
||||||
|
- accepted_values:
|
||||||
|
values: [1, 2, 3, 4, 5]
|
||||||
|
```
|
||||||
|
|
||||||
|
Produces deprecation warnings:
|
||||||
|
```
|
||||||
|
MissingArgumentsPropertyInGenericTestDeprecation: Arguments to generic tests should be nested under the `arguments` property.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Nest test arguments under the `arguments` property:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
tests:
|
||||||
|
- accepted_values:
|
||||||
|
arguments:
|
||||||
|
values: [1, 2, 3, 4, 5]
|
||||||
|
```
|
||||||
|
|
||||||
|
This applies to all generic tests with arguments, not just `accepted_values`.
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- When writing dbt schema YAML files, always use the `arguments:` nesting for generic tests
|
||||||
|
- Run `dbt parse --no-partial-parse` to catch all deprecation warnings before they become errors
|
||||||
|
- Check dbt changelog when upgrading versions for breaking changes to test syntax
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
dbt, testing, yaml, deprecation, syntax, schema
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
# Sprint 10 - Formspree Integration with Dash Callbacks
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Implementing a contact form on a Dash portfolio site that submits to Formspree, a third-party form handling service.
|
||||||
|
|
||||||
|
## Insights
|
||||||
|
|
||||||
|
### Formspree AJAX Submission
|
||||||
|
Formspree supports AJAX submissions (no page redirect) when you:
|
||||||
|
1. POST with `Content-Type: application/json`
|
||||||
|
2. Include `Accept: application/json` header
|
||||||
|
3. Send form data as JSON body
|
||||||
|
|
||||||
|
This returns a JSON response instead of redirecting to a thank-you page, which is ideal for single-page Dash applications.
|
||||||
|
|
||||||
|
### Dash Multi-Output Callbacks for Forms
|
||||||
|
When handling form submission with validation and feedback, use a multi-output callback pattern:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@callback(
|
||||||
|
Output("feedback-container", "children"), # Success/error alert
|
||||||
|
Output("submit-button", "loading"), # Button loading state
|
||||||
|
Output("field-1", "value"), # Clear on success
|
||||||
|
Output("field-2", "value"), # Clear on success
|
||||||
|
Output("field-1", "error"), # Field-level errors
|
||||||
|
Output("field-2", "error"), # Field-level errors
|
||||||
|
Input("submit-button", "n_clicks"),
|
||||||
|
State("field-1", "value"),
|
||||||
|
State("field-2", "value"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `no_update` for outputs you don't want to change (e.g., keep form values on validation error, only clear on success).
|
||||||
|
|
||||||
|
### Honeypot Spam Protection
|
||||||
|
Simple and effective bot protection without CAPTCHA:
|
||||||
|
1. Add a hidden text input field (CSS: `position: absolute; left: -9999px`)
|
||||||
|
2. Set `tabIndex=-1` and `autoComplete="off"` to prevent accidental filling
|
||||||
|
3. In callback, check if honeypot has value - if yes, it's a bot
|
||||||
|
4. For bots: return fake success (don't reveal detection)
|
||||||
|
5. For humans: proceed with real submission
|
||||||
|
|
||||||
|
Formspree also accepts `_gotcha` as a honeypot field name in the JSON payload.
|
||||||
|
|
||||||
|
## Code Pattern
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Honeypot check - bots fill hidden fields
|
||||||
|
if honeypot_value:
|
||||||
|
# Fake success - don't let bots know they were caught
|
||||||
|
return (_create_success_alert(), False, "", "", None, None)
|
||||||
|
|
||||||
|
# Real submission for humans
|
||||||
|
response = requests.post(
|
||||||
|
FORMSPREE_ENDPOINT,
|
||||||
|
json=form_data,
|
||||||
|
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prevention/Best Practices
|
||||||
|
- Always use `timeout` parameter with `requests.post()` to avoid hanging
|
||||||
|
- Wrap external API calls in try/except for network errors
|
||||||
|
- Return user-friendly error messages, not technical details
|
||||||
|
- Use DMC's `required=True` and `error` props for form validation feedback
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
formspree, dash, callbacks, forms, spam-protection, honeypot, ajax, python, requests, validation
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
# Sprint 9-10 - Figure Factory Pattern for Reusable Charts
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Creating multiple chart types across 5 dashboard tabs, with consistent styling and behavior needed across all visualizations.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
Without a standardized approach, each callback would create figures inline with:
|
||||||
|
- Duplicated styling code (colors, fonts, backgrounds)
|
||||||
|
- Inconsistent hover templates
|
||||||
|
- Hard-to-maintain figure creation logic
|
||||||
|
- No reuse between tabs
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Created a `figures/` module with factory functions:
|
||||||
|
|
||||||
|
```
|
||||||
|
figures/
|
||||||
|
├── __init__.py # Exports all factories
|
||||||
|
├── choropleth.py # Map visualizations
|
||||||
|
├── bar_charts.py # ranking_bar, stacked_bar, horizontal_bar
|
||||||
|
├── scatter.py # scatter_figure, bubble_chart
|
||||||
|
├── radar.py # radar_figure, comparison_radar
|
||||||
|
└── demographics.py # age_pyramid, donut_chart
|
||||||
|
```
|
||||||
|
|
||||||
|
Factory pattern benefits:
|
||||||
|
1. **Consistent styling** - dark theme applied once
|
||||||
|
2. **Type-safe interfaces** - clear parameters for each chart type
|
||||||
|
3. **Easy testing** - factories can be unit tested with sample data
|
||||||
|
4. **Reusability** - same factory used across multiple tabs
|
||||||
|
|
||||||
|
Example factory signature:
|
||||||
|
```python
|
||||||
|
def create_ranking_bar(
|
||||||
|
data: list[dict],
|
||||||
|
name_column: str,
|
||||||
|
value_column: str,
|
||||||
|
title: str = "",
|
||||||
|
top_n: int = 5,
|
||||||
|
bottom_n: int = 5,
|
||||||
|
top_color: str = "#4CAF50",
|
||||||
|
bottom_color: str = "#F44336",
|
||||||
|
) -> go.Figure:
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- **Create factories early** - before implementing callbacks
|
||||||
|
- **Design generic interfaces** - factories should work with any data matching the schema
|
||||||
|
- **Apply styling in one place** - use constants for colors, fonts
|
||||||
|
- **Test factories independently** - with synthetic data before integration
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
plotly, dash, design-patterns, python, visualization, reusability, code-organization
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
# Sprint 9-10 - Graceful Error Handling in Service Layers
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Building the Toronto Neighbourhood Dashboard with a service layer that queries PostgreSQL/PostGIS dbt marts to provide data to Dash callbacks.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
Initial service layer implementation let database connection errors propagate as unhandled exceptions. When the PostGIS Docker container was unavailable (common on ARM64 systems where the x86_64 image fails), the entire dashboard would crash instead of gracefully degrading.
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Wrapped database queries in try/except blocks to return empty DataFrames/lists/dicts when the database is unavailable:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def _execute_query(sql: str, params: dict | None = None) -> pd.DataFrame:
|
||||||
|
try:
|
||||||
|
engine = get_engine()
|
||||||
|
with engine.connect() as conn:
|
||||||
|
return pd.read_sql(text(sql), conn, params=params)
|
||||||
|
except Exception:
|
||||||
|
return pd.DataFrame()
|
||||||
|
```
|
||||||
|
|
||||||
|
This allows:
|
||||||
|
1. Dashboard to load and display empty states
|
||||||
|
2. Development/testing without running database
|
||||||
|
3. Graceful degradation in production
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- **Always design service layers with graceful degradation** - assume external dependencies can fail
|
||||||
|
- **Return empty collections, not exceptions** - let UI components handle empty states
|
||||||
|
- **Test without database** - verify the app doesn't crash when DB is unavailable
|
||||||
|
- **Consider ARM64 compatibility** - PostGIS images may not support all platforms
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
python, postgresql, service-layer, error-handling, dash, graceful-degradation, arm64
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
# Sprint 9-10 - Modular Callback Structure for Multi-Tab Dashboards
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Implementing a 5-tab Toronto Neighbourhood Dashboard with multiple callbacks per tab (map updates, chart updates, KPI updates, selection handling).
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
Initial callback implementation approach would have placed all callbacks in a single file, leading to:
|
||||||
|
- A monolithic file with 500+ lines
|
||||||
|
- Difficult-to-navigate code
|
||||||
|
- Callbacks for different tabs interleaved
|
||||||
|
- Testing difficulties
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Organized callbacks into three focused modules:
|
||||||
|
|
||||||
|
```
|
||||||
|
callbacks/
|
||||||
|
├── __init__.py # Imports all modules to register callbacks
|
||||||
|
├── map_callbacks.py # Choropleth updates, map click handling
|
||||||
|
├── chart_callbacks.py # Supporting chart updates (scatter, trend, donut)
|
||||||
|
└── selection_callbacks.py # Dropdown population, KPI updates
|
||||||
|
```
|
||||||
|
|
||||||
|
Key patterns:
|
||||||
|
1. **Group by responsibility**, not by tab - all map-related callbacks together
|
||||||
|
2. **Use noqa comments** for imports that register callbacks as side effects
|
||||||
|
3. **Share helper functions** (like `_empty_chart()`) within modules
|
||||||
|
|
||||||
|
```python
|
||||||
|
# callbacks/__init__.py
|
||||||
|
from . import (
|
||||||
|
chart_callbacks, # noqa: F401
|
||||||
|
map_callbacks, # noqa: F401
|
||||||
|
selection_callbacks, # noqa: F401
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- **Plan callback organization before implementation** - sketch which callbacks go where
|
||||||
|
- **Group by function, not by feature** - keeps related logic together
|
||||||
|
- **Keep modules under 400 lines** - split if exceeding
|
||||||
|
- **Test imports early** - verify callbacks register correctly
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
dash, callbacks, architecture, python, code-organization, maintainability
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# Sprint 9 - Gitea Labels API Requires Org Context
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Creating Gitea issues with labels via MCP tools during Sprint 9 planning for the personal-portfolio project.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
When calling `create_issue` with a `labels` parameter, received:
|
||||||
|
```
|
||||||
|
404 Client Error: Not Found for url: https://gitea.hotserv.cloud/api/v1/orgs/lmiranda/labels
|
||||||
|
```
|
||||||
|
|
||||||
|
The API attempted to fetch labels from an **organization** endpoint, but `lmiranda` is a **user account**, not an organization.
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Created issues without the `labels` parameter and documented intended labels in the issue body instead:
|
||||||
|
```markdown
|
||||||
|
**Labels:** Type/Feature, Priority/Medium, Complexity/Simple, Efforts/XS, Component/Docs, Tech/Python
|
||||||
|
```
|
||||||
|
|
||||||
|
This provides visibility into intended categorization while avoiding the API error.
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- When working with user-owned repos (not org repos), avoid using the `labels` parameter in `create_issue`
|
||||||
|
- Document labels in issue body as a workaround
|
||||||
|
- Consider creating a repo-level label set for user repos (Gitea supports this)
|
||||||
|
- Update projman plugin to handle user vs org repos differently
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
gitea, mcp, api, labels, projman, configuration
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Sprint 9 - Always Read CLAUDE.md Before Asking Questions
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Starting Sprint 9 planning session with `/projman:sprint-plan` command.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
Asked the user "what should I do?" when all the necessary context was already documented in CLAUDE.md:
|
||||||
|
- Current sprint number and phase
|
||||||
|
- Implementation plan location
|
||||||
|
- Remaining phases to complete
|
||||||
|
- Project conventions and workflows
|
||||||
|
|
||||||
|
This caused user frustration: "why are you asking what to do? cant you see this yourself"
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
Before asking any questions about what to do:
|
||||||
|
1. Read `CLAUDE.md` in the project root
|
||||||
|
2. Check "Project Status" section for current sprint/phase
|
||||||
|
3. Follow references to implementation plans
|
||||||
|
4. Review "Projman Plugin Workflow" section for expected behavior
|
||||||
|
|
||||||
|
## Prevention
|
||||||
|
- **ALWAYS** read CLAUDE.md at the start of any sprint-related command
|
||||||
|
- Look for "Current Sprint" and "Phase" indicators
|
||||||
|
- Check for implementation plan references in `docs/changes/`
|
||||||
|
- Only ask questions if information is genuinely missing from documentation
|
||||||
|
- The projman plugin expects autonomous behavior based on documented context
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
projman, claude-code, context, documentation, workflow, sprint-planning
|
||||||
265
docs/runbooks/adding-dashboard.md
Normal file
265
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
# Runbook: Adding a New Dashboard
|
||||||
|
|
||||||
|
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- [ ] Data sources identified and accessible
|
||||||
|
- [ ] Database schema designed
|
||||||
|
- [ ] Basic Dash/Plotly familiarity
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
Create the following structure:
|
||||||
|
|
||||||
|
### Application Code (`portfolio_app/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── pages/
|
||||||
|
│ └── {dashboard_name}/
|
||||||
|
│ ├── dashboard.py # Main layout with tabs
|
||||||
|
│ ├── methodology.py # Data sources and methods page
|
||||||
|
│ ├── tabs/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── overview.py # Overview tab layout
|
||||||
|
│ │ └── ... # Additional tab layouts
|
||||||
|
│ └── callbacks/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── ... # Callback modules
|
||||||
|
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── parsers/ # API/CSV extraction
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── loaders/ # Database operations
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── schemas/ # Pydantic models
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ └── models/ # SQLAlchemy ORM (schema: raw_{dashboard_name})
|
||||||
|
│ └── __init__.py
|
||||||
|
└── figures/
|
||||||
|
└── {dashboard_name}/ # Figure factories for this dashboard
|
||||||
|
├── __init__.py
|
||||||
|
└── ... # Chart modules
|
||||||
|
```
|
||||||
|
|
||||||
|
### dbt Models (`dbt/models/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
dbt/models/
|
||||||
|
├── staging/
|
||||||
|
│ └── {dashboard_name}/ # Staging models
|
||||||
|
│ ├── _sources.yml # Source definitions (schema: raw_{dashboard_name})
|
||||||
|
│ ├── _staging.yml # Model tests/docs
|
||||||
|
│ └── stg_*.sql # Staging models
|
||||||
|
├── intermediate/
|
||||||
|
│ └── {dashboard_name}/ # Intermediate models
|
||||||
|
│ ├── _intermediate.yml
|
||||||
|
│ └── int_*.sql
|
||||||
|
└── marts/
|
||||||
|
└── {dashboard_name}/ # Mart tables
|
||||||
|
├── _marts.yml
|
||||||
|
└── mart_*.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
### Documentation (`notebooks/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
notebooks/
|
||||||
|
└── {dashboard_name}/ # Domain subdirectories
|
||||||
|
├── overview/
|
||||||
|
├── ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step-by-Step Checklist
|
||||||
|
|
||||||
|
### 1. Data Layer
|
||||||
|
|
||||||
|
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||||
|
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||||
|
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||||
|
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||||
|
- [ ] Add database migrations if needed
|
||||||
|
|
||||||
|
### 2. Database Schema
|
||||||
|
|
||||||
|
- [ ] Define schema constant in models (e.g., `RAW_FOOTBALL_SCHEMA = "raw_football"`)
|
||||||
|
- [ ] Add `__table_args__ = {"schema": RAW_FOOTBALL_SCHEMA}` to all models
|
||||||
|
- [ ] Update `scripts/db/init_schema.py` to create the new schema
|
||||||
|
|
||||||
|
### 3. dbt Models
|
||||||
|
|
||||||
|
Create dbt models in `dbt/models/`:
|
||||||
|
|
||||||
|
- [ ] `staging/{dashboard_name}/_sources.yml` - Source definitions pointing to `raw_{dashboard_name}` schema
|
||||||
|
- [ ] `staging/{dashboard_name}/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||||
|
- [ ] `intermediate/{dashboard_name}/int_{domain}__{transform}.sql` - Business logic
|
||||||
|
- [ ] `marts/{dashboard_name}/mart_{domain}.sql` - Final analytical tables
|
||||||
|
|
||||||
|
Update `dbt/dbt_project.yml` with new subdirectory config:
|
||||||
|
```yaml
|
||||||
|
models:
|
||||||
|
portfolio:
|
||||||
|
staging:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: view
|
||||||
|
+schema: stg_{dashboard_name}
|
||||||
|
intermediate:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: view
|
||||||
|
+schema: int_{dashboard_name}
|
||||||
|
marts:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: table
|
||||||
|
+schema: mart_{dashboard_name}
|
||||||
|
```
|
||||||
|
|
||||||
|
Follow naming conventions:
|
||||||
|
- Staging: `stg_{source}__{entity}`
|
||||||
|
- Intermediate: `int_{domain}__{transform}`
|
||||||
|
- Marts: `mart_{domain}`
|
||||||
|
|
||||||
|
### 4. Visualization Layer
|
||||||
|
|
||||||
|
- [ ] Create figure factories in `figures/{dashboard_name}/`
|
||||||
|
- [ ] Create `figures/{dashboard_name}/__init__.py` with exports
|
||||||
|
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||||
|
|
||||||
|
Import pattern:
|
||||||
|
```python
|
||||||
|
from portfolio_app.figures.{dashboard_name} import create_choropleth_figure
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Dashboard Pages
|
||||||
|
|
||||||
|
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
import dash
|
||||||
|
from dash import html, dcc
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/{dashboard_name}",
|
||||||
|
title="{Dashboard Title}",
|
||||||
|
description="{Description}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def layout():
|
||||||
|
return dmc.Container([
|
||||||
|
# Header
|
||||||
|
dmc.Title("{Dashboard Title}", order=1),
|
||||||
|
|
||||||
|
# Tabs
|
||||||
|
dmc.Tabs([
|
||||||
|
dmc.TabsList([
|
||||||
|
dmc.TabsTab("Overview", value="overview"),
|
||||||
|
# Add more tabs
|
||||||
|
]),
|
||||||
|
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||||
|
# Add more panels
|
||||||
|
], value="overview"),
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||||
|
|
||||||
|
- [ ] Create one file per tab
|
||||||
|
- [ ] Export layout function from each
|
||||||
|
|
||||||
|
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||||
|
|
||||||
|
- [ ] Create callback modules for interactivity
|
||||||
|
- [ ] Import and register in dashboard.py
|
||||||
|
|
||||||
|
### 5. Navigation
|
||||||
|
|
||||||
|
Add to sidebar in `components/sidebar.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
dmc.NavLink(
|
||||||
|
label="{Dashboard Name}",
|
||||||
|
href="/{dashboard_name}",
|
||||||
|
icon=DashIconify(icon="..."),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Documentation
|
||||||
|
|
||||||
|
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||||
|
- [ ] Document data sources
|
||||||
|
- [ ] Document transformation logic
|
||||||
|
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||||
|
|
||||||
|
### 7. Testing
|
||||||
|
|
||||||
|
- [ ] Add unit tests for parsers
|
||||||
|
- [ ] Add unit tests for loaders
|
||||||
|
- [ ] Add integration tests for callbacks
|
||||||
|
- [ ] Run `make test`
|
||||||
|
|
||||||
|
### 8. Final Verification
|
||||||
|
|
||||||
|
- [ ] All pages render without errors
|
||||||
|
- [ ] All callbacks respond correctly
|
||||||
|
- [ ] Data loads successfully
|
||||||
|
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Tests pass (`make test`)
|
||||||
|
|
||||||
|
## Example: Toronto Dashboard
|
||||||
|
|
||||||
|
Reference implementation: `portfolio_app/pages/toronto/`
|
||||||
|
|
||||||
|
Key files:
|
||||||
|
- `dashboard.py` - Main layout with 5 tabs
|
||||||
|
- `tabs/overview.py` - Livability scores, scatter plots
|
||||||
|
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||||
|
- `toronto/models/dimensions.py` - Dimension tables
|
||||||
|
- `toronto/models/facts.py` - Fact tables
|
||||||
|
|
||||||
|
## Common Patterns
|
||||||
|
|
||||||
|
### Figure Factories
|
||||||
|
|
||||||
|
```python
|
||||||
|
# figures/choropleth.py
|
||||||
|
def create_choropleth_figure(
|
||||||
|
gdf: gpd.GeoDataFrame,
|
||||||
|
value_column: str,
|
||||||
|
title: str,
|
||||||
|
**kwargs
|
||||||
|
) -> go.Figure:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Callbacks
|
||||||
|
|
||||||
|
```python
|
||||||
|
# callbacks/map_callbacks.py
|
||||||
|
@callback(
|
||||||
|
Output("neighbourhood-details", "children"),
|
||||||
|
Input("choropleth-map", "clickData"),
|
||||||
|
)
|
||||||
|
def update_details(click_data):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Loading
|
||||||
|
|
||||||
|
```python
|
||||||
|
# {dashboard_name}/loaders/load.py
|
||||||
|
def load_data(session: Session) -> None:
|
||||||
|
# Parse from source
|
||||||
|
records = parse_source_data()
|
||||||
|
|
||||||
|
# Validate with Pydantic
|
||||||
|
validated = [Schema(**r) for r in records]
|
||||||
|
|
||||||
|
# Load to database
|
||||||
|
for record in validated:
|
||||||
|
session.add(Model(**record.model_dump()))
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
```
|
||||||
232
docs/runbooks/deployment.md
Normal file
232
docs/runbooks/deployment.md
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
# Runbook: Deployment
|
||||||
|
|
||||||
|
This runbook covers deployment procedures for the Analytics Portfolio application.
|
||||||
|
|
||||||
|
## Environments
|
||||||
|
|
||||||
|
| Environment | Branch | Server | URL |
|
||||||
|
|-------------|--------|--------|-----|
|
||||||
|
| Development | `development` | Local | http://localhost:8050 |
|
||||||
|
| Staging | `staging` | Homelab (hotserv) | Internal |
|
||||||
|
| Production | `main` | Bandit Labs VPS | https://leodata.science |
|
||||||
|
|
||||||
|
## CI/CD Pipeline
|
||||||
|
|
||||||
|
### Automatic Deployment
|
||||||
|
|
||||||
|
Deployments are triggered automatically via Gitea Actions:
|
||||||
|
|
||||||
|
1. **Push to `staging`** → Deploys to staging server
|
||||||
|
2. **Push to `main`** → Deploys to production server
|
||||||
|
|
||||||
|
### Workflow Files
|
||||||
|
|
||||||
|
- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
|
||||||
|
- `.gitea/workflows/deploy-staging.yml` - Staging deployment
|
||||||
|
- `.gitea/workflows/deploy-production.yml` - Production deployment
|
||||||
|
|
||||||
|
### Required Secrets
|
||||||
|
|
||||||
|
Configure these in Gitea repository settings:
|
||||||
|
|
||||||
|
| Secret | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `STAGING_HOST` | Staging server hostname/IP |
|
||||||
|
| `STAGING_USER` | SSH username for staging |
|
||||||
|
| `STAGING_SSH_KEY` | Private key for staging SSH |
|
||||||
|
| `PROD_HOST` | Production server hostname/IP |
|
||||||
|
| `PROD_USER` | SSH username for production |
|
||||||
|
| `PROD_SSH_KEY` | Private key for production SSH |
|
||||||
|
|
||||||
|
## Manual Deployment
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- SSH access to target server
|
||||||
|
- Repository cloned at `~/apps/personal-portfolio`
|
||||||
|
- Virtual environment created at `.venv`
|
||||||
|
- Docker and Docker Compose installed
|
||||||
|
- PostgreSQL container running
|
||||||
|
|
||||||
|
### Steps
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. SSH to server
|
||||||
|
ssh user@server
|
||||||
|
|
||||||
|
# 2. Navigate to app directory
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
# 3. Pull latest changes
|
||||||
|
git fetch origin {branch}
|
||||||
|
git reset --hard origin/{branch}
|
||||||
|
|
||||||
|
# 4. Activate virtual environment
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# 5. Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 6. Run database migrations (if any)
|
||||||
|
# python -m alembic upgrade head
|
||||||
|
|
||||||
|
# 7. Run dbt models
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 8. Restart application
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 9. Verify health
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rollback Procedure
|
||||||
|
|
||||||
|
### Quick Rollback
|
||||||
|
|
||||||
|
If deployment fails, rollback to previous commit:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Find previous working commit
|
||||||
|
git log --oneline -10
|
||||||
|
|
||||||
|
# 2. Reset to that commit
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 3. Restart services
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Full Rollback (Database)
|
||||||
|
|
||||||
|
If database changes need to be reverted:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Stop application
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# 2. Restore database backup
|
||||||
|
pg_restore -h localhost -U portfolio -d portfolio backup.dump
|
||||||
|
|
||||||
|
# 3. Revert code
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 4. Run dbt at that version
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 5. Restart
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Health Checks
|
||||||
|
|
||||||
|
### Application Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
```json
|
||||||
|
{"status": "healthy"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec postgres pg_isready -U portfolio
|
||||||
|
```
|
||||||
|
|
||||||
|
### Container Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose ps
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### View Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# All services
|
||||||
|
make logs
|
||||||
|
|
||||||
|
# Specific service
|
||||||
|
make logs SERVICE=postgres
|
||||||
|
|
||||||
|
# Or directly
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Resource Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker stats
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Application Won't Start
|
||||||
|
|
||||||
|
1. Check container logs: `docker compose logs app`
|
||||||
|
2. Verify environment variables: `cat .env`
|
||||||
|
3. Check database connectivity: `docker compose exec postgres pg_isready`
|
||||||
|
4. Verify port availability: `lsof -i :8050`
|
||||||
|
|
||||||
|
### Database Connection Errors
|
||||||
|
|
||||||
|
1. Check postgres container: `docker compose ps postgres`
|
||||||
|
2. Verify DATABASE_URL in `.env`
|
||||||
|
3. Check postgres logs: `docker compose logs postgres`
|
||||||
|
4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
|
||||||
|
|
||||||
|
### dbt Failures
|
||||||
|
|
||||||
|
1. Check dbt logs: `cd dbt && dbt debug`
|
||||||
|
2. Verify profiles.yml: `cat dbt/profiles.yml`
|
||||||
|
3. Run with verbose output: `dbt run --debug`
|
||||||
|
|
||||||
|
### Out of Memory
|
||||||
|
|
||||||
|
1. Check memory usage: `free -h`
|
||||||
|
2. Review container limits in docker-compose.yml
|
||||||
|
3. Consider increasing swap or server resources
|
||||||
|
|
||||||
|
## Backup Procedures
|
||||||
|
|
||||||
|
### Database Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
|
||||||
|
|
||||||
|
# Compressed backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore from Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From SQL file
|
||||||
|
docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
|
||||||
|
|
||||||
|
# From dump file
|
||||||
|
docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Checklist
|
||||||
|
|
||||||
|
Before deploying to production:
|
||||||
|
|
||||||
|
- [ ] All tests pass (`make test`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Staging deployment successful
|
||||||
|
- [ ] Manual testing on staging complete
|
||||||
|
- [ ] Database backup taken
|
||||||
|
- [ ] Rollback plan confirmed
|
||||||
|
- [ ] Team notified of deployment window
|
||||||
@@ -1,809 +0,0 @@
|
|||||||
# Toronto Housing Price Dashboard
|
|
||||||
## Portfolio Project — Data Specification & Architecture
|
|
||||||
|
|
||||||
**Version**: 5.1
|
|
||||||
**Last Updated**: January 2026
|
|
||||||
**Status**: Specification Complete
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Document Context
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
|
||||||
| **Role** | Detailed specification for Toronto Housing Dashboard |
|
|
||||||
| **Scope** | Data schemas, source URLs, geographic boundaries, V1/V2 decisions |
|
|
||||||
|
|
||||||
**Rule**: For overall project scope, phasing, tech stack, and deployment architecture, see `portfolio_project_plan_v5.md`. This document provides implementation-level detail for the Toronto Housing project specifically.
|
|
||||||
|
|
||||||
**Terminology Note**: This document uses **Stages 1–4** to describe Toronto Housing implementation steps. These are distinct from the **Phases 1–5** in `portfolio_project_plan_v5.md`, which describe the overall portfolio project lifecycle.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
A dashboard analyzing housing price variations across Toronto neighbourhoods over time, with dual analysis tracks:
|
|
||||||
|
|
||||||
| Track | Data Domain | Primary Source | Geographic Unit |
|
|
||||||
|-------|-------------|----------------|-----------------|
|
|
||||||
| **Purchases** | Sales transactions | TRREB Monthly Reports | ~35 Districts |
|
|
||||||
| **Rentals** | Rental market stats | CMHC Rental Market Survey | ~20 Zones |
|
|
||||||
|
|
||||||
**Core Visualization**: Interactive choropleth map of Toronto with toggle between rental/purchase analysis, time-series exploration by month/year.
|
|
||||||
|
|
||||||
**Enrichment Layer** (V1: overlay only): Neighbourhood-level demographic and socioeconomic context including population density, education attainment, and income. Crime data deferred to Phase 4 of the portfolio project (post-Energy project).
|
|
||||||
|
|
||||||
**Tech Stack & Deployment**: See `portfolio_project_plan_v5.md` → Tech Stack, Deployment Architecture
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Geographic Layers
|
|
||||||
|
|
||||||
### Layer Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ City of Toronto Official Neighbourhoods (158) │ ← Reference overlay + Enrichment data
|
|
||||||
├─────────────────────────────────────────────────────────────────┤
|
|
||||||
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
|
||||||
├─────────────────────────────────────────────────────────────────┤
|
|
||||||
│ CMHC Survey Zones (~20) — Census Tract aligned │ ← Rental data
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Boundary Files
|
|
||||||
|
|
||||||
| Layer | Zones | Format | Source | Status |
|
|
||||||
|-------|-------|--------|--------|--------|
|
|
||||||
| **City Neighbourhoods** | 158 | GeoJSON, Shapefile | [GitHub - jasonicarter/toronto-geojson](https://github.com/jasonicarter/toronto-geojson) | ✅ Ready to use |
|
|
||||||
| **TRREB Districts** | ~35 | PDF only | [TRREB Toronto Map PDF](https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf) | ⚠ Requires manual digitization |
|
|
||||||
| **CMHC Zones** | ~20 | R package | R `cmhc` package via `get_cmhc_geography()` | ✅ Available (see note) |
|
|
||||||
|
|
||||||
### Digitization Task: TRREB Districts
|
|
||||||
|
|
||||||
**Input**: TRREB Toronto PDF map
|
|
||||||
**Output**: GeoJSON with district codes (W01-W10, C01-C15, E01-E11)
|
|
||||||
**Tool**: QGIS
|
|
||||||
|
|
||||||
**Process**:
|
|
||||||
1. Import PDF as raster layer in QGIS
|
|
||||||
2. Create vector layer with polygon features
|
|
||||||
3. Trace district boundaries
|
|
||||||
4. Add attributes: `district_code`, `district_name`, `area_type` (West/Central/East)
|
|
||||||
5. Export as GeoJSON (WGS84 / EPSG:4326)
|
|
||||||
|
|
||||||
### CMHC Zone Boundaries
|
|
||||||
|
|
||||||
**Source**: The R `cmhc` package provides CMHC survey geography via the `get_cmhc_geography()` function.
|
|
||||||
|
|
||||||
**Extraction Process**:
|
|
||||||
```r
|
|
||||||
# In R
|
|
||||||
library(cmhc)
|
|
||||||
library(sf)
|
|
||||||
|
|
||||||
# Get Toronto CMA zones
|
|
||||||
toronto_zones <- get_cmhc_geography(
|
|
||||||
geography_type = "ZONE",
|
|
||||||
cma = "Toronto"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Export to GeoJSON for Python/PostGIS
|
|
||||||
st_write(toronto_zones, "cmhc_zones.geojson", driver = "GeoJSON")
|
|
||||||
```
|
|
||||||
|
|
||||||
**Output**: `data/toronto/raw/geo/cmhc_zones.geojson`
|
|
||||||
|
|
||||||
**Why R?**: CMHC zone boundaries are not published as standalone files. The `cmhc` R package is the only reliable programmatic source. One-time extraction, then use GeoJSON in Python stack.
|
|
||||||
|
|
||||||
### ⚠ Neighbourhood Boundary Change (140 → 158)
|
|
||||||
|
|
||||||
The City of Toronto updated from 140 to 158 social planning neighbourhoods in **April 2021**. This affects data alignment:
|
|
||||||
|
|
||||||
| Data Source | Pre-2021 | Post-2021 | Handling |
|
|
||||||
|-------------|----------|-----------|----------|
|
|
||||||
| Census (2016 and earlier) | 140 neighbourhoods | N/A | Use 140-model files |
|
|
||||||
| Census (2021+) | N/A | 158 neighbourhoods | Use 158-model files |
|
|
||||||
|
|
||||||
**V1 Strategy**: Use 2021 Census on 158 boundaries only. Defer historical trend analysis to portfolio Phase 4.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Source #1: TRREB Monthly Market Reports
|
|
||||||
|
|
||||||
### Source Details
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Provider** | Toronto Regional Real Estate Board |
|
|
||||||
| **URL** | [TRREB Market Watch](https://trreb.ca/index.php/market-news/market-watch) |
|
|
||||||
| **Format** | PDF (monthly reports) |
|
|
||||||
| **Update Frequency** | Monthly |
|
|
||||||
| **Historical Availability** | 2007–Present |
|
|
||||||
| **Access** | Public (aggregate data in PDFs) |
|
|
||||||
| **Extraction Method** | PDF parsing (`pdfplumber` or `camelot-py`) |
|
|
||||||
|
|
||||||
### Available Tables
|
|
||||||
|
|
||||||
#### Table: `trreb_monthly_summary`
|
|
||||||
**Location in PDF**: Pages 3-4 (Summary by Area)
|
|
||||||
|
|
||||||
| Column | Data Type | Description |
|
|
||||||
|--------|-----------|-------------|
|
|
||||||
| `report_date` | DATE | First of month (YYYY-MM-01) |
|
|
||||||
| `area_code` | VARCHAR(3) | District code (W01, C01, E01, etc.) |
|
|
||||||
| `area_name` | VARCHAR(100) | District name |
|
|
||||||
| `area_type` | VARCHAR(10) | West / Central / East / North |
|
|
||||||
| `sales` | INTEGER | Number of transactions |
|
|
||||||
| `dollar_volume` | DECIMAL | Total sales volume ($) |
|
|
||||||
| `avg_price` | DECIMAL | Average sale price ($) |
|
|
||||||
| `median_price` | DECIMAL | Median sale price ($) |
|
|
||||||
| `new_listings` | INTEGER | New listings count |
|
|
||||||
| `active_listings` | INTEGER | Active listings at month end |
|
|
||||||
| `avg_sp_lp` | DECIMAL | Avg sale price / list price ratio (%) |
|
|
||||||
| `avg_dom` | INTEGER | Average days on market |
|
|
||||||
|
|
||||||
### Dimensions
|
|
||||||
|
|
||||||
| Dimension | Granularity | Values |
|
|
||||||
|-----------|-------------|--------|
|
|
||||||
| **Time** | Monthly | 2007-01 to present |
|
|
||||||
| **Geography** | District | ~35 TRREB districts |
|
|
||||||
| **Property Type** | Aggregate | All residential (no breakdown in summary) |
|
|
||||||
|
|
||||||
### Metrics Available
|
|
||||||
|
|
||||||
| Metric | Aggregation | Use Case |
|
|
||||||
|--------|-------------|----------|
|
|
||||||
| `avg_price` | Pre-calculated monthly avg | Primary price indicator |
|
|
||||||
| `median_price` | Pre-calculated monthly median | Robust price indicator |
|
|
||||||
| `sales` | Count | Market activity volume |
|
|
||||||
| `avg_dom` | Average | Market velocity |
|
|
||||||
| `avg_sp_lp` | Ratio | Buyer/seller market indicator |
|
|
||||||
| `new_listings` | Count | Supply indicator |
|
|
||||||
| `active_listings` | Snapshot | Inventory level |
|
|
||||||
|
|
||||||
### ⚠ Limitations
|
|
||||||
|
|
||||||
- No transaction-level data (aggregates only)
|
|
||||||
- Property type breakdown requires parsing additional tables
|
|
||||||
- PDF structure may vary slightly across years
|
|
||||||
- District boundaries haven't changed since 2011
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Source #2: CMHC Rental Market Survey
|
|
||||||
|
|
||||||
### Source Details
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Provider** | Canada Mortgage and Housing Corporation |
|
|
||||||
| **URL** | [CMHC Housing Market Information Portal](https://www03.cmhc-schl.gc.ca/hmip-pimh/) |
|
|
||||||
| **Format** | CSV export, API |
|
|
||||||
| **Update Frequency** | Annual (October survey) |
|
|
||||||
| **Historical Availability** | 1990–Present |
|
|
||||||
| **Access** | Public, free registration for bulk downloads |
|
|
||||||
| **Geographic Levels** | CMA → Zone → Neighbourhood → Census Tract |
|
|
||||||
|
|
||||||
### Available Tables
|
|
||||||
|
|
||||||
#### Table: `cmhc_rental_summary`
|
|
||||||
**Portal Path**: Toronto → Primary Rental Market → Summary Statistics
|
|
||||||
|
|
||||||
| Column | Data Type | Description |
|
|
||||||
|--------|-----------|-------------|
|
|
||||||
| `survey_year` | INTEGER | Survey year (October) |
|
|
||||||
| `zone_code` | VARCHAR(10) | CMHC zone identifier |
|
|
||||||
| `zone_name` | VARCHAR(100) | Zone name |
|
|
||||||
| `bedroom_type` | VARCHAR(20) | Bachelor / 1-Bed / 2-Bed / 3-Bed+ / Total |
|
|
||||||
| `universe` | INTEGER | Total rental units in zone |
|
|
||||||
| `vacancy_rate` | DECIMAL | Vacancy rate (%) |
|
|
||||||
| `vacancy_rate_reliability` | VARCHAR(1) | Reliability code (a/b/c/d) |
|
|
||||||
| `availability_rate` | DECIMAL | Availability rate (%) |
|
|
||||||
| `average_rent` | DECIMAL | Average monthly rent ($) |
|
|
||||||
| `average_rent_reliability` | VARCHAR(1) | Reliability code |
|
|
||||||
| `median_rent` | DECIMAL | Median monthly rent ($) |
|
|
||||||
| `rent_change_pct` | DECIMAL | YoY rent change (%) |
|
|
||||||
| `turnover_rate` | DECIMAL | Unit turnover rate (%) |
|
|
||||||
|
|
||||||
### Dimensions
|
|
||||||
|
|
||||||
| Dimension | Granularity | Values |
|
|
||||||
|-----------|-------------|--------|
|
|
||||||
| **Time** | Annual | 1990 to present (October snapshot) |
|
|
||||||
| **Geography** | Zone | ~20 CMHC zones in Toronto CMA |
|
|
||||||
| **Bedroom Type** | Category | Bachelor, 1-Bed, 2-Bed, 3-Bed+, Total |
|
|
||||||
| **Structure Type** | Category | Row, Apartment (available in detailed tables) |
|
|
||||||
|
|
||||||
### Metrics Available
|
|
||||||
|
|
||||||
| Metric | Aggregation | Use Case |
|
|
||||||
|--------|-------------|----------|
|
|
||||||
| `average_rent` | Pre-calculated avg | Primary rent indicator |
|
|
||||||
| `median_rent` | Pre-calculated median | Robust rent indicator |
|
|
||||||
| `vacancy_rate` | Percentage | Market tightness |
|
|
||||||
| `availability_rate` | Percentage | Supply accessibility |
|
|
||||||
| `turnover_rate` | Percentage | Tenant mobility |
|
|
||||||
| `rent_change_pct` | YoY % | Rent growth tracking |
|
|
||||||
| `universe` | Count | Market size |
|
|
||||||
|
|
||||||
### Reliability Codes
|
|
||||||
|
|
||||||
| Code | Meaning | Coefficient of Variation |
|
|
||||||
|------|---------|-------------------------|
|
|
||||||
| `a` | Excellent | CV ≤ 2.5% |
|
|
||||||
| `b` | Good | 2.5% < CV ≤ 5% |
|
|
||||||
| `c` | Fair | 5% < CV ≤ 10% |
|
|
||||||
| `d` | Poor (use with caution) | CV > 10% |
|
|
||||||
| `**` | Data suppressed | Sample too small |
|
|
||||||
|
|
||||||
### ⚠ Limitations
|
|
||||||
|
|
||||||
- Annual only (no monthly granularity)
|
|
||||||
- October snapshot (point-in-time)
|
|
||||||
- Zones are larger than TRREB districts
|
|
||||||
- Purpose-built rental only (excludes condo rentals in base survey)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Source #3: City of Toronto Open Data
|
|
||||||
|
|
||||||
### Source Details
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Provider** | City of Toronto |
|
|
||||||
| **URL** | [Toronto Open Data Portal](https://open.toronto.ca/) |
|
|
||||||
| **Format** | GeoJSON, Shapefile, CSV |
|
|
||||||
| **Use Case** | Reference layer, demographic enrichment |
|
|
||||||
|
|
||||||
### Relevant Datasets
|
|
||||||
|
|
||||||
#### Dataset: `neighbourhoods`
|
|
||||||
|
|
||||||
| Column | Data Type | Description |
|
|
||||||
|--------|-----------|-------------|
|
|
||||||
| `area_id` | INTEGER | Neighbourhood ID (1-158) |
|
|
||||||
| `area_name` | VARCHAR(100) | Official neighbourhood name |
|
|
||||||
| `geometry` | POLYGON | Boundary geometry |
|
|
||||||
|
|
||||||
#### Dataset: `neighbourhood_profiles` (Census-linked)
|
|
||||||
|
|
||||||
| Column | Data Type | Description |
|
|
||||||
|--------|-----------|-------------|
|
|
||||||
| `neighbourhood_id` | INTEGER | Links to neighbourhoods |
|
|
||||||
| `population` | INTEGER | Total population |
|
|
||||||
| `avg_household_income` | DECIMAL | Average household income |
|
|
||||||
| `dwelling_count` | INTEGER | Total dwellings |
|
|
||||||
| `owner_pct` | DECIMAL | % owner-occupied |
|
|
||||||
| `renter_pct` | DECIMAL | % renter-occupied |
|
|
||||||
|
|
||||||
### Enrichment Potential
|
|
||||||
|
|
||||||
Can overlay demographic context on housing data:
|
|
||||||
- Income brackets by neighbourhood
|
|
||||||
- Ownership vs rental ratios
|
|
||||||
- Population density
|
|
||||||
- Dwelling type distribution
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Source #4: Enrichment Data (Density, Education)
|
|
||||||
|
|
||||||
### Purpose
|
|
||||||
|
|
||||||
Provide socioeconomic context to housing price analysis. Enables questions like:
|
|
||||||
- Do neighbourhoods with higher education attainment have higher prices?
|
|
||||||
- How does population density correlate with price per square foot?
|
|
||||||
|
|
||||||
### Geographic Alignment Reality
|
|
||||||
|
|
||||||
**Critical constraint**: Enrichment data is available at the **158-neighbourhood** level, while core housing data sits at **TRREB districts (~35)** and **CMHC zones (~20)**. These do not align cleanly.
|
|
||||||
|
|
||||||
```
|
|
||||||
158 Neighbourhoods (fine) → Enrichment data lives here
|
|
||||||
(no clean crosswalk)
|
|
||||||
~35 TRREB Districts (coarse) → Purchase data lives here
|
|
||||||
~20 CMHC Zones (coarse) → Rental data lives here
|
|
||||||
```
|
|
||||||
|
|
||||||
### Available Enrichment Datasets
|
|
||||||
|
|
||||||
#### Dataset: Neighbourhood Profiles (Census)
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Provider** | City of Toronto (via Statistics Canada Census) |
|
|
||||||
| **URL** | [Toronto Open Data - Neighbourhood Profiles](https://open.toronto.ca/dataset/neighbourhood-profiles/) |
|
|
||||||
| **Format** | CSV, JSON, XML, XLSX |
|
|
||||||
| **Update Frequency** | Every 5 years (Census cycle) |
|
|
||||||
| **Available Years** | 2001, 2006, 2011, 2016, 2021 |
|
|
||||||
| **Geographic Unit** | 158 neighbourhoods (140 pre-2021) |
|
|
||||||
|
|
||||||
**Key Variables**:
|
|
||||||
|
|
||||||
| Variable | Description | Use Case |
|
|
||||||
|----------|-------------|----------|
|
|
||||||
| `population` | Total population | Density calculation |
|
|
||||||
| `land_area_sqkm` | Area in square kilometers | Density calculation |
|
|
||||||
| `pop_density_per_sqkm` | Population per km | Density metric |
|
|
||||||
| `pct_bachelors_or_higher` | % age 25-64 with bachelor's+ | Education proxy |
|
|
||||||
| `median_household_income` | Median total household income | Income metric |
|
|
||||||
| `avg_household_income` | Average total household income | Income metric |
|
|
||||||
| `pct_owner_occupied` | % owner-occupied dwellings | Tenure split |
|
|
||||||
| `pct_renter_occupied` | % renter-occupied dwellings | Tenure split |
|
|
||||||
|
|
||||||
**Download URL (2021, 158 neighbourhoods)**:
|
|
||||||
```
|
|
||||||
https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx
|
|
||||||
```
|
|
||||||
|
|
||||||
### Crime Data — Deferred to Portfolio Phase 4
|
|
||||||
|
|
||||||
Crime data (TPS Neighbourhood Crime Rates) is **not included in V1 scope**. It will be added in portfolio Phase 4 after the Energy Pricing project is complete.
|
|
||||||
|
|
||||||
**Rationale**:
|
|
||||||
- Crime data is socially/politically sensitive and requires careful methodology documentation
|
|
||||||
- V1 focuses on core housing metrics and policy events
|
|
||||||
- Deferral reduces scope creep risk
|
|
||||||
|
|
||||||
**Future Reference** (Portfolio Phase 4):
|
|
||||||
- Source: [TPS Public Safety Data Portal](https://data.torontopolice.on.ca/)
|
|
||||||
- Dataset: Neighbourhood Crime Rates (Major Crime Indicators)
|
|
||||||
- Geographic Unit: 158 neighbourhoods
|
|
||||||
|
|
||||||
### V1 Enrichment Data Summary
|
|
||||||
|
|
||||||
| Measure | Source | Geography | Frequency | Format | Status |
|
|
||||||
|---------|--------|-----------|-----------|--------|--------|
|
|
||||||
| **Population Density** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
|
||||||
| **Education Attainment** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
|
||||||
| **Median Income** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
|
||||||
| **Crime Rates (MCI)** | TPS Data Portal | 158 neighbourhoods | Annual | GeoJSON/CSV | Deferred to Portfolio Phase 4 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Source #5: Policy Events
|
|
||||||
|
|
||||||
### Purpose
|
|
||||||
|
|
||||||
Provide temporal context for housing price movements. Display as annotation markers on time series charts. **No causation claims** — correlation/context only.
|
|
||||||
|
|
||||||
### Event Schema
|
|
||||||
|
|
||||||
#### Table: `dim_policy_event`
|
|
||||||
|
|
||||||
| Column | Data Type | Description |
|
|
||||||
|--------|-----------|-------------|
|
|
||||||
| `event_id` | INTEGER (PK) | Auto-increment primary key |
|
|
||||||
| `event_date` | DATE | Date event was announced/occurred |
|
|
||||||
| `effective_date` | DATE | Date policy took effect (if different) |
|
|
||||||
| `level` | VARCHAR(20) | `federal` / `provincial` / `municipal` |
|
|
||||||
| `category` | VARCHAR(20) | `monetary` / `tax` / `regulatory` / `supply` / `economic` |
|
|
||||||
| `title` | VARCHAR(200) | Short event title for display |
|
|
||||||
| `description` | TEXT | Longer description for tooltip |
|
|
||||||
| `expected_direction` | VARCHAR(10) | `bearish` / `bullish` / `neutral` |
|
|
||||||
| `source_url` | VARCHAR(500) | Link to official announcement/documentation |
|
|
||||||
| `confidence` | VARCHAR(10) | `high` / `medium` / `low` |
|
|
||||||
| `created_at` | TIMESTAMP | Record creation timestamp |
|
|
||||||
|
|
||||||
### Event Tiers
|
|
||||||
|
|
||||||
| Tier | Level | Category Examples | Inclusion Criteria |
|
|
||||||
|------|-------|-------------------|-------------------|
|
|
||||||
| **1** | Federal | BoC rate decisions, OSFI stress tests | Always include; objective, documented |
|
|
||||||
| **1** | Provincial | Fair Housing Plan, foreign buyer tax, rent control | Always include; legislative record |
|
|
||||||
| **2** | Municipal | Zoning reforms, development charges | Include if material impact expected |
|
|
||||||
| **2** | Economic | COVID measures, major employer closures | Include if Toronto-specific impact |
|
|
||||||
| **3** | Market | Major project announcements | Strict criteria; must be verifiable |
|
|
||||||
|
|
||||||
### Expected Direction Values
|
|
||||||
|
|
||||||
| Value | Meaning | Example |
|
|
||||||
|-------|---------|---------|
|
|
||||||
| `bullish` | Expected to increase prices | Rate cut, supply restriction |
|
|
||||||
| `bearish` | Expected to decrease prices | Rate hike, foreign buyer tax |
|
|
||||||
| `neutral` | Uncertain or mixed impact | Regulatory clarification |
|
|
||||||
|
|
||||||
### ⚠ Caveats
|
|
||||||
|
|
||||||
- **No causation claims**: Events are context, not explanation
|
|
||||||
- **Lag effects**: Policy impact may not be immediate
|
|
||||||
- **Confounding factors**: Multiple simultaneous influences
|
|
||||||
- **Display only**: No statistical analysis in V1
|
|
||||||
|
|
||||||
### Sample Events (Tier 1)
|
|
||||||
|
|
||||||
| Date | Level | Category | Title | Direction |
|
|
||||||
|------|-------|----------|-------|-----------|
|
|
||||||
| 2017-04-20 | provincial | tax | Ontario Fair Housing Plan | bearish |
|
|
||||||
| 2018-01-01 | federal | regulatory | OSFI B-20 Stress Test | bearish |
|
|
||||||
| 2020-03-27 | federal | monetary | BoC Emergency Rate Cut (0.25%) | bullish |
|
|
||||||
| 2022-03-02 | federal | monetary | BoC Rate Hike Cycle Begins | bearish |
|
|
||||||
| 2023-06-01 | federal | tax | Federal 2-Year Foreign Buyer Ban | bearish |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Integration Strategy
|
|
||||||
|
|
||||||
### Temporal Alignment
|
|
||||||
|
|
||||||
| Source | Native Frequency | Alignment Strategy |
|
|
||||||
|--------|------------------|---------------------|
|
|
||||||
| TRREB | Monthly | Use as-is |
|
|
||||||
| CMHC | Annual (October) | Spread to monthly OR display annual overlay |
|
|
||||||
| Census/Enrichment | 5-year | Static snapshot; display as reference |
|
|
||||||
| Policy Events | Event-based | Display as vertical markers on time axis |
|
|
||||||
|
|
||||||
**Recommendation**: Keep separate time axes. TRREB monthly for purchases, CMHC annual for rentals. Don't force artificial monthly rental data.
|
|
||||||
|
|
||||||
### Geographic Alignment
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ VISUALIZATION APPROACH │
|
|
||||||
├─────────────────────────────────────────────────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ Purchase Mode Rental Mode │
|
|
||||||
│ ───────────────── ────────────── │
|
|
||||||
│ Map: TRREB Districts Map: CMHC Zones │
|
|
||||||
│ Time: Monthly slider Time: Annual selector │
|
|
||||||
│ Metrics: Price, Sales Metrics: Rent, Vacancy │
|
|
||||||
│ │
|
|
||||||
│ ┌───────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ City Neighbourhoods Overlay │ │
|
|
||||||
│ │ (158 boundaries as reference layer) │ │
|
|
||||||
│ │ + Enrichment data (density, education, income) │ │
|
|
||||||
│ ──────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Enrichment Integration Strategy (Phased)
|
|
||||||
|
|
||||||
#### V1: Reference Overlay (Current Scope)
|
|
||||||
|
|
||||||
**Approach**: Display neighbourhood enrichment as a separate toggle-able layer. No joins to housing data.
|
|
||||||
|
|
||||||
**UX**:
|
|
||||||
- User hovers over TRREB district → tooltip shows "This district contains neighbourhoods: Annex, Casa Loma, Yorkville..."
|
|
||||||
- User toggles "Show Enrichment" → choropleth switches to neighbourhood-level density/education/income
|
|
||||||
- Enrichment and housing metrics displayed side-by-side, not merged
|
|
||||||
|
|
||||||
**Pros**:
|
|
||||||
- No imputation or dodgy aggregations
|
|
||||||
- Honest about geographic mismatch
|
|
||||||
- Ships faster
|
|
||||||
|
|
||||||
**Cons**:
|
|
||||||
- Can't do correlation analysis (price vs. enrichment) directly in dashboard
|
|
||||||
|
|
||||||
**Implementation**:
|
|
||||||
- `dim_neighbourhood` as standalone dimension (no FK to fact tables)
|
|
||||||
- Spatial lookup on hover (point-in-polygon)
|
|
||||||
|
|
||||||
#### V2/Portfolio Phase 4: Area-Weighted Aggregation (Future Scope)
|
|
||||||
|
|
||||||
**Approach**: Pre-compute area-weighted averages of neighbourhood metrics for each TRREB district and CMHC zone.
|
|
||||||
|
|
||||||
**Process**:
|
|
||||||
1. Spatial join: intersect neighbourhood polygons with TRREB/CMHC polygons
|
|
||||||
2. Compute overlap area for each neighbourhood-district pair
|
|
||||||
3. Weight neighbourhood metrics by overlap area proportion
|
|
||||||
4. User selects aggregation method in UI
|
|
||||||
|
|
||||||
**Aggregation Methods to Expose**:
|
|
||||||
|
|
||||||
| Method | Description | Best For |
|
|
||||||
|--------|-------------|----------|
|
|
||||||
| **Area-weighted mean** | Weight by % overlap area | Continuous metrics (density) |
|
|
||||||
| **Population-weighted mean** | Weight by population in overlap | Per-capita metrics (education) |
|
|
||||||
| **Majority assignment** | Assign neighbourhood to district with >50% overlap | Categorical data |
|
|
||||||
| **Max overlap** | Assign to single district with largest overlap | 1:1 mapping needs |
|
|
||||||
|
|
||||||
**Default**: Population-weighted (more defensible for per-capita metrics). Hide selector behind "Advanced" toggle.
|
|
||||||
|
|
||||||
### V1 Future-Proofing (Do Now)
|
|
||||||
|
|
||||||
| Action | Why |
|
|
||||||
|--------|-----|
|
|
||||||
| Store neighbourhood boundaries in same CRS as TRREB/CMHC (WGS84) | Avoids reprojection headaches |
|
|
||||||
| Keep `dim_neighbourhood` normalized (not denormalized into district tables) | Clean separation for V2 join |
|
|
||||||
| Document Census year for each metric | Ready for 2026 Census |
|
|
||||||
| Include `census_year` column in dim_neighbourhood | Enables SCD tracking |
|
|
||||||
|
|
||||||
### V1 Defer (Don't Do Yet)
|
|
||||||
|
|
||||||
| Action | Why Not |
|
|
||||||
|--------|---------|
|
|
||||||
| Pre-compute area-weighted crosswalk | Don't need for V1 |
|
|
||||||
| Build aggregation method selector UI | No backend to support it |
|
|
||||||
| Crime data integration | Deferred to Portfolio Phase 4 |
|
|
||||||
| Historical neighbourhood boundary reconciliation (140→158) | Use 2021+ data only for V1 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Proposed Data Model
|
|
||||||
|
|
||||||
### Star Schema
|
|
||||||
|
|
||||||
```
|
|
||||||
┌──────────────────┐
|
|
||||||
│ dim_time │
|
|
||||||
├──────────────────┤
|
|
||||||
│ date_key (PK) │
|
|
||||||
│ year │
|
|
||||||
│ month │
|
|
||||||
│ quarter │
|
|
||||||
│ month_name │
|
|
||||||
───────────────────────┘
|
|
||||||
│
|
|
||||||
┌─────────────────────────────────────────────┐
|
|
||||||
│ │ │
|
|
||||||
│
|
|
||||||
┌──────────────────┐ │ ┌──────────────────┐
|
|
||||||
│ dim_trreb_district│ │ │ dim_cmhc_zone │
|
|
||||||
├──────────────────┤ │ ├──────────────────┤
|
|
||||||
│ district_key (PK)│ │ │ zone_key (PK) │
|
|
||||||
│ district_code │ │ │ zone_code │
|
|
||||||
│ district_name │ │ │ zone_name │
|
|
||||||
│ area_type │ │ │ geometry │
|
|
||||||
│ geometry │
|
|
||||||
───────────────────────┘ │ │
|
|
||||||
│ │ │
|
|
||||||
│
|
|
||||||
┌──────────────────┐ │ ┌──────────────────┐
|
|
||||||
│ fact_purchases │ │ │ fact_rentals │
|
|
||||||
├──────────────────┤ │ ├──────────────────┤
|
|
||||||
│ date_key (FK) │ │ │ date_key (FK) │
|
|
||||||
│ district_key (FK)│ │ │ zone_key (FK) │
|
|
||||||
│ sales_count │ │ │ bedroom_type │
|
|
||||||
│ avg_price │ │ │ avg_rent │
|
|
||||||
│ median_price │ │ │ median_rent │
|
|
||||||
│ new_listings │ │ │ vacancy_rate │
|
|
||||||
│ active_listings │ │ │ universe │
|
|
||||||
│ avg_dom │ │ │ turnover_rate │
|
|
||||||
│ avg_sp_lp │ │ │ reliability_code │
|
|
||||||
─────────────────────┘ │ ─────────────────────┘
|
|
||||||
│
|
|
||||||
|
|
||||||
┌───────────────────────────┐
|
|
||||||
│ dim_neighbourhood │
|
|
||||||
├───────────────────────────┤
|
|
||||||
│ neighbourhood_id (PK) │
|
|
||||||
│ name │
|
|
||||||
│ geometry │
|
|
||||||
│ population │
|
|
||||||
│ land_area_sqkm │
|
|
||||||
│ pop_density_per_sqkm │
|
|
||||||
│ pct_bachelors_or_higher │
|
|
||||||
│ median_household_income │
|
|
||||||
│ pct_owner_occupied │
|
|
||||||
│ pct_renter_occupied │
|
|
||||||
│ census_year │ ← For SCD tracking
|
|
||||||
──────────────────────────────┘
|
|
||||||
|
|
||||||
┌───────────────────────────┐
|
|
||||||
│ dim_policy_event │
|
|
||||||
├───────────────────────────┤
|
|
||||||
│ event_id (PK) │
|
|
||||||
│ event_date │
|
|
||||||
│ effective_date │
|
|
||||||
│ level │ ← federal/provincial/municipal
|
|
||||||
│ category │ ← monetary/tax/regulatory/supply/economic
|
|
||||||
│ title │
|
|
||||||
│ description │
|
|
||||||
│ expected_direction │ ← bearish/bullish/neutral
|
|
||||||
│ source_url │
|
|
||||||
│ confidence │ ← high/medium/low
|
|
||||||
│ created_at │
|
|
||||||
──────────────────────────────┘
|
|
||||||
|
|
||||||
┌───────────────────────────┐
|
|
||||||
│ bridge_district_neighbourhood │ ← Portfolio Phase 4 ONLY
|
|
||||||
├───────────────────────────┤
|
|
||||||
│ district_key (FK) │
|
|
||||||
│ neighbourhood_id (FK) │
|
|
||||||
│ area_overlap_pct │
|
|
||||||
│ population_overlap │ ← For pop-weighted agg
|
|
||||||
──────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**Notes**:
|
|
||||||
- `dim_neighbourhood` has no FK relationship to fact tables in V1
|
|
||||||
- `dim_policy_event` is standalone (no FK to facts); used for time-series annotation
|
|
||||||
- `bridge_district_neighbourhood` is Portfolio Phase 4 scope only
|
|
||||||
- Similar bridge table needed for CMHC zones in Portfolio Phase 4
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## File Structure
|
|
||||||
|
|
||||||
> **Note**: Toronto Housing data logic lives in `portfolio_app/toronto/`. See `portfolio_project_plan_v5.md` for full project structure.
|
|
||||||
|
|
||||||
### Data Directory Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
data/
|
|
||||||
└── toronto/
|
|
||||||
├── raw/
|
|
||||||
│ ├── trreb/
|
|
||||||
│ │ └── market_watch_YYYY_MM.pdf
|
|
||||||
│ ├── cmhc/
|
|
||||||
│ │ └── rental_survey_YYYY.csv
|
|
||||||
│ ├── enrichment/
|
|
||||||
│ │ └── neighbourhood_profiles_2021.xlsx
|
|
||||||
│ └── geo/
|
|
||||||
│ ├── toronto_neighbourhoods.geojson
|
|
||||||
│ ├── trreb_districts.geojson ← (to be created via QGIS)
|
|
||||||
│ └── cmhc_zones.geojson ← (from R cmhc package)
|
|
||||||
│
|
|
||||||
├── processed/ ← gitignored
|
|
||||||
│ ├── fact_purchases.parquet
|
|
||||||
│ ├── fact_rentals.parquet
|
|
||||||
│ ├── dim_time.parquet
|
|
||||||
│ ├── dim_trreb_district.parquet
|
|
||||||
│ ├── dim_cmhc_zone.parquet
|
|
||||||
│ ├── dim_neighbourhood.parquet
|
|
||||||
│ └── dim_policy_event.parquet
|
|
||||||
│
|
|
||||||
└── reference/
|
|
||||||
├── policy_events.csv ← Curated event list
|
|
||||||
└── neighbourhood_boundary_changelog.md ← 140→158 notes
|
|
||||||
```
|
|
||||||
|
|
||||||
### Code Module Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
portfolio_app/toronto/
|
|
||||||
├── __init__.py
|
|
||||||
├── parsers/
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── trreb.py # PDF extraction
|
|
||||||
│ └── cmhc.py # CSV processing
|
|
||||||
├── loaders/
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ └── database.py # DB operations
|
|
||||||
├── schemas/ # Pydantic models
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── trreb.py
|
|
||||||
│ ├── cmhc.py
|
|
||||||
│ ├── enrichment.py
|
|
||||||
│ └── policy_event.py
|
|
||||||
├── models/ # SQLAlchemy ORM
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── base.py # DeclarativeBase, engine
|
|
||||||
│ ├── dimensions.py # dim_time, dim_trreb_district, dim_policy_event, etc.
|
|
||||||
│ └── facts.py # fact_purchases, fact_rentals
|
|
||||||
└── transforms/
|
|
||||||
└── __init__.py
|
|
||||||
```
|
|
||||||
|
|
||||||
### Notebooks
|
|
||||||
|
|
||||||
```
|
|
||||||
notebooks/
|
|
||||||
├── 01_trreb_pdf_extraction.ipynb
|
|
||||||
├── 02_cmhc_data_prep.ipynb
|
|
||||||
├── 03_geo_layer_prep.ipynb
|
|
||||||
├── 04_enrichment_data_prep.ipynb
|
|
||||||
├── 05_policy_events_curation.ipynb
|
|
||||||
└── 06_spatial_crosswalk.ipynb ← Portfolio Phase 4 only
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## ✅ Implementation Checklist
|
|
||||||
|
|
||||||
> **Note**: These are **Stages** within the Toronto Housing project (Portfolio Phase 1). They are distinct from the overall portfolio **Phases** defined in `portfolio_project_plan_v5.md`.
|
|
||||||
|
|
||||||
### Stage 1: Data Acquisition
|
|
||||||
- [ ] Download TRREB monthly PDFs (2020-present as MVP)
|
|
||||||
- [ ] Register for CMHC portal and export Toronto rental data
|
|
||||||
- [ ] Extract CMHC zone boundaries via R `cmhc` package
|
|
||||||
- [ ] Download City of Toronto neighbourhood GeoJSON (158 boundaries)
|
|
||||||
- [ ] Digitize TRREB district boundaries in QGIS
|
|
||||||
- [ ] Download Neighbourhood Profiles (2021 Census, 158-model)
|
|
||||||
|
|
||||||
### Stage 2: Data Processing
|
|
||||||
- [ ] Build TRREB PDF parser (`portfolio_app/toronto/parsers/trreb.py`)
|
|
||||||
- [ ] Build Pydantic schemas (`portfolio_app/toronto/schemas/`)
|
|
||||||
- [ ] Build SQLAlchemy models (`portfolio_app/toronto/models/`)
|
|
||||||
- [ ] Extract and validate TRREB monthly summaries
|
|
||||||
- [ ] Clean and structure CMHC rental data
|
|
||||||
- [ ] Process Neighbourhood Profiles into `dim_neighbourhood`
|
|
||||||
- [ ] Curate and load policy events into `dim_policy_event`
|
|
||||||
- [ ] Create dimension tables
|
|
||||||
- [ ] Build fact tables
|
|
||||||
- [ ] Validate all geospatial layers use same CRS (WGS84/EPSG:4326)
|
|
||||||
|
|
||||||
### Stage 3: Visualization (V1)
|
|
||||||
- [ ] Create dashboard page (`portfolio_app/pages/toronto/dashboard.py`)
|
|
||||||
- [ ] Build choropleth figures (`portfolio_app/figures/choropleth.py`)
|
|
||||||
- [ ] Build time series figures (`portfolio_app/figures/time_series.py`)
|
|
||||||
- [ ] Design dashboard layout (purchase/rental toggle)
|
|
||||||
- [ ] Implement choropleth map with layer switching
|
|
||||||
- [ ] Add time slider/selector
|
|
||||||
- [ ] Build neighbourhood overlay (toggle-able)
|
|
||||||
- [ ] Add enrichment layer toggle (density/education/income choropleth)
|
|
||||||
- [ ] Add policy event markers on time series
|
|
||||||
- [ ] Add tooltips with cross-reference info ("This district contains...")
|
|
||||||
- [ ] Add tooltips showing enrichment metrics on hover
|
|
||||||
|
|
||||||
### Stage 4: Polish (V1)
|
|
||||||
- [ ] Add data source citations
|
|
||||||
- [ ] Document methodology (especially geographic limitations)
|
|
||||||
- [ ] Write docs (`docs/methodology.md`, `docs/data_sources.md`)
|
|
||||||
- [ ] Deploy to portfolio
|
|
||||||
|
|
||||||
### Future Enhancements (Portfolio Phase 4 — Post-Energy Project)
|
|
||||||
- [ ] Add crime data to dim_neighbourhood
|
|
||||||
- [ ] Build spatial crosswalk (neighbourhood ↔ district/zone intersections)
|
|
||||||
- [ ] Compute area-weighted and population-weighted aggregations
|
|
||||||
- [ ] Add aggregation method selector to UI
|
|
||||||
- [ ] Enable correlation analysis (price vs. enrichment metrics)
|
|
||||||
- [ ] Add historical neighbourhood boundary support (140→158)
|
|
||||||
|
|
||||||
**Deployment & dbt Architecture**: See `portfolio_project_plan_v5.md` for:
|
|
||||||
- dbt layer structure and testing strategy
|
|
||||||
- Deployment architecture
|
|
||||||
- Data quality framework
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## References & Links
|
|
||||||
|
|
||||||
### Core Housing Data
|
|
||||||
|
|
||||||
| Resource | URL |
|
|
||||||
|----------|-----|
|
|
||||||
| TRREB Market Watch | https://trreb.ca/index.php/market-news/market-watch |
|
|
||||||
| CMHC Housing Portal | https://www03.cmhc-schl.gc.ca/hmip-pimh/ |
|
|
||||||
|
|
||||||
### Geographic Boundaries
|
|
||||||
|
|
||||||
| Resource | URL |
|
|
||||||
|----------|-----|
|
|
||||||
| Toronto Neighbourhoods GeoJSON | https://github.com/jasonicarter/toronto-geojson |
|
|
||||||
| TRREB District Map (PDF) | https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf |
|
|
||||||
| Statistics Canada Census Tracts | https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/index-eng.cfm |
|
|
||||||
| R `cmhc` package (CRAN) | https://cran.r-project.org/package=cmhc |
|
|
||||||
|
|
||||||
### Enrichment Data
|
|
||||||
|
|
||||||
| Resource | URL |
|
|
||||||
|----------|-----|
|
|
||||||
| Toronto Open Data Portal | https://open.toronto.ca/ |
|
|
||||||
| Neighbourhood Profiles (CKAN) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/neighbourhood-profiles |
|
|
||||||
| Neighbourhood Profiles 2021 (Direct Download) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx |
|
|
||||||
|
|
||||||
### Policy Events Research
|
|
||||||
|
|
||||||
| Resource | URL |
|
|
||||||
|----------|-----|
|
|
||||||
| Bank of Canada Interest Rates | https://www.bankofcanada.ca/rates/interest-rates/ |
|
|
||||||
| OSFI (Stress Test Rules) | https://www.osfi-bsif.gc.ca/ |
|
|
||||||
| Ontario Legislature (Bills) | https://www.ola.org/ |
|
|
||||||
|
|
||||||
### Reference Documentation
|
|
||||||
|
|
||||||
| Resource | URL |
|
|
||||||
|----------|-----|
|
|
||||||
| Statistics Canada 2021 Census Reference | https://www12.statcan.gc.ca/census-recensement/2021/ref/index-eng.cfm |
|
|
||||||
| City of Toronto Neighbourhood Profiles Overview | https://www.toronto.ca/city-government/data-research-maps/neighbourhoods-communities/neighbourhood-profiles/ |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Related Documents
|
|
||||||
|
|
||||||
| Document | Relationship | Use For |
|
|
||||||
|----------|--------------|---------|
|
|
||||||
| `portfolio_project_plan_v5.md` | Parent document | Overall scope, phasing, tech stack, deployment, dbt architecture, data quality framework |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Document Version: 5.1*
|
|
||||||
*Updated: January 2026*
|
|
||||||
*Project: Toronto Housing Price Dashboard — Portfolio Piece*
|
|
||||||
@@ -1,794 +0,0 @@
|
|||||||
# Work Breakdown Structure & Sprint Plan
|
|
||||||
|
|
||||||
**Project**: Toronto Housing Dashboard (Portfolio Phase 1)
|
|
||||||
**Version**: 4.1
|
|
||||||
**Date**: January 2026
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Document Context
|
|
||||||
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **Parent Documents** | `portfolio_project_plan_v5.md`, `toronto_housing_dashboard_spec_v5.md` |
|
|
||||||
| **Content Source** | `bio_content_v2.md` |
|
|
||||||
| **Role** | Executable sprint plan for Phase 1 delivery |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Milestones
|
|
||||||
|
|
||||||
| Milestone | Deliverable | Target Sprint |
|
|
||||||
|-----------|-------------|---------------|
|
|
||||||
| **Launch 1** | Bio Landing Page | Sprint 2 |
|
|
||||||
| **Launch 2** | Toronto Housing Dashboard | Sprint 6 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## WBS Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
1.0 Launch 1: Bio Landing Page
|
|
||||||
├── 1.1 Project Bootstrap
|
|
||||||
├── 1.2 Infrastructure
|
|
||||||
├── 1.3 Application Foundation
|
|
||||||
├── 1.4 Bio Page
|
|
||||||
└── 1.5 Deployment
|
|
||||||
|
|
||||||
2.0 Launch 2: Toronto Housing Dashboard
|
|
||||||
├── 2.1 Data Acquisition
|
|
||||||
├── 2.2 Data Processing
|
|
||||||
├── 2.3 Database Layer
|
|
||||||
├── 2.4 dbt Transformation
|
|
||||||
├── 2.5 Visualization
|
|
||||||
├── 2.6 Documentation
|
|
||||||
└── 2.7 Operations
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Launch 1: Bio Landing Page
|
|
||||||
|
|
||||||
### 1.1 Project Bootstrap
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 1.1.1 | Git repository initialization | — | Low | Low |
|
|
||||||
| 1.1.2 | Create `.gitignore` | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.3 | Create `pyproject.toml` | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.4 | Create `.python-version` (3.11+) | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.5 | Create `.env.example` | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.6 | Create `README.md` (initial) | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.7 | Create `CLAUDE.md` | 1.1.1 | Low | Low |
|
|
||||||
| 1.1.8 | Create `Makefile` with all targets | 1.1.3 | Low | Medium |
|
|
||||||
|
|
||||||
### 1.2 Infrastructure
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 1.2.1 | Python env setup (pyenv, venv, deps) | 1.1.3, 1.1.4 | Low | Low |
|
|
||||||
| 1.2.2 | Create `.pre-commit-config.yaml` | 1.2.1 | Low | Low |
|
|
||||||
| 1.2.3 | Install pre-commit hooks | 1.2.2 | Low | Low |
|
|
||||||
| 1.2.4 | Create `docker-compose.yml` (PostgreSQL + PostGIS) | 1.1.5 | Low | Low |
|
|
||||||
| 1.2.5 | Create `scripts/` directory structure | 1.1.1 | Low | Low |
|
|
||||||
| 1.2.6 | Create `scripts/docker/up.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.7 | Create `scripts/docker/down.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.8 | Create `scripts/docker/logs.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.9 | Create `scripts/docker/rebuild.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.10 | Create `scripts/db/init.sh` (PostGIS extension) | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.11 | Create `scripts/dev/setup.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.2.12 | Verify Docker + PostGIS working | 1.2.4, 1.2.10 | Low | Low |
|
|
||||||
|
|
||||||
### 1.3 Application Foundation
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 1.3.1 | Create `portfolio_app/` directory structure (full tree) | 1.2.1 | Low | Low |
|
|
||||||
| 1.3.2 | Create `portfolio_app/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 1.3.3 | Create `portfolio_app/config.py` (Pydantic BaseSettings) | 1.3.1 | Low | Medium |
|
|
||||||
| 1.3.4 | Create `portfolio_app/errors/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 1.3.5 | Create `portfolio_app/errors/exceptions.py` | 1.3.4 | Low | Low |
|
|
||||||
| 1.3.6 | Create `portfolio_app/errors/handlers.py` | 1.3.5 | Low | Medium |
|
|
||||||
| 1.3.7 | Create `portfolio_app/app.py` (Dash + Pages routing) | 1.3.3 | Low | Medium |
|
|
||||||
| 1.3.8 | Configure dash-mantine-components theme | 1.3.7 | Low | Low |
|
|
||||||
| 1.3.9 | Create `portfolio_app/assets/` directory | 1.3.1 | Low | Low |
|
|
||||||
| 1.3.10 | Create `portfolio_app/assets/styles.css` | 1.3.9 | Low | Medium |
|
|
||||||
| 1.3.11 | Create `portfolio_app/assets/variables.css` | 1.3.9 | Low | Low |
|
|
||||||
| 1.3.12 | Add `portfolio_app/assets/favicon.ico` | 1.3.9 | Low | Low |
|
|
||||||
| 1.3.13 | Create `portfolio_app/assets/images/` directory | 1.3.9 | Low | Low |
|
|
||||||
| 1.3.14 | Create `tests/` directory structure | 1.2.1 | Low | Low |
|
|
||||||
| 1.3.15 | Create `tests/__init__.py` | 1.3.14 | Low | Low |
|
|
||||||
| 1.3.16 | Create `tests/conftest.py` | 1.3.14 | Low | Medium |
|
|
||||||
| 1.3.17 | Configure pytest in `pyproject.toml` | 1.1.3, 1.3.14 | Low | Low |
|
|
||||||
|
|
||||||
### 1.4 Bio Page
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 1.4.1 | Create `portfolio_app/components/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 1.4.2 | Create `portfolio_app/components/navbar.py` | 1.4.1, 1.3.8 | Low | Low |
|
|
||||||
| 1.4.3 | Create `portfolio_app/components/footer.py` | 1.4.1, 1.3.8 | Low | Low |
|
|
||||||
| 1.4.4 | Create `portfolio_app/components/cards.py` | 1.4.1, 1.3.8 | Low | Low |
|
|
||||||
| 1.4.5 | Create `portfolio_app/pages/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 1.4.6 | Create `portfolio_app/pages/home.py` (layout) | 1.4.5, 1.4.2, 1.4.3 | Low | Low |
|
|
||||||
| 1.4.7 | Integrate bio content from `bio_content_v2.md` | 1.4.6 | Low | Low |
|
|
||||||
| 1.4.8 | Replace social link placeholders with real URLs | 1.4.7 | Low | Low |
|
|
||||||
| 1.4.9 | Implement project cards (deployed/in-dev logic) | 1.4.4, 1.4.6 | Low | Low |
|
|
||||||
| 1.4.10 | Test bio page renders locally | 1.4.9 | Low | Low |
|
|
||||||
|
|
||||||
### 1.5 Deployment
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 1.5.1 | Install PostgreSQL + PostGIS on VPS | — | Low | Low |
|
|
||||||
| 1.5.2 | Configure firewall (ufw: SSH, HTTP, HTTPS) | 1.5.1 | Low | Low |
|
|
||||||
| 1.5.3 | Create application database user | 1.5.1 | Low | Low |
|
|
||||||
| 1.5.4 | Create Gunicorn systemd service file | 1.4.10 | Low | Low |
|
|
||||||
| 1.5.5 | Configure Nginx reverse proxy | 1.5.4 | Low | Low |
|
|
||||||
| 1.5.6 | Configure SSL (certbot) | 1.5.5 | Low | Low |
|
|
||||||
| 1.5.7 | Create `scripts/deploy/deploy.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.5.8 | Create `scripts/deploy/health-check.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 1.5.9 | Deploy bio page | 1.5.6, 1.5.7 | Low | Low |
|
|
||||||
| 1.5.10 | Verify HTTPS access | 1.5.9 | Low | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Launch 2: Toronto Housing Dashboard
|
|
||||||
|
|
||||||
### 2.1 Data Acquisition
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.1.1 | Define TRREB year scope + download PDFs | — | Low | Low |
|
|
||||||
| 2.1.2 | **HUMAN**: Digitize TRREB district boundaries (QGIS) | 2.1.1 | High | High |
|
|
||||||
| 2.1.3 | Register for CMHC portal | — | Low | Low |
|
|
||||||
| 2.1.4 | Export CMHC Toronto rental CSVs | 2.1.3 | Low | Low |
|
|
||||||
| 2.1.5 | Extract CMHC zone boundaries (R cmhc package) | 2.1.3 | Low | Medium |
|
|
||||||
| 2.1.6 | Download neighbourhoods GeoJSON (158 boundaries) | — | Low | Low |
|
|
||||||
| 2.1.7 | Download Neighbourhood Profiles 2021 (xlsx) | — | Low | Low |
|
|
||||||
| 2.1.8 | Validate CRS alignment (all geo files WGS84) | 2.1.2, 2.1.5, 2.1.6 | Low | Medium |
|
|
||||||
| 2.1.9 | Research Tier 1 policy events (10—20 events) | — | Mid | Medium |
|
|
||||||
| 2.1.10 | Create `data/toronto/reference/policy_events.csv` | 2.1.9 | Low | Low |
|
|
||||||
| 2.1.11 | Create `data/` directory structure | 1.3.1 | Low | Low |
|
|
||||||
| 2.1.12 | Organize raw files into `data/toronto/raw/` | 2.1.11 | Low | Low |
|
|
||||||
| 2.1.13 | Test TRREB parser across year boundaries | 2.2.3 | Low | Medium |
|
|
||||||
|
|
||||||
### 2.2 Data Processing
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.2.1 | Create `portfolio_app/toronto/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 2.2.2 | Create `portfolio_app/toronto/parsers/__init__.py` | 2.2.1 | Low | Low |
|
|
||||||
| 2.2.3 | Build TRREB PDF parser (`parsers/trreb.py`) | 2.2.2, 2.1.1 | Mid | High |
|
|
||||||
| 2.2.4 | TRREB data cleaning/normalization | 2.2.3 | Low | Medium |
|
|
||||||
| 2.2.5 | TRREB parser unit tests | 2.2.4 | Low | Low |
|
|
||||||
| 2.2.6 | Build CMHC CSV processor (`parsers/cmhc.py`) | 2.2.2, 2.1.4 | Low | Low |
|
|
||||||
| 2.2.7 | CMHC reliability code handling | 2.2.6 | Low | Low |
|
|
||||||
| 2.2.8 | CMHC processor unit tests | 2.2.7 | Low | Low |
|
|
||||||
| 2.2.9 | Build Neighbourhood Profiles parser | 2.2.1, 2.1.7 | Low | Low |
|
|
||||||
| 2.2.10 | Policy events CSV loader | 2.2.1, 2.1.10 | Low | Low |
|
|
||||||
|
|
||||||
### 2.3 Database Layer
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.3.1 | Create `portfolio_app/toronto/schemas/__init__.py` | 2.2.1 | Low | Low |
|
|
||||||
| 2.3.2 | Create TRREB Pydantic schemas (`schemas/trreb.py`) | 2.3.1 | Low | Medium |
|
|
||||||
| 2.3.3 | Create CMHC Pydantic schemas (`schemas/cmhc.py`) | 2.3.1 | Low | Medium |
|
|
||||||
| 2.3.4 | Create enrichment Pydantic schemas (`schemas/enrichment.py`) | 2.3.1 | Low | Low |
|
|
||||||
| 2.3.5 | Create policy event Pydantic schema (`schemas/policy_event.py`) | 2.3.1 | Low | Low |
|
|
||||||
| 2.3.6 | Create `portfolio_app/toronto/models/__init__.py` | 2.2.1 | Low | Low |
|
|
||||||
| 2.3.7 | Create SQLAlchemy base (`models/base.py`) | 2.3.6, 1.3.3 | Low | Medium |
|
|
||||||
| 2.3.8 | Create dimension models (`models/dimensions.py`) | 2.3.7 | Low | Medium |
|
|
||||||
| 2.3.9 | Create fact models (`models/facts.py`) | 2.3.8 | Low | Medium |
|
|
||||||
| 2.3.10 | Create `portfolio_app/toronto/loaders/__init__.py` | 2.2.1 | Low | Low |
|
|
||||||
| 2.3.11 | Create dimension loaders (`loaders/database.py`) | 2.3.10, 2.3.8 | Low | Medium |
|
|
||||||
| 2.3.12 | Create fact loaders | 2.3.11, 2.3.9, 2.2.4, 2.2.7 | Mid | Medium |
|
|
||||||
| 2.3.13 | Loader integration tests | 2.3.12 | Low | Medium |
|
|
||||||
| 2.3.14 | Create SQL views for dashboard queries | 2.3.12 | Low | Medium |
|
|
||||||
|
|
||||||
### 2.4 dbt Transformation
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.4.1 | Create `dbt/` directory structure | 1.3.1 | Low | Low |
|
|
||||||
| 2.4.2 | Create `dbt/dbt_project.yml` | 2.4.1 | Low | Low |
|
|
||||||
| 2.4.3 | Create `dbt/profiles.yml` | 2.4.1, 1.3.3 | Low | Low |
|
|
||||||
| 2.4.4 | Create `scripts/dbt/run.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.4.5 | Create `scripts/dbt/test.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.4.6 | Create `scripts/dbt/docs.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.4.7 | Create `scripts/dbt/fresh.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.4.8 | Create staging models (`stg_trreb__monthly`, `stg_cmhc__rental`) | 2.4.3, 2.3.12 | Low | Medium |
|
|
||||||
| 2.4.9 | Create intermediate models | 2.4.8 | Low | Medium |
|
|
||||||
| 2.4.10 | Create mart models | 2.4.9 | Low | Medium |
|
|
||||||
| 2.4.11 | Create dbt schema tests (unique, not_null, relationships) | 2.4.10 | Low | Medium |
|
|
||||||
| 2.4.12 | Create custom dbt tests (anomaly detection) | 2.4.11 | Low | Medium |
|
|
||||||
| 2.4.13 | Create dbt documentation (schema.yml) | 2.4.10 | Low | Low |
|
|
||||||
|
|
||||||
### 2.5 Visualization
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.5.1 | Create `portfolio_app/figures/__init__.py` | 1.3.1 | Low | Low |
|
|
||||||
| 2.5.2 | Build choropleth factory (`figures/choropleth.py`) | 2.5.1, 2.1.8 | Mid | Medium |
|
|
||||||
| 2.5.3 | Build time series factory (`figures/time_series.py`) | 2.5.1 | Low | Medium |
|
|
||||||
| 2.5.4 | Build YoY change chart factory (`figures/statistical.py`) | 2.5.1 | Low | Medium |
|
|
||||||
| 2.5.5 | Build seasonality decomposition chart | 2.5.4 | Low | Medium |
|
|
||||||
| 2.5.6 | Build district correlation matrix chart | 2.5.4 | Low | Medium |
|
|
||||||
| 2.5.7 | Create `portfolio_app/pages/toronto/__init__.py` | 1.4.5 | Low | Low |
|
|
||||||
| 2.5.8 | Create `portfolio_app/pages/toronto/dashboard.py` (layout only) | 2.5.7, 1.4.2, 1.4.3 | Mid | High |
|
|
||||||
| 2.5.9 | Implement purchase/rental mode toggle | 2.5.8 | Low | Low |
|
|
||||||
| 2.5.10 | Implement monthly time slider | 2.5.8 | Low | Medium |
|
|
||||||
| 2.5.11 | Implement annual time selector (CMHC) | 2.5.8 | Low | Low |
|
|
||||||
| 2.5.12 | Implement layer toggles (districts/zones/neighbourhoods) | 2.5.8 | Low | Medium |
|
|
||||||
| 2.5.13 | Create `portfolio_app/pages/toronto/callbacks/__init__.py` | 2.5.7 | Low | Low |
|
|
||||||
| 2.5.14 | Create `callbacks/map_callbacks.py` | 2.5.13, 2.5.2 | Mid | Medium |
|
|
||||||
| 2.5.15 | Create `callbacks/filter_callbacks.py` | 2.5.13 | Low | Medium |
|
|
||||||
| 2.5.16 | Create `callbacks/timeseries_callbacks.py` | 2.5.13, 2.5.3 | Low | Medium |
|
|
||||||
| 2.5.17 | Implement district/zone tooltips | 2.5.14 | Low | Low |
|
|
||||||
| 2.5.18 | Implement neighbourhood overlay | 2.5.14, 2.1.6 | Low | Medium |
|
|
||||||
| 2.5.19 | Implement enrichment layer toggle | 2.5.18 | Low | Medium |
|
|
||||||
| 2.5.20 | Implement policy event markers on time series | 2.5.16, 2.2.10 | Low | Medium |
|
|
||||||
| 2.5.21 | Implement "district contains neighbourhoods" tooltip | 2.5.17 | Low | Low |
|
|
||||||
| 2.5.22 | Test dashboard renders with sample data | 2.5.20 | Low | Medium |
|
|
||||||
|
|
||||||
### 2.6 Documentation
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.6.1 | Create `docs/` directory | 1.3.1 | Low | Low |
|
|
||||||
| 2.6.2 | Write `docs/methodology.md` (geographic limitations) | 2.5.22 | Low | Medium |
|
|
||||||
| 2.6.3 | Write `docs/data_sources.md` (citations) | 2.5.22 | Low | Low |
|
|
||||||
| 2.6.4 | Write `docs/user_guide.md` | 2.5.22 | Low | Low |
|
|
||||||
| 2.6.5 | Update `README.md` (final) | 2.6.2, 2.6.3 | Low | Low |
|
|
||||||
| 2.6.6 | Update `CLAUDE.md` (final) | 2.6.5 | Low | Low |
|
|
||||||
|
|
||||||
### 2.7 Operations
|
|
||||||
|
|
||||||
| ID | Task | Depends On | Effort | Complexity |
|
|
||||||
|----|------|------------|--------|------------|
|
|
||||||
| 2.7.1 | Create `scripts/db/backup.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.7.2 | Create `scripts/db/restore.sh` | 1.2.5 | Low | Low |
|
|
||||||
| 2.7.3 | Create `scripts/db/reset.sh` (dev only) | 1.2.5 | Low | Low |
|
|
||||||
| 2.7.4 | Create `scripts/deploy/rollback.sh` | 1.2.5 | Low | Medium |
|
|
||||||
| 2.7.5 | Implement backup retention policy | 2.7.1 | Low | Low |
|
|
||||||
| 2.7.6 | Add `/health` endpoint | 2.5.8 | Low | Low |
|
|
||||||
| 2.7.7 | Configure uptime monitoring (external) | 2.7.6 | Low | Low |
|
|
||||||
| 2.7.8 | Deploy Toronto dashboard | 1.5.9, 2.5.22 | Low | Low |
|
|
||||||
| 2.7.9 | Verify production deployment | 2.7.8 | Low | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## L3 Task Details
|
|
||||||
|
|
||||||
### 1.1 Project Bootstrap
|
|
||||||
|
|
||||||
#### 1.1.1 Git repository initialization
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Initialize git repo with main branch |
|
|
||||||
| **How** | `git init`, initial commit |
|
|
||||||
| **Inputs** | — |
|
|
||||||
| **Outputs** | `.git/` directory |
|
|
||||||
| **Why** | Version control foundation |
|
|
||||||
|
|
||||||
#### 1.1.2 Create `.gitignore`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Git ignore rules per project plan |
|
|
||||||
| **How** | Create file with patterns for: `.env`, `data/*/processed/`, `reports/`, `backups/`, `notebooks/*.html`, `__pycache__/`, `.venv/` |
|
|
||||||
| **Inputs** | Project plan → Directory Rules |
|
|
||||||
| **Outputs** | `.gitignore` |
|
|
||||||
|
|
||||||
#### 1.1.3 Create `pyproject.toml`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Python packaging config |
|
|
||||||
| **How** | Define project metadata, dependencies, tool configs (ruff, mypy, pytest) |
|
|
||||||
| **Inputs** | Tech stack versions from project plan |
|
|
||||||
| **Outputs** | `pyproject.toml` |
|
|
||||||
| **Dependencies** | PostgreSQL 16.x, Pydantic ≥2.0, SQLAlchemy ≥2.0, dbt-postgres ≥1.7, Pandas ≥2.1, GeoPandas ≥0.14, Dash ≥2.14, dash-mantine-components (latest), pytest ≥7.0 |
|
|
||||||
|
|
||||||
#### 1.1.4 Create `.python-version`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | pyenv version file |
|
|
||||||
| **How** | Single line: `3.11` or specific patch version |
|
|
||||||
| **Outputs** | `.python-version` |
|
|
||||||
|
|
||||||
#### 1.1.5 Create `.env.example`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Environment variable template |
|
|
||||||
| **How** | Template with: DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
|
||||||
| **Inputs** | Project plan → Environment Setup |
|
|
||||||
| **Outputs** | `.env.example` |
|
|
||||||
|
|
||||||
#### 1.1.6 Create `README.md` (initial)
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Project overview stub |
|
|
||||||
| **How** | Title, brief description, "Setup coming soon" |
|
|
||||||
| **Outputs** | `README.md` |
|
|
||||||
|
|
||||||
#### 1.1.7 Create `CLAUDE.md`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | AI assistant context file |
|
|
||||||
| **How** | Project context, architecture decisions, patterns, conventions |
|
|
||||||
| **Inputs** | Project plan → Code Architecture |
|
|
||||||
| **Outputs** | `CLAUDE.md` |
|
|
||||||
| **Why** | Claude Code effectiveness from day 1 |
|
|
||||||
|
|
||||||
#### 1.1.8 Create `Makefile`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | All make targets from project plan |
|
|
||||||
| **How** | Implement targets: setup, venv, clean, docker-up/down/logs/rebuild, db-init/backup/restore/reset, run, run-prod, dbt-run/test/docs/fresh, test, test-cov, lint, format, typecheck, ci, deploy, rollback |
|
|
||||||
| **Inputs** | Project plan → Makefile Targets |
|
|
||||||
| **Outputs** | `Makefile` |
|
|
||||||
|
|
||||||
### 1.2 Infrastructure
|
|
||||||
|
|
||||||
#### 1.2.4 Create `docker-compose.yml`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Docker Compose V2 for PostgreSQL 16 + PostGIS |
|
|
||||||
| **How** | Service definition, volume mounts, port 5432, env vars from `.env` |
|
|
||||||
| **Inputs** | `.env.example` |
|
|
||||||
| **Outputs** | `docker-compose.yml` |
|
|
||||||
| **Note** | No `version` field (Docker Compose V2) |
|
|
||||||
|
|
||||||
#### 1.2.5 Create `scripts/` directory structure
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Full scripts tree per project plan |
|
|
||||||
| **How** | `mkdir -p scripts/{db,docker,deploy,dbt,dev}` |
|
|
||||||
| **Outputs** | `scripts/db/`, `scripts/docker/`, `scripts/deploy/`, `scripts/dbt/`, `scripts/dev/` |
|
|
||||||
|
|
||||||
#### 1.2.10 Create `scripts/db/init.sh`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Database initialization with PostGIS |
|
|
||||||
| **How** | `CREATE DATABASE`, `CREATE EXTENSION postgis`, schema creation |
|
|
||||||
| **Standard** | `set -euo pipefail`, usage comment, idempotent |
|
|
||||||
| **Outputs** | `scripts/db/init.sh` |
|
|
||||||
|
|
||||||
### 1.3 Application Foundation
|
|
||||||
|
|
||||||
#### 1.3.1 Create `portfolio_app/` directory structure
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Full application tree per project plan |
|
|
||||||
| **Directories** | `portfolio_app/`, `portfolio_app/assets/`, `portfolio_app/assets/images/`, `portfolio_app/pages/`, `portfolio_app/pages/toronto/`, `portfolio_app/pages/toronto/callbacks/`, `portfolio_app/components/`, `portfolio_app/figures/`, `portfolio_app/toronto/`, `portfolio_app/toronto/parsers/`, `portfolio_app/toronto/loaders/`, `portfolio_app/toronto/schemas/`, `portfolio_app/toronto/models/`, `portfolio_app/toronto/transforms/`, `portfolio_app/errors/` |
|
|
||||||
| **Pattern** | Callbacks in `pages/{dashboard}/callbacks/` per project plan |
|
|
||||||
|
|
||||||
#### 1.3.3 Create `config.py`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Pydantic BaseSettings for config |
|
|
||||||
| **How** | Settings class loading from `.env` |
|
|
||||||
| **Fields** | DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
|
||||||
|
|
||||||
#### 1.3.5 Create `exceptions.py`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Exception hierarchy per project plan |
|
|
||||||
| **Classes** | `PortfolioError` (base), `ParseError`, `ValidationError`, `LoadError` |
|
|
||||||
|
|
||||||
#### 1.3.6 Create `handlers.py`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Error handling decorators |
|
|
||||||
| **How** | Decorators for: logging/re-raise, retry logic, transaction boundaries, timing |
|
|
||||||
| **Pattern** | Infrastructure concerns only; domain logic uses explicit handling |
|
|
||||||
|
|
||||||
#### 1.3.7 Create `app.py`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Dash app factory with Pages routing |
|
|
||||||
| **How** | `Dash(__name__, use_pages=True)`, MantineProvider wrapper |
|
|
||||||
| **Imports** | External: absolute; Internal: relative (dot notation) |
|
|
||||||
|
|
||||||
#### 1.3.16 Create `conftest.py`
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | pytest fixtures |
|
|
||||||
| **How** | Test database fixture, sample data fixtures, app client fixture |
|
|
||||||
|
|
||||||
### 1.4 Bio Page
|
|
||||||
|
|
||||||
#### 1.4.7 Integrate bio content
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Content from `bio_content_v2.md` |
|
|
||||||
| **Sections** | Headline, Professional Summary, Tech Stack, Side Project, Availability |
|
|
||||||
| **Layout** | Hero → Summary → Tech Stack → Project Cards → Social Links → Availability |
|
|
||||||
|
|
||||||
#### 1.4.8 Replace social link placeholders
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Replace `[USERNAME]` in LinkedIn/GitHub URLs |
|
|
||||||
| **Source** | `bio_content_v2.md` → Social Links |
|
|
||||||
| **Acceptance** | No placeholder text in production |
|
|
||||||
|
|
||||||
#### 1.4.9 Implement project cards
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Dynamic project card display |
|
|
||||||
| **Logic** | Show deployed projects with links; show "In Development" for in-progress; hide or grey out planned |
|
|
||||||
| **Source** | `bio_content_v2.md` → Portfolio Projects Section |
|
|
||||||
|
|
||||||
### 2.1 Data Acquisition
|
|
||||||
|
|
||||||
#### 2.1.1 Define TRREB year scope + download PDFs
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Decide which years to parse for V1, download PDFs |
|
|
||||||
| **Decision** | 2020—present for V1 (manageable scope, consistent PDF format). Expand to 2007+ in future if needed. |
|
|
||||||
| **Output** | `data/toronto/raw/trreb/market_watch_YYYY_MM.pdf` |
|
|
||||||
| **Note** | PDF format may vary pre-2018; test before committing to older years |
|
|
||||||
|
|
||||||
#### 2.1.2 Digitize TRREB district boundaries
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | GeoJSON with ~35 district polygons |
|
|
||||||
| **Tool** | QGIS |
|
|
||||||
| **Process** | Import PDF as raster → create vector layer → trace polygons → add attributes (district_code, district_name, area_type) → export GeoJSON (WGS84/EPSG:4326) |
|
|
||||||
| **Input** | TRREB Toronto.pdf map |
|
|
||||||
| **Output** | `data/toronto/raw/geo/trreb_districts.geojson` |
|
|
||||||
| **Effort** | High |
|
|
||||||
| **Complexity** | High |
|
|
||||||
| **Note** | HUMAN TASK — not automatable |
|
|
||||||
|
|
||||||
#### 2.1.5 Extract CMHC zone boundaries
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | GeoJSON with ~20 zone polygons |
|
|
||||||
| **Tool** | R with cmhc and sf packages |
|
|
||||||
| **Process** | `get_cmhc_geography(geography_type="ZONE", cma="Toronto")` → `st_write()` to GeoJSON |
|
|
||||||
| **Output** | `data/toronto/raw/geo/cmhc_zones.geojson` |
|
|
||||||
|
|
||||||
#### 2.1.9 Research Tier 1 policy events
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Federal/provincial policy events with dates, descriptions, expected direction |
|
|
||||||
| **Sources** | Bank of Canada, OSFI, Ontario Legislature |
|
|
||||||
| **Schema** | event_date, effective_date, level, category, title, description, expected_direction, source_url, confidence |
|
|
||||||
| **Acceptance** | Minimum 10 events, maximum 20 |
|
|
||||||
| **Examples** | BoC rate decisions, OSFI B-20, Ontario Fair Housing Plan, foreign buyer tax |
|
|
||||||
|
|
||||||
#### 2.1.13 Test TRREB parser across year boundaries
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Verify parser handles PDFs from different years |
|
|
||||||
| **Test Cases** | 2020 Q1, 2022 Q1, 2024 Q1 (minimum) |
|
|
||||||
| **Check For** | Table structure changes, column naming variations, page number shifts |
|
|
||||||
| **Output** | Documented format variations, parser fallbacks if needed |
|
|
||||||
|
|
||||||
### 2.2 Data Processing
|
|
||||||
|
|
||||||
#### 2.2.3 Build TRREB PDF parser
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Extract summary tables from TRREB PDFs |
|
|
||||||
| **Tool** | pdfplumber or camelot-py |
|
|
||||||
| **Location** | Pages 3-4 (Summary by Area) |
|
|
||||||
| **Fields** | report_date, area_code, area_name, area_type, sales, dollar_volume, avg_price, median_price, new_listings, active_listings, avg_sp_lp, avg_dom |
|
|
||||||
| **Output** | `portfolio_app/toronto/parsers/trreb.py` |
|
|
||||||
|
|
||||||
#### 2.2.7 CMHC reliability code handling
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Parse reliability codes, handle suppression |
|
|
||||||
| **Codes** | a (excellent), b (good), c (fair), d (poor/caution), ** (suppressed → NULL) |
|
|
||||||
| **Implementation** | Pydantic validators, enum type |
|
|
||||||
|
|
||||||
### 2.3 Database Layer
|
|
||||||
|
|
||||||
#### 2.3.8 Create dimension models
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | SQLAlchemy 2.0 models for dimensions |
|
|
||||||
| **Tables** | `dim_time`, `dim_trreb_district`, `dim_cmhc_zone`, `dim_neighbourhood`, `dim_policy_event` |
|
|
||||||
| **Geometry** | PostGIS geometry columns for districts, zones, neighbourhoods |
|
|
||||||
| **Note** | `dim_neighbourhood` has no FK to facts in V1 |
|
|
||||||
|
|
||||||
#### 2.3.9 Create fact models
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | SQLAlchemy 2.0 models for facts |
|
|
||||||
| **Tables** | `fact_purchases`, `fact_rentals` |
|
|
||||||
| **FKs** | fact_purchases → dim_time, dim_trreb_district; fact_rentals → dim_time, dim_cmhc_zone |
|
|
||||||
|
|
||||||
### 2.4 dbt Transformation
|
|
||||||
|
|
||||||
#### 2.4.8 Create staging models
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | 1:1 source mapping, cleaned and typed |
|
|
||||||
| **Models** | `stg_trreb__monthly`, `stg_cmhc__rental` |
|
|
||||||
| **Naming** | `stg_{source}__{entity}` |
|
|
||||||
|
|
||||||
#### 2.4.11 Create dbt schema tests
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Data quality tests |
|
|
||||||
| **Tests** | `unique` (PKs), `not_null` (required), `accepted_values` (reliability codes, area_type), `relationships` (FK integrity) |
|
|
||||||
|
|
||||||
#### 2.4.12 Create custom dbt tests
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Anomaly detection rules |
|
|
||||||
| **Rules** | Price MoM change >30% → flag; missing districts → fail; duplicate records → fail |
|
|
||||||
|
|
||||||
### 2.5 Visualization
|
|
||||||
|
|
||||||
#### 2.5.2 Build choropleth factory
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Reusable choropleth_mapbox figure generator |
|
|
||||||
| **Inputs** | GeoDataFrame, metric column, color config |
|
|
||||||
| **Output** | Plotly figure |
|
|
||||||
| **Location** | `portfolio_app/figures/choropleth.py` |
|
|
||||||
|
|
||||||
#### 2.5.4—2.5.6 Statistical chart factories
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Statistical analysis visualizations |
|
|
||||||
| **Charts** | YoY change with variance bands, seasonality decomposition, district correlation matrix |
|
|
||||||
| **Location** | `portfolio_app/figures/statistical.py` |
|
|
||||||
| **Why** | Required skill demonstration per project plan |
|
|
||||||
|
|
||||||
#### 2.5.8 Create dashboard layout
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Toronto dashboard page structure |
|
|
||||||
| **File** | `portfolio_app/pages/toronto/dashboard.py` |
|
|
||||||
| **Pattern** | Layout only — no callbacks in this file |
|
|
||||||
| **Components** | Navbar, choropleth map, time controls, layer toggles, time series panel, statistics panel, footer |
|
|
||||||
|
|
||||||
#### 2.5.13—2.5.16 Create callbacks
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Dashboard interaction logic |
|
|
||||||
| **Location** | `portfolio_app/pages/toronto/callbacks/` |
|
|
||||||
| **Files** | `__init__.py`, `map_callbacks.py`, `filter_callbacks.py`, `timeseries_callbacks.py` |
|
|
||||||
| **Pattern** | Separate from layout per project plan callback separation pattern |
|
|
||||||
| **Registration** | Import callback modules in `callbacks/__init__.py`; import that package in `dashboard.py`. Dash Pages auto-discovers callbacks when module is imported. |
|
|
||||||
|
|
||||||
#### 2.5.22 Test dashboard renders with sample data
|
|
||||||
| Attribute | Value |
|
|
||||||
|-----------|-------|
|
|
||||||
| **What** | Verify dashboard works end-to-end |
|
|
||||||
| **Sample Data** | Use output from task 2.3.12 (fact loaders). Run loaders with subset of parsed data before this task. |
|
|
||||||
| **Verify** | Choropleth renders, time controls work, tooltips display, no console errors |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Sprint Plan
|
|
||||||
|
|
||||||
### Sprint 1: Project Bootstrap + Start TRREB Digitization
|
|
||||||
|
|
||||||
**Goal**: Dev environment working, repo initialized, TRREB digitization started
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 1.1.1 | Git repo init | Low |
|
|
||||||
| 1.1.2 | .gitignore | Low |
|
|
||||||
| 1.1.3 | pyproject.toml | Low |
|
|
||||||
| 1.1.4 | .python-version | Low |
|
|
||||||
| 1.1.5 | .env.example | Low |
|
|
||||||
| 1.1.6 | README.md (initial) | Low |
|
|
||||||
| 1.1.7 | CLAUDE.md | Low |
|
|
||||||
| 1.1.8 | Makefile | Low |
|
|
||||||
| 1.2.1 | Python env setup | Low |
|
|
||||||
| 1.2.2 | .pre-commit-config.yaml | Low |
|
|
||||||
| 1.2.3 | Install pre-commit | Low |
|
|
||||||
| 1.2.4 | docker-compose.yml | Low |
|
|
||||||
| 1.2.5 | scripts/ directory structure | Low |
|
|
||||||
| 1.2.6—1.2.9 | Docker scripts | Low |
|
|
||||||
| 1.2.10 | scripts/db/init.sh | Low |
|
|
||||||
| 1.2.11 | scripts/dev/setup.sh | Low |
|
|
||||||
| 1.2.12 | Verify Docker + PostGIS | Low |
|
|
||||||
| 1.3.1 | portfolio_app/ directory structure | Low |
|
|
||||||
| 1.3.2—1.3.6 | App foundation files | Low |
|
|
||||||
| 1.3.14—1.3.17 | Test infrastructure | Low |
|
|
||||||
| 2.1.1 | Download TRREB PDFs | Low |
|
|
||||||
| 2.1.2 | **START** TRREB boundaries (HUMAN) | High |
|
|
||||||
| 2.1.9 | **START** Policy events research | Mid |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 2: Bio Page + Data Acquisition
|
|
||||||
|
|
||||||
**Goal**: Bio live, all raw data downloaded
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 1.3.7 | app.py with Pages | Low |
|
|
||||||
| 1.3.8 | Theme config | Low |
|
|
||||||
| 1.3.9—1.3.13 | Assets directory + files | Low |
|
|
||||||
| 1.4.1—1.4.4 | Components | Low |
|
|
||||||
| 1.4.5—1.4.10 | Bio page | Low |
|
|
||||||
| 1.5.1—1.5.3 | VPS setup | Low |
|
|
||||||
| 1.5.4—1.5.6 | Gunicorn/Nginx/SSL | Low |
|
|
||||||
| 1.5.7—1.5.8 | Deploy scripts | Low |
|
|
||||||
| 1.5.9—1.5.10 | Deploy + verify | Low |
|
|
||||||
| 2.1.2 | **CONTINUE** TRREB boundaries | High |
|
|
||||||
| 2.1.3—2.1.4 | CMHC registration + export | Low |
|
|
||||||
| 2.1.5 | CMHC zone boundaries (R) | Low |
|
|
||||||
| 2.1.6 | Neighbourhoods GeoJSON | Low |
|
|
||||||
| 2.1.7 | Neighbourhood Profiles download | Low |
|
|
||||||
| 2.1.9 | **CONTINUE** Policy events research | Mid |
|
|
||||||
| 2.1.10 | policy_events.csv | Low |
|
|
||||||
| 2.1.11—2.1.12 | data/ directory + organize | Low |
|
|
||||||
|
|
||||||
**Milestone**: **Launch 1 — Bio Live**
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 3: Parsers + Schemas + Models
|
|
||||||
|
|
||||||
**Goal**: ETL pipeline working, database layer complete
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 2.1.2 | **COMPLETE** TRREB boundaries | High |
|
|
||||||
| 2.1.8 | CRS validation | Low |
|
|
||||||
| 2.2.1—2.2.2 | Toronto module init | Low |
|
|
||||||
| 2.2.3—2.2.5 | TRREB parser + tests | Mid |
|
|
||||||
| 2.2.6—2.2.8 | CMHC processor + tests | Low |
|
|
||||||
| 2.2.9 | Neighbourhood Profiles parser | Low |
|
|
||||||
| 2.2.10 | Policy events loader | Low |
|
|
||||||
| 2.3.1—2.3.5 | Pydantic schemas | Low |
|
|
||||||
| 2.3.6—2.3.9 | SQLAlchemy models | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 4: Loaders + dbt
|
|
||||||
|
|
||||||
**Goal**: Data loaded, transformation layer ready
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 2.3.10—2.3.13 | Loaders + tests | Mid |
|
|
||||||
| 2.3.14 | SQL views | Low |
|
|
||||||
| 2.4.1—2.4.7 | dbt setup + scripts | Low |
|
|
||||||
| 2.4.8—2.4.10 | dbt models | Low |
|
|
||||||
| 2.4.11—2.4.12 | dbt tests | Low |
|
|
||||||
| 2.4.13 | dbt documentation | Low |
|
|
||||||
| 2.7.1—2.7.3 | DB backup/restore scripts | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 5: Visualization
|
|
||||||
|
|
||||||
**Goal**: Dashboard functional
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 2.5.1—2.5.6 | Figure factories | Mid |
|
|
||||||
| 2.5.7—2.5.12 | Dashboard layout + controls | Mid |
|
|
||||||
| 2.5.13—2.5.16 | Callbacks | Mid |
|
|
||||||
| 2.5.17—2.5.21 | Tooltips + overlays + markers | Low |
|
|
||||||
| 2.5.22 | Test dashboard | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 6: Polish + Launch 2
|
|
||||||
|
|
||||||
**Goal**: Dashboard deployed
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| 2.6.1—2.6.6 | Documentation | Low |
|
|
||||||
| 2.7.4—2.7.5 | Rollback script + retention | Low |
|
|
||||||
| 2.7.6—2.7.7 | Health endpoint + monitoring | Low |
|
|
||||||
| 2.7.8—2.7.9 | Deploy + verify | Low |
|
|
||||||
|
|
||||||
**Milestone**: **Launch 2 — Toronto Dashboard Live**
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Sprint 7: Buffer
|
|
||||||
|
|
||||||
**Goal**: Contingency for slippage, bug fixes
|
|
||||||
|
|
||||||
| Task ID | Task | Effort |
|
|
||||||
|---------|------|--------|
|
|
||||||
| — | Overflow from previous sprints | Varies |
|
|
||||||
| — | Bug fixes | Varies |
|
|
||||||
| — | UX polish | Low |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Sprint Summary
|
|
||||||
|
|
||||||
| Sprint | Focus | Key Risk | Milestone |
|
|
||||||
|--------|-------|----------|-----------|
|
|
||||||
| 1 | Bootstrap + start boundaries | — | — |
|
|
||||||
| 2 | Bio + data acquisition | TRREB digitization | Launch 1 |
|
|
||||||
| 3 | Parsers + DB layer | PDF parser, boundaries | — |
|
|
||||||
| 4 | Loaders + dbt | — | — |
|
|
||||||
| 5 | Visualization | Choropleth complexity | — |
|
|
||||||
| 6 | Polish + deploy | — | Launch 2 |
|
|
||||||
| 7 | Buffer | — | — |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Dependency Graph
|
|
||||||
|
|
||||||
### Launch 1 Critical Path
|
|
||||||
```
|
|
||||||
1.1.1 → 1.1.3 → 1.2.1 → 1.3.1 → 1.3.7 → 1.4.6 → 1.4.10 → 1.5.9 → 1.5.10
|
|
||||||
```
|
|
||||||
|
|
||||||
### Launch 2 Critical Path
|
|
||||||
```
|
|
||||||
2.1.2 (TRREB boundaries) ─┬→ 2.1.8 (CRS) → 2.5.2 (choropleth) → 2.5.8 (layout) → 2.5.22 (test) → 2.7.8 (deploy)
|
|
||||||
│
|
|
||||||
2.1.1 → 2.2.3 (parser) → 2.2.4 → 2.3.12 (loaders) → 2.4.8 (dbt) ─┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Parallel Tracks (can run simultaneously)
|
|
||||||
|
|
||||||
| Track | Tasks | Can Start |
|
|
||||||
|-------|-------|-----------|
|
|
||||||
| **A: TRREB Boundaries** | 2.1.1 → 2.1.2 | Sprint 1 |
|
|
||||||
| **B: TRREB Parser** | 2.2.3—2.2.5 | Sprint 2 (after PDFs) |
|
|
||||||
| **C: CMHC** | 2.1.3—2.1.5 → 2.2.6—2.2.8 | Sprint 2 |
|
|
||||||
| **D: Enrichment** | 2.1.6—2.1.7 → 2.2.9 | Sprint 2 |
|
|
||||||
| **E: Policy Events** | 2.1.9—2.1.10 → 2.2.10 | Sprint 1—2 |
|
|
||||||
| **F: Schemas/Models** | 2.3.1—2.3.9 | Sprint 3 (after parsers) |
|
|
||||||
| **G: dbt** | 2.4.* | Sprint 4 (after loaders) |
|
|
||||||
| **H: Ops Scripts** | 2.7.1—2.7.5 | Sprint 4 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Risk Register
|
|
||||||
|
|
||||||
| Risk | Likelihood | Impact | Mitigation |
|
|
||||||
|------|------------|--------|------------|
|
|
||||||
| TRREB digitization slips | Medium | High | Start Sprint 1; timebox; accept lower precision initially |
|
|
||||||
| PDF parser breaks on older years | Medium | Medium | Test multiple years early; build fallbacks |
|
|
||||||
| PostGIS geometry issues | Low | Medium | Validate CRS before load (2.1.8) |
|
|
||||||
| Choropleth performance | Low | Medium | Pre-aggregate; simplify geometries |
|
|
||||||
| Policy events research takes too long | Medium | Low | Cap at 10 events minimum; expand post-launch |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Acceptance Criteria
|
|
||||||
|
|
||||||
### Launch 1
|
|
||||||
- [ ] Bio page accessible via HTTPS
|
|
||||||
- [ ] All content from `bio_content_v2.md` rendered
|
|
||||||
- [ ] No placeholder text ([USERNAME]) visible
|
|
||||||
- [ ] Mobile responsive
|
|
||||||
- [ ] Social links functional
|
|
||||||
|
|
||||||
### Launch 2
|
|
||||||
- [ ] Choropleth renders TRREB districts
|
|
||||||
- [ ] Choropleth renders CMHC zones
|
|
||||||
- [ ] Purchase/rental mode toggle works
|
|
||||||
- [ ] Time navigation works (monthly for TRREB, annual for CMHC)
|
|
||||||
- [ ] Policy event markers visible on time series
|
|
||||||
- [ ] Neighbourhood overlay toggleable
|
|
||||||
- [ ] Methodology documentation published
|
|
||||||
- [ ] Data sources cited
|
|
||||||
- [ ] Health endpoint responds
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Effort Legend
|
|
||||||
|
|
||||||
| Level | Meaning |
|
|
||||||
|-------|---------|
|
|
||||||
| **Low** | Straightforward; minimal iteration expected |
|
|
||||||
| **Mid** | Requires debugging or multi-step coordination |
|
|
||||||
| **High** | Complex logic, external tools, or human intervention required |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
*Document Version: 4.1*
|
|
||||||
*Created: January 2026*
|
|
||||||
70
notebooks/README.md
Normal file
70
notebooks/README.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Dashboard Documentation Notebooks
|
||||||
|
|
||||||
|
Documentation notebooks organized by dashboard project. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern.
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
notebooks/
|
||||||
|
├── README.md # This file
|
||||||
|
└── toronto/ # Toronto Neighbourhood Dashboard
|
||||||
|
├── overview/ # Overview tab visualizations
|
||||||
|
├── housing/ # Housing tab visualizations
|
||||||
|
├── safety/ # Safety tab visualizations
|
||||||
|
├── demographics/ # Demographics tab visualizations
|
||||||
|
└── amenities/ # Amenities tab visualizations
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notebook Template
|
||||||
|
|
||||||
|
Each notebook follows a standard two-section structure:
|
||||||
|
|
||||||
|
### Section 1: Data Reference
|
||||||
|
|
||||||
|
Documents the data pipeline:
|
||||||
|
- **Source Tables**: List of dbt marts/tables used
|
||||||
|
- **SQL Query**: The exact query to fetch data
|
||||||
|
- **Transformation Steps**: Any pandas/python transformations
|
||||||
|
- **Sample Output**: First 10 rows of the result
|
||||||
|
|
||||||
|
### Section 2: Data Visualization
|
||||||
|
|
||||||
|
Documents the figure creation:
|
||||||
|
- **Figure Factory**: Import from `portfolio_app.figures`
|
||||||
|
- **Parameters**: Key configuration options
|
||||||
|
- **Rendered Output**: The actual visualization
|
||||||
|
|
||||||
|
## Available Figure Factories
|
||||||
|
|
||||||
|
| Factory | Module | Use Case |
|
||||||
|
|---------|--------|----------|
|
||||||
|
| `create_choropleth` | `figures.choropleth` | Map visualizations |
|
||||||
|
| `create_ranking_bar` | `figures.bar_charts` | Top/bottom N rankings |
|
||||||
|
| `create_stacked_bar` | `figures.bar_charts` | Category breakdowns |
|
||||||
|
| `create_scatter` | `figures.scatter` | Correlation plots |
|
||||||
|
| `create_radar` | `figures.radar` | Multi-metric comparisons |
|
||||||
|
| `create_age_pyramid` | `figures.demographics` | Age distributions |
|
||||||
|
| `create_time_series` | `figures.time_series` | Trend lines |
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Start Jupyter from project root:
|
||||||
|
```bash
|
||||||
|
jupyter notebook notebooks/
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Ensure database is running:
|
||||||
|
```bash
|
||||||
|
make docker-up
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Each notebook is self-contained - run all cells top to bottom.
|
||||||
|
|
||||||
|
## Notebook Naming Convention
|
||||||
|
|
||||||
|
`{metric}_{chart_type}.ipynb`
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
- `livability_choropleth.ipynb`
|
||||||
|
- `crime_trend_line.ipynb`
|
||||||
|
- `age_pyramid.ipynb`
|
||||||
0
notebooks/toronto/amenities/.gitkeep
Normal file
0
notebooks/toronto/amenities/.gitkeep
Normal file
182
notebooks/toronto/amenities/amenity_index_choropleth.ipynb
Normal file
182
notebooks/toronto/amenities/amenity_index_choropleth.ipynb
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Amenity Index Choropleth Map\n",
|
||||||
|
"\n",
|
||||||
|
"Displays total amenities per 1,000 residents across Toronto's 158 neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_amenities` | neighbourhood × year | amenity_index, total_amenities_per_1000, amenity_tier, geometry |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_id,\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" geometry,\n",
|
||||||
|
" year,\n",
|
||||||
|
" total_amenities_per_1000,\n",
|
||||||
|
" amenity_index,\n",
|
||||||
|
" amenity_tier,\n",
|
||||||
|
" parks_per_1000,\n",
|
||||||
|
" schools_per_1000,\n",
|
||||||
|
" transit_per_1000,\n",
|
||||||
|
" total_amenities,\n",
|
||||||
|
" population\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
|
"ORDER BY total_amenities_per_1000 DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent year\n",
|
||||||
|
"2. Convert geometry to GeoJSON"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\"neighbourhood_name\", \"total_amenities_per_1000\", \"amenity_index\", \"amenity_tier\"]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_choropleth_figure(\n",
|
||||||
|
" geojson=geojson,\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
|
" color_column=\"total_amenities_per_1000\",\n",
|
||||||
|
" hover_data=[\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"amenity_index\",\n",
|
||||||
|
" \"parks_per_1000\",\n",
|
||||||
|
" \"schools_per_1000\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" color_scale=\"Greens\",\n",
|
||||||
|
" title=\"Toronto Amenities per 1,000 Population\",\n",
|
||||||
|
" zoom=10,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Amenity Tier Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"| Tier | Meaning |\n",
|
||||||
|
"|------|--------|\n",
|
||||||
|
"| 1 | Best served (top 20%) |\n",
|
||||||
|
"| 2-4 | Middle tiers |\n",
|
||||||
|
"| 5 | Underserved (bottom 20%) |"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
191
notebooks/toronto/amenities/amenity_radar.ipynb
Normal file
191
notebooks/toronto/amenities/amenity_radar.ipynb
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Amenity Radar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Spider/radar chart comparing amenity categories for selected neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_amenities` | neighbourhood × year | parks_index, schools_index, transit_index |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" parks_index,\n",
|
||||||
|
" schools_index,\n",
|
||||||
|
" transit_index,\n",
|
||||||
|
" amenity_index,\n",
|
||||||
|
" amenity_tier\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
|
"ORDER BY amenity_index DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Select top 5 and bottom 5 neighbourhoods by amenity index\n",
|
||||||
|
"2. Reshape for radar chart format"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Select representative neighbourhoods\n",
|
||||||
|
"top_5 = df.head(5)\n",
|
||||||
|
"bottom_5 = df.tail(5)\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare radar data\n",
|
||||||
|
"categories = [\"Parks\", \"Schools\", \"Transit\"]\n",
|
||||||
|
"index_columns = [\"parks_index\", \"schools_index\", \"transit_index\"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n",
|
||||||
|
"display(\n",
|
||||||
|
" top_5[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"parks_index\",\n",
|
||||||
|
" \"schools_index\",\n",
|
||||||
|
" \"transit_index\",\n",
|
||||||
|
" \"amenity_index\",\n",
|
||||||
|
" ]\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n",
|
||||||
|
"display(\n",
|
||||||
|
" bottom_5[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"parks_index\",\n",
|
||||||
|
" \"schools_index\",\n",
|
||||||
|
" \"transit_index\",\n",
|
||||||
|
" \"amenity_index\",\n",
|
||||||
|
" ]\n",
|
||||||
|
" ]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_radar` from `portfolio_app.figures.toronto.radar`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.radar import create_comparison_radar\n",
|
||||||
|
"\n",
|
||||||
|
"# Compare top neighbourhood vs city average (100)\n",
|
||||||
|
"top_hood = top_5.iloc[0]\n",
|
||||||
|
"metrics = [\"parks_index\", \"schools_index\", \"transit_index\"]\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_comparison_radar(\n",
|
||||||
|
" selected_data=top_hood.to_dict(),\n",
|
||||||
|
" average_data={\"parks_index\": 100, \"schools_index\": 100, \"transit_index\": 100},\n",
|
||||||
|
" metrics=metrics,\n",
|
||||||
|
" selected_name=top_hood[\"neighbourhood_name\"],\n",
|
||||||
|
" average_name=\"City Average\",\n",
|
||||||
|
" title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Index Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"| Value | Meaning |\n",
|
||||||
|
"|-------|--------|\n",
|
||||||
|
"| < 100 | Below city average |\n",
|
||||||
|
"| = 100 | City average |\n",
|
||||||
|
"| > 100 | Above city average |"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
169
notebooks/toronto/amenities/transit_accessibility_bar.ipynb
Normal file
169
notebooks/toronto/amenities/transit_accessibility_bar.ipynb
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Transit Accessibility Bar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Shows transit stops per 1,000 residents across Toronto neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_amenities` | neighbourhood × year | transit_per_1000, transit_index, transit_count |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" transit_per_1000,\n",
|
||||||
|
" transit_index,\n",
|
||||||
|
" transit_count,\n",
|
||||||
|
" population,\n",
|
||||||
|
" amenity_tier\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
|
" AND transit_per_1000 IS NOT NULL\n",
|
||||||
|
"ORDER BY transit_per_1000 DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Sort by transit accessibility\n",
|
||||||
|
"2. Select top 20 for visualization"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = df.head(20).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[[\"neighbourhood_name\", \"transit_per_1000\", \"transit_index\", \"transit_count\"]].head(\n",
|
||||||
|
" 10\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_horizontal_bar(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"transit_per_1000\",\n",
|
||||||
|
" title=\"Top 20 Neighbourhoods by Transit Accessibility\",\n",
|
||||||
|
" color=\"#00BCD4\",\n",
|
||||||
|
" value_format=\".2f\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transit Statistics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"City-wide Transit Statistics:\")\n",
|
||||||
|
"print(f\" Total Transit Stops: {df['transit_count'].sum():,.0f}\")\n",
|
||||||
|
"print(f\" Average per 1,000 pop: {df['transit_per_1000'].mean():.2f}\")\n",
|
||||||
|
"print(f\" Median per 1,000 pop: {df['transit_per_1000'].median():.2f}\")\n",
|
||||||
|
"print(f\" Best Access: {df['transit_per_1000'].max():.2f} per 1,000\")\n",
|
||||||
|
"print(f\" Worst Access: {df['transit_per_1000'].min():.2f} per 1,000\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
0
notebooks/toronto/demographics/.gitkeep
Normal file
0
notebooks/toronto/demographics/.gitkeep
Normal file
183
notebooks/toronto/demographics/age_distribution.ipynb
Normal file
183
notebooks/toronto/demographics/age_distribution.ipynb
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Age Distribution Analysis\n",
|
||||||
|
"\n",
|
||||||
|
"Compares median age and age index across Toronto neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_demographics` | neighbourhood × year | median_age, age_index, city_avg_age |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" median_age,\n",
|
||||||
|
" age_index,\n",
|
||||||
|
" city_avg_age,\n",
|
||||||
|
" population,\n",
|
||||||
|
" income_quintile,\n",
|
||||||
|
" pct_renter_occupied\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
|
" AND median_age IS NOT NULL\n",
|
||||||
|
"ORDER BY median_age DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods with age data\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent census year\n",
|
||||||
|
"2. Calculate deviation from city average\n",
|
||||||
|
"3. Classify as younger/older than average"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"city_avg = df[\"city_avg_age\"].iloc[0]\n",
|
||||||
|
"df[\"age_category\"] = df[\"median_age\"].apply(\n",
|
||||||
|
" lambda x: \"Younger\" if x < city_avg else \"Older\"\n",
|
||||||
|
")\n",
|
||||||
|
"df[\"age_deviation\"] = df[\"median_age\"] - city_avg\n",
|
||||||
|
"\n",
|
||||||
|
"data = df.to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(f\"City Average Age: {city_avg:.1f}\")\n",
|
||||||
|
"print(\"\\nYoungest Neighbourhoods:\")\n",
|
||||||
|
"display(\n",
|
||||||
|
" df.tail(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n",
|
||||||
|
")\n",
|
||||||
|
"print(\"\\nOldest Neighbourhoods:\")\n",
|
||||||
|
"display(\n",
|
||||||
|
" df.head(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_ranking_bar(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"median_age\",\n",
|
||||||
|
" title=\"Youngest & Oldest Neighbourhoods (Median Age)\",\n",
|
||||||
|
" top_n=10,\n",
|
||||||
|
" bottom_n=10,\n",
|
||||||
|
" color_top=\"#FF9800\", # Orange for older\n",
|
||||||
|
" color_bottom=\"#2196F3\", # Blue for younger\n",
|
||||||
|
" value_format=\".1f\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Age vs Income Correlation"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Age by income quintile\n",
|
||||||
|
"print(\"Median Age by Income Quintile:\")\n",
|
||||||
|
"df.groupby(\"income_quintile\")[\"median_age\"].mean().round(1)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
182
notebooks/toronto/demographics/income_choropleth.ipynb
Normal file
182
notebooks/toronto/demographics/income_choropleth.ipynb
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Median Income Choropleth Map\n",
|
||||||
|
"\n",
|
||||||
|
"Displays median household income across Toronto's 158 neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_demographics` | neighbourhood × year | median_household_income, income_index, income_quintile, geometry |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_id,\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" geometry,\n",
|
||||||
|
" year,\n",
|
||||||
|
" median_household_income,\n",
|
||||||
|
" income_index,\n",
|
||||||
|
" income_quintile,\n",
|
||||||
|
" population,\n",
|
||||||
|
" unemployment_rate\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
|
"ORDER BY median_household_income DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent census year\n",
|
||||||
|
"2. Convert geometry to GeoJSON\n",
|
||||||
|
"3. Scale income to thousands for readability"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n",
|
||||||
|
"\n",
|
||||||
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\"neighbourhood_name\", \"median_household_income\", \"income_index\", \"income_quintile\"]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_choropleth_figure(\n",
|
||||||
|
" geojson=geojson,\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
|
" color_column=\"median_household_income\",\n",
|
||||||
|
" hover_data=[\"neighbourhood_name\", \"income_index\", \"income_quintile\"],\n",
|
||||||
|
" color_scale=\"Viridis\",\n",
|
||||||
|
" title=\"Toronto Median Household Income by Neighbourhood\",\n",
|
||||||
|
" zoom=10,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Income Quintile Distribution"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df.groupby(\"income_quintile\")[\"median_household_income\"].agg(\n",
|
||||||
|
" [\"count\", \"mean\", \"min\", \"max\"]\n",
|
||||||
|
").round(0)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
169
notebooks/toronto/demographics/population_density_bar.ipynb
Normal file
169
notebooks/toronto/demographics/population_density_bar.ipynb
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Population Density Bar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Shows population density (people per sq km) across Toronto neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_demographics` | neighbourhood × year | population_density, population, land_area_sqkm |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" population_density,\n",
|
||||||
|
" population,\n",
|
||||||
|
" land_area_sqkm,\n",
|
||||||
|
" median_household_income,\n",
|
||||||
|
" pct_renter_occupied\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
|
" AND population_density IS NOT NULL\n",
|
||||||
|
"ORDER BY population_density DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Sort by population density\n",
|
||||||
|
"2. Select top 20 most dense neighbourhoods"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = df.head(20).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[[\"neighbourhood_name\", \"population_density\", \"population\", \"land_area_sqkm\"]].head(\n",
|
||||||
|
" 10\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_horizontal_bar(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"population_density\",\n",
|
||||||
|
" title=\"Top 20 Most Dense Neighbourhoods\",\n",
|
||||||
|
" color=\"#9C27B0\",\n",
|
||||||
|
" value_format=\",.0f\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Density Statistics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"City-wide Statistics:\")\n",
|
||||||
|
"print(f\" Total Population: {df['population'].sum():,.0f}\")\n",
|
||||||
|
"print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n",
|
||||||
|
"print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n",
|
||||||
|
"print(f\" Max Density: {df['population_density'].max():,.0f} per sq km\")\n",
|
||||||
|
"print(f\" Min Density: {df['population_density'].min():,.0f} per sq km\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
187
notebooks/toronto/housing/affordability_choropleth.ipynb
Normal file
187
notebooks/toronto/housing/affordability_choropleth.ipynb
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Affordability Index Choropleth Map\n",
|
||||||
|
"\n",
|
||||||
|
"Displays housing affordability across Toronto's 158 neighbourhoods. Index of 100 = city average."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_housing` | neighbourhood × year | affordability_index, rent_to_income_pct, avg_rent_2bed, geometry |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_id,\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" geometry,\n",
|
||||||
|
" year,\n",
|
||||||
|
" affordability_index,\n",
|
||||||
|
" rent_to_income_pct,\n",
|
||||||
|
" avg_rent_2bed,\n",
|
||||||
|
" median_household_income,\n",
|
||||||
|
" is_affordable\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
|
"ORDER BY affordability_index ASC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent year\n",
|
||||||
|
"2. Convert geometry to GeoJSON\n",
|
||||||
|
"3. Lower index = more affordable (inverted for visualization clarity)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"affordability_index\",\n",
|
||||||
|
" \"rent_to_income_pct\",\n",
|
||||||
|
" \"avg_rent_2bed\",\n",
|
||||||
|
" \"is_affordable\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `color_column`: 'affordability_index'\n",
|
||||||
|
"- `color_scale`: 'RdYlGn_r' (reversed: green=affordable, red=expensive)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_choropleth_figure(\n",
|
||||||
|
" geojson=geojson,\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
|
" color_column=\"affordability_index\",\n",
|
||||||
|
" hover_data=[\"neighbourhood_name\", \"rent_to_income_pct\", \"avg_rent_2bed\"],\n",
|
||||||
|
" color_scale=\"RdYlGn_r\", # Reversed: lower index (affordable) = green\n",
|
||||||
|
" title=\"Toronto Housing Affordability Index\",\n",
|
||||||
|
" zoom=10,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Index Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"| Index | Meaning |\n",
|
||||||
|
"|-------|--------|\n",
|
||||||
|
"| < 100 | More affordable than city average |\n",
|
||||||
|
"| = 100 | City average affordability |\n",
|
||||||
|
"| > 100 | Less affordable than city average |\n",
|
||||||
|
"\n",
|
||||||
|
"Affordability calculated as: `rent_to_income_pct / city_avg_rent_to_income * 100`"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
200
notebooks/toronto/housing/rent_trend_line.ipynb
Normal file
200
notebooks/toronto/housing/rent_trend_line.ipynb
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Rent Trend Line Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Shows 5-year rental price trends across Toronto neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_housing` | neighbourhood × year | year, avg_rent_2bed, rent_yoy_change_pct |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"# City-wide average rent by year\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" year,\n",
|
||||||
|
" AVG(avg_rent_bachelor) as avg_rent_bachelor,\n",
|
||||||
|
" AVG(avg_rent_1bed) as avg_rent_1bed,\n",
|
||||||
|
" AVG(avg_rent_2bed) as avg_rent_2bed,\n",
|
||||||
|
" AVG(avg_rent_3bed) as avg_rent_3bed,\n",
|
||||||
|
" AVG(rent_yoy_change_pct) as avg_yoy_change\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
|
"WHERE year >= (SELECT MAX(year) - 5 FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
|
"GROUP BY year\n",
|
||||||
|
"ORDER BY year\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} years of rent data\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Aggregate rent by year (city-wide average)\n",
|
||||||
|
"2. Convert year to datetime for proper x-axis\n",
|
||||||
|
"3. Reshape for multi-line chart by bedroom type"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Create date column from year\n",
|
||||||
|
"df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Melt for multi-line chart\n",
|
||||||
|
"df_melted = df.melt(\n",
|
||||||
|
" id_vars=[\"year\", \"date\"],\n",
|
||||||
|
" value_vars=[\"avg_rent_bachelor\", \"avg_rent_1bed\", \"avg_rent_2bed\", \"avg_rent_3bed\"],\n",
|
||||||
|
" var_name=\"bedroom_type\",\n",
|
||||||
|
" value_name=\"avg_rent\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Clean labels\n",
|
||||||
|
"df_melted[\"bedroom_type\"] = df_melted[\"bedroom_type\"].map(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"avg_rent_bachelor\": \"Bachelor\",\n",
|
||||||
|
" \"avg_rent_1bed\": \"1 Bedroom\",\n",
|
||||||
|
" \"avg_rent_2bed\": \"2 Bedroom\",\n",
|
||||||
|
" \"avg_rent_3bed\": \"3 Bedroom\",\n",
|
||||||
|
" }\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"year\",\n",
|
||||||
|
" \"avg_rent_bachelor\",\n",
|
||||||
|
" \"avg_rent_1bed\",\n",
|
||||||
|
" \"avg_rent_2bed\",\n",
|
||||||
|
" \"avg_rent_3bed\",\n",
|
||||||
|
" \"avg_yoy_change\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_price_time_series` from `portfolio_app.figures.toronto.time_series`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `date_column`: 'date'\n",
|
||||||
|
"- `price_column`: 'avg_rent'\n",
|
||||||
|
"- `group_column`: 'bedroom_type' (for multi-line)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.time_series import create_price_time_series\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_melted.to_dict(\"records\")\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_price_time_series(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" date_column=\"date\",\n",
|
||||||
|
" price_column=\"avg_rent\",\n",
|
||||||
|
" group_column=\"bedroom_type\",\n",
|
||||||
|
" title=\"Toronto Average Rent Trend (5 Years)\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### YoY Change Analysis"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Show year-over-year changes\n",
|
||||||
|
"print(\"Year-over-Year Rent Change (%)\")\n",
|
||||||
|
"df[[\"year\", \"avg_yoy_change\"]].dropna()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
202
notebooks/toronto/housing/tenure_breakdown_bar.ipynb
Normal file
202
notebooks/toronto/housing/tenure_breakdown_bar.ipynb
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Housing Tenure Breakdown Bar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Shows the distribution of owner-occupied vs renter-occupied dwellings across neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_housing` | neighbourhood × year | pct_owner_occupied, pct_renter_occupied, income_quintile |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" pct_owner_occupied,\n",
|
||||||
|
" pct_renter_occupied,\n",
|
||||||
|
" income_quintile,\n",
|
||||||
|
" total_rental_units,\n",
|
||||||
|
" average_dwelling_value\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
|
" AND pct_owner_occupied IS NOT NULL\n",
|
||||||
|
"ORDER BY pct_renter_occupied DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods with tenure data\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent year with tenure data\n",
|
||||||
|
"2. Melt owner/renter columns for stacked bar\n",
|
||||||
|
"3. Sort by renter percentage (highest first)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Prepare for stacked bar\n",
|
||||||
|
"df_stacked = df.melt(\n",
|
||||||
|
" id_vars=[\"neighbourhood_name\", \"income_quintile\"],\n",
|
||||||
|
" value_vars=[\"pct_owner_occupied\", \"pct_renter_occupied\"],\n",
|
||||||
|
" var_name=\"tenure_type\",\n",
|
||||||
|
" value_name=\"percentage\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"df_stacked[\"tenure_type\"] = df_stacked[\"tenure_type\"].map(\n",
|
||||||
|
" {\"pct_owner_occupied\": \"Owner\", \"pct_renter_occupied\": \"Renter\"}\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_stacked.to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Highest Renter Neighbourhoods:\")\n",
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"pct_renter_occupied\",\n",
|
||||||
|
" \"pct_owner_occupied\",\n",
|
||||||
|
" \"income_quintile\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `x_column`: 'neighbourhood_name'\n",
|
||||||
|
"- `value_column`: 'percentage'\n",
|
||||||
|
"- `category_column`: 'tenure_type'\n",
|
||||||
|
"- `show_percentages`: True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n",
|
||||||
|
"\n",
|
||||||
|
"# Show top 20 by renter percentage\n",
|
||||||
|
"top_20_names = df.head(20)[\"neighbourhood_name\"].tolist()\n",
|
||||||
|
"data_filtered = [d for d in data if d[\"neighbourhood_name\"] in top_20_names]\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_stacked_bar(\n",
|
||||||
|
" data=data_filtered,\n",
|
||||||
|
" x_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"percentage\",\n",
|
||||||
|
" category_column=\"tenure_type\",\n",
|
||||||
|
" title=\"Housing Tenure Mix - Top 20 Renter Neighbourhoods\",\n",
|
||||||
|
" color_map={\"Owner\": \"#4CAF50\", \"Renter\": \"#2196F3\"},\n",
|
||||||
|
" show_percentages=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### City-Wide Distribution"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# City-wide averages\n",
|
||||||
|
"print(f\"City Average Owner-Occupied: {df['pct_owner_occupied'].mean():.1f}%\")\n",
|
||||||
|
"print(f\"City Average Renter-Occupied: {df['pct_renter_occupied'].mean():.1f}%\")\n",
|
||||||
|
"\n",
|
||||||
|
"# By income quintile\n",
|
||||||
|
"print(\"\\nTenure by Income Quintile:\")\n",
|
||||||
|
"df.groupby(\"income_quintile\")[\n",
|
||||||
|
" [\"pct_owner_occupied\", \"pct_renter_occupied\"]\n",
|
||||||
|
"].mean().round(1)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
196
notebooks/toronto/overview/income_safety_scatter.ipynb
Normal file
196
notebooks/toronto/overview/income_safety_scatter.ipynb
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Income vs Safety Scatter Plot\n",
|
||||||
|
"\n",
|
||||||
|
"Explores the correlation between median household income and safety score across Toronto neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, median_household_income, safety_score, population |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" median_household_income,\n",
|
||||||
|
" safety_score,\n",
|
||||||
|
" population,\n",
|
||||||
|
" livability_score,\n",
|
||||||
|
" crime_rate_per_100k\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
|
" AND median_household_income IS NOT NULL\n",
|
||||||
|
" AND safety_score IS NOT NULL\n",
|
||||||
|
"ORDER BY median_household_income DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods with income and safety data\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter out null values for income and safety\n",
|
||||||
|
"2. Optionally scale income to thousands for readability\n",
|
||||||
|
"3. Pass to scatter figure factory with optional trendline"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Scale income to thousands for better axis readability\n",
|
||||||
|
"df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare data for figure factory\n",
|
||||||
|
"data = df.to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"median_household_income\",\n",
|
||||||
|
" \"safety_score\",\n",
|
||||||
|
" \"crime_rate_per_100k\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_scatter_figure` from `portfolio_app.figures.toronto.scatter`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `x_column`: 'income_thousands' (median household income in $K)\n",
|
||||||
|
"- `y_column`: 'safety_score' (0-100 percentile rank)\n",
|
||||||
|
"- `name_column`: 'neighbourhood_name' (hover label)\n",
|
||||||
|
"- `size_column`: 'population' (optional, bubble size)\n",
|
||||||
|
"- `trendline`: True (adds OLS regression line)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.scatter import create_scatter_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_scatter_figure(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" x_column=\"income_thousands\",\n",
|
||||||
|
" y_column=\"safety_score\",\n",
|
||||||
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
|
" size_column=\"population\",\n",
|
||||||
|
" title=\"Income vs Safety by Neighbourhood\",\n",
|
||||||
|
" x_title=\"Median Household Income ($K)\",\n",
|
||||||
|
" y_title=\"Safety Score (0-100)\",\n",
|
||||||
|
" trendline=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"This scatter plot reveals the relationship between income and safety:\n",
|
||||||
|
"\n",
|
||||||
|
"- **Positive correlation**: Higher income neighbourhoods tend to have higher safety scores\n",
|
||||||
|
"- **Bubble size**: Represents population (larger = more people)\n",
|
||||||
|
"- **Trendline**: Orange dashed line shows the overall trend\n",
|
||||||
|
"- **Outliers**: Neighbourhoods far from the trendline are interesting cases\n",
|
||||||
|
" - Above line: Safer than income would predict\n",
|
||||||
|
" - Below line: Less safe than income would predict"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Calculate correlation coefficient\n",
|
||||||
|
"correlation = df[\"median_household_income\"].corr(df[\"safety_score\"])\n",
|
||||||
|
"print(f\"Correlation coefficient (Income vs Safety): {correlation:.3f}\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
201
notebooks/toronto/overview/livability_choropleth.ipynb
Normal file
201
notebooks/toronto/overview/livability_choropleth.ipynb
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Livability Score Choropleth Map\n",
|
||||||
|
"\n",
|
||||||
|
"Displays neighbourhood livability scores on an interactive map of Toronto's 158 neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_overview` | neighbourhood × year | livability_score, safety_score, affordability_score, amenity_score, geometry |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_id,\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" geometry,\n",
|
||||||
|
" year,\n",
|
||||||
|
" livability_score,\n",
|
||||||
|
" safety_score,\n",
|
||||||
|
" affordability_score,\n",
|
||||||
|
" amenity_score,\n",
|
||||||
|
" population,\n",
|
||||||
|
" median_household_income\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
|
"ORDER BY livability_score DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent year of data\n",
|
||||||
|
"2. Extract GeoJSON from PostGIS geometry column\n",
|
||||||
|
"3. Pass to choropleth figure factory"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Transform geometry to GeoJSON\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"# Convert WKB geometry to GeoDataFrame\n",
|
||||||
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Create GeoJSON FeatureCollection\n",
|
||||||
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare data for figure factory\n",
|
||||||
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"livability_score\",\n",
|
||||||
|
" \"safety_score\",\n",
|
||||||
|
" \"affordability_score\",\n",
|
||||||
|
" \"amenity_score\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `geojson`: GeoJSON FeatureCollection with neighbourhood boundaries\n",
|
||||||
|
"- `data`: List of dicts with neighbourhood_id and scores\n",
|
||||||
|
"- `location_key`: 'neighbourhood_id'\n",
|
||||||
|
"- `color_column`: 'livability_score' (or safety_score, etc.)\n",
|
||||||
|
"- `color_scale`: 'RdYlGn' (red=low, yellow=mid, green=high)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_choropleth_figure(\n",
|
||||||
|
" geojson=geojson,\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
|
" color_column=\"livability_score\",\n",
|
||||||
|
" hover_data=[\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"safety_score\",\n",
|
||||||
|
" \"affordability_score\",\n",
|
||||||
|
" \"amenity_score\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" color_scale=\"RdYlGn\",\n",
|
||||||
|
" title=\"Toronto Neighbourhood Livability Score\",\n",
|
||||||
|
" zoom=10,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Score Components\n",
|
||||||
|
"\n",
|
||||||
|
"The livability score is a weighted composite:\n",
|
||||||
|
"\n",
|
||||||
|
"| Component | Weight | Source |\n",
|
||||||
|
"|-----------|--------|--------|\n",
|
||||||
|
"| Safety | 30% | Inverse of crime rate per 100K |\n",
|
||||||
|
"| Affordability | 40% | Inverse of rent-to-income ratio |\n",
|
||||||
|
"| Amenities | 30% | Amenities per 1,000 residents |"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
173
notebooks/toronto/overview/top_bottom_10_bar.ipynb
Normal file
173
notebooks/toronto/overview/top_bottom_10_bar.ipynb
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Top & Bottom 10 Neighbourhoods Bar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Horizontal bar chart showing the highest and lowest scoring neighbourhoods by livability."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, livability_score |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" livability_score,\n",
|
||||||
|
" safety_score,\n",
|
||||||
|
" affordability_score,\n",
|
||||||
|
" amenity_score\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
|
" AND livability_score IS NOT NULL\n",
|
||||||
|
"ORDER BY livability_score DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods with scores\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Sort by livability_score descending\n",
|
||||||
|
"2. Take top 10 and bottom 10\n",
|
||||||
|
"3. Pass to ranking bar figure factory"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# The figure factory handles top/bottom selection internally\n",
|
||||||
|
"# Just prepare as list of dicts\n",
|
||||||
|
"data = df.to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Top 5:\")\n",
|
||||||
|
"display(df.head(5))\n",
|
||||||
|
"print(\"\\nBottom 5:\")\n",
|
||||||
|
"display(df.tail(5))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `data`: List of dicts with all neighbourhoods\n",
|
||||||
|
"- `name_column`: 'neighbourhood_name'\n",
|
||||||
|
"- `value_column`: 'livability_score'\n",
|
||||||
|
"- `top_n`: 10 (green bars)\n",
|
||||||
|
"- `bottom_n`: 10 (red bars)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_ranking_bar(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"livability_score\",\n",
|
||||||
|
" title=\"Top & Bottom 10 Neighbourhoods by Livability\",\n",
|
||||||
|
" top_n=10,\n",
|
||||||
|
" bottom_n=10,\n",
|
||||||
|
" color_top=\"#4CAF50\", # Green for top performers\n",
|
||||||
|
" color_bottom=\"#F44336\", # Red for bottom performers\n",
|
||||||
|
" value_format=\".1f\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"- **Green bars**: Highest livability scores (best combination of safety, affordability, and amenities)\n",
|
||||||
|
"- **Red bars**: Lowest livability scores (areas that may need targeted investment)\n",
|
||||||
|
"\n",
|
||||||
|
"The ranking bar chart provides quick context for which neighbourhoods stand out at either extreme."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
0
notebooks/toronto/safety/.gitkeep
Normal file
0
notebooks/toronto/safety/.gitkeep
Normal file
200
notebooks/toronto/safety/crime_breakdown_bar.ipynb
Normal file
200
notebooks/toronto/safety/crime_breakdown_bar.ipynb
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Crime Type Breakdown Bar Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Stacked bar chart showing crime composition by Major Crime Indicator (MCI) categories."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_safety` | neighbourhood × year | assault_count, auto_theft_count, break_enter_count, robbery_count, etc. |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" assault_count,\n",
|
||||||
|
" auto_theft_count,\n",
|
||||||
|
" break_enter_count,\n",
|
||||||
|
" robbery_count,\n",
|
||||||
|
" theft_over_count,\n",
|
||||||
|
" homicide_count,\n",
|
||||||
|
" total_incidents,\n",
|
||||||
|
" crime_rate_per_100k\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
|
"ORDER BY total_incidents DESC\n",
|
||||||
|
"LIMIT 15\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded top {len(df)} neighbourhoods by crime volume\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Select top 15 neighbourhoods by total incidents\n",
|
||||||
|
"2. Melt crime type columns into rows\n",
|
||||||
|
"3. Pass to stacked bar figure factory"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df_melted = df.melt(\n",
|
||||||
|
" id_vars=[\"neighbourhood_name\", \"total_incidents\"],\n",
|
||||||
|
" value_vars=[\n",
|
||||||
|
" \"assault_count\",\n",
|
||||||
|
" \"auto_theft_count\",\n",
|
||||||
|
" \"break_enter_count\",\n",
|
||||||
|
" \"robbery_count\",\n",
|
||||||
|
" \"theft_over_count\",\n",
|
||||||
|
" \"homicide_count\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" var_name=\"crime_type\",\n",
|
||||||
|
" value_name=\"count\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Clean labels\n",
|
||||||
|
"df_melted[\"crime_type\"] = (\n",
|
||||||
|
" df_melted[\"crime_type\"].str.replace(\"_count\", \"\").str.replace(\"_\", \" \").str.title()\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_melted.to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"assault_count\",\n",
|
||||||
|
" \"auto_theft_count\",\n",
|
||||||
|
" \"break_enter_count\",\n",
|
||||||
|
" \"total_incidents\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_stacked_bar(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" x_column=\"neighbourhood_name\",\n",
|
||||||
|
" value_column=\"count\",\n",
|
||||||
|
" category_column=\"crime_type\",\n",
|
||||||
|
" title=\"Crime Type Breakdown - Top 15 Neighbourhoods\",\n",
|
||||||
|
" color_map={\n",
|
||||||
|
" \"Assault\": \"#d62728\",\n",
|
||||||
|
" \"Auto Theft\": \"#ff7f0e\",\n",
|
||||||
|
" \"Break Enter\": \"#9467bd\",\n",
|
||||||
|
" \"Robbery\": \"#8c564b\",\n",
|
||||||
|
" \"Theft Over\": \"#e377c2\",\n",
|
||||||
|
" \"Homicide\": \"#1f77b4\",\n",
|
||||||
|
" },\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### MCI Categories\n",
|
||||||
|
"\n",
|
||||||
|
"| Category | Description |\n",
|
||||||
|
"|----------|------------|\n",
|
||||||
|
"| Assault | Physical attacks |\n",
|
||||||
|
"| Auto Theft | Vehicle theft |\n",
|
||||||
|
"| Break & Enter | Burglary |\n",
|
||||||
|
"| Robbery | Theft with force/threat |\n",
|
||||||
|
"| Theft Over | Theft > $5,000 |\n",
|
||||||
|
"| Homicide | Murder/manslaughter |"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
185
notebooks/toronto/safety/crime_rate_choropleth.ipynb
Normal file
185
notebooks/toronto/safety/crime_rate_choropleth.ipynb
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Crime Rate Choropleth Map\n",
|
||||||
|
"\n",
|
||||||
|
"Displays crime rates per 100,000 population across Toronto's 158 neighbourhoods."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_safety` | neighbourhood × year | crime_rate_per_100k, crime_index, safety_tier, geometry |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" neighbourhood_id,\n",
|
||||||
|
" neighbourhood_name,\n",
|
||||||
|
" geometry,\n",
|
||||||
|
" year,\n",
|
||||||
|
" crime_rate_per_100k,\n",
|
||||||
|
" crime_index,\n",
|
||||||
|
" safety_tier,\n",
|
||||||
|
" total_incidents,\n",
|
||||||
|
" population\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
|
"ORDER BY crime_rate_per_100k DESC\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Filter to most recent year\n",
|
||||||
|
"2. Convert geometry to GeoJSON\n",
|
||||||
|
"3. Use reversed color scale (green=low crime, red=high crime)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"crime_rate_per_100k\",\n",
|
||||||
|
" \"crime_index\",\n",
|
||||||
|
" \"safety_tier\",\n",
|
||||||
|
" \"total_incidents\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
|
"\n",
|
||||||
|
"**Key Parameters:**\n",
|
||||||
|
"- `color_column`: 'crime_rate_per_100k'\n",
|
||||||
|
"- `color_scale`: 'RdYlGn_r' (red=high crime, green=low crime)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_choropleth_figure(\n",
|
||||||
|
" geojson=geojson,\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
|
" color_column=\"crime_rate_per_100k\",\n",
|
||||||
|
" hover_data=[\"neighbourhood_name\", \"crime_index\", \"total_incidents\"],\n",
|
||||||
|
" color_scale=\"RdYlGn_r\",\n",
|
||||||
|
" title=\"Toronto Crime Rate per 100,000 Population\",\n",
|
||||||
|
" zoom=10,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Safety Tier Interpretation\n",
|
||||||
|
"\n",
|
||||||
|
"| Tier | Meaning |\n",
|
||||||
|
"|------|--------|\n",
|
||||||
|
"| 1 | Highest crime (top 20%) |\n",
|
||||||
|
"| 2-4 | Middle tiers |\n",
|
||||||
|
"| 5 | Lowest crime (bottom 20%) |"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
198
notebooks/toronto/safety/crime_trend_line.ipynb
Normal file
198
notebooks/toronto/safety/crime_trend_line.ipynb
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Crime Trend Line Chart\n",
|
||||||
|
"\n",
|
||||||
|
"Shows 5-year crime rate trends across Toronto."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Data Reference\n",
|
||||||
|
"\n",
|
||||||
|
"### Source Tables\n",
|
||||||
|
"\n",
|
||||||
|
"| Table | Grain | Key Columns |\n",
|
||||||
|
"|-------|-------|-------------|\n",
|
||||||
|
"| `mart_neighbourhood_safety` | neighbourhood × year | year, crime_rate_per_100k, crime_yoy_change_pct |\n",
|
||||||
|
"\n",
|
||||||
|
"### SQL Query"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
"SELECT\n",
|
||||||
|
" year,\n",
|
||||||
|
" AVG(crime_rate_per_100k) as avg_crime_rate,\n",
|
||||||
|
" AVG(assault_rate_per_100k) as avg_assault_rate,\n",
|
||||||
|
" AVG(auto_theft_rate_per_100k) as avg_auto_theft_rate,\n",
|
||||||
|
" AVG(break_enter_rate_per_100k) as avg_break_enter_rate,\n",
|
||||||
|
" SUM(total_incidents) as total_city_incidents,\n",
|
||||||
|
" AVG(crime_yoy_change_pct) as avg_yoy_change\n",
|
||||||
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
|
"WHERE year >= (SELECT MAX(year) - 5 FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
|
"GROUP BY year\n",
|
||||||
|
"ORDER BY year\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"df = pd.read_sql(query, engine)\n",
|
||||||
|
"print(f\"Loaded {len(df)} years of crime data\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Transformation Steps\n",
|
||||||
|
"\n",
|
||||||
|
"1. Aggregate by year (city-wide)\n",
|
||||||
|
"2. Convert year to datetime\n",
|
||||||
|
"3. Melt for multi-line by crime type"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Melt for multi-line\n",
|
||||||
|
"df_melted = df.melt(\n",
|
||||||
|
" id_vars=[\"year\", \"date\"],\n",
|
||||||
|
" value_vars=[\"avg_assault_rate\", \"avg_auto_theft_rate\", \"avg_break_enter_rate\"],\n",
|
||||||
|
" var_name=\"crime_type\",\n",
|
||||||
|
" value_name=\"rate_per_100k\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"df_melted[\"crime_type\"] = df_melted[\"crime_type\"].map(\n",
|
||||||
|
" {\n",
|
||||||
|
" \"avg_assault_rate\": \"Assault\",\n",
|
||||||
|
" \"avg_auto_theft_rate\": \"Auto Theft\",\n",
|
||||||
|
" \"avg_break_enter_rate\": \"Break & Enter\",\n",
|
||||||
|
" }\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Sample Output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df[[\"year\", \"avg_crime_rate\", \"total_city_incidents\", \"avg_yoy_change\"]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Data Visualization\n",
|
||||||
|
"\n",
|
||||||
|
"### Figure Factory\n",
|
||||||
|
"\n",
|
||||||
|
"Uses `create_price_time_series` (reused for any numeric trend)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.time_series import create_price_time_series\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_melted.to_dict(\"records\")\n",
|
||||||
|
"\n",
|
||||||
|
"fig = create_price_time_series(\n",
|
||||||
|
" data=data,\n",
|
||||||
|
" date_column=\"date\",\n",
|
||||||
|
" price_column=\"rate_per_100k\",\n",
|
||||||
|
" group_column=\"crime_type\",\n",
|
||||||
|
" title=\"Toronto Crime Trends by Type (5 Years)\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Remove dollar sign formatting since this is rate data\n",
|
||||||
|
"fig.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n",
|
||||||
|
"\n",
|
||||||
|
"fig.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Overall Trend"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Total crime rate trend\n",
|
||||||
|
"total_data = (\n",
|
||||||
|
" df[[\"date\", \"avg_crime_rate\"]]\n",
|
||||||
|
" .rename(columns={\"avg_crime_rate\": \"total_rate\"})\n",
|
||||||
|
" .to_dict(\"records\")\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"fig2 = create_price_time_series(\n",
|
||||||
|
" data=total_data,\n",
|
||||||
|
" date_column=\"date\",\n",
|
||||||
|
" price_column=\"total_rate\",\n",
|
||||||
|
" title=\"Toronto Overall Crime Rate Trend\",\n",
|
||||||
|
")\n",
|
||||||
|
"fig2.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n",
|
||||||
|
"fig2.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
||||||
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
import dash
|
import dash
|
||||||
import dash_mantine_components as dmc
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
|
||||||
|
from .components import create_sidebar
|
||||||
from .config import get_settings
|
from .config import get_settings
|
||||||
|
|
||||||
|
|
||||||
@@ -17,14 +19,31 @@ def create_app() -> dash.Dash:
|
|||||||
)
|
)
|
||||||
|
|
||||||
app.layout = dmc.MantineProvider(
|
app.layout = dmc.MantineProvider(
|
||||||
dash.page_container,
|
id="mantine-provider",
|
||||||
|
children=[
|
||||||
|
dcc.Location(id="url", refresh=False),
|
||||||
|
dcc.Store(id="theme-store", storage_type="local", data="dark"),
|
||||||
|
dcc.Store(id="theme-init-dummy"), # Dummy store for theme init callback
|
||||||
|
html.Div(
|
||||||
|
[
|
||||||
|
create_sidebar(),
|
||||||
|
html.Div(
|
||||||
|
dash.page_container,
|
||||||
|
className="page-content-wrapper",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
theme={
|
theme={
|
||||||
"primaryColor": "blue",
|
"primaryColor": "blue",
|
||||||
"fontFamily": "'Inter', sans-serif",
|
"fontFamily": "'Inter', sans-serif",
|
||||||
},
|
},
|
||||||
forceColorScheme="light",
|
defaultColorScheme="dark",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Import callbacks to register them
|
||||||
|
from . import callbacks # noqa: F401
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
139
portfolio_app/assets/sidebar.css
Normal file
139
portfolio_app/assets/sidebar.css
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
/* Floating sidebar navigation styles */
|
||||||
|
|
||||||
|
/* Sidebar container */
|
||||||
|
.floating-sidebar {
|
||||||
|
position: fixed;
|
||||||
|
left: 16px;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
width: 60px;
|
||||||
|
padding: 16px 8px;
|
||||||
|
border-radius: 32px;
|
||||||
|
z-index: 1000;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||||
|
transition: background-color 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Page content offset to prevent sidebar overlap */
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 92px; /* sidebar width (60px) + left margin (16px) + gap (16px) */
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dark theme (default) */
|
||||||
|
[data-mantine-color-scheme="dark"] .floating-sidebar {
|
||||||
|
background-color: #141414;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-mantine-color-scheme="dark"] body {
|
||||||
|
background-color: #000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Light theme */
|
||||||
|
[data-mantine-color-scheme="light"] .floating-sidebar {
|
||||||
|
background-color: #f0f0f0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-mantine-color-scheme="light"] body {
|
||||||
|
background-color: #ffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Brand initials styling */
|
||||||
|
.sidebar-brand {
|
||||||
|
width: 40px;
|
||||||
|
height: 40px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
border-radius: 50%;
|
||||||
|
background-color: var(--mantine-color-blue-filled);
|
||||||
|
margin-bottom: 4px;
|
||||||
|
transition: transform 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand:hover {
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand-link {
|
||||||
|
font-weight: 700;
|
||||||
|
font-size: 16px;
|
||||||
|
color: white;
|
||||||
|
text-decoration: none;
|
||||||
|
line-height: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Divider between sections */
|
||||||
|
.sidebar-divider {
|
||||||
|
width: 32px;
|
||||||
|
height: 1px;
|
||||||
|
background-color: var(--mantine-color-dimmed);
|
||||||
|
margin: 4px 0;
|
||||||
|
opacity: 0.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Active nav icon indicator */
|
||||||
|
.nav-icon-active {
|
||||||
|
background-color: var(--mantine-color-blue-filled) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Navigation icon hover effects */
|
||||||
|
.floating-sidebar .mantine-ActionIcon-root {
|
||||||
|
transition: transform 0.15s ease, background-color 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.floating-sidebar .mantine-ActionIcon-root:hover {
|
||||||
|
transform: scale(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure links don't have underlines */
|
||||||
|
.floating-sidebar a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Theme toggle specific styling */
|
||||||
|
#theme-toggle {
|
||||||
|
transition: transform 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
#theme-toggle:hover {
|
||||||
|
transform: rotate(15deg) scale(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive adjustments for smaller screens */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.floating-sidebar {
|
||||||
|
left: 8px;
|
||||||
|
width: 50px;
|
||||||
|
padding: 12px 6px;
|
||||||
|
border-radius: 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 70px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand {
|
||||||
|
width: 34px;
|
||||||
|
height: 34px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand-link {
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Very small screens - hide sidebar, show minimal navigation */
|
||||||
|
@media (max-width: 480px) {
|
||||||
|
.floating-sidebar {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
5
portfolio_app/callbacks/__init__.py
Normal file
5
portfolio_app/callbacks/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Application-level callbacks for the portfolio app."""
|
||||||
|
|
||||||
|
from . import contact, sidebar, theme
|
||||||
|
|
||||||
|
__all__ = ["contact", "sidebar", "theme"]
|
||||||
214
portfolio_app/callbacks/contact.py
Normal file
214
portfolio_app/callbacks/contact.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
"""Contact form submission callback with Formspree integration."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
import requests
|
||||||
|
from dash import Input, Output, State, callback, no_update
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
FORMSPREE_ENDPOINT = "https://formspree.io/f/mqelqzpd"
|
||||||
|
EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_form(
|
||||||
|
name: str | None, email: str | None, message: str | None
|
||||||
|
) -> str | None:
|
||||||
|
"""Validate form fields and return error message if invalid."""
|
||||||
|
if not name or not name.strip():
|
||||||
|
return "Please enter your name."
|
||||||
|
if not email or not email.strip():
|
||||||
|
return "Please enter your email address."
|
||||||
|
if not EMAIL_REGEX.match(email.strip()):
|
||||||
|
return "Please enter a valid email address."
|
||||||
|
if not message or not message.strip():
|
||||||
|
return "Please enter a message."
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _create_success_alert() -> dmc.Alert:
|
||||||
|
"""Create success feedback alert."""
|
||||||
|
return dmc.Alert(
|
||||||
|
"Thank you for your message! I'll get back to you soon.",
|
||||||
|
title="Message Sent",
|
||||||
|
color="green",
|
||||||
|
variant="light",
|
||||||
|
icon=DashIconify(icon="tabler:check", width=20),
|
||||||
|
withCloseButton=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_error_alert(message: str) -> dmc.Alert:
|
||||||
|
"""Create error feedback alert."""
|
||||||
|
return dmc.Alert(
|
||||||
|
message,
|
||||||
|
title="Error",
|
||||||
|
color="red",
|
||||||
|
variant="light",
|
||||||
|
icon=DashIconify(icon="tabler:alert-circle", width=20),
|
||||||
|
withCloseButton=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@callback( # type: ignore[misc]
|
||||||
|
Output("contact-feedback", "children"),
|
||||||
|
Output("contact-submit", "loading"),
|
||||||
|
Output("contact-name", "value"),
|
||||||
|
Output("contact-email", "value"),
|
||||||
|
Output("contact-subject", "value"),
|
||||||
|
Output("contact-message", "value"),
|
||||||
|
Output("contact-name", "error"),
|
||||||
|
Output("contact-email", "error"),
|
||||||
|
Output("contact-message", "error"),
|
||||||
|
Input("contact-submit", "n_clicks"),
|
||||||
|
State("contact-name", "value"),
|
||||||
|
State("contact-email", "value"),
|
||||||
|
State("contact-subject", "value"),
|
||||||
|
State("contact-message", "value"),
|
||||||
|
State("contact-gotcha", "value"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
def submit_contact_form(
|
||||||
|
n_clicks: int | None,
|
||||||
|
name: str | None,
|
||||||
|
email: str | None,
|
||||||
|
subject: str | None,
|
||||||
|
message: str | None,
|
||||||
|
gotcha: str | None,
|
||||||
|
) -> tuple[Any, ...]:
|
||||||
|
"""Submit contact form to Formspree.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_clicks: Button click count.
|
||||||
|
name: User's name.
|
||||||
|
email: User's email address.
|
||||||
|
subject: Message subject (optional).
|
||||||
|
message: Message content.
|
||||||
|
gotcha: Honeypot field value (should be empty for real users).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (feedback, loading, name, email, subject, message,
|
||||||
|
name_error, email_error, message_error).
|
||||||
|
"""
|
||||||
|
if not n_clicks:
|
||||||
|
return (no_update,) * 9
|
||||||
|
|
||||||
|
# Check honeypot - if filled, silently "succeed" (it's a bot)
|
||||||
|
if gotcha:
|
||||||
|
return (
|
||||||
|
_create_success_alert(),
|
||||||
|
False,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate form
|
||||||
|
validation_error = _validate_form(name, email, message)
|
||||||
|
if validation_error:
|
||||||
|
# Determine which field has the error
|
||||||
|
name_error = "Required" if not name or not name.strip() else None
|
||||||
|
email_error = None
|
||||||
|
message_error = "Required" if not message or not message.strip() else None
|
||||||
|
|
||||||
|
if not email or not email.strip():
|
||||||
|
email_error = "Required"
|
||||||
|
elif not EMAIL_REGEX.match(email.strip()):
|
||||||
|
email_error = "Invalid email format"
|
||||||
|
|
||||||
|
return (
|
||||||
|
_create_error_alert(validation_error),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
name_error,
|
||||||
|
email_error,
|
||||||
|
message_error,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare form data (validation passed, so name/email/message are not None)
|
||||||
|
assert name is not None
|
||||||
|
assert email is not None
|
||||||
|
assert message is not None
|
||||||
|
form_data = {
|
||||||
|
"name": name.strip(),
|
||||||
|
"email": email.strip(),
|
||||||
|
"subject": subject or "General Inquiry",
|
||||||
|
"message": message.strip(),
|
||||||
|
"_gotcha": "", # Formspree honeypot
|
||||||
|
}
|
||||||
|
|
||||||
|
# Submit to Formspree
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
FORMSPREE_ENDPOINT,
|
||||||
|
json=form_data,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Success - clear form
|
||||||
|
return (
|
||||||
|
_create_success_alert(),
|
||||||
|
False,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Formspree returned an error
|
||||||
|
return (
|
||||||
|
_create_error_alert(
|
||||||
|
"Failed to send message. Please try again or use direct contact."
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return (
|
||||||
|
_create_error_alert("Request timed out. Please try again."),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return (
|
||||||
|
_create_error_alert(
|
||||||
|
"Network error. Please check your connection and try again."
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
25
portfolio_app/callbacks/sidebar.py
Normal file
25
portfolio_app/callbacks/sidebar.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""Sidebar navigation callbacks for active state updates."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from dash import Input, Output, callback
|
||||||
|
|
||||||
|
from portfolio_app.components.sidebar import create_sidebar_content
|
||||||
|
|
||||||
|
|
||||||
|
@callback( # type: ignore[misc]
|
||||||
|
Output("floating-sidebar", "children"),
|
||||||
|
Input("url", "pathname"),
|
||||||
|
prevent_initial_call=False,
|
||||||
|
)
|
||||||
|
def update_sidebar_active_state(pathname: str) -> list[Any]:
|
||||||
|
"""Update sidebar to highlight the current page.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pathname: Current URL pathname from dcc.Location.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated sidebar content with correct active state.
|
||||||
|
"""
|
||||||
|
current_path = pathname or "/"
|
||||||
|
return create_sidebar_content(current_path=current_path)
|
||||||
38
portfolio_app/callbacks/theme.py
Normal file
38
portfolio_app/callbacks/theme.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""Theme toggle callbacks using clientside JavaScript."""
|
||||||
|
|
||||||
|
from dash import Input, Output, State, clientside_callback
|
||||||
|
|
||||||
|
# Toggle theme on button click
|
||||||
|
# Stores new theme value and updates the DOM attribute
|
||||||
|
clientside_callback(
|
||||||
|
"""
|
||||||
|
function(n_clicks, currentTheme) {
|
||||||
|
if (n_clicks === undefined || n_clicks === null) {
|
||||||
|
return window.dash_clientside.no_update;
|
||||||
|
}
|
||||||
|
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
|
||||||
|
document.documentElement.setAttribute('data-mantine-color-scheme', newTheme);
|
||||||
|
return newTheme;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
Output("theme-store", "data"),
|
||||||
|
Input("theme-toggle", "n_clicks"),
|
||||||
|
State("theme-store", "data"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize theme from localStorage on page load
|
||||||
|
# Uses a dummy output since we only need the side effect of setting the DOM attribute
|
||||||
|
clientside_callback(
|
||||||
|
"""
|
||||||
|
function(theme) {
|
||||||
|
if (theme) {
|
||||||
|
document.documentElement.setAttribute('data-mantine-color-scheme', theme);
|
||||||
|
}
|
||||||
|
return theme;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
Output("theme-init-dummy", "data"),
|
||||||
|
Input("theme-store", "data"),
|
||||||
|
prevent_initial_call=False,
|
||||||
|
)
|
||||||
@@ -2,11 +2,13 @@
|
|||||||
|
|
||||||
from .map_controls import create_map_controls, create_metric_selector
|
from .map_controls import create_map_controls, create_metric_selector
|
||||||
from .metric_card import MetricCard, create_metric_cards_row
|
from .metric_card import MetricCard, create_metric_cards_row
|
||||||
|
from .sidebar import create_sidebar
|
||||||
from .time_slider import create_time_slider, create_year_selector
|
from .time_slider import create_time_slider, create_year_selector
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"create_map_controls",
|
"create_map_controls",
|
||||||
"create_metric_selector",
|
"create_metric_selector",
|
||||||
|
"create_sidebar",
|
||||||
"create_time_slider",
|
"create_time_slider",
|
||||||
"create_year_selector",
|
"create_year_selector",
|
||||||
"MetricCard",
|
"MetricCard",
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ def create_metric_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=default_value or (options[0]["value"] if options else None),
|
value=default_value or (options[0]["value"] if options else None),
|
||||||
style={"width": "200px"},
|
w=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -64,7 +64,7 @@ def create_map_controls(
|
|||||||
id=f"{id_prefix}-layer-toggle",
|
id=f"{id_prefix}-layer-toggle",
|
||||||
label="Show Boundaries",
|
label="Show Boundaries",
|
||||||
checked=True,
|
checked=True,
|
||||||
style={"marginTop": "10px"},
|
mt="sm",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import Any
|
|||||||
import dash_mantine_components as dmc
|
import dash_mantine_components as dmc
|
||||||
from dash import dcc
|
from dash import dcc
|
||||||
|
|
||||||
from portfolio_app.figures.summary_cards import create_metric_card_figure
|
from portfolio_app.figures.toronto.summary_cards import create_metric_card_figure
|
||||||
|
|
||||||
|
|
||||||
class MetricCard:
|
class MetricCard:
|
||||||
|
|||||||
214
portfolio_app/components/sidebar.py
Normal file
214
portfolio_app/components/sidebar.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
"""Floating sidebar navigation component."""
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
# Navigation items configuration - main pages
|
||||||
|
NAV_ITEMS_MAIN = [
|
||||||
|
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||||
|
{"path": "/about", "icon": "tabler:user", "label": "About"},
|
||||||
|
{"path": "/blog", "icon": "tabler:article", "label": "Blog"},
|
||||||
|
{"path": "/resume", "icon": "tabler:file-text", "label": "Resume"},
|
||||||
|
{"path": "/contact", "icon": "tabler:mail", "label": "Contact"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Navigation items configuration - projects/dashboards (separated)
|
||||||
|
NAV_ITEMS_PROJECTS = [
|
||||||
|
{"path": "/projects", "icon": "tabler:folder", "label": "Projects"},
|
||||||
|
{"path": "/toronto", "icon": "tabler:map-2", "label": "Toronto Housing"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# External links configuration
|
||||||
|
EXTERNAL_LINKS = [
|
||||||
|
{
|
||||||
|
"url": "https://github.com/leomiranda",
|
||||||
|
"icon": "tabler:brand-github",
|
||||||
|
"label": "GitHub",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://linkedin.com/in/leobmiranda",
|
||||||
|
"icon": "tabler:brand-linkedin",
|
||||||
|
"label": "LinkedIn",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_brand_logo() -> html.Div:
|
||||||
|
"""Create the brand initials logo."""
|
||||||
|
return html.Div(
|
||||||
|
dcc.Link(
|
||||||
|
"LM",
|
||||||
|
href="/",
|
||||||
|
className="sidebar-brand-link",
|
||||||
|
),
|
||||||
|
className="sidebar-brand",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_nav_icon(
|
||||||
|
icon: str,
|
||||||
|
label: str,
|
||||||
|
path: str,
|
||||||
|
current_path: str,
|
||||||
|
) -> dmc.Tooltip:
|
||||||
|
"""Create a navigation icon with tooltip.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
icon: Iconify icon string.
|
||||||
|
label: Tooltip label.
|
||||||
|
path: Navigation path.
|
||||||
|
current_path: Current page path for active state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped navigation icon.
|
||||||
|
"""
|
||||||
|
is_active = current_path == path or (path != "/" and current_path.startswith(path))
|
||||||
|
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dcc.Link(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20),
|
||||||
|
variant="subtle" if not is_active else "filled",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="blue" if is_active else "gray",
|
||||||
|
className="nav-icon-active" if is_active else "",
|
||||||
|
),
|
||||||
|
href=path,
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_theme_toggle(current_theme: str = "dark") -> dmc.Tooltip:
|
||||||
|
"""Create the theme toggle button.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_theme: Current theme ('dark' or 'light').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped theme toggle icon.
|
||||||
|
"""
|
||||||
|
icon = "tabler:sun" if current_theme == "dark" else "tabler:moon"
|
||||||
|
label = "Switch to light mode" if current_theme == "dark" else "Switch to dark mode"
|
||||||
|
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20, id="theme-toggle-icon"),
|
||||||
|
id="theme-toggle",
|
||||||
|
variant="subtle",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_external_link(url: str, icon: str, label: str) -> dmc.Tooltip:
|
||||||
|
"""Create an external link icon with tooltip.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: External URL.
|
||||||
|
icon: Iconify icon string.
|
||||||
|
label: Tooltip label.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped external link icon.
|
||||||
|
"""
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dmc.Anchor(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20),
|
||||||
|
variant="subtle",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
href=url,
|
||||||
|
target="_blank",
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_sidebar_divider() -> html.Div:
|
||||||
|
"""Create a horizontal divider for the sidebar."""
|
||||||
|
return html.Div(className="sidebar-divider")
|
||||||
|
|
||||||
|
|
||||||
|
def create_sidebar_content(
|
||||||
|
current_path: str = "/", current_theme: str = "dark"
|
||||||
|
) -> list[dmc.Tooltip | html.Div]:
|
||||||
|
"""Create the sidebar content list.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_path: Current page path for active state highlighting.
|
||||||
|
current_theme: Current theme for toggle icon state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of sidebar components.
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
# Brand logo
|
||||||
|
create_brand_logo(),
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# Main navigation icons
|
||||||
|
*[
|
||||||
|
create_nav_icon(
|
||||||
|
icon=item["icon"],
|
||||||
|
label=item["label"],
|
||||||
|
path=item["path"],
|
||||||
|
current_path=current_path,
|
||||||
|
)
|
||||||
|
for item in NAV_ITEMS_MAIN
|
||||||
|
],
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# Dashboard/Project links
|
||||||
|
*[
|
||||||
|
create_nav_icon(
|
||||||
|
icon=item["icon"],
|
||||||
|
label=item["label"],
|
||||||
|
path=item["path"],
|
||||||
|
current_path=current_path,
|
||||||
|
)
|
||||||
|
for item in NAV_ITEMS_PROJECTS
|
||||||
|
],
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# Theme toggle
|
||||||
|
create_theme_toggle(current_theme),
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# External links
|
||||||
|
*[
|
||||||
|
create_external_link(
|
||||||
|
url=link["url"],
|
||||||
|
icon=link["icon"],
|
||||||
|
label=link["label"],
|
||||||
|
)
|
||||||
|
for link in EXTERNAL_LINKS
|
||||||
|
],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html.Div:
|
||||||
|
"""Create the floating sidebar navigation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_path: Current page path for active state highlighting.
|
||||||
|
current_theme: Current theme for toggle icon state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete sidebar component.
|
||||||
|
"""
|
||||||
|
return html.Div(
|
||||||
|
id="floating-sidebar",
|
||||||
|
className="floating-sidebar",
|
||||||
|
children=create_sidebar_content(current_path, current_theme),
|
||||||
|
)
|
||||||
@@ -38,7 +38,7 @@ def create_year_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=str(default_year),
|
value=str(default_year),
|
||||||
style={"width": "120px"},
|
w=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -83,7 +83,8 @@ def create_time_slider(
|
|||||||
marks=marks,
|
marks=marks,
|
||||||
step=1,
|
step=1,
|
||||||
minRange=1,
|
minRange=1,
|
||||||
style={"marginTop": "20px", "marginBottom": "10px"},
|
mt="md",
|
||||||
|
mb="sm",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
p="md",
|
p="md",
|
||||||
@@ -131,5 +132,5 @@ def create_month_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=str(default_month),
|
value=str(default_month),
|
||||||
style={"width": "140px"},
|
w=140,
|
||||||
)
|
)
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user