Compare commits
34 Commits
28f703f96b
...
sprint-7-c
| Author | SHA1 | Date | |
|---|---|---|---|
| d64f90b3d3 | |||
| b3fb94c7cb | |||
| 1e0ea9cca2 | |||
| 9dfa24fb76 | |||
| 8701a12b41 | |||
| 6ef5460ad0 | |||
| 19ffc04573 | |||
| 08aa61f85e | |||
| 2a6db2a252 | |||
| 140d3085bf | |||
| ad6ee3d37f | |||
| 077e426d34 | |||
| b7907e68e4 | |||
| 457bb49395 | |||
| 88e23674a8 | |||
| 1c42533834 | |||
| 802efab8b8 | |||
| ead6d91a28 | |||
| 549e1fcbaf | |||
| 3ee4c20f5e | |||
| 68cc5bbe66 | |||
| 58f2c692e3 | |||
| 8200bbaa99 | |||
| 15da8a97ce | |||
| eb01ad1101 | |||
| 8453f78e31 | |||
| ff0f5a9b51 | |||
| 10f46f7cf1 | |||
| 160dc90308 | |||
| ff58e0a3ea | |||
| 38e4a0354b | |||
| c7e9b88adb | |||
| 01a0984333 | |||
| 8ed220e014 |
15
.env.example
Normal file
15
.env.example
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Database Configuration
|
||||||
|
DATABASE_URL=postgresql://portfolio:portfolio_dev@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=portfolio_dev
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
|
||||||
|
# Application Settings
|
||||||
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=change-me-in-production
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# Optional: dbt profile (defaults to profiles.yml)
|
||||||
|
# DBT_PROFILES_DIR=.
|
||||||
26
.gitignore
vendored
26
.gitignore
vendored
@@ -1,4 +1,28 @@
|
|||||||
# ---> Python
|
# ====================
|
||||||
|
# Project-Specific
|
||||||
|
# ====================
|
||||||
|
|
||||||
|
# Processed data (generated, not source)
|
||||||
|
data/*/processed/
|
||||||
|
|
||||||
|
# Reports (generated)
|
||||||
|
reports/
|
||||||
|
|
||||||
|
# Backups
|
||||||
|
backups/
|
||||||
|
|
||||||
|
# Notebook exports
|
||||||
|
notebooks/*.html
|
||||||
|
|
||||||
|
# dbt
|
||||||
|
dbt/target/
|
||||||
|
dbt/dbt_packages/
|
||||||
|
dbt/logs/
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# Python
|
||||||
|
# ====================
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|||||||
33
.pre-commit-config.yaml
Normal file
33
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.5.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: ['--maxkb=1000']
|
||||||
|
exclude: ^data/(raw/|toronto/raw/geo/)
|
||||||
|
- id: check-merge-conflict
|
||||||
|
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.1.9
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: [--fix, --exit-non-zero-on-fix]
|
||||||
|
- id: ruff-format
|
||||||
|
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
rev: v1.8.0
|
||||||
|
hooks:
|
||||||
|
- id: mypy
|
||||||
|
additional_dependencies:
|
||||||
|
- pydantic>=2.0
|
||||||
|
- pandas-stubs
|
||||||
|
- types-requests
|
||||||
|
args: [--ignore-missing-imports]
|
||||||
|
exclude: ^(tests/|dbt/)
|
||||||
|
|
||||||
|
ci:
|
||||||
|
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
|
||||||
|
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.11
|
||||||
257
CLAUDE.md
Normal file
257
CLAUDE.md
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
Working context for Claude Code on the Analytics Portfolio project.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Status
|
||||||
|
|
||||||
|
**Current Sprint**: 7 (Navigation & Theme Modernization)
|
||||||
|
**Phase**: 1 - Toronto Housing Dashboard
|
||||||
|
**Branch**: `development` (feature branches merge here)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
### Run Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make setup # Install deps, create .env, init pre-commit
|
||||||
|
make docker-up # Start PostgreSQL + PostGIS
|
||||||
|
make docker-down # Stop containers
|
||||||
|
make db-init # Initialize database schema
|
||||||
|
make run # Start Dash dev server
|
||||||
|
make test # Run pytest
|
||||||
|
make lint # Run ruff linter
|
||||||
|
make format # Run ruff formatter
|
||||||
|
make ci # Run all checks
|
||||||
|
```
|
||||||
|
|
||||||
|
### Branch Workflow
|
||||||
|
|
||||||
|
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
||||||
|
2. Work and commit on feature branch
|
||||||
|
3. Merge INTO `development` when complete
|
||||||
|
4. `development` -> `staging` -> `main` for releases
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code Conventions
|
||||||
|
|
||||||
|
### Import Style
|
||||||
|
|
||||||
|
| Context | Style | Example |
|
||||||
|
|---------|-------|---------|
|
||||||
|
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||||
|
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||||
|
| External packages | Absolute | `import pandas as pd` |
|
||||||
|
|
||||||
|
### Module Responsibilities
|
||||||
|
|
||||||
|
| Directory | Contains | Purpose |
|
||||||
|
|-----------|----------|---------|
|
||||||
|
| `schemas/` | Pydantic models | Data validation |
|
||||||
|
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||||
|
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||||
|
| `loaders/` | Database operations | Data loading |
|
||||||
|
| `figures/` | Chart factories | Plotly figure generation |
|
||||||
|
| `callbacks/` | Dash callbacks | In `pages/{dashboard}/callbacks/` |
|
||||||
|
| `errors/` | Exceptions + handlers | Error handling |
|
||||||
|
|
||||||
|
### Type Hints
|
||||||
|
|
||||||
|
Use Python 3.10+ style:
|
||||||
|
```python
|
||||||
|
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
# errors/exceptions.py
|
||||||
|
class PortfolioError(Exception):
|
||||||
|
"""Base exception."""
|
||||||
|
|
||||||
|
class ParseError(PortfolioError):
|
||||||
|
"""PDF/CSV parsing failed."""
|
||||||
|
|
||||||
|
class ValidationError(PortfolioError):
|
||||||
|
"""Pydantic or business rule validation failed."""
|
||||||
|
|
||||||
|
class LoadError(PortfolioError):
|
||||||
|
"""Database load operation failed."""
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Standards
|
||||||
|
|
||||||
|
- Single responsibility functions with verb naming
|
||||||
|
- Early returns over deep nesting
|
||||||
|
- Google-style docstrings only for non-obvious behavior
|
||||||
|
- Module-level constants for magic values
|
||||||
|
- Pydantic BaseSettings for runtime config
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Application Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── app.py # Dash app factory with Pages routing
|
||||||
|
├── config.py # Pydantic BaseSettings
|
||||||
|
├── assets/ # CSS, images (auto-served)
|
||||||
|
├── pages/
|
||||||
|
│ ├── home.py # Bio landing page -> /
|
||||||
|
│ └── toronto/
|
||||||
|
│ ├── dashboard.py # Layout only -> /toronto
|
||||||
|
│ └── callbacks/ # Interaction logic
|
||||||
|
├── components/ # Shared UI (navbar, footer, cards)
|
||||||
|
├── figures/ # Shared chart factories
|
||||||
|
├── toronto/ # Toronto data logic
|
||||||
|
│ ├── parsers/
|
||||||
|
│ ├── loaders/
|
||||||
|
│ ├── schemas/ # Pydantic
|
||||||
|
│ └── models/ # SQLAlchemy
|
||||||
|
└── errors/
|
||||||
|
```
|
||||||
|
|
||||||
|
### URL Routing
|
||||||
|
|
||||||
|
| URL | Page | Sprint |
|
||||||
|
|-----|------|--------|
|
||||||
|
| `/` | Bio landing page | 2 |
|
||||||
|
| `/toronto` | Toronto Housing Dashboard | 6 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tech Stack (Locked)
|
||||||
|
|
||||||
|
| Layer | Technology | Version |
|
||||||
|
|-------|------------|---------|
|
||||||
|
| Database | PostgreSQL + PostGIS | 16.x |
|
||||||
|
| Validation | Pydantic | >=2.0 |
|
||||||
|
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
||||||
|
| Transformation | dbt-postgres | >=1.7 |
|
||||||
|
| Data Processing | Pandas | >=2.1 |
|
||||||
|
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
||||||
|
| Visualization | Dash + Plotly | >=2.14 |
|
||||||
|
| UI Components | dash-mantine-components | Latest stable |
|
||||||
|
| Testing | pytest | >=7.0 |
|
||||||
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
|
**Notes**:
|
||||||
|
- SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
|
||||||
|
- PostGIS extension required in database
|
||||||
|
- Docker Compose V2 format (no `version` field)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Model Overview
|
||||||
|
|
||||||
|
### Geographic Reality (Toronto Housing)
|
||||||
|
|
||||||
|
```
|
||||||
|
TRREB Districts (~35) - Purchase data (W01, C01, E01...)
|
||||||
|
CMHC Zones (~20) - Rental data (Census Tract aligned)
|
||||||
|
City Neighbourhoods (158) - Enrichment/overlay only
|
||||||
|
```
|
||||||
|
|
||||||
|
**Critical**: These geographies do NOT align. Display as separate layers—do not force crosswalks.
|
||||||
|
|
||||||
|
### Star Schema
|
||||||
|
|
||||||
|
| Table | Type | Keys |
|
||||||
|
|-------|------|------|
|
||||||
|
| `fact_purchases` | Fact | -> dim_time, dim_trreb_district |
|
||||||
|
| `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone |
|
||||||
|
| `dim_time` | Dimension | date_key (PK) |
|
||||||
|
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||||
|
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||||
|
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||||
|
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||||
|
|
||||||
|
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||||
|
|
||||||
|
### dbt Layers
|
||||||
|
|
||||||
|
| Layer | Naming | Purpose |
|
||||||
|
|-------|--------|---------|
|
||||||
|
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||||
|
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
||||||
|
| Marts | `mart_{domain}` | Final analytical tables |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## DO NOT BUILD (Phase 1)
|
||||||
|
|
||||||
|
**Stop and flag if a task seems to require these**:
|
||||||
|
|
||||||
|
| Feature | Reason |
|
||||||
|
|---------|--------|
|
||||||
|
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 |
|
||||||
|
| Crime data integration | Deferred to Phase 4 |
|
||||||
|
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||||
|
| ML prediction models | Energy project scope (Phase 3) |
|
||||||
|
| Multi-project shared infrastructure | Build first, abstract second (Phase 2) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sprint 1 Deliverables
|
||||||
|
|
||||||
|
| Category | Tasks |
|
||||||
|
|----------|-------|
|
||||||
|
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||||
|
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||||
|
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||||
|
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||||
|
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||||
|
|
||||||
|
### Human Tasks (Cannot Automate)
|
||||||
|
|
||||||
|
| Task | Tool | Effort |
|
||||||
|
|------|------|--------|
|
||||||
|
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||||
|
| Research policy events (10-20) | Manual | 2-3 hours |
|
||||||
|
| Replace social link placeholders | Manual | 5 minutes |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Required in `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=<secure>
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=<random>
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Script Standards
|
||||||
|
|
||||||
|
All scripts in `scripts/`:
|
||||||
|
- Include usage comments at top
|
||||||
|
- Idempotent where possible
|
||||||
|
- Exit codes: 0 = success, 1 = error
|
||||||
|
- Use `set -euo pipefail` for bash
|
||||||
|
- Log to stdout, errors to stderr
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Reference Documents
|
||||||
|
|
||||||
|
| Document | Location | Use When |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| Full specification | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||||
|
| Data schemas | `docs/toronto_housing_dashboard_spec_v5.md` | Parser/model tasks |
|
||||||
|
| WBS details | `docs/wbs_sprint_plan_v4.md` | Sprint planning |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Last Updated: Sprint 7*
|
||||||
157
Makefile
Normal file
157
Makefile
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
.PHONY: setup docker-up docker-down db-init run test dbt-run dbt-test lint format ci deploy clean help
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
PYTHON := python3
|
||||||
|
PIP := pip
|
||||||
|
DOCKER_COMPOSE := docker compose
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
BLUE := \033[0;34m
|
||||||
|
GREEN := \033[0;32m
|
||||||
|
YELLOW := \033[0;33m
|
||||||
|
NC := \033[0m
|
||||||
|
|
||||||
|
help: ## Show this help message
|
||||||
|
@echo "Usage: make [target]"
|
||||||
|
@echo ""
|
||||||
|
@echo "Targets:"
|
||||||
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-15s$(NC) %s\n", $$1, $$2}'
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Setup
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
setup: ## Install dependencies, create .env, init pre-commit
|
||||||
|
@echo "$(GREEN)Installing dependencies...$(NC)"
|
||||||
|
$(PIP) install -e ".[dev,dbt]"
|
||||||
|
@echo "$(GREEN)Setting up environment...$(NC)"
|
||||||
|
@if [ ! -f .env ]; then cp .env.example .env; echo "$(YELLOW)Created .env from .env.example - please update values$(NC)"; fi
|
||||||
|
@echo "$(GREEN)Installing pre-commit hooks...$(NC)"
|
||||||
|
pre-commit install
|
||||||
|
@echo "$(GREEN)Setup complete!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Docker
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||||
|
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||||
|
$(DOCKER_COMPOSE) up -d
|
||||||
|
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||||
|
@sleep 3
|
||||||
|
@echo "$(GREEN)Database containers started!$(NC)"
|
||||||
|
|
||||||
|
docker-down: ## Stop containers
|
||||||
|
@echo "$(YELLOW)Stopping containers...$(NC)"
|
||||||
|
$(DOCKER_COMPOSE) down
|
||||||
|
|
||||||
|
docker-logs: ## View container logs
|
||||||
|
$(DOCKER_COMPOSE) logs -f
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Database
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
db-init: ## Initialize database schema
|
||||||
|
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||||
|
@if [ -f scripts/db/init.sh ]; then \
|
||||||
|
bash scripts/db/init.sh; \
|
||||||
|
else \
|
||||||
|
echo "$(YELLOW)scripts/db/init.sh not found - skipping$(NC)"; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||||
|
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||||
|
@read -p "Are you sure? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||||
|
$(DOCKER_COMPOSE) down -v
|
||||||
|
$(DOCKER_COMPOSE) up -d
|
||||||
|
@sleep 3
|
||||||
|
$(MAKE) db-init
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Application
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
run: ## Start Dash development server
|
||||||
|
@echo "$(GREEN)Starting Dash server...$(NC)"
|
||||||
|
$(PYTHON) -m portfolio_app.app
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Testing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
test: ## Run pytest
|
||||||
|
@echo "$(GREEN)Running tests...$(NC)"
|
||||||
|
pytest
|
||||||
|
|
||||||
|
test-cov: ## Run pytest with coverage
|
||||||
|
@echo "$(GREEN)Running tests with coverage...$(NC)"
|
||||||
|
pytest --cov=portfolio_app --cov-report=html --cov-report=term
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# dbt
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
dbt-run: ## Run dbt models
|
||||||
|
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||||
|
cd dbt && dbt run
|
||||||
|
|
||||||
|
dbt-test: ## Run dbt tests
|
||||||
|
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||||
|
cd dbt && dbt test
|
||||||
|
|
||||||
|
dbt-docs: ## Generate dbt documentation
|
||||||
|
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||||
|
cd dbt && dbt docs generate && dbt docs serve
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Code Quality
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
lint: ## Run ruff linter
|
||||||
|
@echo "$(GREEN)Running linter...$(NC)"
|
||||||
|
ruff check .
|
||||||
|
|
||||||
|
format: ## Run ruff formatter
|
||||||
|
@echo "$(GREEN)Formatting code...$(NC)"
|
||||||
|
ruff format .
|
||||||
|
ruff check --fix .
|
||||||
|
|
||||||
|
typecheck: ## Run mypy type checker
|
||||||
|
@echo "$(GREEN)Running type checker...$(NC)"
|
||||||
|
mypy portfolio_app
|
||||||
|
|
||||||
|
ci: ## Run all checks (lint, typecheck, test)
|
||||||
|
@echo "$(GREEN)Running CI checks...$(NC)"
|
||||||
|
$(MAKE) lint
|
||||||
|
$(MAKE) typecheck
|
||||||
|
$(MAKE) test
|
||||||
|
@echo "$(GREEN)All checks passed!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Deployment
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
deploy: ## Deploy to production
|
||||||
|
@echo "$(YELLOW)Deployment not yet configured$(NC)"
|
||||||
|
@echo "TODO: Add deployment script"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Cleanup
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
clean: ## Remove build artifacts and caches
|
||||||
|
@echo "$(YELLOW)Cleaning up...$(NC)"
|
||||||
|
rm -rf build/
|
||||||
|
rm -rf dist/
|
||||||
|
rm -rf *.egg-info/
|
||||||
|
rm -rf .pytest_cache/
|
||||||
|
rm -rf .ruff_cache/
|
||||||
|
rm -rf .mypy_cache/
|
||||||
|
rm -rf htmlcov/
|
||||||
|
rm -rf .coverage
|
||||||
|
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||||
|
@echo "$(GREEN)Clean complete!$(NC)"
|
||||||
120
README.md
120
README.md
@@ -1,2 +1,120 @@
|
|||||||
# personal-portfolio
|
# Analytics Portfolio
|
||||||
|
|
||||||
|
A data analytics portfolio showcasing end-to-end data engineering, visualization, and analysis capabilities.
|
||||||
|
|
||||||
|
## Projects
|
||||||
|
|
||||||
|
### Toronto Housing Dashboard
|
||||||
|
|
||||||
|
An interactive choropleth dashboard analyzing Toronto's housing market using multi-source data integration.
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Purchase market analysis from TRREB monthly reports
|
||||||
|
- Rental market analysis from CMHC annual surveys
|
||||||
|
- Interactive choropleth maps by district/zone
|
||||||
|
- Time series visualization with policy event annotations
|
||||||
|
- Purchase/Rental mode toggle
|
||||||
|
|
||||||
|
**Data Sources:**
|
||||||
|
- [TRREB Market Watch](https://trreb.ca/market-data/market-watch/) - Monthly purchase statistics
|
||||||
|
- [CMHC Rental Market Survey](https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market) - Annual rental data
|
||||||
|
|
||||||
|
**Tech Stack:**
|
||||||
|
- Python 3.11+ / Dash / Plotly
|
||||||
|
- PostgreSQL + PostGIS
|
||||||
|
- dbt for data transformation
|
||||||
|
- Pydantic for validation
|
||||||
|
- SQLAlchemy 2.0
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone and setup
|
||||||
|
git clone https://github.com/lmiranda/personal-portfolio.git
|
||||||
|
cd personal-portfolio
|
||||||
|
|
||||||
|
# Install dependencies and configure environment
|
||||||
|
make setup
|
||||||
|
|
||||||
|
# Start database
|
||||||
|
make docker-up
|
||||||
|
|
||||||
|
# Initialize database schema
|
||||||
|
make db-init
|
||||||
|
|
||||||
|
# Run development server
|
||||||
|
make run
|
||||||
|
```
|
||||||
|
|
||||||
|
Visit `http://localhost:8050` to view the portfolio.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── app.py # Dash app factory
|
||||||
|
├── config.py # Pydantic settings
|
||||||
|
├── pages/
|
||||||
|
│ ├── home.py # Bio landing page (/)
|
||||||
|
│ └── toronto/ # Toronto dashboard (/toronto)
|
||||||
|
├── components/ # Shared UI components
|
||||||
|
├── figures/ # Plotly figure factories
|
||||||
|
└── toronto/ # Toronto data logic
|
||||||
|
├── parsers/ # PDF/CSV extraction
|
||||||
|
├── loaders/ # Database operations
|
||||||
|
├── schemas/ # Pydantic models
|
||||||
|
└── models/ # SQLAlchemy ORM
|
||||||
|
|
||||||
|
dbt/
|
||||||
|
├── models/
|
||||||
|
│ ├── staging/ # 1:1 source tables
|
||||||
|
│ ├── intermediate/ # Business logic
|
||||||
|
│ └── marts/ # Analytical tables
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make test # Run tests
|
||||||
|
make lint # Run linter
|
||||||
|
make format # Format code
|
||||||
|
make ci # Run all checks
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Pipeline
|
||||||
|
|
||||||
|
```
|
||||||
|
Raw Files (PDF/Excel)
|
||||||
|
↓
|
||||||
|
Parsers (pdfplumber, pandas)
|
||||||
|
↓
|
||||||
|
Pydantic Validation
|
||||||
|
↓
|
||||||
|
SQLAlchemy Loaders
|
||||||
|
↓
|
||||||
|
PostgreSQL + PostGIS
|
||||||
|
↓
|
||||||
|
dbt Transformations
|
||||||
|
↓
|
||||||
|
Dash Visualization
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and configure:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=<secure>
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
DASH_DEBUG=true
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
Leo Miranda - [GitHub](https://github.com/lmiranda) | [LinkedIn](https://linkedin.com/in/yourprofile)
|
||||||
|
|||||||
BIN
data/raw/cmhc/rmr-toronto-2021-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2021-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2022-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2022-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2023-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2023-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2024-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2024-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2025-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2025-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2401.pdf
Normal file
BIN
data/raw/trreb/mw2401.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2402.pdf
Normal file
BIN
data/raw/trreb/mw2402.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2403.pdf
Normal file
BIN
data/raw/trreb/mw2403.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2404.pdf
Normal file
BIN
data/raw/trreb/mw2404.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2405.pdf
Normal file
BIN
data/raw/trreb/mw2405.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2406.pdf
Normal file
BIN
data/raw/trreb/mw2406.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2407.pdf
Normal file
BIN
data/raw/trreb/mw2407.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2408.pdf
Normal file
BIN
data/raw/trreb/mw2408.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2409.pdf
Normal file
BIN
data/raw/trreb/mw2409.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2410.pdf
Normal file
BIN
data/raw/trreb/mw2410.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2411.pdf
Normal file
BIN
data/raw/trreb/mw2411.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2412.pdf
Normal file
BIN
data/raw/trreb/mw2412.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2501.pdf
Normal file
BIN
data/raw/trreb/mw2501.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2502.pdf
Normal file
BIN
data/raw/trreb/mw2502.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2503.pdf
Normal file
BIN
data/raw/trreb/mw2503.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2504.pdf
Normal file
BIN
data/raw/trreb/mw2504.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2505.pdf
Normal file
BIN
data/raw/trreb/mw2505.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2506.pdf
Normal file
BIN
data/raw/trreb/mw2506.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2507.pdf
Normal file
BIN
data/raw/trreb/mw2507.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2508.pdf
Normal file
BIN
data/raw/trreb/mw2508.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2509.pdf
Normal file
BIN
data/raw/trreb/mw2509.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2510.pdf
Normal file
BIN
data/raw/trreb/mw2510.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2511.pdf
Normal file
BIN
data/raw/trreb/mw2511.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2512.pdf
Normal file
BIN
data/raw/trreb/mw2512.pdf
Normal file
Binary file not shown.
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/raw/geo/.gitkeep
Normal file
0
data/toronto/raw/geo/.gitkeep
Normal file
38
data/toronto/raw/geo/cmhc_zones.geojson
Normal file
38
data/toronto/raw/geo/cmhc_zones.geojson
Normal file
File diff suppressed because one or more lines are too long
1
data/toronto/raw/geo/toronto_neighbourhoods.geojson
Normal file
1
data/toronto/raw/geo/toronto_neighbourhoods.geojson
Normal file
File diff suppressed because one or more lines are too long
0
data/toronto/reference/.gitkeep
Normal file
0
data/toronto/reference/.gitkeep
Normal file
28
dbt/dbt_project.yml
Normal file
28
dbt/dbt_project.yml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: 'toronto_housing'
|
||||||
|
version: '1.0.0'
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
profile: 'toronto_housing'
|
||||||
|
|
||||||
|
model-paths: ["models"]
|
||||||
|
analysis-paths: ["analyses"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["seeds"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
snapshot-paths: ["snapshots"]
|
||||||
|
|
||||||
|
clean-targets:
|
||||||
|
- "target"
|
||||||
|
- "dbt_packages"
|
||||||
|
|
||||||
|
models:
|
||||||
|
toronto_housing:
|
||||||
|
staging:
|
||||||
|
+materialized: view
|
||||||
|
+schema: staging
|
||||||
|
intermediate:
|
||||||
|
+materialized: view
|
||||||
|
+schema: intermediate
|
||||||
|
marts:
|
||||||
|
+materialized: table
|
||||||
|
+schema: marts
|
||||||
0
dbt/macros/.gitkeep
Normal file
0
dbt/macros/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
24
dbt/models/intermediate/_intermediate.yml
Normal file
24
dbt/models/intermediate/_intermediate.yml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: int_purchases__monthly
|
||||||
|
description: "Purchase data enriched with time and district dimensions"
|
||||||
|
columns:
|
||||||
|
- name: purchase_id
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: district_code
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: int_rentals__annual
|
||||||
|
description: "Rental data enriched with time and zone dimensions"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: zone_code
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
62
dbt/models/intermediate/int_purchases__monthly.sql
Normal file
62
dbt/models/intermediate/int_purchases__monthly.sql
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
-- Intermediate: Monthly purchase data enriched with dimensions
|
||||||
|
-- Joins purchases with time and district dimensions for analysis
|
||||||
|
|
||||||
|
with purchases as (
|
||||||
|
select * from {{ ref('stg_trreb__purchases') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
time_dim as (
|
||||||
|
select * from {{ ref('stg_dimensions__time') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
district_dim as (
|
||||||
|
select * from {{ ref('stg_dimensions__trreb_districts') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
enriched as (
|
||||||
|
select
|
||||||
|
p.purchase_id,
|
||||||
|
|
||||||
|
-- Time attributes
|
||||||
|
t.date_key,
|
||||||
|
t.full_date,
|
||||||
|
t.year,
|
||||||
|
t.month,
|
||||||
|
t.quarter,
|
||||||
|
t.month_name,
|
||||||
|
|
||||||
|
-- District attributes
|
||||||
|
d.district_key,
|
||||||
|
d.district_code,
|
||||||
|
d.district_name,
|
||||||
|
d.area_type,
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
p.sales_count,
|
||||||
|
p.dollar_volume,
|
||||||
|
p.avg_price,
|
||||||
|
p.median_price,
|
||||||
|
p.new_listings,
|
||||||
|
p.active_listings,
|
||||||
|
p.days_on_market,
|
||||||
|
p.sale_to_list_ratio,
|
||||||
|
|
||||||
|
-- Calculated metrics
|
||||||
|
case
|
||||||
|
when p.active_listings > 0
|
||||||
|
then round(p.sales_count::numeric / p.active_listings, 3)
|
||||||
|
else null
|
||||||
|
end as absorption_rate,
|
||||||
|
|
||||||
|
case
|
||||||
|
when p.sales_count > 0
|
||||||
|
then round(p.active_listings::numeric / p.sales_count, 1)
|
||||||
|
else null
|
||||||
|
end as months_of_inventory
|
||||||
|
|
||||||
|
from purchases p
|
||||||
|
inner join time_dim t on p.date_key = t.date_key
|
||||||
|
inner join district_dim d on p.district_key = d.district_key
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from enriched
|
||||||
57
dbt/models/intermediate/int_rentals__annual.sql
Normal file
57
dbt/models/intermediate/int_rentals__annual.sql
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
-- Intermediate: Annual rental data enriched with dimensions
|
||||||
|
-- Joins rentals with time and zone dimensions for analysis
|
||||||
|
|
||||||
|
with rentals as (
|
||||||
|
select * from {{ ref('stg_cmhc__rentals') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
time_dim as (
|
||||||
|
select * from {{ ref('stg_dimensions__time') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
zone_dim as (
|
||||||
|
select * from {{ ref('stg_dimensions__cmhc_zones') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
enriched as (
|
||||||
|
select
|
||||||
|
r.rental_id,
|
||||||
|
|
||||||
|
-- Time attributes
|
||||||
|
t.date_key,
|
||||||
|
t.full_date,
|
||||||
|
t.year,
|
||||||
|
t.month,
|
||||||
|
t.quarter,
|
||||||
|
|
||||||
|
-- Zone attributes
|
||||||
|
z.zone_key,
|
||||||
|
z.zone_code,
|
||||||
|
z.zone_name,
|
||||||
|
|
||||||
|
-- Bedroom type
|
||||||
|
r.bedroom_type,
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
r.rental_universe,
|
||||||
|
r.avg_rent,
|
||||||
|
r.median_rent,
|
||||||
|
r.vacancy_rate,
|
||||||
|
r.availability_rate,
|
||||||
|
r.turnover_rate,
|
||||||
|
r.year_over_year_rent_change,
|
||||||
|
r.reliability_code,
|
||||||
|
|
||||||
|
-- Calculated metrics
|
||||||
|
case
|
||||||
|
when r.rental_universe > 0 and r.vacancy_rate is not null
|
||||||
|
then round(r.rental_universe * (r.vacancy_rate / 100), 0)
|
||||||
|
else null
|
||||||
|
end as vacant_units_estimate
|
||||||
|
|
||||||
|
from rentals r
|
||||||
|
inner join time_dim t on r.date_key = t.date_key
|
||||||
|
inner join zone_dim z on r.zone_key = z.zone_key
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from enriched
|
||||||
0
dbt/models/marts/.gitkeep
Normal file
0
dbt/models/marts/.gitkeep
Normal file
23
dbt/models/marts/_marts.yml
Normal file
23
dbt/models/marts/_marts.yml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: mart_toronto_purchases
|
||||||
|
description: "Final mart for Toronto purchase/sales analysis by district and time"
|
||||||
|
columns:
|
||||||
|
- name: purchase_id
|
||||||
|
description: "Unique purchase record identifier"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: mart_toronto_rentals
|
||||||
|
description: "Final mart for Toronto rental market analysis by zone and time"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
description: "Unique rental record identifier"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: mart_toronto_market_summary
|
||||||
|
description: "Combined market summary aggregating purchases and rentals at Toronto level"
|
||||||
81
dbt/models/marts/mart_toronto_market_summary.sql
Normal file
81
dbt/models/marts/mart_toronto_market_summary.sql
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
-- Mart: Toronto Market Summary
|
||||||
|
-- Aggregated view combining purchase and rental market indicators
|
||||||
|
-- Grain: One row per year-month
|
||||||
|
|
||||||
|
with purchases_agg as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
month,
|
||||||
|
month_name,
|
||||||
|
quarter,
|
||||||
|
|
||||||
|
-- Aggregate purchase metrics across all districts
|
||||||
|
sum(sales_count) as total_sales,
|
||||||
|
sum(dollar_volume) as total_dollar_volume,
|
||||||
|
round(avg(avg_price), 0) as avg_price_all_districts,
|
||||||
|
round(avg(median_price), 0) as median_price_all_districts,
|
||||||
|
sum(new_listings) as total_new_listings,
|
||||||
|
sum(active_listings) as total_active_listings,
|
||||||
|
round(avg(days_on_market), 0) as avg_days_on_market,
|
||||||
|
round(avg(sale_to_list_ratio), 2) as avg_sale_to_list_ratio,
|
||||||
|
round(avg(absorption_rate), 3) as avg_absorption_rate,
|
||||||
|
round(avg(months_of_inventory), 1) as avg_months_of_inventory,
|
||||||
|
round(avg(avg_price_yoy_pct), 2) as avg_price_yoy_pct
|
||||||
|
|
||||||
|
from {{ ref('mart_toronto_purchases') }}
|
||||||
|
group by year, month, month_name, quarter
|
||||||
|
),
|
||||||
|
|
||||||
|
rentals_agg as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
|
||||||
|
-- Aggregate rental metrics across all zones (all bedroom types)
|
||||||
|
round(avg(avg_rent), 0) as avg_rent_all_zones,
|
||||||
|
round(avg(vacancy_rate), 2) as avg_vacancy_rate,
|
||||||
|
round(avg(rent_change_pct), 2) as avg_rent_change_pct,
|
||||||
|
sum(rental_universe) as total_rental_universe
|
||||||
|
|
||||||
|
from {{ ref('mart_toronto_rentals') }}
|
||||||
|
group by year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
p.year,
|
||||||
|
p.month,
|
||||||
|
p.month_name,
|
||||||
|
p.quarter,
|
||||||
|
|
||||||
|
-- Purchase market indicators
|
||||||
|
p.total_sales,
|
||||||
|
p.total_dollar_volume,
|
||||||
|
p.avg_price_all_districts,
|
||||||
|
p.median_price_all_districts,
|
||||||
|
p.total_new_listings,
|
||||||
|
p.total_active_listings,
|
||||||
|
p.avg_days_on_market,
|
||||||
|
p.avg_sale_to_list_ratio,
|
||||||
|
p.avg_absorption_rate,
|
||||||
|
p.avg_months_of_inventory,
|
||||||
|
p.avg_price_yoy_pct,
|
||||||
|
|
||||||
|
-- Rental market indicators (annual, so join on year)
|
||||||
|
r.avg_rent_all_zones,
|
||||||
|
r.avg_vacancy_rate,
|
||||||
|
r.avg_rent_change_pct,
|
||||||
|
r.total_rental_universe,
|
||||||
|
|
||||||
|
-- Affordability indicator (price to rent ratio)
|
||||||
|
case
|
||||||
|
when r.avg_rent_all_zones > 0
|
||||||
|
then round(p.avg_price_all_districts / (r.avg_rent_all_zones * 12), 1)
|
||||||
|
else null
|
||||||
|
end as price_to_annual_rent_ratio
|
||||||
|
|
||||||
|
from purchases_agg p
|
||||||
|
left join rentals_agg r on p.year = r.year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
|
order by year desc, month desc
|
||||||
79
dbt/models/marts/mart_toronto_purchases.sql
Normal file
79
dbt/models/marts/mart_toronto_purchases.sql
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
-- Mart: Toronto Purchase Market Analysis
|
||||||
|
-- Final analytical table for purchase/sales data visualization
|
||||||
|
-- Grain: One row per district per month
|
||||||
|
|
||||||
|
with purchases as (
|
||||||
|
select * from {{ ref('int_purchases__monthly') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Add year-over-year calculations
|
||||||
|
with_yoy as (
|
||||||
|
select
|
||||||
|
p.*,
|
||||||
|
|
||||||
|
-- Previous year same month values
|
||||||
|
lag(p.avg_price, 12) over (
|
||||||
|
partition by p.district_code
|
||||||
|
order by p.date_key
|
||||||
|
) as avg_price_prev_year,
|
||||||
|
|
||||||
|
lag(p.sales_count, 12) over (
|
||||||
|
partition by p.district_code
|
||||||
|
order by p.date_key
|
||||||
|
) as sales_count_prev_year,
|
||||||
|
|
||||||
|
lag(p.median_price, 12) over (
|
||||||
|
partition by p.district_code
|
||||||
|
order by p.date_key
|
||||||
|
) as median_price_prev_year
|
||||||
|
|
||||||
|
from purchases p
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
purchase_id,
|
||||||
|
date_key,
|
||||||
|
full_date,
|
||||||
|
year,
|
||||||
|
month,
|
||||||
|
quarter,
|
||||||
|
month_name,
|
||||||
|
district_key,
|
||||||
|
district_code,
|
||||||
|
district_name,
|
||||||
|
area_type,
|
||||||
|
sales_count,
|
||||||
|
dollar_volume,
|
||||||
|
avg_price,
|
||||||
|
median_price,
|
||||||
|
new_listings,
|
||||||
|
active_listings,
|
||||||
|
days_on_market,
|
||||||
|
sale_to_list_ratio,
|
||||||
|
absorption_rate,
|
||||||
|
months_of_inventory,
|
||||||
|
|
||||||
|
-- Year-over-year changes
|
||||||
|
case
|
||||||
|
when avg_price_prev_year > 0
|
||||||
|
then round(((avg_price - avg_price_prev_year) / avg_price_prev_year) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as avg_price_yoy_pct,
|
||||||
|
|
||||||
|
case
|
||||||
|
when sales_count_prev_year > 0
|
||||||
|
then round(((sales_count - sales_count_prev_year)::numeric / sales_count_prev_year) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as sales_count_yoy_pct,
|
||||||
|
|
||||||
|
case
|
||||||
|
when median_price_prev_year > 0
|
||||||
|
then round(((median_price - median_price_prev_year) / median_price_prev_year) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as median_price_yoy_pct
|
||||||
|
|
||||||
|
from with_yoy
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
64
dbt/models/marts/mart_toronto_rentals.sql
Normal file
64
dbt/models/marts/mart_toronto_rentals.sql
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
-- Mart: Toronto Rental Market Analysis
|
||||||
|
-- Final analytical table for rental market visualization
|
||||||
|
-- Grain: One row per zone per bedroom type per survey year
|
||||||
|
|
||||||
|
with rentals as (
|
||||||
|
select * from {{ ref('int_rentals__annual') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Add year-over-year calculations
|
||||||
|
with_yoy as (
|
||||||
|
select
|
||||||
|
r.*,
|
||||||
|
|
||||||
|
-- Previous year values
|
||||||
|
lag(r.avg_rent, 1) over (
|
||||||
|
partition by r.zone_code, r.bedroom_type
|
||||||
|
order by r.year
|
||||||
|
) as avg_rent_prev_year,
|
||||||
|
|
||||||
|
lag(r.vacancy_rate, 1) over (
|
||||||
|
partition by r.zone_code, r.bedroom_type
|
||||||
|
order by r.year
|
||||||
|
) as vacancy_rate_prev_year
|
||||||
|
|
||||||
|
from rentals r
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
rental_id,
|
||||||
|
date_key,
|
||||||
|
full_date,
|
||||||
|
year,
|
||||||
|
quarter,
|
||||||
|
zone_key,
|
||||||
|
zone_code,
|
||||||
|
zone_name,
|
||||||
|
bedroom_type,
|
||||||
|
rental_universe,
|
||||||
|
avg_rent,
|
||||||
|
median_rent,
|
||||||
|
vacancy_rate,
|
||||||
|
availability_rate,
|
||||||
|
turnover_rate,
|
||||||
|
year_over_year_rent_change,
|
||||||
|
reliability_code,
|
||||||
|
vacant_units_estimate,
|
||||||
|
|
||||||
|
-- Calculated year-over-year (if not provided)
|
||||||
|
coalesce(
|
||||||
|
year_over_year_rent_change,
|
||||||
|
case
|
||||||
|
when avg_rent_prev_year > 0
|
||||||
|
then round(((avg_rent - avg_rent_prev_year) / avg_rent_prev_year) * 100, 2)
|
||||||
|
else null
|
||||||
|
end
|
||||||
|
) as rent_change_pct,
|
||||||
|
|
||||||
|
vacancy_rate - vacancy_rate_prev_year as vacancy_rate_change
|
||||||
|
|
||||||
|
from with_yoy
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
0
dbt/models/staging/.gitkeep
Normal file
0
dbt/models/staging/.gitkeep
Normal file
61
dbt/models/staging/_sources.yml
Normal file
61
dbt/models/staging/_sources.yml
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
sources:
|
||||||
|
- name: toronto_housing
|
||||||
|
description: "Toronto housing data loaded from TRREB and CMHC sources"
|
||||||
|
database: portfolio
|
||||||
|
schema: public
|
||||||
|
tables:
|
||||||
|
- name: fact_purchases
|
||||||
|
description: "TRREB monthly purchase/sales statistics by district"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: date_key
|
||||||
|
description: "Foreign key to dim_time"
|
||||||
|
- name: district_key
|
||||||
|
description: "Foreign key to dim_trreb_district"
|
||||||
|
|
||||||
|
- name: fact_rentals
|
||||||
|
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: "Primary key"
|
||||||
|
- name: date_key
|
||||||
|
description: "Foreign key to dim_time"
|
||||||
|
- name: zone_key
|
||||||
|
description: "Foreign key to dim_cmhc_zone"
|
||||||
|
|
||||||
|
- name: dim_time
|
||||||
|
description: "Time dimension (monthly grain)"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Primary key (YYYYMMDD format)"
|
||||||
|
|
||||||
|
- name: dim_trreb_district
|
||||||
|
description: "TRREB district dimension with geometry"
|
||||||
|
columns:
|
||||||
|
- name: district_key
|
||||||
|
description: "Primary key"
|
||||||
|
- name: district_code
|
||||||
|
description: "TRREB district code"
|
||||||
|
|
||||||
|
- name: dim_cmhc_zone
|
||||||
|
description: "CMHC zone dimension with geometry"
|
||||||
|
columns:
|
||||||
|
- name: zone_key
|
||||||
|
description: "Primary key"
|
||||||
|
- name: zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
|
||||||
|
- name: dim_neighbourhood
|
||||||
|
description: "City of Toronto neighbourhoods (reference only)"
|
||||||
|
columns:
|
||||||
|
- name: neighbourhood_id
|
||||||
|
description: "Primary key"
|
||||||
|
|
||||||
|
- name: dim_policy_event
|
||||||
|
description: "Housing policy events for annotation"
|
||||||
|
columns:
|
||||||
|
- name: event_id
|
||||||
|
description: "Primary key"
|
||||||
73
dbt/models/staging/_staging.yml
Normal file
73
dbt/models/staging/_staging.yml
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_trreb__purchases
|
||||||
|
description: "Staged TRREB purchase/sales data from fact_purchases"
|
||||||
|
columns:
|
||||||
|
- name: purchase_id
|
||||||
|
description: "Unique identifier for purchase record"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: date_key
|
||||||
|
description: "Date dimension key (YYYYMMDD)"
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- name: district_key
|
||||||
|
description: "TRREB district dimension key"
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_cmhc__rentals
|
||||||
|
description: "Staged CMHC rental market data from fact_rentals"
|
||||||
|
columns:
|
||||||
|
- name: rental_id
|
||||||
|
description: "Unique identifier for rental record"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: date_key
|
||||||
|
description: "Date dimension key (YYYYMMDD)"
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- name: zone_key
|
||||||
|
description: "CMHC zone dimension key"
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_dimensions__time
|
||||||
|
description: "Staged time dimension"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Date dimension key (YYYYMMDD)"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_dimensions__trreb_districts
|
||||||
|
description: "Staged TRREB district dimension"
|
||||||
|
columns:
|
||||||
|
- name: district_key
|
||||||
|
description: "District dimension key"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: district_code
|
||||||
|
description: "TRREB district code (e.g., W01, C01)"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: stg_dimensions__cmhc_zones
|
||||||
|
description: "Staged CMHC zone dimension"
|
||||||
|
columns:
|
||||||
|
- name: zone_key
|
||||||
|
description: "Zone dimension key"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: zone_code
|
||||||
|
description: "CMHC zone code"
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
26
dbt/models/staging/stg_cmhc__rentals.sql
Normal file
26
dbt/models/staging/stg_cmhc__rentals.sql
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
-- Staged CMHC rental market survey data
|
||||||
|
-- Source: fact_rentals table loaded from CMHC CSV exports
|
||||||
|
-- Grain: One row per zone per bedroom type per survey year
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto_housing', 'fact_rentals') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as rental_id,
|
||||||
|
date_key,
|
||||||
|
zone_key,
|
||||||
|
bedroom_type,
|
||||||
|
universe as rental_universe,
|
||||||
|
avg_rent,
|
||||||
|
median_rent,
|
||||||
|
vacancy_rate,
|
||||||
|
availability_rate,
|
||||||
|
turnover_rate,
|
||||||
|
rent_change_pct as year_over_year_rent_change,
|
||||||
|
reliability_code
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
18
dbt/models/staging/stg_dimensions__cmhc_zones.sql
Normal file
18
dbt/models/staging/stg_dimensions__cmhc_zones.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
-- Staged CMHC zone dimension
|
||||||
|
-- Source: dim_cmhc_zone table
|
||||||
|
-- Grain: One row per zone
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto_housing', 'dim_cmhc_zone') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
zone_key,
|
||||||
|
zone_code,
|
||||||
|
zone_name,
|
||||||
|
geometry
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
21
dbt/models/staging/stg_dimensions__time.sql
Normal file
21
dbt/models/staging/stg_dimensions__time.sql
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
-- Staged time dimension
|
||||||
|
-- Source: dim_time table
|
||||||
|
-- Grain: One row per month
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto_housing', 'dim_time') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
date_key,
|
||||||
|
full_date,
|
||||||
|
year,
|
||||||
|
month,
|
||||||
|
quarter,
|
||||||
|
month_name,
|
||||||
|
is_month_start
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
19
dbt/models/staging/stg_dimensions__trreb_districts.sql
Normal file
19
dbt/models/staging/stg_dimensions__trreb_districts.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Staged TRREB district dimension
|
||||||
|
-- Source: dim_trreb_district table
|
||||||
|
-- Grain: One row per district
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto_housing', 'dim_trreb_district') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
district_key,
|
||||||
|
district_code,
|
||||||
|
district_name,
|
||||||
|
area_type,
|
||||||
|
geometry
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
25
dbt/models/staging/stg_trreb__purchases.sql
Normal file
25
dbt/models/staging/stg_trreb__purchases.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Staged TRREB purchase/sales data
|
||||||
|
-- Source: fact_purchases table loaded from TRREB Market Watch PDFs
|
||||||
|
-- Grain: One row per district per month
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto_housing', 'fact_purchases') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
id as purchase_id,
|
||||||
|
date_key,
|
||||||
|
district_key,
|
||||||
|
sales_count,
|
||||||
|
dollar_volume,
|
||||||
|
avg_price,
|
||||||
|
median_price,
|
||||||
|
new_listings,
|
||||||
|
active_listings,
|
||||||
|
avg_dom as days_on_market,
|
||||||
|
avg_sp_lp as sale_to_list_ratio
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
5
dbt/packages.yml
Normal file
5
dbt/packages.yml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
packages:
|
||||||
|
- package: dbt-labs/dbt_utils
|
||||||
|
version: ">=1.0.0"
|
||||||
|
- package: calogica/dbt_expectations
|
||||||
|
version: ">=0.10.0"
|
||||||
21
dbt/profiles.yml.example
Normal file
21
dbt/profiles.yml.example
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
toronto_housing:
|
||||||
|
target: dev
|
||||||
|
outputs:
|
||||||
|
dev:
|
||||||
|
type: postgres
|
||||||
|
host: localhost
|
||||||
|
user: portfolio
|
||||||
|
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||||
|
port: 5432
|
||||||
|
dbname: portfolio
|
||||||
|
schema: public
|
||||||
|
threads: 4
|
||||||
|
prod:
|
||||||
|
type: postgres
|
||||||
|
host: "{{ env_var('POSTGRES_HOST') }}"
|
||||||
|
user: "{{ env_var('POSTGRES_USER') }}"
|
||||||
|
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||||
|
port: 5432
|
||||||
|
dbname: portfolio
|
||||||
|
schema: public
|
||||||
|
threads: 4
|
||||||
0
dbt/tests/.gitkeep
Normal file
0
dbt/tests/.gitkeep
Normal file
22
docker-compose.yml
Normal file
22
docker-compose.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgis/postgis:16-3.4
|
||||||
|
container_name: portfolio-db
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-portfolio}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-portfolio_dev}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-portfolio}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
- ./scripts/db/init-postgis.sql:/docker-entrypoint-initdb.d/init-postgis.sql:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-portfolio} -d ${POSTGRES_DB:-portfolio}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
396
docs/PROJECT_REFERENCE.md
Normal file
396
docs/PROJECT_REFERENCE.md
Normal file
@@ -0,0 +1,396 @@
|
|||||||
|
# Portfolio Project Reference
|
||||||
|
|
||||||
|
**Project**: Analytics Portfolio
|
||||||
|
**Owner**: Leo
|
||||||
|
**Status**: Ready for Sprint 1
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
Two-project analytics portfolio demonstrating end-to-end data engineering, visualization, and ML capabilities.
|
||||||
|
|
||||||
|
| Project | Domain | Key Skills | Phase |
|
||||||
|
|---------|--------|------------|-------|
|
||||||
|
| **Toronto Housing Dashboard** | Real estate | ETL, dimensional modeling, geospatial, choropleth | Phase 1 (Active) |
|
||||||
|
| **Energy Pricing Analysis** | Utility markets | Time series, ML prediction, API integration | Phase 3 (Future) |
|
||||||
|
|
||||||
|
**Platform**: Monolithic Dash application on self-hosted VPS (bio landing page + dashboards).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Branching Strategy
|
||||||
|
|
||||||
|
| Branch | Purpose | Deploys To |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `main` | Production releases only | VPS (production) |
|
||||||
|
| `staging` | Pre-production testing | VPS (staging) |
|
||||||
|
| `development` | Active development | Local only |
|
||||||
|
|
||||||
|
**Rules**:
|
||||||
|
- All feature branches created FROM `development`
|
||||||
|
- All feature branches merge INTO `development`
|
||||||
|
- `development` → `staging` for testing
|
||||||
|
- `staging` → `main` for release
|
||||||
|
- Direct commits to `main` or `staging` are forbidden
|
||||||
|
- Branch naming: `feature/{sprint}-{description}` or `fix/{issue-id}`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tech Stack (Locked)
|
||||||
|
|
||||||
|
| Layer | Technology | Version |
|
||||||
|
|-------|------------|---------|
|
||||||
|
| Database | PostgreSQL + PostGIS | 16.x |
|
||||||
|
| Validation | Pydantic | ≥2.0 |
|
||||||
|
| ORM | SQLAlchemy | ≥2.0 (2.0-style API only) |
|
||||||
|
| Transformation | dbt-postgres | ≥1.7 |
|
||||||
|
| Data Processing | Pandas | ≥2.1 |
|
||||||
|
| Geospatial | GeoPandas + Shapely | ≥0.14 |
|
||||||
|
| Visualization | Dash + Plotly | ≥2.14 |
|
||||||
|
| UI Components | dash-mantine-components | Latest stable |
|
||||||
|
| Testing | pytest | ≥7.0 |
|
||||||
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
|
**Compatibility Notes**:
|
||||||
|
- SQLAlchemy 2.0 + Pydantic 2.0 integrate well—never mix 1.x APIs
|
||||||
|
- PostGIS extension required—enable during db init
|
||||||
|
- Docker Compose V2 (no `version` field in compose files)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code Conventions
|
||||||
|
|
||||||
|
### Import Style
|
||||||
|
|
||||||
|
| Context | Style | Example |
|
||||||
|
|---------|-------|---------|
|
||||||
|
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||||
|
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||||
|
| External packages | Absolute | `import pandas as pd` |
|
||||||
|
|
||||||
|
### Module Separation
|
||||||
|
|
||||||
|
| Directory | Contains | Purpose |
|
||||||
|
|-----------|----------|---------|
|
||||||
|
| `schemas/` | Pydantic models | Data validation |
|
||||||
|
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||||
|
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||||
|
| `loaders/` | Database operations | Data loading |
|
||||||
|
| `figures/` | Chart factories | Plotly figure generation |
|
||||||
|
| `callbacks/` | Dash callbacks | Per-dashboard, in `pages/{dashboard}/callbacks/` |
|
||||||
|
| `errors/` | Exceptions + handlers | Error handling |
|
||||||
|
|
||||||
|
### Code Standards
|
||||||
|
|
||||||
|
- **Type hints**: Mandatory, Python 3.10+ style (`list[str]`, `dict[str, int]`, `X | None`)
|
||||||
|
- **Functions**: Single responsibility, verb naming, early returns over nesting
|
||||||
|
- **Docstrings**: Google style, minimal—only for non-obvious behavior
|
||||||
|
- **Constants**: Module-level for magic values, Pydantic BaseSettings for runtime config
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
# errors/exceptions.py
|
||||||
|
class PortfolioError(Exception):
|
||||||
|
"""Base exception."""
|
||||||
|
|
||||||
|
class ParseError(PortfolioError):
|
||||||
|
"""PDF/CSV parsing failed."""
|
||||||
|
|
||||||
|
class ValidationError(PortfolioError):
|
||||||
|
"""Pydantic or business rule validation failed."""
|
||||||
|
|
||||||
|
class LoadError(PortfolioError):
|
||||||
|
"""Database load operation failed."""
|
||||||
|
```
|
||||||
|
|
||||||
|
- Decorators for infrastructure concerns (logging, retry, transactions)
|
||||||
|
- Explicit handling for domain logic (business rules, recovery strategies)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Application Architecture
|
||||||
|
|
||||||
|
### Dash Pages Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── app.py # Dash app factory with Pages routing
|
||||||
|
├── config.py # Pydantic BaseSettings
|
||||||
|
├── assets/ # CSS, images (auto-served by Dash)
|
||||||
|
├── pages/
|
||||||
|
│ ├── home.py # Bio landing page → /
|
||||||
|
│ ├── toronto/
|
||||||
|
│ │ ├── dashboard.py # Layout only → /toronto
|
||||||
|
│ │ └── callbacks/ # Interaction logic
|
||||||
|
│ └── energy/ # Phase 3
|
||||||
|
├── components/ # Shared UI (navbar, footer, cards)
|
||||||
|
├── figures/ # Shared chart factories
|
||||||
|
├── toronto/ # Toronto data logic
|
||||||
|
│ ├── parsers/
|
||||||
|
│ ├── loaders/
|
||||||
|
│ ├── schemas/ # Pydantic
|
||||||
|
│ └── models/ # SQLAlchemy
|
||||||
|
└── errors/
|
||||||
|
```
|
||||||
|
|
||||||
|
### URL Routing (Automatic)
|
||||||
|
|
||||||
|
| URL | Page | Status |
|
||||||
|
|-----|------|--------|
|
||||||
|
| `/` | Bio landing page | Sprint 2 |
|
||||||
|
| `/toronto` | Toronto Housing Dashboard | Sprint 6 |
|
||||||
|
| `/energy` | Energy Pricing Dashboard | Phase 3 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: Toronto Housing Dashboard
|
||||||
|
|
||||||
|
### Data Sources
|
||||||
|
|
||||||
|
| Track | Source | Format | Geography | Frequency |
|
||||||
|
|-------|--------|--------|-----------|-----------|
|
||||||
|
| Purchases | TRREB Monthly Reports | PDF | ~35 Districts | Monthly |
|
||||||
|
| Rentals | CMHC Rental Market Survey | CSV | ~20 Zones | Annual |
|
||||||
|
| Enrichment | City of Toronto Open Data | GeoJSON/CSV | 158 Neighbourhoods | Census |
|
||||||
|
| Policy Events | Curated list | CSV | N/A | Event-based |
|
||||||
|
|
||||||
|
### Geographic Reality
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ City of Toronto Neighbourhoods (158) │ ← Enrichment only
|
||||||
|
├─────────────────────────────────────────────────────────────────┤
|
||||||
|
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
||||||
|
├─────────────────────────────────────────────────────────────────┤
|
||||||
|
│ CMHC Zones (~20) — Census Tract aligned │ ← Rental data
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Critical**: These geographies do NOT align. Display as separate layers with toggle—do not force crosswalks.
|
||||||
|
|
||||||
|
### Data Model (Star Schema)
|
||||||
|
|
||||||
|
| Table | Type | Keys |
|
||||||
|
|-------|------|------|
|
||||||
|
| `fact_purchases` | Fact | → dim_time, dim_trreb_district |
|
||||||
|
| `fact_rentals` | Fact | → dim_time, dim_cmhc_zone |
|
||||||
|
| `dim_time` | Dimension | date_key (PK) |
|
||||||
|
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||||
|
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||||
|
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||||
|
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||||
|
|
||||||
|
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||||
|
|
||||||
|
### dbt Layer Structure
|
||||||
|
|
||||||
|
| Layer | Naming | Purpose |
|
||||||
|
|-------|--------|---------|
|
||||||
|
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||||
|
| Intermediate | `int_{domain}__{transform}` | Business logic, filtering |
|
||||||
|
| Marts | `mart_{domain}` | Final analytical tables |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sprint Overview
|
||||||
|
|
||||||
|
| Sprint | Focus | Milestone |
|
||||||
|
|--------|-------|-----------|
|
||||||
|
| 1 | Project bootstrap, start TRREB digitization | — |
|
||||||
|
| 2 | Bio page, data acquisition | **Launch 1: Bio Live** |
|
||||||
|
| 3 | Parsers, schemas, models | — |
|
||||||
|
| 4 | Loaders, dbt | — |
|
||||||
|
| 5 | Visualization | — |
|
||||||
|
| 6 | Polish, deploy dashboard | **Launch 2: Dashboard Live** |
|
||||||
|
| 7 | Buffer | — |
|
||||||
|
|
||||||
|
### Sprint 1 Deliverables
|
||||||
|
|
||||||
|
| Category | Tasks |
|
||||||
|
|----------|-------|
|
||||||
|
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||||
|
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||||
|
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||||
|
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||||
|
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||||
|
|
||||||
|
### Human Tasks (Cannot Automate)
|
||||||
|
|
||||||
|
| Task | Tool | Effort |
|
||||||
|
|------|------|--------|
|
||||||
|
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||||
|
| Research policy events (10-20) | Manual research | 2-3 hours |
|
||||||
|
| Replace social link placeholders | Manual | 5 minutes |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Phase 1 — Build These
|
||||||
|
|
||||||
|
- Bio landing page with content from bio_content_v2.md
|
||||||
|
- TRREB PDF parser
|
||||||
|
- CMHC CSV processor
|
||||||
|
- PostgreSQL + PostGIS database layer
|
||||||
|
- Star schema (facts + dimensions)
|
||||||
|
- dbt models with tests
|
||||||
|
- Choropleth visualization (Dash)
|
||||||
|
- Policy event annotation layer
|
||||||
|
- Neighbourhood overlay (toggle-able)
|
||||||
|
|
||||||
|
### Phase 1 — Do NOT Build
|
||||||
|
|
||||||
|
| Feature | Reason | When |
|
||||||
|
|---------|--------|------|
|
||||||
|
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 | After Energy project |
|
||||||
|
| Crime data integration | Deferred scope | Phase 4 |
|
||||||
|
| Historical boundary reconciliation (140→158) | 2021+ data only for V1 | Phase 4 |
|
||||||
|
| ML prediction models | Energy project scope | Phase 3 |
|
||||||
|
| Multi-project shared infrastructure | Build first, abstract second | Phase 2 |
|
||||||
|
|
||||||
|
If a task seems to require Phase 3/4 features, **stop and flag it**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
### Root-Level Files (Allowed)
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `README.md` | Project overview |
|
||||||
|
| `CLAUDE.md` | AI assistant context |
|
||||||
|
| `pyproject.toml` | Python packaging |
|
||||||
|
| `.gitignore` | Git ignore rules |
|
||||||
|
| `.env.example` | Environment template |
|
||||||
|
| `.python-version` | pyenv version |
|
||||||
|
| `.pre-commit-config.yaml` | Pre-commit hooks |
|
||||||
|
| `docker-compose.yml` | Container orchestration |
|
||||||
|
| `Makefile` | Task automation |
|
||||||
|
|
||||||
|
### Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio/
|
||||||
|
├── portfolio_app/ # Monolithic Dash application
|
||||||
|
│ ├── app.py
|
||||||
|
│ ├── config.py
|
||||||
|
│ ├── assets/
|
||||||
|
│ ├── pages/
|
||||||
|
│ ├── components/
|
||||||
|
│ ├── figures/
|
||||||
|
│ ├── toronto/
|
||||||
|
│ └── errors/
|
||||||
|
├── tests/
|
||||||
|
├── dbt/
|
||||||
|
├── data/
|
||||||
|
│ └── toronto/
|
||||||
|
│ ├── raw/
|
||||||
|
│ ├── processed/ # gitignored
|
||||||
|
│ └── reference/
|
||||||
|
├── scripts/
|
||||||
|
│ ├── db/
|
||||||
|
│ ├── docker/
|
||||||
|
│ ├── deploy/
|
||||||
|
│ ├── dbt/
|
||||||
|
│ └── dev/
|
||||||
|
├── docs/
|
||||||
|
├── notebooks/
|
||||||
|
├── backups/ # gitignored
|
||||||
|
└── reports/ # gitignored
|
||||||
|
```
|
||||||
|
|
||||||
|
### Gitignored Directories
|
||||||
|
|
||||||
|
- `data/*/processed/`
|
||||||
|
- `reports/`
|
||||||
|
- `backups/`
|
||||||
|
- `notebooks/*.html`
|
||||||
|
- `.env`
|
||||||
|
- `__pycache__/`
|
||||||
|
- `.venv/`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Makefile Targets
|
||||||
|
|
||||||
|
| Target | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `setup` | Install deps, create .env, init pre-commit |
|
||||||
|
| `docker-up` | Start PostgreSQL + PostGIS |
|
||||||
|
| `docker-down` | Stop containers |
|
||||||
|
| `db-init` | Initialize database schema |
|
||||||
|
| `run` | Start Dash dev server |
|
||||||
|
| `test` | Run pytest |
|
||||||
|
| `dbt-run` | Run dbt models |
|
||||||
|
| `dbt-test` | Run dbt tests |
|
||||||
|
| `lint` | Run ruff linter |
|
||||||
|
| `format` | Run ruff formatter |
|
||||||
|
| `ci` | Run all checks |
|
||||||
|
| `deploy` | Deploy to production |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Script Standards
|
||||||
|
|
||||||
|
All scripts in `scripts/`:
|
||||||
|
- Include usage comments at top
|
||||||
|
- Idempotent where possible
|
||||||
|
- Exit codes: 0 = success, 1 = error
|
||||||
|
- Use `set -euo pipefail` for bash
|
||||||
|
- Log to stdout, errors to stderr
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Required in `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=<secure>
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=<random>
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
|
||||||
|
### Launch 1 (Sprint 2)
|
||||||
|
- [ ] Bio page accessible via HTTPS
|
||||||
|
- [ ] All bio content rendered (from bio_content_v2.md)
|
||||||
|
- [ ] No placeholder text visible
|
||||||
|
- [ ] Mobile responsive
|
||||||
|
- [ ] Social links functional
|
||||||
|
|
||||||
|
### Launch 2 (Sprint 6)
|
||||||
|
- [ ] Choropleth renders TRREB districts and CMHC zones
|
||||||
|
- [ ] Purchase/rental mode toggle works
|
||||||
|
- [ ] Time navigation works
|
||||||
|
- [ ] Policy event markers visible
|
||||||
|
- [ ] Neighbourhood overlay toggleable
|
||||||
|
- [ ] Methodology documentation published
|
||||||
|
- [ ] Data sources cited
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Reference Documents
|
||||||
|
|
||||||
|
For detailed specifications, see:
|
||||||
|
|
||||||
|
| Document | Location | Use When |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| Data schemas | `docs/toronto_housing_spec.md` | Parser/model tasks |
|
||||||
|
| WBS details | `docs/wbs.md` | Sprint planning |
|
||||||
|
| Bio content | `docs/bio_content.md` | Building home.py |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Reference Version: 1.0*
|
||||||
|
*Created: January 2026*
|
||||||
134
docs/bio_content_v2.md
Normal file
134
docs/bio_content_v2.md
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
# Portfolio Bio Content
|
||||||
|
|
||||||
|
**Version**: 2.0
|
||||||
|
**Last Updated**: January 2026
|
||||||
|
**Purpose**: Content source for `portfolio_app/pages/home.py`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Document Context
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
||||||
|
| **Role** | Bio content and social links for landing page |
|
||||||
|
| **Consumed By** | `portfolio_app/pages/home.py` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Headline
|
||||||
|
|
||||||
|
**Primary**: Leo | Data Engineer & Analytics Developer
|
||||||
|
|
||||||
|
**Tagline**: I build data infrastructure that actually gets used.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Professional Summary
|
||||||
|
|
||||||
|
Over the past 5 years, I've designed and evolved an enterprise analytics platform from scratch—now processing 1B+ rows across 21 tables with Python-based ETL pipelines and dbt-style SQL transformations. The result: 40% efficiency gains, 30% reduction in call abandon rates, and dashboards that executives actually open.
|
||||||
|
|
||||||
|
My approach: dimensional modeling (star schema), layered transformations (staging → intermediate → marts), and automation that eliminates manual work. I've built everything from self-service analytics portals to OCR-powered receipt processing systems.
|
||||||
|
|
||||||
|
Currently at Summitt Energy supporting multi-market operations across Canada and 8 US states. Previously cut my teeth on IT infrastructure projects at Petrobras (Fortune 500) and the Project Management Institute.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tech Stack
|
||||||
|
|
||||||
|
| Category | Technologies |
|
||||||
|
|----------|--------------|
|
||||||
|
| **Languages** | Python, SQL |
|
||||||
|
| **Data Processing** | Pandas, SQLAlchemy, FastAPI |
|
||||||
|
| **Databases** | PostgreSQL, MSSQL |
|
||||||
|
| **Visualization** | Power BI, Plotly, Dash |
|
||||||
|
| **Patterns** | dbt, dimensional modeling, star schema |
|
||||||
|
| **Other** | Genesys Cloud |
|
||||||
|
|
||||||
|
**Display Format** (for landing page):
|
||||||
|
```
|
||||||
|
Python (Pandas, SQLAlchemy, FastAPI) • SQL (MSSQL, PostgreSQL) • Power BI • Plotly/Dash • Genesys Cloud • dbt patterns
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Side Project
|
||||||
|
|
||||||
|
**Bandit Labs** — Building automation and AI tooling for small businesses.
|
||||||
|
|
||||||
|
*Note: Keep this brief on portfolio; link only if separate landing page exists.*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Social Links
|
||||||
|
|
||||||
|
| Platform | URL | Icon |
|
||||||
|
|----------|-----|------|
|
||||||
|
| **LinkedIn** | `https://linkedin.com/in/[USERNAME]` | `lucide-react: Linkedin` |
|
||||||
|
| **GitHub** | `https://github.com/[USERNAME]` | `lucide-react: Github` |
|
||||||
|
|
||||||
|
> **TODO**: Replace `[USERNAME]` placeholders with actual URLs before bio page launch.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Availability Statement
|
||||||
|
|
||||||
|
Open to **Senior Data Analyst**, **Analytics Engineer**, and **BI Developer** opportunities in Toronto or remote.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Portfolio Projects Section
|
||||||
|
|
||||||
|
*Dynamically populated based on deployed projects.*
|
||||||
|
|
||||||
|
| Project | Status | Link |
|
||||||
|
|---------|--------|------|
|
||||||
|
| Toronto Housing Dashboard | In Development | `/toronto` |
|
||||||
|
| Energy Pricing Analysis | Planned | `/energy` |
|
||||||
|
|
||||||
|
**Display Logic**:
|
||||||
|
- Show only projects with `status = deployed`
|
||||||
|
- "In Development" projects can show as coming soon or be hidden (user preference)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
### Content Hierarchy for `home.py`
|
||||||
|
|
||||||
|
```
|
||||||
|
1. Name + Tagline (hero section)
|
||||||
|
2. Professional Summary (2-3 paragraphs)
|
||||||
|
3. Tech Stack (horizontal chips or inline list)
|
||||||
|
4. Portfolio Projects (cards linking to dashboards)
|
||||||
|
5. Social Links (icon buttons)
|
||||||
|
6. Availability statement (subtle, bottom)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Styling Recommendations
|
||||||
|
|
||||||
|
- Clean, minimal — let the projects speak
|
||||||
|
- Dark/light mode support via dash-mantine-components theme
|
||||||
|
- No headshot required (optional)
|
||||||
|
- Mobile-responsive layout
|
||||||
|
|
||||||
|
### Content Updates
|
||||||
|
|
||||||
|
When updating bio content:
|
||||||
|
1. Edit this document
|
||||||
|
2. Update `home.py` to reflect changes
|
||||||
|
3. Redeploy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Related Documents
|
||||||
|
|
||||||
|
| Document | Relationship |
|
||||||
|
|----------|--------------|
|
||||||
|
| `portfolio_project_plan_v5.md` | Parent — references this for bio content |
|
||||||
|
| `portfolio_app/pages/home.py` | Consumer — implements this content |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Document Version: 2.0*
|
||||||
|
*Updated: January 2026*
|
||||||
809
docs/toronto_housing_dashboard_spec_v5.md
Normal file
809
docs/toronto_housing_dashboard_spec_v5.md
Normal file
@@ -0,0 +1,809 @@
|
|||||||
|
# Toronto Housing Price Dashboard
|
||||||
|
## Portfolio Project — Data Specification & Architecture
|
||||||
|
|
||||||
|
**Version**: 5.1
|
||||||
|
**Last Updated**: January 2026
|
||||||
|
**Status**: Specification Complete
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Document Context
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
||||||
|
| **Role** | Detailed specification for Toronto Housing Dashboard |
|
||||||
|
| **Scope** | Data schemas, source URLs, geographic boundaries, V1/V2 decisions |
|
||||||
|
|
||||||
|
**Rule**: For overall project scope, phasing, tech stack, and deployment architecture, see `portfolio_project_plan_v5.md`. This document provides implementation-level detail for the Toronto Housing project specifically.
|
||||||
|
|
||||||
|
**Terminology Note**: This document uses **Stages 1–4** to describe Toronto Housing implementation steps. These are distinct from the **Phases 1–5** in `portfolio_project_plan_v5.md`, which describe the overall portfolio project lifecycle.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
A dashboard analyzing housing price variations across Toronto neighbourhoods over time, with dual analysis tracks:
|
||||||
|
|
||||||
|
| Track | Data Domain | Primary Source | Geographic Unit |
|
||||||
|
|-------|-------------|----------------|-----------------|
|
||||||
|
| **Purchases** | Sales transactions | TRREB Monthly Reports | ~35 Districts |
|
||||||
|
| **Rentals** | Rental market stats | CMHC Rental Market Survey | ~20 Zones |
|
||||||
|
|
||||||
|
**Core Visualization**: Interactive choropleth map of Toronto with toggle between rental/purchase analysis, time-series exploration by month/year.
|
||||||
|
|
||||||
|
**Enrichment Layer** (V1: overlay only): Neighbourhood-level demographic and socioeconomic context including population density, education attainment, and income. Crime data deferred to Phase 4 of the portfolio project (post-Energy project).
|
||||||
|
|
||||||
|
**Tech Stack & Deployment**: See `portfolio_project_plan_v5.md` → Tech Stack, Deployment Architecture
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Geographic Layers
|
||||||
|
|
||||||
|
### Layer Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ City of Toronto Official Neighbourhoods (158) │ ← Reference overlay + Enrichment data
|
||||||
|
├─────────────────────────────────────────────────────────────────┤
|
||||||
|
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
||||||
|
├─────────────────────────────────────────────────────────────────┤
|
||||||
|
│ CMHC Survey Zones (~20) — Census Tract aligned │ ← Rental data
|
||||||
|
└─────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Boundary Files
|
||||||
|
|
||||||
|
| Layer | Zones | Format | Source | Status |
|
||||||
|
|-------|-------|--------|--------|--------|
|
||||||
|
| **City Neighbourhoods** | 158 | GeoJSON, Shapefile | [GitHub - jasonicarter/toronto-geojson](https://github.com/jasonicarter/toronto-geojson) | ✅ Ready to use |
|
||||||
|
| **TRREB Districts** | ~35 | PDF only | [TRREB Toronto Map PDF](https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf) | ⚠ Requires manual digitization |
|
||||||
|
| **CMHC Zones** | ~20 | R package | R `cmhc` package via `get_cmhc_geography()` | ✅ Available (see note) |
|
||||||
|
|
||||||
|
### Digitization Task: TRREB Districts
|
||||||
|
|
||||||
|
**Input**: TRREB Toronto PDF map
|
||||||
|
**Output**: GeoJSON with district codes (W01-W10, C01-C15, E01-E11)
|
||||||
|
**Tool**: QGIS
|
||||||
|
|
||||||
|
**Process**:
|
||||||
|
1. Import PDF as raster layer in QGIS
|
||||||
|
2. Create vector layer with polygon features
|
||||||
|
3. Trace district boundaries
|
||||||
|
4. Add attributes: `district_code`, `district_name`, `area_type` (West/Central/East)
|
||||||
|
5. Export as GeoJSON (WGS84 / EPSG:4326)
|
||||||
|
|
||||||
|
### CMHC Zone Boundaries
|
||||||
|
|
||||||
|
**Source**: The R `cmhc` package provides CMHC survey geography via the `get_cmhc_geography()` function.
|
||||||
|
|
||||||
|
**Extraction Process**:
|
||||||
|
```r
|
||||||
|
# In R
|
||||||
|
library(cmhc)
|
||||||
|
library(sf)
|
||||||
|
|
||||||
|
# Get Toronto CMA zones
|
||||||
|
toronto_zones <- get_cmhc_geography(
|
||||||
|
geography_type = "ZONE",
|
||||||
|
cma = "Toronto"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Export to GeoJSON for Python/PostGIS
|
||||||
|
st_write(toronto_zones, "cmhc_zones.geojson", driver = "GeoJSON")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Output**: `data/toronto/raw/geo/cmhc_zones.geojson`
|
||||||
|
|
||||||
|
**Why R?**: CMHC zone boundaries are not published as standalone files. The `cmhc` R package is the only reliable programmatic source. One-time extraction, then use GeoJSON in Python stack.
|
||||||
|
|
||||||
|
### ⚠ Neighbourhood Boundary Change (140 → 158)
|
||||||
|
|
||||||
|
The City of Toronto updated from 140 to 158 social planning neighbourhoods in **April 2021**. This affects data alignment:
|
||||||
|
|
||||||
|
| Data Source | Pre-2021 | Post-2021 | Handling |
|
||||||
|
|-------------|----------|-----------|----------|
|
||||||
|
| Census (2016 and earlier) | 140 neighbourhoods | N/A | Use 140-model files |
|
||||||
|
| Census (2021+) | N/A | 158 neighbourhoods | Use 158-model files |
|
||||||
|
|
||||||
|
**V1 Strategy**: Use 2021 Census on 158 boundaries only. Defer historical trend analysis to portfolio Phase 4.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Source #1: TRREB Monthly Market Reports
|
||||||
|
|
||||||
|
### Source Details
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Provider** | Toronto Regional Real Estate Board |
|
||||||
|
| **URL** | [TRREB Market Watch](https://trreb.ca/index.php/market-news/market-watch) |
|
||||||
|
| **Format** | PDF (monthly reports) |
|
||||||
|
| **Update Frequency** | Monthly |
|
||||||
|
| **Historical Availability** | 2007–Present |
|
||||||
|
| **Access** | Public (aggregate data in PDFs) |
|
||||||
|
| **Extraction Method** | PDF parsing (`pdfplumber` or `camelot-py`) |
|
||||||
|
|
||||||
|
### Available Tables
|
||||||
|
|
||||||
|
#### Table: `trreb_monthly_summary`
|
||||||
|
**Location in PDF**: Pages 3-4 (Summary by Area)
|
||||||
|
|
||||||
|
| Column | Data Type | Description |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `report_date` | DATE | First of month (YYYY-MM-01) |
|
||||||
|
| `area_code` | VARCHAR(3) | District code (W01, C01, E01, etc.) |
|
||||||
|
| `area_name` | VARCHAR(100) | District name |
|
||||||
|
| `area_type` | VARCHAR(10) | West / Central / East / North |
|
||||||
|
| `sales` | INTEGER | Number of transactions |
|
||||||
|
| `dollar_volume` | DECIMAL | Total sales volume ($) |
|
||||||
|
| `avg_price` | DECIMAL | Average sale price ($) |
|
||||||
|
| `median_price` | DECIMAL | Median sale price ($) |
|
||||||
|
| `new_listings` | INTEGER | New listings count |
|
||||||
|
| `active_listings` | INTEGER | Active listings at month end |
|
||||||
|
| `avg_sp_lp` | DECIMAL | Avg sale price / list price ratio (%) |
|
||||||
|
| `avg_dom` | INTEGER | Average days on market |
|
||||||
|
|
||||||
|
### Dimensions
|
||||||
|
|
||||||
|
| Dimension | Granularity | Values |
|
||||||
|
|-----------|-------------|--------|
|
||||||
|
| **Time** | Monthly | 2007-01 to present |
|
||||||
|
| **Geography** | District | ~35 TRREB districts |
|
||||||
|
| **Property Type** | Aggregate | All residential (no breakdown in summary) |
|
||||||
|
|
||||||
|
### Metrics Available
|
||||||
|
|
||||||
|
| Metric | Aggregation | Use Case |
|
||||||
|
|--------|-------------|----------|
|
||||||
|
| `avg_price` | Pre-calculated monthly avg | Primary price indicator |
|
||||||
|
| `median_price` | Pre-calculated monthly median | Robust price indicator |
|
||||||
|
| `sales` | Count | Market activity volume |
|
||||||
|
| `avg_dom` | Average | Market velocity |
|
||||||
|
| `avg_sp_lp` | Ratio | Buyer/seller market indicator |
|
||||||
|
| `new_listings` | Count | Supply indicator |
|
||||||
|
| `active_listings` | Snapshot | Inventory level |
|
||||||
|
|
||||||
|
### ⚠ Limitations
|
||||||
|
|
||||||
|
- No transaction-level data (aggregates only)
|
||||||
|
- Property type breakdown requires parsing additional tables
|
||||||
|
- PDF structure may vary slightly across years
|
||||||
|
- District boundaries haven't changed since 2011
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Source #2: CMHC Rental Market Survey
|
||||||
|
|
||||||
|
### Source Details
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Provider** | Canada Mortgage and Housing Corporation |
|
||||||
|
| **URL** | [CMHC Housing Market Information Portal](https://www03.cmhc-schl.gc.ca/hmip-pimh/) |
|
||||||
|
| **Format** | CSV export, API |
|
||||||
|
| **Update Frequency** | Annual (October survey) |
|
||||||
|
| **Historical Availability** | 1990–Present |
|
||||||
|
| **Access** | Public, free registration for bulk downloads |
|
||||||
|
| **Geographic Levels** | CMA → Zone → Neighbourhood → Census Tract |
|
||||||
|
|
||||||
|
### Available Tables
|
||||||
|
|
||||||
|
#### Table: `cmhc_rental_summary`
|
||||||
|
**Portal Path**: Toronto → Primary Rental Market → Summary Statistics
|
||||||
|
|
||||||
|
| Column | Data Type | Description |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `survey_year` | INTEGER | Survey year (October) |
|
||||||
|
| `zone_code` | VARCHAR(10) | CMHC zone identifier |
|
||||||
|
| `zone_name` | VARCHAR(100) | Zone name |
|
||||||
|
| `bedroom_type` | VARCHAR(20) | Bachelor / 1-Bed / 2-Bed / 3-Bed+ / Total |
|
||||||
|
| `universe` | INTEGER | Total rental units in zone |
|
||||||
|
| `vacancy_rate` | DECIMAL | Vacancy rate (%) |
|
||||||
|
| `vacancy_rate_reliability` | VARCHAR(1) | Reliability code (a/b/c/d) |
|
||||||
|
| `availability_rate` | DECIMAL | Availability rate (%) |
|
||||||
|
| `average_rent` | DECIMAL | Average monthly rent ($) |
|
||||||
|
| `average_rent_reliability` | VARCHAR(1) | Reliability code |
|
||||||
|
| `median_rent` | DECIMAL | Median monthly rent ($) |
|
||||||
|
| `rent_change_pct` | DECIMAL | YoY rent change (%) |
|
||||||
|
| `turnover_rate` | DECIMAL | Unit turnover rate (%) |
|
||||||
|
|
||||||
|
### Dimensions
|
||||||
|
|
||||||
|
| Dimension | Granularity | Values |
|
||||||
|
|-----------|-------------|--------|
|
||||||
|
| **Time** | Annual | 1990 to present (October snapshot) |
|
||||||
|
| **Geography** | Zone | ~20 CMHC zones in Toronto CMA |
|
||||||
|
| **Bedroom Type** | Category | Bachelor, 1-Bed, 2-Bed, 3-Bed+, Total |
|
||||||
|
| **Structure Type** | Category | Row, Apartment (available in detailed tables) |
|
||||||
|
|
||||||
|
### Metrics Available
|
||||||
|
|
||||||
|
| Metric | Aggregation | Use Case |
|
||||||
|
|--------|-------------|----------|
|
||||||
|
| `average_rent` | Pre-calculated avg | Primary rent indicator |
|
||||||
|
| `median_rent` | Pre-calculated median | Robust rent indicator |
|
||||||
|
| `vacancy_rate` | Percentage | Market tightness |
|
||||||
|
| `availability_rate` | Percentage | Supply accessibility |
|
||||||
|
| `turnover_rate` | Percentage | Tenant mobility |
|
||||||
|
| `rent_change_pct` | YoY % | Rent growth tracking |
|
||||||
|
| `universe` | Count | Market size |
|
||||||
|
|
||||||
|
### Reliability Codes
|
||||||
|
|
||||||
|
| Code | Meaning | Coefficient of Variation |
|
||||||
|
|------|---------|-------------------------|
|
||||||
|
| `a` | Excellent | CV ≤ 2.5% |
|
||||||
|
| `b` | Good | 2.5% < CV ≤ 5% |
|
||||||
|
| `c` | Fair | 5% < CV ≤ 10% |
|
||||||
|
| `d` | Poor (use with caution) | CV > 10% |
|
||||||
|
| `**` | Data suppressed | Sample too small |
|
||||||
|
|
||||||
|
### ⚠ Limitations
|
||||||
|
|
||||||
|
- Annual only (no monthly granularity)
|
||||||
|
- October snapshot (point-in-time)
|
||||||
|
- Zones are larger than TRREB districts
|
||||||
|
- Purpose-built rental only (excludes condo rentals in base survey)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Source #3: City of Toronto Open Data
|
||||||
|
|
||||||
|
### Source Details
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Provider** | City of Toronto |
|
||||||
|
| **URL** | [Toronto Open Data Portal](https://open.toronto.ca/) |
|
||||||
|
| **Format** | GeoJSON, Shapefile, CSV |
|
||||||
|
| **Use Case** | Reference layer, demographic enrichment |
|
||||||
|
|
||||||
|
### Relevant Datasets
|
||||||
|
|
||||||
|
#### Dataset: `neighbourhoods`
|
||||||
|
|
||||||
|
| Column | Data Type | Description |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `area_id` | INTEGER | Neighbourhood ID (1-158) |
|
||||||
|
| `area_name` | VARCHAR(100) | Official neighbourhood name |
|
||||||
|
| `geometry` | POLYGON | Boundary geometry |
|
||||||
|
|
||||||
|
#### Dataset: `neighbourhood_profiles` (Census-linked)
|
||||||
|
|
||||||
|
| Column | Data Type | Description |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `neighbourhood_id` | INTEGER | Links to neighbourhoods |
|
||||||
|
| `population` | INTEGER | Total population |
|
||||||
|
| `avg_household_income` | DECIMAL | Average household income |
|
||||||
|
| `dwelling_count` | INTEGER | Total dwellings |
|
||||||
|
| `owner_pct` | DECIMAL | % owner-occupied |
|
||||||
|
| `renter_pct` | DECIMAL | % renter-occupied |
|
||||||
|
|
||||||
|
### Enrichment Potential
|
||||||
|
|
||||||
|
Can overlay demographic context on housing data:
|
||||||
|
- Income brackets by neighbourhood
|
||||||
|
- Ownership vs rental ratios
|
||||||
|
- Population density
|
||||||
|
- Dwelling type distribution
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Source #4: Enrichment Data (Density, Education)
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
|
||||||
|
Provide socioeconomic context to housing price analysis. Enables questions like:
|
||||||
|
- Do neighbourhoods with higher education attainment have higher prices?
|
||||||
|
- How does population density correlate with price per square foot?
|
||||||
|
|
||||||
|
### Geographic Alignment Reality
|
||||||
|
|
||||||
|
**Critical constraint**: Enrichment data is available at the **158-neighbourhood** level, while core housing data sits at **TRREB districts (~35)** and **CMHC zones (~20)**. These do not align cleanly.
|
||||||
|
|
||||||
|
```
|
||||||
|
158 Neighbourhoods (fine) → Enrichment data lives here
|
||||||
|
(no clean crosswalk)
|
||||||
|
~35 TRREB Districts (coarse) → Purchase data lives here
|
||||||
|
~20 CMHC Zones (coarse) → Rental data lives here
|
||||||
|
```
|
||||||
|
|
||||||
|
### Available Enrichment Datasets
|
||||||
|
|
||||||
|
#### Dataset: Neighbourhood Profiles (Census)
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Provider** | City of Toronto (via Statistics Canada Census) |
|
||||||
|
| **URL** | [Toronto Open Data - Neighbourhood Profiles](https://open.toronto.ca/dataset/neighbourhood-profiles/) |
|
||||||
|
| **Format** | CSV, JSON, XML, XLSX |
|
||||||
|
| **Update Frequency** | Every 5 years (Census cycle) |
|
||||||
|
| **Available Years** | 2001, 2006, 2011, 2016, 2021 |
|
||||||
|
| **Geographic Unit** | 158 neighbourhoods (140 pre-2021) |
|
||||||
|
|
||||||
|
**Key Variables**:
|
||||||
|
|
||||||
|
| Variable | Description | Use Case |
|
||||||
|
|----------|-------------|----------|
|
||||||
|
| `population` | Total population | Density calculation |
|
||||||
|
| `land_area_sqkm` | Area in square kilometers | Density calculation |
|
||||||
|
| `pop_density_per_sqkm` | Population per km | Density metric |
|
||||||
|
| `pct_bachelors_or_higher` | % age 25-64 with bachelor's+ | Education proxy |
|
||||||
|
| `median_household_income` | Median total household income | Income metric |
|
||||||
|
| `avg_household_income` | Average total household income | Income metric |
|
||||||
|
| `pct_owner_occupied` | % owner-occupied dwellings | Tenure split |
|
||||||
|
| `pct_renter_occupied` | % renter-occupied dwellings | Tenure split |
|
||||||
|
|
||||||
|
**Download URL (2021, 158 neighbourhoods)**:
|
||||||
|
```
|
||||||
|
https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
### Crime Data — Deferred to Portfolio Phase 4
|
||||||
|
|
||||||
|
Crime data (TPS Neighbourhood Crime Rates) is **not included in V1 scope**. It will be added in portfolio Phase 4 after the Energy Pricing project is complete.
|
||||||
|
|
||||||
|
**Rationale**:
|
||||||
|
- Crime data is socially/politically sensitive and requires careful methodology documentation
|
||||||
|
- V1 focuses on core housing metrics and policy events
|
||||||
|
- Deferral reduces scope creep risk
|
||||||
|
|
||||||
|
**Future Reference** (Portfolio Phase 4):
|
||||||
|
- Source: [TPS Public Safety Data Portal](https://data.torontopolice.on.ca/)
|
||||||
|
- Dataset: Neighbourhood Crime Rates (Major Crime Indicators)
|
||||||
|
- Geographic Unit: 158 neighbourhoods
|
||||||
|
|
||||||
|
### V1 Enrichment Data Summary
|
||||||
|
|
||||||
|
| Measure | Source | Geography | Frequency | Format | Status |
|
||||||
|
|---------|--------|-----------|-----------|--------|--------|
|
||||||
|
| **Population Density** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||||
|
| **Education Attainment** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||||
|
| **Median Income** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||||
|
| **Crime Rates (MCI)** | TPS Data Portal | 158 neighbourhoods | Annual | GeoJSON/CSV | Deferred to Portfolio Phase 4 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Source #5: Policy Events
|
||||||
|
|
||||||
|
### Purpose
|
||||||
|
|
||||||
|
Provide temporal context for housing price movements. Display as annotation markers on time series charts. **No causation claims** — correlation/context only.
|
||||||
|
|
||||||
|
### Event Schema
|
||||||
|
|
||||||
|
#### Table: `dim_policy_event`
|
||||||
|
|
||||||
|
| Column | Data Type | Description |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `event_id` | INTEGER (PK) | Auto-increment primary key |
|
||||||
|
| `event_date` | DATE | Date event was announced/occurred |
|
||||||
|
| `effective_date` | DATE | Date policy took effect (if different) |
|
||||||
|
| `level` | VARCHAR(20) | `federal` / `provincial` / `municipal` |
|
||||||
|
| `category` | VARCHAR(20) | `monetary` / `tax` / `regulatory` / `supply` / `economic` |
|
||||||
|
| `title` | VARCHAR(200) | Short event title for display |
|
||||||
|
| `description` | TEXT | Longer description for tooltip |
|
||||||
|
| `expected_direction` | VARCHAR(10) | `bearish` / `bullish` / `neutral` |
|
||||||
|
| `source_url` | VARCHAR(500) | Link to official announcement/documentation |
|
||||||
|
| `confidence` | VARCHAR(10) | `high` / `medium` / `low` |
|
||||||
|
| `created_at` | TIMESTAMP | Record creation timestamp |
|
||||||
|
|
||||||
|
### Event Tiers
|
||||||
|
|
||||||
|
| Tier | Level | Category Examples | Inclusion Criteria |
|
||||||
|
|------|-------|-------------------|-------------------|
|
||||||
|
| **1** | Federal | BoC rate decisions, OSFI stress tests | Always include; objective, documented |
|
||||||
|
| **1** | Provincial | Fair Housing Plan, foreign buyer tax, rent control | Always include; legislative record |
|
||||||
|
| **2** | Municipal | Zoning reforms, development charges | Include if material impact expected |
|
||||||
|
| **2** | Economic | COVID measures, major employer closures | Include if Toronto-specific impact |
|
||||||
|
| **3** | Market | Major project announcements | Strict criteria; must be verifiable |
|
||||||
|
|
||||||
|
### Expected Direction Values
|
||||||
|
|
||||||
|
| Value | Meaning | Example |
|
||||||
|
|-------|---------|---------|
|
||||||
|
| `bullish` | Expected to increase prices | Rate cut, supply restriction |
|
||||||
|
| `bearish` | Expected to decrease prices | Rate hike, foreign buyer tax |
|
||||||
|
| `neutral` | Uncertain or mixed impact | Regulatory clarification |
|
||||||
|
|
||||||
|
### ⚠ Caveats
|
||||||
|
|
||||||
|
- **No causation claims**: Events are context, not explanation
|
||||||
|
- **Lag effects**: Policy impact may not be immediate
|
||||||
|
- **Confounding factors**: Multiple simultaneous influences
|
||||||
|
- **Display only**: No statistical analysis in V1
|
||||||
|
|
||||||
|
### Sample Events (Tier 1)
|
||||||
|
|
||||||
|
| Date | Level | Category | Title | Direction |
|
||||||
|
|------|-------|----------|-------|-----------|
|
||||||
|
| 2017-04-20 | provincial | tax | Ontario Fair Housing Plan | bearish |
|
||||||
|
| 2018-01-01 | federal | regulatory | OSFI B-20 Stress Test | bearish |
|
||||||
|
| 2020-03-27 | federal | monetary | BoC Emergency Rate Cut (0.25%) | bullish |
|
||||||
|
| 2022-03-02 | federal | monetary | BoC Rate Hike Cycle Begins | bearish |
|
||||||
|
| 2023-06-01 | federal | tax | Federal 2-Year Foreign Buyer Ban | bearish |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Integration Strategy
|
||||||
|
|
||||||
|
### Temporal Alignment
|
||||||
|
|
||||||
|
| Source | Native Frequency | Alignment Strategy |
|
||||||
|
|--------|------------------|---------------------|
|
||||||
|
| TRREB | Monthly | Use as-is |
|
||||||
|
| CMHC | Annual (October) | Spread to monthly OR display annual overlay |
|
||||||
|
| Census/Enrichment | 5-year | Static snapshot; display as reference |
|
||||||
|
| Policy Events | Event-based | Display as vertical markers on time axis |
|
||||||
|
|
||||||
|
**Recommendation**: Keep separate time axes. TRREB monthly for purchases, CMHC annual for rentals. Don't force artificial monthly rental data.
|
||||||
|
|
||||||
|
### Geographic Alignment
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ VISUALIZATION APPROACH │
|
||||||
|
├─────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ Purchase Mode Rental Mode │
|
||||||
|
│ ───────────────── ────────────── │
|
||||||
|
│ Map: TRREB Districts Map: CMHC Zones │
|
||||||
|
│ Time: Monthly slider Time: Annual selector │
|
||||||
|
│ Metrics: Price, Sales Metrics: Rent, Vacancy │
|
||||||
|
│ │
|
||||||
|
│ ┌───────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ City Neighbourhoods Overlay │ │
|
||||||
|
│ │ (158 boundaries as reference layer) │ │
|
||||||
|
│ │ + Enrichment data (density, education, income) │ │
|
||||||
|
│ ──────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Enrichment Integration Strategy (Phased)
|
||||||
|
|
||||||
|
#### V1: Reference Overlay (Current Scope)
|
||||||
|
|
||||||
|
**Approach**: Display neighbourhood enrichment as a separate toggle-able layer. No joins to housing data.
|
||||||
|
|
||||||
|
**UX**:
|
||||||
|
- User hovers over TRREB district → tooltip shows "This district contains neighbourhoods: Annex, Casa Loma, Yorkville..."
|
||||||
|
- User toggles "Show Enrichment" → choropleth switches to neighbourhood-level density/education/income
|
||||||
|
- Enrichment and housing metrics displayed side-by-side, not merged
|
||||||
|
|
||||||
|
**Pros**:
|
||||||
|
- No imputation or dodgy aggregations
|
||||||
|
- Honest about geographic mismatch
|
||||||
|
- Ships faster
|
||||||
|
|
||||||
|
**Cons**:
|
||||||
|
- Can't do correlation analysis (price vs. enrichment) directly in dashboard
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
- `dim_neighbourhood` as standalone dimension (no FK to fact tables)
|
||||||
|
- Spatial lookup on hover (point-in-polygon)
|
||||||
|
|
||||||
|
#### V2/Portfolio Phase 4: Area-Weighted Aggregation (Future Scope)
|
||||||
|
|
||||||
|
**Approach**: Pre-compute area-weighted averages of neighbourhood metrics for each TRREB district and CMHC zone.
|
||||||
|
|
||||||
|
**Process**:
|
||||||
|
1. Spatial join: intersect neighbourhood polygons with TRREB/CMHC polygons
|
||||||
|
2. Compute overlap area for each neighbourhood-district pair
|
||||||
|
3. Weight neighbourhood metrics by overlap area proportion
|
||||||
|
4. User selects aggregation method in UI
|
||||||
|
|
||||||
|
**Aggregation Methods to Expose**:
|
||||||
|
|
||||||
|
| Method | Description | Best For |
|
||||||
|
|--------|-------------|----------|
|
||||||
|
| **Area-weighted mean** | Weight by % overlap area | Continuous metrics (density) |
|
||||||
|
| **Population-weighted mean** | Weight by population in overlap | Per-capita metrics (education) |
|
||||||
|
| **Majority assignment** | Assign neighbourhood to district with >50% overlap | Categorical data |
|
||||||
|
| **Max overlap** | Assign to single district with largest overlap | 1:1 mapping needs |
|
||||||
|
|
||||||
|
**Default**: Population-weighted (more defensible for per-capita metrics). Hide selector behind "Advanced" toggle.
|
||||||
|
|
||||||
|
### V1 Future-Proofing (Do Now)
|
||||||
|
|
||||||
|
| Action | Why |
|
||||||
|
|--------|-----|
|
||||||
|
| Store neighbourhood boundaries in same CRS as TRREB/CMHC (WGS84) | Avoids reprojection headaches |
|
||||||
|
| Keep `dim_neighbourhood` normalized (not denormalized into district tables) | Clean separation for V2 join |
|
||||||
|
| Document Census year for each metric | Ready for 2026 Census |
|
||||||
|
| Include `census_year` column in dim_neighbourhood | Enables SCD tracking |
|
||||||
|
|
||||||
|
### V1 Defer (Don't Do Yet)
|
||||||
|
|
||||||
|
| Action | Why Not |
|
||||||
|
|--------|---------|
|
||||||
|
| Pre-compute area-weighted crosswalk | Don't need for V1 |
|
||||||
|
| Build aggregation method selector UI | No backend to support it |
|
||||||
|
| Crime data integration | Deferred to Portfolio Phase 4 |
|
||||||
|
| Historical neighbourhood boundary reconciliation (140→158) | Use 2021+ data only for V1 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Proposed Data Model
|
||||||
|
|
||||||
|
### Star Schema
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────┐
|
||||||
|
│ dim_time │
|
||||||
|
├──────────────────┤
|
||||||
|
│ date_key (PK) │
|
||||||
|
│ year │
|
||||||
|
│ month │
|
||||||
|
│ quarter │
|
||||||
|
│ month_name │
|
||||||
|
───────────────────────┘
|
||||||
|
│
|
||||||
|
┌─────────────────────────────────────────────┐
|
||||||
|
│ │ │
|
||||||
|
│
|
||||||
|
┌──────────────────┐ │ ┌──────────────────┐
|
||||||
|
│ dim_trreb_district│ │ │ dim_cmhc_zone │
|
||||||
|
├──────────────────┤ │ ├──────────────────┤
|
||||||
|
│ district_key (PK)│ │ │ zone_key (PK) │
|
||||||
|
│ district_code │ │ │ zone_code │
|
||||||
|
│ district_name │ │ │ zone_name │
|
||||||
|
│ area_type │ │ │ geometry │
|
||||||
|
│ geometry │
|
||||||
|
───────────────────────┘ │ │
|
||||||
|
│ │ │
|
||||||
|
│
|
||||||
|
┌──────────────────┐ │ ┌──────────────────┐
|
||||||
|
│ fact_purchases │ │ │ fact_rentals │
|
||||||
|
├──────────────────┤ │ ├──────────────────┤
|
||||||
|
│ date_key (FK) │ │ │ date_key (FK) │
|
||||||
|
│ district_key (FK)│ │ │ zone_key (FK) │
|
||||||
|
│ sales_count │ │ │ bedroom_type │
|
||||||
|
│ avg_price │ │ │ avg_rent │
|
||||||
|
│ median_price │ │ │ median_rent │
|
||||||
|
│ new_listings │ │ │ vacancy_rate │
|
||||||
|
│ active_listings │ │ │ universe │
|
||||||
|
│ avg_dom │ │ │ turnover_rate │
|
||||||
|
│ avg_sp_lp │ │ │ reliability_code │
|
||||||
|
─────────────────────┘ │ ─────────────────────┘
|
||||||
|
│
|
||||||
|
|
||||||
|
┌───────────────────────────┐
|
||||||
|
│ dim_neighbourhood │
|
||||||
|
├───────────────────────────┤
|
||||||
|
│ neighbourhood_id (PK) │
|
||||||
|
│ name │
|
||||||
|
│ geometry │
|
||||||
|
│ population │
|
||||||
|
│ land_area_sqkm │
|
||||||
|
│ pop_density_per_sqkm │
|
||||||
|
│ pct_bachelors_or_higher │
|
||||||
|
│ median_household_income │
|
||||||
|
│ pct_owner_occupied │
|
||||||
|
│ pct_renter_occupied │
|
||||||
|
│ census_year │ ← For SCD tracking
|
||||||
|
──────────────────────────────┘
|
||||||
|
|
||||||
|
┌───────────────────────────┐
|
||||||
|
│ dim_policy_event │
|
||||||
|
├───────────────────────────┤
|
||||||
|
│ event_id (PK) │
|
||||||
|
│ event_date │
|
||||||
|
│ effective_date │
|
||||||
|
│ level │ ← federal/provincial/municipal
|
||||||
|
│ category │ ← monetary/tax/regulatory/supply/economic
|
||||||
|
│ title │
|
||||||
|
│ description │
|
||||||
|
│ expected_direction │ ← bearish/bullish/neutral
|
||||||
|
│ source_url │
|
||||||
|
│ confidence │ ← high/medium/low
|
||||||
|
│ created_at │
|
||||||
|
──────────────────────────────┘
|
||||||
|
|
||||||
|
┌───────────────────────────┐
|
||||||
|
│ bridge_district_neighbourhood │ ← Portfolio Phase 4 ONLY
|
||||||
|
├───────────────────────────┤
|
||||||
|
│ district_key (FK) │
|
||||||
|
│ neighbourhood_id (FK) │
|
||||||
|
│ area_overlap_pct │
|
||||||
|
│ population_overlap │ ← For pop-weighted agg
|
||||||
|
──────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Notes**:
|
||||||
|
- `dim_neighbourhood` has no FK relationship to fact tables in V1
|
||||||
|
- `dim_policy_event` is standalone (no FK to facts); used for time-series annotation
|
||||||
|
- `bridge_district_neighbourhood` is Portfolio Phase 4 scope only
|
||||||
|
- Similar bridge table needed for CMHC zones in Portfolio Phase 4
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Structure
|
||||||
|
|
||||||
|
> **Note**: Toronto Housing data logic lives in `portfolio_app/toronto/`. See `portfolio_project_plan_v5.md` for full project structure.
|
||||||
|
|
||||||
|
### Data Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
data/
|
||||||
|
└── toronto/
|
||||||
|
├── raw/
|
||||||
|
│ ├── trreb/
|
||||||
|
│ │ └── market_watch_YYYY_MM.pdf
|
||||||
|
│ ├── cmhc/
|
||||||
|
│ │ └── rental_survey_YYYY.csv
|
||||||
|
│ ├── enrichment/
|
||||||
|
│ │ └── neighbourhood_profiles_2021.xlsx
|
||||||
|
│ └── geo/
|
||||||
|
│ ├── toronto_neighbourhoods.geojson
|
||||||
|
│ ├── trreb_districts.geojson ← (to be created via QGIS)
|
||||||
|
│ └── cmhc_zones.geojson ← (from R cmhc package)
|
||||||
|
│
|
||||||
|
├── processed/ ← gitignored
|
||||||
|
│ ├── fact_purchases.parquet
|
||||||
|
│ ├── fact_rentals.parquet
|
||||||
|
│ ├── dim_time.parquet
|
||||||
|
│ ├── dim_trreb_district.parquet
|
||||||
|
│ ├── dim_cmhc_zone.parquet
|
||||||
|
│ ├── dim_neighbourhood.parquet
|
||||||
|
│ └── dim_policy_event.parquet
|
||||||
|
│
|
||||||
|
└── reference/
|
||||||
|
├── policy_events.csv ← Curated event list
|
||||||
|
└── neighbourhood_boundary_changelog.md ← 140→158 notes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Module Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/toronto/
|
||||||
|
├── __init__.py
|
||||||
|
├── parsers/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── trreb.py # PDF extraction
|
||||||
|
│ └── cmhc.py # CSV processing
|
||||||
|
├── loaders/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── database.py # DB operations
|
||||||
|
├── schemas/ # Pydantic models
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── trreb.py
|
||||||
|
│ ├── cmhc.py
|
||||||
|
│ ├── enrichment.py
|
||||||
|
│ └── policy_event.py
|
||||||
|
├── models/ # SQLAlchemy ORM
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── base.py # DeclarativeBase, engine
|
||||||
|
│ ├── dimensions.py # dim_time, dim_trreb_district, dim_policy_event, etc.
|
||||||
|
│ └── facts.py # fact_purchases, fact_rentals
|
||||||
|
└── transforms/
|
||||||
|
└── __init__.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Notebooks
|
||||||
|
|
||||||
|
```
|
||||||
|
notebooks/
|
||||||
|
├── 01_trreb_pdf_extraction.ipynb
|
||||||
|
├── 02_cmhc_data_prep.ipynb
|
||||||
|
├── 03_geo_layer_prep.ipynb
|
||||||
|
├── 04_enrichment_data_prep.ipynb
|
||||||
|
├── 05_policy_events_curation.ipynb
|
||||||
|
└── 06_spatial_crosswalk.ipynb ← Portfolio Phase 4 only
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Implementation Checklist
|
||||||
|
|
||||||
|
> **Note**: These are **Stages** within the Toronto Housing project (Portfolio Phase 1). They are distinct from the overall portfolio **Phases** defined in `portfolio_project_plan_v5.md`.
|
||||||
|
|
||||||
|
### Stage 1: Data Acquisition
|
||||||
|
- [ ] Download TRREB monthly PDFs (2020-present as MVP)
|
||||||
|
- [ ] Register for CMHC portal and export Toronto rental data
|
||||||
|
- [ ] Extract CMHC zone boundaries via R `cmhc` package
|
||||||
|
- [ ] Download City of Toronto neighbourhood GeoJSON (158 boundaries)
|
||||||
|
- [ ] Digitize TRREB district boundaries in QGIS
|
||||||
|
- [ ] Download Neighbourhood Profiles (2021 Census, 158-model)
|
||||||
|
|
||||||
|
### Stage 2: Data Processing
|
||||||
|
- [ ] Build TRREB PDF parser (`portfolio_app/toronto/parsers/trreb.py`)
|
||||||
|
- [ ] Build Pydantic schemas (`portfolio_app/toronto/schemas/`)
|
||||||
|
- [ ] Build SQLAlchemy models (`portfolio_app/toronto/models/`)
|
||||||
|
- [ ] Extract and validate TRREB monthly summaries
|
||||||
|
- [ ] Clean and structure CMHC rental data
|
||||||
|
- [ ] Process Neighbourhood Profiles into `dim_neighbourhood`
|
||||||
|
- [ ] Curate and load policy events into `dim_policy_event`
|
||||||
|
- [ ] Create dimension tables
|
||||||
|
- [ ] Build fact tables
|
||||||
|
- [ ] Validate all geospatial layers use same CRS (WGS84/EPSG:4326)
|
||||||
|
|
||||||
|
### Stage 3: Visualization (V1)
|
||||||
|
- [ ] Create dashboard page (`portfolio_app/pages/toronto/dashboard.py`)
|
||||||
|
- [ ] Build choropleth figures (`portfolio_app/figures/choropleth.py`)
|
||||||
|
- [ ] Build time series figures (`portfolio_app/figures/time_series.py`)
|
||||||
|
- [ ] Design dashboard layout (purchase/rental toggle)
|
||||||
|
- [ ] Implement choropleth map with layer switching
|
||||||
|
- [ ] Add time slider/selector
|
||||||
|
- [ ] Build neighbourhood overlay (toggle-able)
|
||||||
|
- [ ] Add enrichment layer toggle (density/education/income choropleth)
|
||||||
|
- [ ] Add policy event markers on time series
|
||||||
|
- [ ] Add tooltips with cross-reference info ("This district contains...")
|
||||||
|
- [ ] Add tooltips showing enrichment metrics on hover
|
||||||
|
|
||||||
|
### Stage 4: Polish (V1)
|
||||||
|
- [ ] Add data source citations
|
||||||
|
- [ ] Document methodology (especially geographic limitations)
|
||||||
|
- [ ] Write docs (`docs/methodology.md`, `docs/data_sources.md`)
|
||||||
|
- [ ] Deploy to portfolio
|
||||||
|
|
||||||
|
### Future Enhancements (Portfolio Phase 4 — Post-Energy Project)
|
||||||
|
- [ ] Add crime data to dim_neighbourhood
|
||||||
|
- [ ] Build spatial crosswalk (neighbourhood ↔ district/zone intersections)
|
||||||
|
- [ ] Compute area-weighted and population-weighted aggregations
|
||||||
|
- [ ] Add aggregation method selector to UI
|
||||||
|
- [ ] Enable correlation analysis (price vs. enrichment metrics)
|
||||||
|
- [ ] Add historical neighbourhood boundary support (140→158)
|
||||||
|
|
||||||
|
**Deployment & dbt Architecture**: See `portfolio_project_plan_v5.md` for:
|
||||||
|
- dbt layer structure and testing strategy
|
||||||
|
- Deployment architecture
|
||||||
|
- Data quality framework
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## References & Links
|
||||||
|
|
||||||
|
### Core Housing Data
|
||||||
|
|
||||||
|
| Resource | URL |
|
||||||
|
|----------|-----|
|
||||||
|
| TRREB Market Watch | https://trreb.ca/index.php/market-news/market-watch |
|
||||||
|
| CMHC Housing Portal | https://www03.cmhc-schl.gc.ca/hmip-pimh/ |
|
||||||
|
|
||||||
|
### Geographic Boundaries
|
||||||
|
|
||||||
|
| Resource | URL |
|
||||||
|
|----------|-----|
|
||||||
|
| Toronto Neighbourhoods GeoJSON | https://github.com/jasonicarter/toronto-geojson |
|
||||||
|
| TRREB District Map (PDF) | https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf |
|
||||||
|
| Statistics Canada Census Tracts | https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/index-eng.cfm |
|
||||||
|
| R `cmhc` package (CRAN) | https://cran.r-project.org/package=cmhc |
|
||||||
|
|
||||||
|
### Enrichment Data
|
||||||
|
|
||||||
|
| Resource | URL |
|
||||||
|
|----------|-----|
|
||||||
|
| Toronto Open Data Portal | https://open.toronto.ca/ |
|
||||||
|
| Neighbourhood Profiles (CKAN) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/neighbourhood-profiles |
|
||||||
|
| Neighbourhood Profiles 2021 (Direct Download) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx |
|
||||||
|
|
||||||
|
### Policy Events Research
|
||||||
|
|
||||||
|
| Resource | URL |
|
||||||
|
|----------|-----|
|
||||||
|
| Bank of Canada Interest Rates | https://www.bankofcanada.ca/rates/interest-rates/ |
|
||||||
|
| OSFI (Stress Test Rules) | https://www.osfi-bsif.gc.ca/ |
|
||||||
|
| Ontario Legislature (Bills) | https://www.ola.org/ |
|
||||||
|
|
||||||
|
### Reference Documentation
|
||||||
|
|
||||||
|
| Resource | URL |
|
||||||
|
|----------|-----|
|
||||||
|
| Statistics Canada 2021 Census Reference | https://www12.statcan.gc.ca/census-recensement/2021/ref/index-eng.cfm |
|
||||||
|
| City of Toronto Neighbourhood Profiles Overview | https://www.toronto.ca/city-government/data-research-maps/neighbourhoods-communities/neighbourhood-profiles/ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Related Documents
|
||||||
|
|
||||||
|
| Document | Relationship | Use For |
|
||||||
|
|----------|--------------|---------|
|
||||||
|
| `portfolio_project_plan_v5.md` | Parent document | Overall scope, phasing, tech stack, deployment, dbt architecture, data quality framework |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Document Version: 5.1*
|
||||||
|
*Updated: January 2026*
|
||||||
|
*Project: Toronto Housing Price Dashboard — Portfolio Piece*
|
||||||
794
docs/wbs_sprint_plan_v4.md
Normal file
794
docs/wbs_sprint_plan_v4.md
Normal file
@@ -0,0 +1,794 @@
|
|||||||
|
# Work Breakdown Structure & Sprint Plan
|
||||||
|
|
||||||
|
**Project**: Toronto Housing Dashboard (Portfolio Phase 1)
|
||||||
|
**Version**: 4.1
|
||||||
|
**Date**: January 2026
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Document Context
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **Parent Documents** | `portfolio_project_plan_v5.md`, `toronto_housing_dashboard_spec_v5.md` |
|
||||||
|
| **Content Source** | `bio_content_v2.md` |
|
||||||
|
| **Role** | Executable sprint plan for Phase 1 delivery |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestones
|
||||||
|
|
||||||
|
| Milestone | Deliverable | Target Sprint |
|
||||||
|
|-----------|-------------|---------------|
|
||||||
|
| **Launch 1** | Bio Landing Page | Sprint 2 |
|
||||||
|
| **Launch 2** | Toronto Housing Dashboard | Sprint 6 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## WBS Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
1.0 Launch 1: Bio Landing Page
|
||||||
|
├── 1.1 Project Bootstrap
|
||||||
|
├── 1.2 Infrastructure
|
||||||
|
├── 1.3 Application Foundation
|
||||||
|
├── 1.4 Bio Page
|
||||||
|
└── 1.5 Deployment
|
||||||
|
|
||||||
|
2.0 Launch 2: Toronto Housing Dashboard
|
||||||
|
├── 2.1 Data Acquisition
|
||||||
|
├── 2.2 Data Processing
|
||||||
|
├── 2.3 Database Layer
|
||||||
|
├── 2.4 dbt Transformation
|
||||||
|
├── 2.5 Visualization
|
||||||
|
├── 2.6 Documentation
|
||||||
|
└── 2.7 Operations
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Launch 1: Bio Landing Page
|
||||||
|
|
||||||
|
### 1.1 Project Bootstrap
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 1.1.1 | Git repository initialization | — | Low | Low |
|
||||||
|
| 1.1.2 | Create `.gitignore` | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.3 | Create `pyproject.toml` | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.4 | Create `.python-version` (3.11+) | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.5 | Create `.env.example` | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.6 | Create `README.md` (initial) | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.7 | Create `CLAUDE.md` | 1.1.1 | Low | Low |
|
||||||
|
| 1.1.8 | Create `Makefile` with all targets | 1.1.3 | Low | Medium |
|
||||||
|
|
||||||
|
### 1.2 Infrastructure
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 1.2.1 | Python env setup (pyenv, venv, deps) | 1.1.3, 1.1.4 | Low | Low |
|
||||||
|
| 1.2.2 | Create `.pre-commit-config.yaml` | 1.2.1 | Low | Low |
|
||||||
|
| 1.2.3 | Install pre-commit hooks | 1.2.2 | Low | Low |
|
||||||
|
| 1.2.4 | Create `docker-compose.yml` (PostgreSQL + PostGIS) | 1.1.5 | Low | Low |
|
||||||
|
| 1.2.5 | Create `scripts/` directory structure | 1.1.1 | Low | Low |
|
||||||
|
| 1.2.6 | Create `scripts/docker/up.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.7 | Create `scripts/docker/down.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.8 | Create `scripts/docker/logs.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.9 | Create `scripts/docker/rebuild.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.10 | Create `scripts/db/init.sh` (PostGIS extension) | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.11 | Create `scripts/dev/setup.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.2.12 | Verify Docker + PostGIS working | 1.2.4, 1.2.10 | Low | Low |
|
||||||
|
|
||||||
|
### 1.3 Application Foundation
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 1.3.1 | Create `portfolio_app/` directory structure (full tree) | 1.2.1 | Low | Low |
|
||||||
|
| 1.3.2 | Create `portfolio_app/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 1.3.3 | Create `portfolio_app/config.py` (Pydantic BaseSettings) | 1.3.1 | Low | Medium |
|
||||||
|
| 1.3.4 | Create `portfolio_app/errors/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 1.3.5 | Create `portfolio_app/errors/exceptions.py` | 1.3.4 | Low | Low |
|
||||||
|
| 1.3.6 | Create `portfolio_app/errors/handlers.py` | 1.3.5 | Low | Medium |
|
||||||
|
| 1.3.7 | Create `portfolio_app/app.py` (Dash + Pages routing) | 1.3.3 | Low | Medium |
|
||||||
|
| 1.3.8 | Configure dash-mantine-components theme | 1.3.7 | Low | Low |
|
||||||
|
| 1.3.9 | Create `portfolio_app/assets/` directory | 1.3.1 | Low | Low |
|
||||||
|
| 1.3.10 | Create `portfolio_app/assets/styles.css` | 1.3.9 | Low | Medium |
|
||||||
|
| 1.3.11 | Create `portfolio_app/assets/variables.css` | 1.3.9 | Low | Low |
|
||||||
|
| 1.3.12 | Add `portfolio_app/assets/favicon.ico` | 1.3.9 | Low | Low |
|
||||||
|
| 1.3.13 | Create `portfolio_app/assets/images/` directory | 1.3.9 | Low | Low |
|
||||||
|
| 1.3.14 | Create `tests/` directory structure | 1.2.1 | Low | Low |
|
||||||
|
| 1.3.15 | Create `tests/__init__.py` | 1.3.14 | Low | Low |
|
||||||
|
| 1.3.16 | Create `tests/conftest.py` | 1.3.14 | Low | Medium |
|
||||||
|
| 1.3.17 | Configure pytest in `pyproject.toml` | 1.1.3, 1.3.14 | Low | Low |
|
||||||
|
|
||||||
|
### 1.4 Bio Page
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 1.4.1 | Create `portfolio_app/components/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 1.4.2 | Create `portfolio_app/components/navbar.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||||
|
| 1.4.3 | Create `portfolio_app/components/footer.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||||
|
| 1.4.4 | Create `portfolio_app/components/cards.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||||
|
| 1.4.5 | Create `portfolio_app/pages/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 1.4.6 | Create `portfolio_app/pages/home.py` (layout) | 1.4.5, 1.4.2, 1.4.3 | Low | Low |
|
||||||
|
| 1.4.7 | Integrate bio content from `bio_content_v2.md` | 1.4.6 | Low | Low |
|
||||||
|
| 1.4.8 | Replace social link placeholders with real URLs | 1.4.7 | Low | Low |
|
||||||
|
| 1.4.9 | Implement project cards (deployed/in-dev logic) | 1.4.4, 1.4.6 | Low | Low |
|
||||||
|
| 1.4.10 | Test bio page renders locally | 1.4.9 | Low | Low |
|
||||||
|
|
||||||
|
### 1.5 Deployment
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 1.5.1 | Install PostgreSQL + PostGIS on VPS | — | Low | Low |
|
||||||
|
| 1.5.2 | Configure firewall (ufw: SSH, HTTP, HTTPS) | 1.5.1 | Low | Low |
|
||||||
|
| 1.5.3 | Create application database user | 1.5.1 | Low | Low |
|
||||||
|
| 1.5.4 | Create Gunicorn systemd service file | 1.4.10 | Low | Low |
|
||||||
|
| 1.5.5 | Configure Nginx reverse proxy | 1.5.4 | Low | Low |
|
||||||
|
| 1.5.6 | Configure SSL (certbot) | 1.5.5 | Low | Low |
|
||||||
|
| 1.5.7 | Create `scripts/deploy/deploy.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.5.8 | Create `scripts/deploy/health-check.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 1.5.9 | Deploy bio page | 1.5.6, 1.5.7 | Low | Low |
|
||||||
|
| 1.5.10 | Verify HTTPS access | 1.5.9 | Low | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Launch 2: Toronto Housing Dashboard
|
||||||
|
|
||||||
|
### 2.1 Data Acquisition
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.1.1 | Define TRREB year scope + download PDFs | — | Low | Low |
|
||||||
|
| 2.1.2 | **HUMAN**: Digitize TRREB district boundaries (QGIS) | 2.1.1 | High | High |
|
||||||
|
| 2.1.3 | Register for CMHC portal | — | Low | Low |
|
||||||
|
| 2.1.4 | Export CMHC Toronto rental CSVs | 2.1.3 | Low | Low |
|
||||||
|
| 2.1.5 | Extract CMHC zone boundaries (R cmhc package) | 2.1.3 | Low | Medium |
|
||||||
|
| 2.1.6 | Download neighbourhoods GeoJSON (158 boundaries) | — | Low | Low |
|
||||||
|
| 2.1.7 | Download Neighbourhood Profiles 2021 (xlsx) | — | Low | Low |
|
||||||
|
| 2.1.8 | Validate CRS alignment (all geo files WGS84) | 2.1.2, 2.1.5, 2.1.6 | Low | Medium |
|
||||||
|
| 2.1.9 | Research Tier 1 policy events (10—20 events) | — | Mid | Medium |
|
||||||
|
| 2.1.10 | Create `data/toronto/reference/policy_events.csv` | 2.1.9 | Low | Low |
|
||||||
|
| 2.1.11 | Create `data/` directory structure | 1.3.1 | Low | Low |
|
||||||
|
| 2.1.12 | Organize raw files into `data/toronto/raw/` | 2.1.11 | Low | Low |
|
||||||
|
| 2.1.13 | Test TRREB parser across year boundaries | 2.2.3 | Low | Medium |
|
||||||
|
|
||||||
|
### 2.2 Data Processing
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.2.1 | Create `portfolio_app/toronto/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 2.2.2 | Create `portfolio_app/toronto/parsers/__init__.py` | 2.2.1 | Low | Low |
|
||||||
|
| 2.2.3 | Build TRREB PDF parser (`parsers/trreb.py`) | 2.2.2, 2.1.1 | Mid | High |
|
||||||
|
| 2.2.4 | TRREB data cleaning/normalization | 2.2.3 | Low | Medium |
|
||||||
|
| 2.2.5 | TRREB parser unit tests | 2.2.4 | Low | Low |
|
||||||
|
| 2.2.6 | Build CMHC CSV processor (`parsers/cmhc.py`) | 2.2.2, 2.1.4 | Low | Low |
|
||||||
|
| 2.2.7 | CMHC reliability code handling | 2.2.6 | Low | Low |
|
||||||
|
| 2.2.8 | CMHC processor unit tests | 2.2.7 | Low | Low |
|
||||||
|
| 2.2.9 | Build Neighbourhood Profiles parser | 2.2.1, 2.1.7 | Low | Low |
|
||||||
|
| 2.2.10 | Policy events CSV loader | 2.2.1, 2.1.10 | Low | Low |
|
||||||
|
|
||||||
|
### 2.3 Database Layer
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.3.1 | Create `portfolio_app/toronto/schemas/__init__.py` | 2.2.1 | Low | Low |
|
||||||
|
| 2.3.2 | Create TRREB Pydantic schemas (`schemas/trreb.py`) | 2.3.1 | Low | Medium |
|
||||||
|
| 2.3.3 | Create CMHC Pydantic schemas (`schemas/cmhc.py`) | 2.3.1 | Low | Medium |
|
||||||
|
| 2.3.4 | Create enrichment Pydantic schemas (`schemas/enrichment.py`) | 2.3.1 | Low | Low |
|
||||||
|
| 2.3.5 | Create policy event Pydantic schema (`schemas/policy_event.py`) | 2.3.1 | Low | Low |
|
||||||
|
| 2.3.6 | Create `portfolio_app/toronto/models/__init__.py` | 2.2.1 | Low | Low |
|
||||||
|
| 2.3.7 | Create SQLAlchemy base (`models/base.py`) | 2.3.6, 1.3.3 | Low | Medium |
|
||||||
|
| 2.3.8 | Create dimension models (`models/dimensions.py`) | 2.3.7 | Low | Medium |
|
||||||
|
| 2.3.9 | Create fact models (`models/facts.py`) | 2.3.8 | Low | Medium |
|
||||||
|
| 2.3.10 | Create `portfolio_app/toronto/loaders/__init__.py` | 2.2.1 | Low | Low |
|
||||||
|
| 2.3.11 | Create dimension loaders (`loaders/database.py`) | 2.3.10, 2.3.8 | Low | Medium |
|
||||||
|
| 2.3.12 | Create fact loaders | 2.3.11, 2.3.9, 2.2.4, 2.2.7 | Mid | Medium |
|
||||||
|
| 2.3.13 | Loader integration tests | 2.3.12 | Low | Medium |
|
||||||
|
| 2.3.14 | Create SQL views for dashboard queries | 2.3.12 | Low | Medium |
|
||||||
|
|
||||||
|
### 2.4 dbt Transformation
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.4.1 | Create `dbt/` directory structure | 1.3.1 | Low | Low |
|
||||||
|
| 2.4.2 | Create `dbt/dbt_project.yml` | 2.4.1 | Low | Low |
|
||||||
|
| 2.4.3 | Create `dbt/profiles.yml` | 2.4.1, 1.3.3 | Low | Low |
|
||||||
|
| 2.4.4 | Create `scripts/dbt/run.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.4.5 | Create `scripts/dbt/test.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.4.6 | Create `scripts/dbt/docs.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.4.7 | Create `scripts/dbt/fresh.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.4.8 | Create staging models (`stg_trreb__monthly`, `stg_cmhc__rental`) | 2.4.3, 2.3.12 | Low | Medium |
|
||||||
|
| 2.4.9 | Create intermediate models | 2.4.8 | Low | Medium |
|
||||||
|
| 2.4.10 | Create mart models | 2.4.9 | Low | Medium |
|
||||||
|
| 2.4.11 | Create dbt schema tests (unique, not_null, relationships) | 2.4.10 | Low | Medium |
|
||||||
|
| 2.4.12 | Create custom dbt tests (anomaly detection) | 2.4.11 | Low | Medium |
|
||||||
|
| 2.4.13 | Create dbt documentation (schema.yml) | 2.4.10 | Low | Low |
|
||||||
|
|
||||||
|
### 2.5 Visualization
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.5.1 | Create `portfolio_app/figures/__init__.py` | 1.3.1 | Low | Low |
|
||||||
|
| 2.5.2 | Build choropleth factory (`figures/choropleth.py`) | 2.5.1, 2.1.8 | Mid | Medium |
|
||||||
|
| 2.5.3 | Build time series factory (`figures/time_series.py`) | 2.5.1 | Low | Medium |
|
||||||
|
| 2.5.4 | Build YoY change chart factory (`figures/statistical.py`) | 2.5.1 | Low | Medium |
|
||||||
|
| 2.5.5 | Build seasonality decomposition chart | 2.5.4 | Low | Medium |
|
||||||
|
| 2.5.6 | Build district correlation matrix chart | 2.5.4 | Low | Medium |
|
||||||
|
| 2.5.7 | Create `portfolio_app/pages/toronto/__init__.py` | 1.4.5 | Low | Low |
|
||||||
|
| 2.5.8 | Create `portfolio_app/pages/toronto/dashboard.py` (layout only) | 2.5.7, 1.4.2, 1.4.3 | Mid | High |
|
||||||
|
| 2.5.9 | Implement purchase/rental mode toggle | 2.5.8 | Low | Low |
|
||||||
|
| 2.5.10 | Implement monthly time slider | 2.5.8 | Low | Medium |
|
||||||
|
| 2.5.11 | Implement annual time selector (CMHC) | 2.5.8 | Low | Low |
|
||||||
|
| 2.5.12 | Implement layer toggles (districts/zones/neighbourhoods) | 2.5.8 | Low | Medium |
|
||||||
|
| 2.5.13 | Create `portfolio_app/pages/toronto/callbacks/__init__.py` | 2.5.7 | Low | Low |
|
||||||
|
| 2.5.14 | Create `callbacks/map_callbacks.py` | 2.5.13, 2.5.2 | Mid | Medium |
|
||||||
|
| 2.5.15 | Create `callbacks/filter_callbacks.py` | 2.5.13 | Low | Medium |
|
||||||
|
| 2.5.16 | Create `callbacks/timeseries_callbacks.py` | 2.5.13, 2.5.3 | Low | Medium |
|
||||||
|
| 2.5.17 | Implement district/zone tooltips | 2.5.14 | Low | Low |
|
||||||
|
| 2.5.18 | Implement neighbourhood overlay | 2.5.14, 2.1.6 | Low | Medium |
|
||||||
|
| 2.5.19 | Implement enrichment layer toggle | 2.5.18 | Low | Medium |
|
||||||
|
| 2.5.20 | Implement policy event markers on time series | 2.5.16, 2.2.10 | Low | Medium |
|
||||||
|
| 2.5.21 | Implement "district contains neighbourhoods" tooltip | 2.5.17 | Low | Low |
|
||||||
|
| 2.5.22 | Test dashboard renders with sample data | 2.5.20 | Low | Medium |
|
||||||
|
|
||||||
|
### 2.6 Documentation
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.6.1 | Create `docs/` directory | 1.3.1 | Low | Low |
|
||||||
|
| 2.6.2 | Write `docs/methodology.md` (geographic limitations) | 2.5.22 | Low | Medium |
|
||||||
|
| 2.6.3 | Write `docs/data_sources.md` (citations) | 2.5.22 | Low | Low |
|
||||||
|
| 2.6.4 | Write `docs/user_guide.md` | 2.5.22 | Low | Low |
|
||||||
|
| 2.6.5 | Update `README.md` (final) | 2.6.2, 2.6.3 | Low | Low |
|
||||||
|
| 2.6.6 | Update `CLAUDE.md` (final) | 2.6.5 | Low | Low |
|
||||||
|
|
||||||
|
### 2.7 Operations
|
||||||
|
|
||||||
|
| ID | Task | Depends On | Effort | Complexity |
|
||||||
|
|----|------|------------|--------|------------|
|
||||||
|
| 2.7.1 | Create `scripts/db/backup.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.7.2 | Create `scripts/db/restore.sh` | 1.2.5 | Low | Low |
|
||||||
|
| 2.7.3 | Create `scripts/db/reset.sh` (dev only) | 1.2.5 | Low | Low |
|
||||||
|
| 2.7.4 | Create `scripts/deploy/rollback.sh` | 1.2.5 | Low | Medium |
|
||||||
|
| 2.7.5 | Implement backup retention policy | 2.7.1 | Low | Low |
|
||||||
|
| 2.7.6 | Add `/health` endpoint | 2.5.8 | Low | Low |
|
||||||
|
| 2.7.7 | Configure uptime monitoring (external) | 2.7.6 | Low | Low |
|
||||||
|
| 2.7.8 | Deploy Toronto dashboard | 1.5.9, 2.5.22 | Low | Low |
|
||||||
|
| 2.7.9 | Verify production deployment | 2.7.8 | Low | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## L3 Task Details
|
||||||
|
|
||||||
|
### 1.1 Project Bootstrap
|
||||||
|
|
||||||
|
#### 1.1.1 Git repository initialization
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Initialize git repo with main branch |
|
||||||
|
| **How** | `git init`, initial commit |
|
||||||
|
| **Inputs** | — |
|
||||||
|
| **Outputs** | `.git/` directory |
|
||||||
|
| **Why** | Version control foundation |
|
||||||
|
|
||||||
|
#### 1.1.2 Create `.gitignore`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Git ignore rules per project plan |
|
||||||
|
| **How** | Create file with patterns for: `.env`, `data/*/processed/`, `reports/`, `backups/`, `notebooks/*.html`, `__pycache__/`, `.venv/` |
|
||||||
|
| **Inputs** | Project plan → Directory Rules |
|
||||||
|
| **Outputs** | `.gitignore` |
|
||||||
|
|
||||||
|
#### 1.1.3 Create `pyproject.toml`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Python packaging config |
|
||||||
|
| **How** | Define project metadata, dependencies, tool configs (ruff, mypy, pytest) |
|
||||||
|
| **Inputs** | Tech stack versions from project plan |
|
||||||
|
| **Outputs** | `pyproject.toml` |
|
||||||
|
| **Dependencies** | PostgreSQL 16.x, Pydantic ≥2.0, SQLAlchemy ≥2.0, dbt-postgres ≥1.7, Pandas ≥2.1, GeoPandas ≥0.14, Dash ≥2.14, dash-mantine-components (latest), pytest ≥7.0 |
|
||||||
|
|
||||||
|
#### 1.1.4 Create `.python-version`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | pyenv version file |
|
||||||
|
| **How** | Single line: `3.11` or specific patch version |
|
||||||
|
| **Outputs** | `.python-version` |
|
||||||
|
|
||||||
|
#### 1.1.5 Create `.env.example`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Environment variable template |
|
||||||
|
| **How** | Template with: DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
||||||
|
| **Inputs** | Project plan → Environment Setup |
|
||||||
|
| **Outputs** | `.env.example` |
|
||||||
|
|
||||||
|
#### 1.1.6 Create `README.md` (initial)
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Project overview stub |
|
||||||
|
| **How** | Title, brief description, "Setup coming soon" |
|
||||||
|
| **Outputs** | `README.md` |
|
||||||
|
|
||||||
|
#### 1.1.7 Create `CLAUDE.md`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | AI assistant context file |
|
||||||
|
| **How** | Project context, architecture decisions, patterns, conventions |
|
||||||
|
| **Inputs** | Project plan → Code Architecture |
|
||||||
|
| **Outputs** | `CLAUDE.md` |
|
||||||
|
| **Why** | Claude Code effectiveness from day 1 |
|
||||||
|
|
||||||
|
#### 1.1.8 Create `Makefile`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | All make targets from project plan |
|
||||||
|
| **How** | Implement targets: setup, venv, clean, docker-up/down/logs/rebuild, db-init/backup/restore/reset, run, run-prod, dbt-run/test/docs/fresh, test, test-cov, lint, format, typecheck, ci, deploy, rollback |
|
||||||
|
| **Inputs** | Project plan → Makefile Targets |
|
||||||
|
| **Outputs** | `Makefile` |
|
||||||
|
|
||||||
|
### 1.2 Infrastructure
|
||||||
|
|
||||||
|
#### 1.2.4 Create `docker-compose.yml`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Docker Compose V2 for PostgreSQL 16 + PostGIS |
|
||||||
|
| **How** | Service definition, volume mounts, port 5432, env vars from `.env` |
|
||||||
|
| **Inputs** | `.env.example` |
|
||||||
|
| **Outputs** | `docker-compose.yml` |
|
||||||
|
| **Note** | No `version` field (Docker Compose V2) |
|
||||||
|
|
||||||
|
#### 1.2.5 Create `scripts/` directory structure
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Full scripts tree per project plan |
|
||||||
|
| **How** | `mkdir -p scripts/{db,docker,deploy,dbt,dev}` |
|
||||||
|
| **Outputs** | `scripts/db/`, `scripts/docker/`, `scripts/deploy/`, `scripts/dbt/`, `scripts/dev/` |
|
||||||
|
|
||||||
|
#### 1.2.10 Create `scripts/db/init.sh`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Database initialization with PostGIS |
|
||||||
|
| **How** | `CREATE DATABASE`, `CREATE EXTENSION postgis`, schema creation |
|
||||||
|
| **Standard** | `set -euo pipefail`, usage comment, idempotent |
|
||||||
|
| **Outputs** | `scripts/db/init.sh` |
|
||||||
|
|
||||||
|
### 1.3 Application Foundation
|
||||||
|
|
||||||
|
#### 1.3.1 Create `portfolio_app/` directory structure
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Full application tree per project plan |
|
||||||
|
| **Directories** | `portfolio_app/`, `portfolio_app/assets/`, `portfolio_app/assets/images/`, `portfolio_app/pages/`, `portfolio_app/pages/toronto/`, `portfolio_app/pages/toronto/callbacks/`, `portfolio_app/components/`, `portfolio_app/figures/`, `portfolio_app/toronto/`, `portfolio_app/toronto/parsers/`, `portfolio_app/toronto/loaders/`, `portfolio_app/toronto/schemas/`, `portfolio_app/toronto/models/`, `portfolio_app/toronto/transforms/`, `portfolio_app/errors/` |
|
||||||
|
| **Pattern** | Callbacks in `pages/{dashboard}/callbacks/` per project plan |
|
||||||
|
|
||||||
|
#### 1.3.3 Create `config.py`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Pydantic BaseSettings for config |
|
||||||
|
| **How** | Settings class loading from `.env` |
|
||||||
|
| **Fields** | DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
||||||
|
|
||||||
|
#### 1.3.5 Create `exceptions.py`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Exception hierarchy per project plan |
|
||||||
|
| **Classes** | `PortfolioError` (base), `ParseError`, `ValidationError`, `LoadError` |
|
||||||
|
|
||||||
|
#### 1.3.6 Create `handlers.py`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Error handling decorators |
|
||||||
|
| **How** | Decorators for: logging/re-raise, retry logic, transaction boundaries, timing |
|
||||||
|
| **Pattern** | Infrastructure concerns only; domain logic uses explicit handling |
|
||||||
|
|
||||||
|
#### 1.3.7 Create `app.py`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Dash app factory with Pages routing |
|
||||||
|
| **How** | `Dash(__name__, use_pages=True)`, MantineProvider wrapper |
|
||||||
|
| **Imports** | External: absolute; Internal: relative (dot notation) |
|
||||||
|
|
||||||
|
#### 1.3.16 Create `conftest.py`
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | pytest fixtures |
|
||||||
|
| **How** | Test database fixture, sample data fixtures, app client fixture |
|
||||||
|
|
||||||
|
### 1.4 Bio Page
|
||||||
|
|
||||||
|
#### 1.4.7 Integrate bio content
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Content from `bio_content_v2.md` |
|
||||||
|
| **Sections** | Headline, Professional Summary, Tech Stack, Side Project, Availability |
|
||||||
|
| **Layout** | Hero → Summary → Tech Stack → Project Cards → Social Links → Availability |
|
||||||
|
|
||||||
|
#### 1.4.8 Replace social link placeholders
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Replace `[USERNAME]` in LinkedIn/GitHub URLs |
|
||||||
|
| **Source** | `bio_content_v2.md` → Social Links |
|
||||||
|
| **Acceptance** | No placeholder text in production |
|
||||||
|
|
||||||
|
#### 1.4.9 Implement project cards
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Dynamic project card display |
|
||||||
|
| **Logic** | Show deployed projects with links; show "In Development" for in-progress; hide or grey out planned |
|
||||||
|
| **Source** | `bio_content_v2.md` → Portfolio Projects Section |
|
||||||
|
|
||||||
|
### 2.1 Data Acquisition
|
||||||
|
|
||||||
|
#### 2.1.1 Define TRREB year scope + download PDFs
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Decide which years to parse for V1, download PDFs |
|
||||||
|
| **Decision** | 2020—present for V1 (manageable scope, consistent PDF format). Expand to 2007+ in future if needed. |
|
||||||
|
| **Output** | `data/toronto/raw/trreb/market_watch_YYYY_MM.pdf` |
|
||||||
|
| **Note** | PDF format may vary pre-2018; test before committing to older years |
|
||||||
|
|
||||||
|
#### 2.1.2 Digitize TRREB district boundaries
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | GeoJSON with ~35 district polygons |
|
||||||
|
| **Tool** | QGIS |
|
||||||
|
| **Process** | Import PDF as raster → create vector layer → trace polygons → add attributes (district_code, district_name, area_type) → export GeoJSON (WGS84/EPSG:4326) |
|
||||||
|
| **Input** | TRREB Toronto.pdf map |
|
||||||
|
| **Output** | `data/toronto/raw/geo/trreb_districts.geojson` |
|
||||||
|
| **Effort** | High |
|
||||||
|
| **Complexity** | High |
|
||||||
|
| **Note** | HUMAN TASK — not automatable |
|
||||||
|
|
||||||
|
#### 2.1.5 Extract CMHC zone boundaries
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | GeoJSON with ~20 zone polygons |
|
||||||
|
| **Tool** | R with cmhc and sf packages |
|
||||||
|
| **Process** | `get_cmhc_geography(geography_type="ZONE", cma="Toronto")` → `st_write()` to GeoJSON |
|
||||||
|
| **Output** | `data/toronto/raw/geo/cmhc_zones.geojson` |
|
||||||
|
|
||||||
|
#### 2.1.9 Research Tier 1 policy events
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Federal/provincial policy events with dates, descriptions, expected direction |
|
||||||
|
| **Sources** | Bank of Canada, OSFI, Ontario Legislature |
|
||||||
|
| **Schema** | event_date, effective_date, level, category, title, description, expected_direction, source_url, confidence |
|
||||||
|
| **Acceptance** | Minimum 10 events, maximum 20 |
|
||||||
|
| **Examples** | BoC rate decisions, OSFI B-20, Ontario Fair Housing Plan, foreign buyer tax |
|
||||||
|
|
||||||
|
#### 2.1.13 Test TRREB parser across year boundaries
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Verify parser handles PDFs from different years |
|
||||||
|
| **Test Cases** | 2020 Q1, 2022 Q1, 2024 Q1 (minimum) |
|
||||||
|
| **Check For** | Table structure changes, column naming variations, page number shifts |
|
||||||
|
| **Output** | Documented format variations, parser fallbacks if needed |
|
||||||
|
|
||||||
|
### 2.2 Data Processing
|
||||||
|
|
||||||
|
#### 2.2.3 Build TRREB PDF parser
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Extract summary tables from TRREB PDFs |
|
||||||
|
| **Tool** | pdfplumber or camelot-py |
|
||||||
|
| **Location** | Pages 3-4 (Summary by Area) |
|
||||||
|
| **Fields** | report_date, area_code, area_name, area_type, sales, dollar_volume, avg_price, median_price, new_listings, active_listings, avg_sp_lp, avg_dom |
|
||||||
|
| **Output** | `portfolio_app/toronto/parsers/trreb.py` |
|
||||||
|
|
||||||
|
#### 2.2.7 CMHC reliability code handling
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Parse reliability codes, handle suppression |
|
||||||
|
| **Codes** | a (excellent), b (good), c (fair), d (poor/caution), ** (suppressed → NULL) |
|
||||||
|
| **Implementation** | Pydantic validators, enum type |
|
||||||
|
|
||||||
|
### 2.3 Database Layer
|
||||||
|
|
||||||
|
#### 2.3.8 Create dimension models
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | SQLAlchemy 2.0 models for dimensions |
|
||||||
|
| **Tables** | `dim_time`, `dim_trreb_district`, `dim_cmhc_zone`, `dim_neighbourhood`, `dim_policy_event` |
|
||||||
|
| **Geometry** | PostGIS geometry columns for districts, zones, neighbourhoods |
|
||||||
|
| **Note** | `dim_neighbourhood` has no FK to facts in V1 |
|
||||||
|
|
||||||
|
#### 2.3.9 Create fact models
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | SQLAlchemy 2.0 models for facts |
|
||||||
|
| **Tables** | `fact_purchases`, `fact_rentals` |
|
||||||
|
| **FKs** | fact_purchases → dim_time, dim_trreb_district; fact_rentals → dim_time, dim_cmhc_zone |
|
||||||
|
|
||||||
|
### 2.4 dbt Transformation
|
||||||
|
|
||||||
|
#### 2.4.8 Create staging models
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | 1:1 source mapping, cleaned and typed |
|
||||||
|
| **Models** | `stg_trreb__monthly`, `stg_cmhc__rental` |
|
||||||
|
| **Naming** | `stg_{source}__{entity}` |
|
||||||
|
|
||||||
|
#### 2.4.11 Create dbt schema tests
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Data quality tests |
|
||||||
|
| **Tests** | `unique` (PKs), `not_null` (required), `accepted_values` (reliability codes, area_type), `relationships` (FK integrity) |
|
||||||
|
|
||||||
|
#### 2.4.12 Create custom dbt tests
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Anomaly detection rules |
|
||||||
|
| **Rules** | Price MoM change >30% → flag; missing districts → fail; duplicate records → fail |
|
||||||
|
|
||||||
|
### 2.5 Visualization
|
||||||
|
|
||||||
|
#### 2.5.2 Build choropleth factory
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Reusable choropleth_mapbox figure generator |
|
||||||
|
| **Inputs** | GeoDataFrame, metric column, color config |
|
||||||
|
| **Output** | Plotly figure |
|
||||||
|
| **Location** | `portfolio_app/figures/choropleth.py` |
|
||||||
|
|
||||||
|
#### 2.5.4—2.5.6 Statistical chart factories
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Statistical analysis visualizations |
|
||||||
|
| **Charts** | YoY change with variance bands, seasonality decomposition, district correlation matrix |
|
||||||
|
| **Location** | `portfolio_app/figures/statistical.py` |
|
||||||
|
| **Why** | Required skill demonstration per project plan |
|
||||||
|
|
||||||
|
#### 2.5.8 Create dashboard layout
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Toronto dashboard page structure |
|
||||||
|
| **File** | `portfolio_app/pages/toronto/dashboard.py` |
|
||||||
|
| **Pattern** | Layout only — no callbacks in this file |
|
||||||
|
| **Components** | Navbar, choropleth map, time controls, layer toggles, time series panel, statistics panel, footer |
|
||||||
|
|
||||||
|
#### 2.5.13—2.5.16 Create callbacks
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Dashboard interaction logic |
|
||||||
|
| **Location** | `portfolio_app/pages/toronto/callbacks/` |
|
||||||
|
| **Files** | `__init__.py`, `map_callbacks.py`, `filter_callbacks.py`, `timeseries_callbacks.py` |
|
||||||
|
| **Pattern** | Separate from layout per project plan callback separation pattern |
|
||||||
|
| **Registration** | Import callback modules in `callbacks/__init__.py`; import that package in `dashboard.py`. Dash Pages auto-discovers callbacks when module is imported. |
|
||||||
|
|
||||||
|
#### 2.5.22 Test dashboard renders with sample data
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| **What** | Verify dashboard works end-to-end |
|
||||||
|
| **Sample Data** | Use output from task 2.3.12 (fact loaders). Run loaders with subset of parsed data before this task. |
|
||||||
|
| **Verify** | Choropleth renders, time controls work, tooltips display, no console errors |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sprint Plan
|
||||||
|
|
||||||
|
### Sprint 1: Project Bootstrap + Start TRREB Digitization
|
||||||
|
|
||||||
|
**Goal**: Dev environment working, repo initialized, TRREB digitization started
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 1.1.1 | Git repo init | Low |
|
||||||
|
| 1.1.2 | .gitignore | Low |
|
||||||
|
| 1.1.3 | pyproject.toml | Low |
|
||||||
|
| 1.1.4 | .python-version | Low |
|
||||||
|
| 1.1.5 | .env.example | Low |
|
||||||
|
| 1.1.6 | README.md (initial) | Low |
|
||||||
|
| 1.1.7 | CLAUDE.md | Low |
|
||||||
|
| 1.1.8 | Makefile | Low |
|
||||||
|
| 1.2.1 | Python env setup | Low |
|
||||||
|
| 1.2.2 | .pre-commit-config.yaml | Low |
|
||||||
|
| 1.2.3 | Install pre-commit | Low |
|
||||||
|
| 1.2.4 | docker-compose.yml | Low |
|
||||||
|
| 1.2.5 | scripts/ directory structure | Low |
|
||||||
|
| 1.2.6—1.2.9 | Docker scripts | Low |
|
||||||
|
| 1.2.10 | scripts/db/init.sh | Low |
|
||||||
|
| 1.2.11 | scripts/dev/setup.sh | Low |
|
||||||
|
| 1.2.12 | Verify Docker + PostGIS | Low |
|
||||||
|
| 1.3.1 | portfolio_app/ directory structure | Low |
|
||||||
|
| 1.3.2—1.3.6 | App foundation files | Low |
|
||||||
|
| 1.3.14—1.3.17 | Test infrastructure | Low |
|
||||||
|
| 2.1.1 | Download TRREB PDFs | Low |
|
||||||
|
| 2.1.2 | **START** TRREB boundaries (HUMAN) | High |
|
||||||
|
| 2.1.9 | **START** Policy events research | Mid |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 2: Bio Page + Data Acquisition
|
||||||
|
|
||||||
|
**Goal**: Bio live, all raw data downloaded
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 1.3.7 | app.py with Pages | Low |
|
||||||
|
| 1.3.8 | Theme config | Low |
|
||||||
|
| 1.3.9—1.3.13 | Assets directory + files | Low |
|
||||||
|
| 1.4.1—1.4.4 | Components | Low |
|
||||||
|
| 1.4.5—1.4.10 | Bio page | Low |
|
||||||
|
| 1.5.1—1.5.3 | VPS setup | Low |
|
||||||
|
| 1.5.4—1.5.6 | Gunicorn/Nginx/SSL | Low |
|
||||||
|
| 1.5.7—1.5.8 | Deploy scripts | Low |
|
||||||
|
| 1.5.9—1.5.10 | Deploy + verify | Low |
|
||||||
|
| 2.1.2 | **CONTINUE** TRREB boundaries | High |
|
||||||
|
| 2.1.3—2.1.4 | CMHC registration + export | Low |
|
||||||
|
| 2.1.5 | CMHC zone boundaries (R) | Low |
|
||||||
|
| 2.1.6 | Neighbourhoods GeoJSON | Low |
|
||||||
|
| 2.1.7 | Neighbourhood Profiles download | Low |
|
||||||
|
| 2.1.9 | **CONTINUE** Policy events research | Mid |
|
||||||
|
| 2.1.10 | policy_events.csv | Low |
|
||||||
|
| 2.1.11—2.1.12 | data/ directory + organize | Low |
|
||||||
|
|
||||||
|
**Milestone**: **Launch 1 — Bio Live**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 3: Parsers + Schemas + Models
|
||||||
|
|
||||||
|
**Goal**: ETL pipeline working, database layer complete
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 2.1.2 | **COMPLETE** TRREB boundaries | High |
|
||||||
|
| 2.1.8 | CRS validation | Low |
|
||||||
|
| 2.2.1—2.2.2 | Toronto module init | Low |
|
||||||
|
| 2.2.3—2.2.5 | TRREB parser + tests | Mid |
|
||||||
|
| 2.2.6—2.2.8 | CMHC processor + tests | Low |
|
||||||
|
| 2.2.9 | Neighbourhood Profiles parser | Low |
|
||||||
|
| 2.2.10 | Policy events loader | Low |
|
||||||
|
| 2.3.1—2.3.5 | Pydantic schemas | Low |
|
||||||
|
| 2.3.6—2.3.9 | SQLAlchemy models | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 4: Loaders + dbt
|
||||||
|
|
||||||
|
**Goal**: Data loaded, transformation layer ready
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 2.3.10—2.3.13 | Loaders + tests | Mid |
|
||||||
|
| 2.3.14 | SQL views | Low |
|
||||||
|
| 2.4.1—2.4.7 | dbt setup + scripts | Low |
|
||||||
|
| 2.4.8—2.4.10 | dbt models | Low |
|
||||||
|
| 2.4.11—2.4.12 | dbt tests | Low |
|
||||||
|
| 2.4.13 | dbt documentation | Low |
|
||||||
|
| 2.7.1—2.7.3 | DB backup/restore scripts | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 5: Visualization
|
||||||
|
|
||||||
|
**Goal**: Dashboard functional
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 2.5.1—2.5.6 | Figure factories | Mid |
|
||||||
|
| 2.5.7—2.5.12 | Dashboard layout + controls | Mid |
|
||||||
|
| 2.5.13—2.5.16 | Callbacks | Mid |
|
||||||
|
| 2.5.17—2.5.21 | Tooltips + overlays + markers | Low |
|
||||||
|
| 2.5.22 | Test dashboard | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 6: Polish + Launch 2
|
||||||
|
|
||||||
|
**Goal**: Dashboard deployed
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| 2.6.1—2.6.6 | Documentation | Low |
|
||||||
|
| 2.7.4—2.7.5 | Rollback script + retention | Low |
|
||||||
|
| 2.7.6—2.7.7 | Health endpoint + monitoring | Low |
|
||||||
|
| 2.7.8—2.7.9 | Deploy + verify | Low |
|
||||||
|
|
||||||
|
**Milestone**: **Launch 2 — Toronto Dashboard Live**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Sprint 7: Buffer
|
||||||
|
|
||||||
|
**Goal**: Contingency for slippage, bug fixes
|
||||||
|
|
||||||
|
| Task ID | Task | Effort |
|
||||||
|
|---------|------|--------|
|
||||||
|
| — | Overflow from previous sprints | Varies |
|
||||||
|
| — | Bug fixes | Varies |
|
||||||
|
| — | UX polish | Low |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sprint Summary
|
||||||
|
|
||||||
|
| Sprint | Focus | Key Risk | Milestone |
|
||||||
|
|--------|-------|----------|-----------|
|
||||||
|
| 1 | Bootstrap + start boundaries | — | — |
|
||||||
|
| 2 | Bio + data acquisition | TRREB digitization | Launch 1 |
|
||||||
|
| 3 | Parsers + DB layer | PDF parser, boundaries | — |
|
||||||
|
| 4 | Loaders + dbt | — | — |
|
||||||
|
| 5 | Visualization | Choropleth complexity | — |
|
||||||
|
| 6 | Polish + deploy | — | Launch 2 |
|
||||||
|
| 7 | Buffer | — | — |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dependency Graph
|
||||||
|
|
||||||
|
### Launch 1 Critical Path
|
||||||
|
```
|
||||||
|
1.1.1 → 1.1.3 → 1.2.1 → 1.3.1 → 1.3.7 → 1.4.6 → 1.4.10 → 1.5.9 → 1.5.10
|
||||||
|
```
|
||||||
|
|
||||||
|
### Launch 2 Critical Path
|
||||||
|
```
|
||||||
|
2.1.2 (TRREB boundaries) ─┬→ 2.1.8 (CRS) → 2.5.2 (choropleth) → 2.5.8 (layout) → 2.5.22 (test) → 2.7.8 (deploy)
|
||||||
|
│
|
||||||
|
2.1.1 → 2.2.3 (parser) → 2.2.4 → 2.3.12 (loaders) → 2.4.8 (dbt) ─┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parallel Tracks (can run simultaneously)
|
||||||
|
|
||||||
|
| Track | Tasks | Can Start |
|
||||||
|
|-------|-------|-----------|
|
||||||
|
| **A: TRREB Boundaries** | 2.1.1 → 2.1.2 | Sprint 1 |
|
||||||
|
| **B: TRREB Parser** | 2.2.3—2.2.5 | Sprint 2 (after PDFs) |
|
||||||
|
| **C: CMHC** | 2.1.3—2.1.5 → 2.2.6—2.2.8 | Sprint 2 |
|
||||||
|
| **D: Enrichment** | 2.1.6—2.1.7 → 2.2.9 | Sprint 2 |
|
||||||
|
| **E: Policy Events** | 2.1.9—2.1.10 → 2.2.10 | Sprint 1—2 |
|
||||||
|
| **F: Schemas/Models** | 2.3.1—2.3.9 | Sprint 3 (after parsers) |
|
||||||
|
| **G: dbt** | 2.4.* | Sprint 4 (after loaders) |
|
||||||
|
| **H: Ops Scripts** | 2.7.1—2.7.5 | Sprint 4 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Risk Register
|
||||||
|
|
||||||
|
| Risk | Likelihood | Impact | Mitigation |
|
||||||
|
|------|------------|--------|------------|
|
||||||
|
| TRREB digitization slips | Medium | High | Start Sprint 1; timebox; accept lower precision initially |
|
||||||
|
| PDF parser breaks on older years | Medium | Medium | Test multiple years early; build fallbacks |
|
||||||
|
| PostGIS geometry issues | Low | Medium | Validate CRS before load (2.1.8) |
|
||||||
|
| Choropleth performance | Low | Medium | Pre-aggregate; simplify geometries |
|
||||||
|
| Policy events research takes too long | Medium | Low | Cap at 10 events minimum; expand post-launch |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
|
||||||
|
### Launch 1
|
||||||
|
- [ ] Bio page accessible via HTTPS
|
||||||
|
- [ ] All content from `bio_content_v2.md` rendered
|
||||||
|
- [ ] No placeholder text ([USERNAME]) visible
|
||||||
|
- [ ] Mobile responsive
|
||||||
|
- [ ] Social links functional
|
||||||
|
|
||||||
|
### Launch 2
|
||||||
|
- [ ] Choropleth renders TRREB districts
|
||||||
|
- [ ] Choropleth renders CMHC zones
|
||||||
|
- [ ] Purchase/rental mode toggle works
|
||||||
|
- [ ] Time navigation works (monthly for TRREB, annual for CMHC)
|
||||||
|
- [ ] Policy event markers visible on time series
|
||||||
|
- [ ] Neighbourhood overlay toggleable
|
||||||
|
- [ ] Methodology documentation published
|
||||||
|
- [ ] Data sources cited
|
||||||
|
- [ ] Health endpoint responds
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Effort Legend
|
||||||
|
|
||||||
|
| Level | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| **Low** | Straightforward; minimal iteration expected |
|
||||||
|
| **Mid** | Requires debugging or multi-step coordination |
|
||||||
|
| **High** | Complex logic, external tools, or human intervention required |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Document Version: 4.1*
|
||||||
|
*Created: January 2026*
|
||||||
0
notebooks/.gitkeep
Normal file
0
notebooks/.gitkeep
Normal file
3
portfolio_app/__init__.py
Normal file
3
portfolio_app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""Analytics Portfolio Application."""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
58
portfolio_app/app.py
Normal file
58
portfolio_app/app.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
"""Dash application factory with Pages routing."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
|
||||||
|
from .components import create_sidebar
|
||||||
|
from .config import get_settings
|
||||||
|
|
||||||
|
|
||||||
|
def create_app() -> dash.Dash:
|
||||||
|
"""Create and configure the Dash application."""
|
||||||
|
app = dash.Dash(
|
||||||
|
__name__,
|
||||||
|
use_pages=True,
|
||||||
|
suppress_callback_exceptions=True,
|
||||||
|
title="Analytics Portfolio",
|
||||||
|
external_stylesheets=dmc.styles.ALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
app.layout = dmc.MantineProvider(
|
||||||
|
id="mantine-provider",
|
||||||
|
children=[
|
||||||
|
dcc.Location(id="url", refresh=False),
|
||||||
|
dcc.Store(id="theme-store", storage_type="local", data="dark"),
|
||||||
|
dcc.Store(id="theme-init-dummy"), # Dummy store for theme init callback
|
||||||
|
html.Div(
|
||||||
|
[
|
||||||
|
create_sidebar(),
|
||||||
|
html.Div(
|
||||||
|
dash.page_container,
|
||||||
|
className="page-content-wrapper",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
theme={
|
||||||
|
"primaryColor": "blue",
|
||||||
|
"fontFamily": "'Inter', sans-serif",
|
||||||
|
},
|
||||||
|
defaultColorScheme="dark",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Import callbacks to register them
|
||||||
|
from . import callbacks # noqa: F401
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Run the development server."""
|
||||||
|
settings = get_settings()
|
||||||
|
app = create_app()
|
||||||
|
app.run(debug=settings.dash_debug, host="0.0.0.0", port=8050)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
portfolio_app/assets/.gitkeep
Normal file
0
portfolio_app/assets/.gitkeep
Normal file
139
portfolio_app/assets/sidebar.css
Normal file
139
portfolio_app/assets/sidebar.css
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
/* Floating sidebar navigation styles */
|
||||||
|
|
||||||
|
/* Sidebar container */
|
||||||
|
.floating-sidebar {
|
||||||
|
position: fixed;
|
||||||
|
left: 16px;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
width: 60px;
|
||||||
|
padding: 16px 8px;
|
||||||
|
border-radius: 32px;
|
||||||
|
z-index: 1000;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||||
|
transition: background-color 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Page content offset to prevent sidebar overlap */
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 92px; /* sidebar width (60px) + left margin (16px) + gap (16px) */
|
||||||
|
min-height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dark theme (default) */
|
||||||
|
[data-mantine-color-scheme="dark"] .floating-sidebar {
|
||||||
|
background-color: #141414;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-mantine-color-scheme="dark"] body {
|
||||||
|
background-color: #000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Light theme */
|
||||||
|
[data-mantine-color-scheme="light"] .floating-sidebar {
|
||||||
|
background-color: #f0f0f0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-mantine-color-scheme="light"] body {
|
||||||
|
background-color: #ffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Brand initials styling */
|
||||||
|
.sidebar-brand {
|
||||||
|
width: 40px;
|
||||||
|
height: 40px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
border-radius: 50%;
|
||||||
|
background-color: var(--mantine-color-blue-filled);
|
||||||
|
margin-bottom: 4px;
|
||||||
|
transition: transform 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand:hover {
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand-link {
|
||||||
|
font-weight: 700;
|
||||||
|
font-size: 16px;
|
||||||
|
color: white;
|
||||||
|
text-decoration: none;
|
||||||
|
line-height: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Divider between sections */
|
||||||
|
.sidebar-divider {
|
||||||
|
width: 32px;
|
||||||
|
height: 1px;
|
||||||
|
background-color: var(--mantine-color-dimmed);
|
||||||
|
margin: 4px 0;
|
||||||
|
opacity: 0.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Active nav icon indicator */
|
||||||
|
.nav-icon-active {
|
||||||
|
background-color: var(--mantine-color-blue-filled) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Navigation icon hover effects */
|
||||||
|
.floating-sidebar .mantine-ActionIcon-root {
|
||||||
|
transition: transform 0.15s ease, background-color 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.floating-sidebar .mantine-ActionIcon-root:hover {
|
||||||
|
transform: scale(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure links don't have underlines */
|
||||||
|
.floating-sidebar a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Theme toggle specific styling */
|
||||||
|
#theme-toggle {
|
||||||
|
transition: transform 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
#theme-toggle:hover {
|
||||||
|
transform: rotate(15deg) scale(1.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive adjustments for smaller screens */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.floating-sidebar {
|
||||||
|
left: 8px;
|
||||||
|
width: 50px;
|
||||||
|
padding: 12px 6px;
|
||||||
|
border-radius: 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 70px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand {
|
||||||
|
width: 34px;
|
||||||
|
height: 34px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidebar-brand-link {
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Very small screens - hide sidebar, show minimal navigation */
|
||||||
|
@media (max-width: 480px) {
|
||||||
|
.floating-sidebar {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.page-content-wrapper {
|
||||||
|
margin-left: 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
5
portfolio_app/callbacks/__init__.py
Normal file
5
portfolio_app/callbacks/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Application-level callbacks for the portfolio app."""
|
||||||
|
|
||||||
|
from . import theme
|
||||||
|
|
||||||
|
__all__ = ["theme"]
|
||||||
38
portfolio_app/callbacks/theme.py
Normal file
38
portfolio_app/callbacks/theme.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""Theme toggle callbacks using clientside JavaScript."""
|
||||||
|
|
||||||
|
from dash import Input, Output, State, clientside_callback
|
||||||
|
|
||||||
|
# Toggle theme on button click
|
||||||
|
# Stores new theme value and updates the DOM attribute
|
||||||
|
clientside_callback(
|
||||||
|
"""
|
||||||
|
function(n_clicks, currentTheme) {
|
||||||
|
if (n_clicks === undefined || n_clicks === null) {
|
||||||
|
return window.dash_clientside.no_update;
|
||||||
|
}
|
||||||
|
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
|
||||||
|
document.documentElement.setAttribute('data-mantine-color-scheme', newTheme);
|
||||||
|
return newTheme;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
Output("theme-store", "data"),
|
||||||
|
Input("theme-toggle", "n_clicks"),
|
||||||
|
State("theme-store", "data"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize theme from localStorage on page load
|
||||||
|
# Uses a dummy output since we only need the side effect of setting the DOM attribute
|
||||||
|
clientside_callback(
|
||||||
|
"""
|
||||||
|
function(theme) {
|
||||||
|
if (theme) {
|
||||||
|
document.documentElement.setAttribute('data-mantine-color-scheme', theme);
|
||||||
|
}
|
||||||
|
return theme;
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
Output("theme-init-dummy", "data"),
|
||||||
|
Input("theme-store", "data"),
|
||||||
|
prevent_initial_call=False,
|
||||||
|
)
|
||||||
0
portfolio_app/components/.gitkeep
Normal file
0
portfolio_app/components/.gitkeep
Normal file
16
portfolio_app/components/__init__.py
Normal file
16
portfolio_app/components/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
"""Shared Dash components for the portfolio application."""
|
||||||
|
|
||||||
|
from .map_controls import create_map_controls, create_metric_selector
|
||||||
|
from .metric_card import MetricCard, create_metric_cards_row
|
||||||
|
from .sidebar import create_sidebar
|
||||||
|
from .time_slider import create_time_slider, create_year_selector
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_map_controls",
|
||||||
|
"create_metric_selector",
|
||||||
|
"create_sidebar",
|
||||||
|
"create_time_slider",
|
||||||
|
"create_year_selector",
|
||||||
|
"MetricCard",
|
||||||
|
"create_metric_cards_row",
|
||||||
|
]
|
||||||
79
portfolio_app/components/map_controls.py
Normal file
79
portfolio_app/components/map_controls.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""Map control components for choropleth visualizations."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import html
|
||||||
|
|
||||||
|
|
||||||
|
def create_metric_selector(
|
||||||
|
id_prefix: str,
|
||||||
|
options: list[dict[str, str]],
|
||||||
|
default_value: str | None = None,
|
||||||
|
label: str = "Select Metric",
|
||||||
|
) -> dmc.Select:
|
||||||
|
"""Create a metric selector dropdown.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
options: List of options with 'label' and 'value' keys.
|
||||||
|
default_value: Initial selected value.
|
||||||
|
label: Label text for the selector.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Select component.
|
||||||
|
"""
|
||||||
|
return dmc.Select(
|
||||||
|
id=f"{id_prefix}-metric-selector",
|
||||||
|
label=label,
|
||||||
|
data=options,
|
||||||
|
value=default_value or (options[0]["value"] if options else None),
|
||||||
|
style={"width": "200px"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_map_controls(
|
||||||
|
id_prefix: str,
|
||||||
|
metric_options: list[dict[str, str]],
|
||||||
|
default_metric: str | None = None,
|
||||||
|
show_layer_toggle: bool = True,
|
||||||
|
) -> dmc.Paper:
|
||||||
|
"""Create a control panel for map visualizations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
metric_options: Options for metric selector.
|
||||||
|
default_metric: Default selected metric.
|
||||||
|
show_layer_toggle: Whether to show layer visibility toggle.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Paper component containing controls.
|
||||||
|
"""
|
||||||
|
controls: list[Any] = [
|
||||||
|
create_metric_selector(
|
||||||
|
id_prefix=id_prefix,
|
||||||
|
options=metric_options,
|
||||||
|
default_value=default_metric,
|
||||||
|
label="Display Metric",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
if show_layer_toggle:
|
||||||
|
controls.append(
|
||||||
|
dmc.Switch(
|
||||||
|
id=f"{id_prefix}-layer-toggle",
|
||||||
|
label="Show Boundaries",
|
||||||
|
checked=True,
|
||||||
|
style={"marginTop": "10px"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dmc.Text("Map Controls", fw=500, size="sm", mb="xs"),
|
||||||
|
html.Div(controls),
|
||||||
|
],
|
||||||
|
p="md",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
115
portfolio_app/components/metric_card.py
Normal file
115
portfolio_app/components/metric_card.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
"""Metric card components for KPI display."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc
|
||||||
|
|
||||||
|
from portfolio_app.figures.summary_cards import create_metric_card_figure
|
||||||
|
|
||||||
|
|
||||||
|
class MetricCard:
|
||||||
|
"""A reusable metric card component."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
id_prefix: str,
|
||||||
|
title: str,
|
||||||
|
value: float | int | str = 0,
|
||||||
|
delta: float | None = None,
|
||||||
|
prefix: str = "",
|
||||||
|
suffix: str = "",
|
||||||
|
format_spec: str = ",.0f",
|
||||||
|
positive_is_good: bool = True,
|
||||||
|
):
|
||||||
|
"""Initialize a metric card.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
title: Card title.
|
||||||
|
value: Main metric value.
|
||||||
|
delta: Change value for delta indicator.
|
||||||
|
prefix: Value prefix (e.g., '$').
|
||||||
|
suffix: Value suffix.
|
||||||
|
format_spec: Python format specification.
|
||||||
|
positive_is_good: Whether positive delta is good.
|
||||||
|
"""
|
||||||
|
self.id_prefix = id_prefix
|
||||||
|
self.title = title
|
||||||
|
self.value = value
|
||||||
|
self.delta = delta
|
||||||
|
self.prefix = prefix
|
||||||
|
self.suffix = suffix
|
||||||
|
self.format_spec = format_spec
|
||||||
|
self.positive_is_good = positive_is_good
|
||||||
|
|
||||||
|
def render(self) -> dmc.Paper:
|
||||||
|
"""Render the metric card component.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Paper component with embedded graph.
|
||||||
|
"""
|
||||||
|
fig = create_metric_card_figure(
|
||||||
|
value=self.value,
|
||||||
|
title=self.title,
|
||||||
|
delta=self.delta,
|
||||||
|
prefix=self.prefix,
|
||||||
|
suffix=self.suffix,
|
||||||
|
format_spec=self.format_spec,
|
||||||
|
positive_is_good=self.positive_is_good,
|
||||||
|
)
|
||||||
|
|
||||||
|
return dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dcc.Graph(
|
||||||
|
id=f"{self.id_prefix}-graph",
|
||||||
|
figure=fig,
|
||||||
|
config={"displayModeBar": False},
|
||||||
|
style={"height": "120px"},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
p="xs",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_metric_cards_row(
|
||||||
|
metrics: list[dict[str, Any]],
|
||||||
|
id_prefix: str = "metric",
|
||||||
|
) -> dmc.SimpleGrid:
|
||||||
|
"""Create a row of metric cards.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metrics: List of metric configurations with keys:
|
||||||
|
- title: Card title
|
||||||
|
- value: Metric value
|
||||||
|
- delta: Optional change value
|
||||||
|
- prefix: Optional value prefix
|
||||||
|
- suffix: Optional value suffix
|
||||||
|
- format_spec: Optional format specification
|
||||||
|
- positive_is_good: Optional delta color logic
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine SimpleGrid component with metric cards.
|
||||||
|
"""
|
||||||
|
cards = []
|
||||||
|
for i, metric in enumerate(metrics):
|
||||||
|
card = MetricCard(
|
||||||
|
id_prefix=f"{id_prefix}-{i}",
|
||||||
|
title=metric.get("title", ""),
|
||||||
|
value=metric.get("value", 0),
|
||||||
|
delta=metric.get("delta"),
|
||||||
|
prefix=metric.get("prefix", ""),
|
||||||
|
suffix=metric.get("suffix", ""),
|
||||||
|
format_spec=metric.get("format_spec", ",.0f"),
|
||||||
|
positive_is_good=metric.get("positive_is_good", True),
|
||||||
|
)
|
||||||
|
cards.append(card.render())
|
||||||
|
|
||||||
|
return dmc.SimpleGrid(
|
||||||
|
cols={"base": 1, "sm": 2, "md": len(cards)},
|
||||||
|
spacing="md",
|
||||||
|
children=cards,
|
||||||
|
)
|
||||||
179
portfolio_app/components/sidebar.py
Normal file
179
portfolio_app/components/sidebar.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
"""Floating sidebar navigation component."""
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
# Navigation items configuration
|
||||||
|
NAV_ITEMS = [
|
||||||
|
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||||
|
{"path": "/toronto", "icon": "tabler:map-2", "label": "Toronto Housing"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# External links configuration
|
||||||
|
EXTERNAL_LINKS = [
|
||||||
|
{
|
||||||
|
"url": "https://github.com/leomiranda",
|
||||||
|
"icon": "tabler:brand-github",
|
||||||
|
"label": "GitHub",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://linkedin.com/in/leobmiranda",
|
||||||
|
"icon": "tabler:brand-linkedin",
|
||||||
|
"label": "LinkedIn",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_brand_logo() -> html.Div:
|
||||||
|
"""Create the brand initials logo."""
|
||||||
|
return html.Div(
|
||||||
|
dcc.Link(
|
||||||
|
"LM",
|
||||||
|
href="/",
|
||||||
|
className="sidebar-brand-link",
|
||||||
|
),
|
||||||
|
className="sidebar-brand",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_nav_icon(
|
||||||
|
icon: str,
|
||||||
|
label: str,
|
||||||
|
path: str,
|
||||||
|
current_path: str,
|
||||||
|
) -> dmc.Tooltip:
|
||||||
|
"""Create a navigation icon with tooltip.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
icon: Iconify icon string.
|
||||||
|
label: Tooltip label.
|
||||||
|
path: Navigation path.
|
||||||
|
current_path: Current page path for active state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped navigation icon.
|
||||||
|
"""
|
||||||
|
is_active = current_path == path or (path != "/" and current_path.startswith(path))
|
||||||
|
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dcc.Link(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20),
|
||||||
|
variant="subtle" if not is_active else "filled",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="blue" if is_active else "gray",
|
||||||
|
className="nav-icon-active" if is_active else "",
|
||||||
|
),
|
||||||
|
href=path,
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_theme_toggle(current_theme: str = "dark") -> dmc.Tooltip:
|
||||||
|
"""Create the theme toggle button.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_theme: Current theme ('dark' or 'light').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped theme toggle icon.
|
||||||
|
"""
|
||||||
|
icon = "tabler:sun" if current_theme == "dark" else "tabler:moon"
|
||||||
|
label = "Switch to light mode" if current_theme == "dark" else "Switch to dark mode"
|
||||||
|
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20, id="theme-toggle-icon"),
|
||||||
|
id="theme-toggle",
|
||||||
|
variant="subtle",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_external_link(url: str, icon: str, label: str) -> dmc.Tooltip:
|
||||||
|
"""Create an external link icon with tooltip.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: External URL.
|
||||||
|
icon: Iconify icon string.
|
||||||
|
label: Tooltip label.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tooltip-wrapped external link icon.
|
||||||
|
"""
|
||||||
|
return dmc.Tooltip(
|
||||||
|
dmc.Anchor(
|
||||||
|
dmc.ActionIcon(
|
||||||
|
DashIconify(icon=icon, width=20),
|
||||||
|
variant="subtle",
|
||||||
|
size="lg",
|
||||||
|
radius="xl",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
href=url,
|
||||||
|
target="_blank",
|
||||||
|
),
|
||||||
|
label=label,
|
||||||
|
position="right",
|
||||||
|
withArrow=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_sidebar_divider() -> html.Div:
|
||||||
|
"""Create a horizontal divider for the sidebar."""
|
||||||
|
return html.Div(className="sidebar-divider")
|
||||||
|
|
||||||
|
|
||||||
|
def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html.Div:
|
||||||
|
"""Create the floating sidebar navigation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_path: Current page path for active state highlighting.
|
||||||
|
current_theme: Current theme for toggle icon state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete sidebar component.
|
||||||
|
"""
|
||||||
|
return html.Div(
|
||||||
|
[
|
||||||
|
# Brand logo
|
||||||
|
create_brand_logo(),
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# Navigation icons
|
||||||
|
*[
|
||||||
|
create_nav_icon(
|
||||||
|
icon=item["icon"],
|
||||||
|
label=item["label"],
|
||||||
|
path=item["path"],
|
||||||
|
current_path=current_path,
|
||||||
|
)
|
||||||
|
for item in NAV_ITEMS
|
||||||
|
],
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# Theme toggle
|
||||||
|
create_theme_toggle(current_theme),
|
||||||
|
create_sidebar_divider(),
|
||||||
|
# External links
|
||||||
|
*[
|
||||||
|
create_external_link(
|
||||||
|
url=link["url"],
|
||||||
|
icon=link["icon"],
|
||||||
|
label=link["label"],
|
||||||
|
)
|
||||||
|
for link in EXTERNAL_LINKS
|
||||||
|
],
|
||||||
|
],
|
||||||
|
className="floating-sidebar",
|
||||||
|
id="floating-sidebar",
|
||||||
|
)
|
||||||
135
portfolio_app/components/time_slider.py
Normal file
135
portfolio_app/components/time_slider.py
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
"""Time selection components for temporal data filtering."""
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
|
||||||
|
def create_year_selector(
|
||||||
|
id_prefix: str,
|
||||||
|
min_year: int = 2020,
|
||||||
|
max_year: int | None = None,
|
||||||
|
default_year: int | None = None,
|
||||||
|
label: str = "Select Year",
|
||||||
|
) -> dmc.Select:
|
||||||
|
"""Create a year selector dropdown.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
min_year: Minimum year option.
|
||||||
|
max_year: Maximum year option (defaults to current year).
|
||||||
|
default_year: Initial selected year.
|
||||||
|
label: Label text for the selector.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Select component.
|
||||||
|
"""
|
||||||
|
if max_year is None:
|
||||||
|
max_year = date.today().year
|
||||||
|
|
||||||
|
if default_year is None:
|
||||||
|
default_year = max_year
|
||||||
|
|
||||||
|
years = list(range(max_year, min_year - 1, -1))
|
||||||
|
options = [{"label": str(year), "value": str(year)} for year in years]
|
||||||
|
|
||||||
|
return dmc.Select(
|
||||||
|
id=f"{id_prefix}-year-selector",
|
||||||
|
label=label,
|
||||||
|
data=options,
|
||||||
|
value=str(default_year),
|
||||||
|
style={"width": "120px"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_time_slider(
|
||||||
|
id_prefix: str,
|
||||||
|
min_year: int = 2020,
|
||||||
|
max_year: int | None = None,
|
||||||
|
default_range: tuple[int, int] | None = None,
|
||||||
|
label: str = "Time Range",
|
||||||
|
) -> dmc.Paper:
|
||||||
|
"""Create a time range slider component.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
min_year: Minimum year for the slider.
|
||||||
|
max_year: Maximum year for the slider.
|
||||||
|
default_range: Default (start, end) year range.
|
||||||
|
label: Label text for the slider.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Paper component containing the slider.
|
||||||
|
"""
|
||||||
|
if max_year is None:
|
||||||
|
max_year = date.today().year
|
||||||
|
|
||||||
|
if default_range is None:
|
||||||
|
default_range = (min_year, max_year)
|
||||||
|
|
||||||
|
# Create marks for every year
|
||||||
|
marks = [
|
||||||
|
{"value": year, "label": str(year)} for year in range(min_year, max_year + 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
return dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dmc.Text(label, fw=500, size="sm", mb="xs"),
|
||||||
|
dmc.RangeSlider(
|
||||||
|
id=f"{id_prefix}-time-slider",
|
||||||
|
min=min_year,
|
||||||
|
max=max_year,
|
||||||
|
value=list(default_range),
|
||||||
|
marks=marks,
|
||||||
|
step=1,
|
||||||
|
minRange=1,
|
||||||
|
style={"marginTop": "20px", "marginBottom": "10px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="md",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_month_selector(
|
||||||
|
id_prefix: str,
|
||||||
|
default_month: int | None = None,
|
||||||
|
label: str = "Select Month",
|
||||||
|
) -> dmc.Select:
|
||||||
|
"""Create a month selector dropdown.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
id_prefix: Prefix for component IDs.
|
||||||
|
default_month: Initial selected month (1-12).
|
||||||
|
label: Label text for the selector.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Mantine Select component.
|
||||||
|
"""
|
||||||
|
months = [
|
||||||
|
"January",
|
||||||
|
"February",
|
||||||
|
"March",
|
||||||
|
"April",
|
||||||
|
"May",
|
||||||
|
"June",
|
||||||
|
"July",
|
||||||
|
"August",
|
||||||
|
"September",
|
||||||
|
"October",
|
||||||
|
"November",
|
||||||
|
"December",
|
||||||
|
]
|
||||||
|
options = [{"label": month, "value": str(i + 1)} for i, month in enumerate(months)]
|
||||||
|
|
||||||
|
if default_month is None:
|
||||||
|
default_month = date.today().month
|
||||||
|
|
||||||
|
return dmc.Select(
|
||||||
|
id=f"{id_prefix}-month-selector",
|
||||||
|
label=label,
|
||||||
|
data=options,
|
||||||
|
value=str(default_month),
|
||||||
|
style={"width": "140px"},
|
||||||
|
)
|
||||||
34
portfolio_app/config.py
Normal file
34
portfolio_app/config.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
"""Application configuration using Pydantic BaseSettings."""
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings): # type: ignore[misc]
|
||||||
|
"""Application settings loaded from environment variables."""
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(
|
||||||
|
env_file=".env",
|
||||||
|
env_file_encoding="utf-8",
|
||||||
|
extra="ignore",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Database
|
||||||
|
database_url: str = "postgresql://portfolio:portfolio_dev@localhost:5432/portfolio"
|
||||||
|
postgres_user: str = "portfolio"
|
||||||
|
postgres_password: str = "portfolio_dev"
|
||||||
|
postgres_db: str = "portfolio"
|
||||||
|
|
||||||
|
# Application
|
||||||
|
dash_debug: bool = True
|
||||||
|
secret_key: str = "change-me-in-production"
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
log_level: str = "INFO"
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def get_settings() -> Settings:
|
||||||
|
"""Get cached settings instance."""
|
||||||
|
return Settings()
|
||||||
5
portfolio_app/errors/__init__.py
Normal file
5
portfolio_app/errors/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Error handling for the portfolio application."""
|
||||||
|
|
||||||
|
from .exceptions import LoadError, ParseError, PortfolioError, ValidationError
|
||||||
|
|
||||||
|
__all__ = ["PortfolioError", "ParseError", "ValidationError", "LoadError"]
|
||||||
17
portfolio_app/errors/exceptions.py
Normal file
17
portfolio_app/errors/exceptions.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
"""Custom exceptions for the portfolio application."""
|
||||||
|
|
||||||
|
|
||||||
|
class PortfolioError(Exception):
|
||||||
|
"""Base exception for all portfolio errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class ParseError(PortfolioError):
|
||||||
|
"""PDF/CSV parsing failed."""
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationError(PortfolioError):
|
||||||
|
"""Pydantic or business rule validation failed."""
|
||||||
|
|
||||||
|
|
||||||
|
class LoadError(PortfolioError):
|
||||||
|
"""Database load operation failed."""
|
||||||
0
portfolio_app/figures/.gitkeep
Normal file
0
portfolio_app/figures/.gitkeep
Normal file
31
portfolio_app/figures/__init__.py
Normal file
31
portfolio_app/figures/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
"""Plotly figure factories for data visualization."""
|
||||||
|
|
||||||
|
from .choropleth import (
|
||||||
|
create_choropleth_figure,
|
||||||
|
create_district_map,
|
||||||
|
create_zone_map,
|
||||||
|
)
|
||||||
|
from .summary_cards import create_metric_card_figure, create_summary_metrics
|
||||||
|
from .time_series import (
|
||||||
|
add_policy_markers,
|
||||||
|
create_market_comparison_chart,
|
||||||
|
create_price_time_series,
|
||||||
|
create_time_series_with_events,
|
||||||
|
create_volume_time_series,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Choropleth
|
||||||
|
"create_choropleth_figure",
|
||||||
|
"create_district_map",
|
||||||
|
"create_zone_map",
|
||||||
|
# Time series
|
||||||
|
"create_price_time_series",
|
||||||
|
"create_volume_time_series",
|
||||||
|
"create_market_comparison_chart",
|
||||||
|
"create_time_series_with_events",
|
||||||
|
"add_policy_markers",
|
||||||
|
# Summary
|
||||||
|
"create_metric_card_figure",
|
||||||
|
"create_summary_metrics",
|
||||||
|
]
|
||||||
171
portfolio_app/figures/choropleth.py
Normal file
171
portfolio_app/figures/choropleth.py
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
"""Choropleth map figure factory for Toronto housing data."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
|
def create_choropleth_figure(
|
||||||
|
geojson: dict[str, Any] | None,
|
||||||
|
data: list[dict[str, Any]],
|
||||||
|
location_key: str,
|
||||||
|
color_column: str,
|
||||||
|
hover_data: list[str] | None = None,
|
||||||
|
color_scale: str = "Blues",
|
||||||
|
title: str | None = None,
|
||||||
|
map_style: str = "carto-positron",
|
||||||
|
center: dict[str, float] | None = None,
|
||||||
|
zoom: float = 9.5,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a choropleth map figure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
geojson: GeoJSON FeatureCollection for boundaries.
|
||||||
|
data: List of data records with location keys and values.
|
||||||
|
location_key: Column name for location identifier.
|
||||||
|
color_column: Column name for color values.
|
||||||
|
hover_data: Additional columns to show on hover.
|
||||||
|
color_scale: Plotly color scale name.
|
||||||
|
title: Optional chart title.
|
||||||
|
map_style: Mapbox style (carto-positron, open-street-map, etc.).
|
||||||
|
center: Map center coordinates {"lat": float, "lon": float}.
|
||||||
|
zoom: Initial zoom level.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
# Default center to Toronto
|
||||||
|
if center is None:
|
||||||
|
center = {"lat": 43.7, "lon": -79.4}
|
||||||
|
|
||||||
|
# Use dark-mode friendly map style by default
|
||||||
|
if map_style == "carto-positron":
|
||||||
|
map_style = "carto-darkmatter"
|
||||||
|
|
||||||
|
# If no geojson provided, create a placeholder map
|
||||||
|
if geojson is None or not data:
|
||||||
|
fig = go.Figure(go.Scattermapbox())
|
||||||
|
fig.update_layout(
|
||||||
|
mapbox={
|
||||||
|
"style": map_style,
|
||||||
|
"center": center,
|
||||||
|
"zoom": zoom,
|
||||||
|
},
|
||||||
|
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
||||||
|
title=title or "Toronto Housing Map",
|
||||||
|
height=500,
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
)
|
||||||
|
fig.add_annotation(
|
||||||
|
text="No geometry data available. Complete QGIS digitization to enable map.",
|
||||||
|
xref="paper",
|
||||||
|
yref="paper",
|
||||||
|
x=0.5,
|
||||||
|
y=0.5,
|
||||||
|
showarrow=False,
|
||||||
|
font={"size": 14, "color": "#888888"},
|
||||||
|
)
|
||||||
|
return fig
|
||||||
|
|
||||||
|
# Create choropleth with data
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# Use dark-mode friendly map style
|
||||||
|
effective_map_style = (
|
||||||
|
"carto-darkmatter" if map_style == "carto-positron" else map_style
|
||||||
|
)
|
||||||
|
|
||||||
|
fig = px.choropleth_mapbox(
|
||||||
|
df,
|
||||||
|
geojson=geojson,
|
||||||
|
locations=location_key,
|
||||||
|
featureidkey=f"properties.{location_key}",
|
||||||
|
color=color_column,
|
||||||
|
color_continuous_scale=color_scale,
|
||||||
|
hover_data=hover_data,
|
||||||
|
mapbox_style=effective_map_style,
|
||||||
|
center=center,
|
||||||
|
zoom=zoom,
|
||||||
|
opacity=0.7,
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
||||||
|
title=title,
|
||||||
|
height=500,
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
coloraxis_colorbar={
|
||||||
|
"title": {
|
||||||
|
"text": color_column.replace("_", " ").title(),
|
||||||
|
"font": {"color": "#c9c9c9"},
|
||||||
|
},
|
||||||
|
"thickness": 15,
|
||||||
|
"len": 0.7,
|
||||||
|
"tickfont": {"color": "#c9c9c9"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def create_district_map(
|
||||||
|
districts_geojson: dict[str, Any] | None,
|
||||||
|
purchase_data: list[dict[str, Any]],
|
||||||
|
metric: str = "avg_price",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create choropleth map for TRREB districts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
districts_geojson: GeoJSON for TRREB district boundaries.
|
||||||
|
purchase_data: Purchase statistics by district.
|
||||||
|
metric: Metric to display (avg_price, sales_count, etc.).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
hover_columns = ["district_name", "sales_count", "avg_price", "median_price"]
|
||||||
|
|
||||||
|
return create_choropleth_figure(
|
||||||
|
geojson=districts_geojson,
|
||||||
|
data=purchase_data,
|
||||||
|
location_key="district_code",
|
||||||
|
color_column=metric,
|
||||||
|
hover_data=[c for c in hover_columns if c != metric],
|
||||||
|
color_scale="Blues" if "price" in metric else "Greens",
|
||||||
|
title="Toronto Purchase Market by District",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_zone_map(
|
||||||
|
zones_geojson: dict[str, Any] | None,
|
||||||
|
rental_data: list[dict[str, Any]],
|
||||||
|
metric: str = "avg_rent",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create choropleth map for CMHC zones.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
zones_geojson: GeoJSON for CMHC zone boundaries.
|
||||||
|
rental_data: Rental statistics by zone.
|
||||||
|
metric: Metric to display (avg_rent, vacancy_rate, etc.).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
hover_columns = ["zone_name", "avg_rent", "vacancy_rate", "rental_universe"]
|
||||||
|
|
||||||
|
return create_choropleth_figure(
|
||||||
|
geojson=zones_geojson,
|
||||||
|
data=rental_data,
|
||||||
|
location_key="zone_code",
|
||||||
|
color_column=metric,
|
||||||
|
hover_data=[c for c in hover_columns if c != metric],
|
||||||
|
color_scale="Oranges" if "rent" in metric else "Purples",
|
||||||
|
title="Toronto Rental Market by Zone",
|
||||||
|
)
|
||||||
107
portfolio_app/figures/summary_cards.py
Normal file
107
portfolio_app/figures/summary_cards.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
"""Summary card figure factories for KPI display."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
|
def create_metric_card_figure(
|
||||||
|
value: float | int | str,
|
||||||
|
title: str,
|
||||||
|
delta: float | None = None,
|
||||||
|
delta_suffix: str = "%",
|
||||||
|
prefix: str = "",
|
||||||
|
suffix: str = "",
|
||||||
|
format_spec: str = ",.0f",
|
||||||
|
positive_is_good: bool = True,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a KPI indicator figure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: The main metric value.
|
||||||
|
title: Card title.
|
||||||
|
delta: Optional change value (for delta indicator).
|
||||||
|
delta_suffix: Suffix for delta value (e.g., '%').
|
||||||
|
prefix: Prefix for main value (e.g., '$').
|
||||||
|
suffix: Suffix for main value.
|
||||||
|
format_spec: Python format specification for the value.
|
||||||
|
positive_is_good: Whether positive delta is good (green) or bad (red).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
# Determine numeric value for indicator
|
||||||
|
if isinstance(value, int | float):
|
||||||
|
number_value: float | None = float(value)
|
||||||
|
else:
|
||||||
|
number_value = None
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
# Add indicator trace
|
||||||
|
indicator_config: dict[str, Any] = {
|
||||||
|
"mode": "number",
|
||||||
|
"value": number_value if number_value is not None else 0,
|
||||||
|
"title": {"text": title, "font": {"size": 14}},
|
||||||
|
"number": {
|
||||||
|
"font": {"size": 32},
|
||||||
|
"prefix": prefix,
|
||||||
|
"suffix": suffix,
|
||||||
|
"valueformat": format_spec,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add delta if provided
|
||||||
|
if delta is not None:
|
||||||
|
indicator_config["mode"] = "number+delta"
|
||||||
|
indicator_config["delta"] = {
|
||||||
|
"reference": number_value - delta if number_value else 0,
|
||||||
|
"relative": False,
|
||||||
|
"valueformat": ".1f",
|
||||||
|
"suffix": delta_suffix,
|
||||||
|
"increasing": {"color": "green" if positive_is_good else "red"},
|
||||||
|
"decreasing": {"color": "red" if positive_is_good else "green"},
|
||||||
|
}
|
||||||
|
|
||||||
|
fig.add_trace(go.Indicator(**indicator_config))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
height=120,
|
||||||
|
margin={"l": 20, "r": 20, "t": 40, "b": 20},
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font={"family": "Inter, sans-serif", "color": "#c9c9c9"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def create_summary_metrics(
|
||||||
|
metrics: dict[str, dict[str, Any]],
|
||||||
|
) -> list[go.Figure]:
|
||||||
|
"""Create multiple metric card figures.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metrics: Dictionary of metric configurations.
|
||||||
|
Key: metric name
|
||||||
|
Value: dict with 'value', 'title', 'delta' (optional), etc.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Plotly Figure objects.
|
||||||
|
"""
|
||||||
|
figures = []
|
||||||
|
|
||||||
|
for metric_config in metrics.values():
|
||||||
|
fig = create_metric_card_figure(
|
||||||
|
value=metric_config.get("value", 0),
|
||||||
|
title=metric_config.get("title", ""),
|
||||||
|
delta=metric_config.get("delta"),
|
||||||
|
delta_suffix=metric_config.get("delta_suffix", "%"),
|
||||||
|
prefix=metric_config.get("prefix", ""),
|
||||||
|
suffix=metric_config.get("suffix", ""),
|
||||||
|
format_spec=metric_config.get("format_spec", ",.0f"),
|
||||||
|
positive_is_good=metric_config.get("positive_is_good", True),
|
||||||
|
)
|
||||||
|
figures.append(fig)
|
||||||
|
|
||||||
|
return figures
|
||||||
386
portfolio_app/figures/time_series.py
Normal file
386
portfolio_app/figures/time_series.py
Normal file
@@ -0,0 +1,386 @@
|
|||||||
|
"""Time series figure factories for Toronto housing data."""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import plotly.express as px
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
|
||||||
|
def create_price_time_series(
|
||||||
|
data: list[dict[str, Any]],
|
||||||
|
date_column: str = "full_date",
|
||||||
|
price_column: str = "avg_price",
|
||||||
|
group_column: str | None = None,
|
||||||
|
title: str = "Average Price Over Time",
|
||||||
|
show_yoy: bool = True,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a time series chart for price data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: List of records with date and price columns.
|
||||||
|
date_column: Column name for dates.
|
||||||
|
price_column: Column name for price values.
|
||||||
|
group_column: Optional column for grouping (e.g., district_code).
|
||||||
|
title: Chart title.
|
||||||
|
show_yoy: Whether to show year-over-year change annotations.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_annotation(
|
||||||
|
text="No data available",
|
||||||
|
xref="paper",
|
||||||
|
yref="paper",
|
||||||
|
x=0.5,
|
||||||
|
y=0.5,
|
||||||
|
showarrow=False,
|
||||||
|
font={"color": "#888888"},
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
height=350,
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
)
|
||||||
|
return fig
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df[date_column] = pd.to_datetime(df[date_column])
|
||||||
|
|
||||||
|
if group_column and group_column in df.columns:
|
||||||
|
fig = px.line(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=price_column,
|
||||||
|
color=group_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fig = px.line(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=price_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
height=350,
|
||||||
|
margin={"l": 40, "r": 20, "t": 50, "b": 40},
|
||||||
|
xaxis_title="Date",
|
||||||
|
yaxis_title=price_column.replace("_", " ").title(),
|
||||||
|
yaxis_tickprefix="$",
|
||||||
|
yaxis_tickformat=",",
|
||||||
|
hovermode="x unified",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def create_volume_time_series(
|
||||||
|
data: list[dict[str, Any]],
|
||||||
|
date_column: str = "full_date",
|
||||||
|
volume_column: str = "sales_count",
|
||||||
|
group_column: str | None = None,
|
||||||
|
title: str = "Sales Volume Over Time",
|
||||||
|
chart_type: str = "bar",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a time series chart for volume/count data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: List of records with date and volume columns.
|
||||||
|
date_column: Column name for dates.
|
||||||
|
volume_column: Column name for volume values.
|
||||||
|
group_column: Optional column for grouping.
|
||||||
|
title: Chart title.
|
||||||
|
chart_type: 'bar' or 'line'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object.
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_annotation(
|
||||||
|
text="No data available",
|
||||||
|
xref="paper",
|
||||||
|
yref="paper",
|
||||||
|
x=0.5,
|
||||||
|
y=0.5,
|
||||||
|
showarrow=False,
|
||||||
|
font={"color": "#888888"},
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
height=350,
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
)
|
||||||
|
return fig
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df[date_column] = pd.to_datetime(df[date_column])
|
||||||
|
|
||||||
|
if chart_type == "bar":
|
||||||
|
if group_column and group_column in df.columns:
|
||||||
|
fig = px.bar(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=volume_column,
|
||||||
|
color=group_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fig = px.bar(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=volume_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if group_column and group_column in df.columns:
|
||||||
|
fig = px.line(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=volume_column,
|
||||||
|
color=group_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
fig = px.line(
|
||||||
|
df,
|
||||||
|
x=date_column,
|
||||||
|
y=volume_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
height=350,
|
||||||
|
margin={"l": 40, "r": 20, "t": 50, "b": 40},
|
||||||
|
xaxis_title="Date",
|
||||||
|
yaxis_title=volume_column.replace("_", " ").title(),
|
||||||
|
yaxis_tickformat=",",
|
||||||
|
hovermode="x unified",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def create_market_comparison_chart(
|
||||||
|
data: list[dict[str, Any]],
|
||||||
|
date_column: str = "full_date",
|
||||||
|
metrics: list[str] | None = None,
|
||||||
|
title: str = "Market Indicators",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a multi-metric comparison chart.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: List of records with date and metric columns.
|
||||||
|
date_column: Column name for dates.
|
||||||
|
metrics: List of metric columns to display.
|
||||||
|
title: Chart title.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure object with secondary y-axis.
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
from plotly.subplots import make_subplots
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_annotation(
|
||||||
|
text="No data available",
|
||||||
|
xref="paper",
|
||||||
|
yref="paper",
|
||||||
|
x=0.5,
|
||||||
|
y=0.5,
|
||||||
|
showarrow=False,
|
||||||
|
font={"color": "#888888"},
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
height=400,
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
)
|
||||||
|
return fig
|
||||||
|
|
||||||
|
if metrics is None:
|
||||||
|
metrics = ["avg_price", "sales_count"]
|
||||||
|
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df[date_column] = pd.to_datetime(df[date_column])
|
||||||
|
|
||||||
|
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
||||||
|
|
||||||
|
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]
|
||||||
|
|
||||||
|
for i, metric in enumerate(metrics[:4]):
|
||||||
|
if metric not in df.columns:
|
||||||
|
continue
|
||||||
|
|
||||||
|
secondary = i > 0
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=df[date_column],
|
||||||
|
y=df[metric],
|
||||||
|
name=metric.replace("_", " ").title(),
|
||||||
|
line={"color": colors[i % len(colors)]},
|
||||||
|
),
|
||||||
|
secondary_y=secondary,
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
height=400,
|
||||||
|
margin={"l": 40, "r": 40, "t": 50, "b": 40},
|
||||||
|
hovermode="x unified",
|
||||||
|
paper_bgcolor="rgba(0,0,0,0)",
|
||||||
|
plot_bgcolor="rgba(0,0,0,0)",
|
||||||
|
font_color="#c9c9c9",
|
||||||
|
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
||||||
|
legend={
|
||||||
|
"orientation": "h",
|
||||||
|
"yanchor": "bottom",
|
||||||
|
"y": 1.02,
|
||||||
|
"xanchor": "right",
|
||||||
|
"x": 1,
|
||||||
|
"font": {"color": "#c9c9c9"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def add_policy_markers(
|
||||||
|
fig: go.Figure,
|
||||||
|
policy_events: list[dict[str, Any]],
|
||||||
|
date_column: str = "event_date",
|
||||||
|
y_position: float | None = None,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Add policy event markers to an existing time series figure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fig: Existing Plotly figure to add markers to.
|
||||||
|
policy_events: List of policy event dicts with date and metadata.
|
||||||
|
date_column: Column name for event dates.
|
||||||
|
y_position: Y position for markers. If None, uses top of chart.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated Plotly Figure object with policy markers.
|
||||||
|
"""
|
||||||
|
if not policy_events:
|
||||||
|
return fig
|
||||||
|
|
||||||
|
# Color mapping for policy categories
|
||||||
|
category_colors = {
|
||||||
|
"monetary": "#1f77b4", # Blue
|
||||||
|
"tax": "#2ca02c", # Green
|
||||||
|
"regulatory": "#ff7f0e", # Orange
|
||||||
|
"supply": "#9467bd", # Purple
|
||||||
|
"economic": "#d62728", # Red
|
||||||
|
}
|
||||||
|
|
||||||
|
# Symbol mapping for expected direction
|
||||||
|
direction_symbols = {
|
||||||
|
"bullish": "triangle-up",
|
||||||
|
"bearish": "triangle-down",
|
||||||
|
"neutral": "circle",
|
||||||
|
}
|
||||||
|
|
||||||
|
for event in policy_events:
|
||||||
|
event_date = event.get(date_column)
|
||||||
|
category = event.get("category", "economic")
|
||||||
|
direction = event.get("expected_direction", "neutral")
|
||||||
|
title = event.get("title", "Policy Event")
|
||||||
|
level = event.get("level", "federal")
|
||||||
|
|
||||||
|
color = category_colors.get(category, "#666666")
|
||||||
|
symbol = direction_symbols.get(direction, "circle")
|
||||||
|
|
||||||
|
# Add vertical line for the event
|
||||||
|
fig.add_vline(
|
||||||
|
x=event_date,
|
||||||
|
line_dash="dot",
|
||||||
|
line_color=color,
|
||||||
|
opacity=0.5,
|
||||||
|
annotation_text="",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add marker with hover info
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=[event_date],
|
||||||
|
y=[y_position] if y_position else [None], # type: ignore[list-item]
|
||||||
|
mode="markers",
|
||||||
|
marker={
|
||||||
|
"symbol": symbol,
|
||||||
|
"size": 12,
|
||||||
|
"color": color,
|
||||||
|
"line": {"width": 1, "color": "white"},
|
||||||
|
},
|
||||||
|
name=title,
|
||||||
|
hovertemplate=(
|
||||||
|
f"<b>{title}</b><br>"
|
||||||
|
f"Date: %{{x}}<br>"
|
||||||
|
f"Level: {level.title()}<br>"
|
||||||
|
f"Category: {category.title()}<br>"
|
||||||
|
f"<extra></extra>"
|
||||||
|
),
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def create_time_series_with_events(
|
||||||
|
data: list[dict[str, Any]],
|
||||||
|
policy_events: list[dict[str, Any]],
|
||||||
|
date_column: str = "full_date",
|
||||||
|
value_column: str = "avg_price",
|
||||||
|
title: str = "Price Trend with Policy Events",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create a time series chart with policy event markers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: Time series data.
|
||||||
|
policy_events: Policy events to overlay.
|
||||||
|
date_column: Column name for dates.
|
||||||
|
value_column: Column name for values.
|
||||||
|
title: Chart title.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly Figure with time series and policy markers.
|
||||||
|
"""
|
||||||
|
# Create base time series
|
||||||
|
fig = create_price_time_series(
|
||||||
|
data=data,
|
||||||
|
date_column=date_column,
|
||||||
|
price_column=value_column,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add policy markers at the top of the chart
|
||||||
|
if policy_events:
|
||||||
|
fig = add_policy_markers(fig, policy_events)
|
||||||
|
|
||||||
|
return fig
|
||||||
1
portfolio_app/pages/__init__.py
Normal file
1
portfolio_app/pages/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Dash pages."""
|
||||||
20
portfolio_app/pages/health.py
Normal file
20
portfolio_app/pages/health.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Health check endpoint for deployment monitoring."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
from dash import html
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/health",
|
||||||
|
title="Health Check",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def layout() -> html.Div:
|
||||||
|
"""Return simple health check response."""
|
||||||
|
return html.Div(
|
||||||
|
[
|
||||||
|
html.Pre("status: ok"),
|
||||||
|
],
|
||||||
|
id="health-check",
|
||||||
|
)
|
||||||
169
portfolio_app/pages/home.py
Normal file
169
portfolio_app/pages/home.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
"""Bio landing page."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
dash.register_page(__name__, path="/", name="Home")
|
||||||
|
|
||||||
|
# Content from bio_content_v2.md
|
||||||
|
HEADLINE = "Leo | Data Engineer & Analytics Developer"
|
||||||
|
TAGLINE = "I build data infrastructure that actually gets used."
|
||||||
|
|
||||||
|
SUMMARY = """Over the past 5 years, I've designed and evolved an enterprise analytics platform
|
||||||
|
from scratch—now processing 1B+ rows across 21 tables with Python-based ETL pipelines and
|
||||||
|
dbt-style SQL transformations. The result: 40% efficiency gains, 30% reduction in call
|
||||||
|
abandon rates, and dashboards that executives actually open.
|
||||||
|
|
||||||
|
My approach: dimensional modeling (star schema), layered transformations
|
||||||
|
(staging → intermediate → marts), and automation that eliminates manual work.
|
||||||
|
I've built everything from self-service analytics portals to OCR-powered receipt processing systems.
|
||||||
|
|
||||||
|
Currently at Summitt Energy supporting multi-market operations across Canada and 8 US states.
|
||||||
|
Previously cut my teeth on IT infrastructure projects at Petrobras (Fortune 500) and the
|
||||||
|
Project Management Institute."""
|
||||||
|
|
||||||
|
TECH_STACK = [
|
||||||
|
"Python",
|
||||||
|
"Pandas",
|
||||||
|
"SQLAlchemy",
|
||||||
|
"FastAPI",
|
||||||
|
"SQL",
|
||||||
|
"PostgreSQL",
|
||||||
|
"MSSQL",
|
||||||
|
"Power BI",
|
||||||
|
"Plotly/Dash",
|
||||||
|
"dbt patterns",
|
||||||
|
"Genesys Cloud",
|
||||||
|
]
|
||||||
|
|
||||||
|
PROJECTS = [
|
||||||
|
{
|
||||||
|
"title": "Toronto Housing Dashboard",
|
||||||
|
"description": "Choropleth visualization of GTA real estate trends with TRREB and CMHC data.",
|
||||||
|
"status": "In Development",
|
||||||
|
"link": "/toronto",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Energy Pricing Analysis",
|
||||||
|
"description": "Time series analysis and ML prediction for utility market pricing.",
|
||||||
|
"status": "Planned",
|
||||||
|
"link": "/energy",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
AVAILABILITY = "Open to Senior Data Analyst, Analytics Engineer, and BI Developer opportunities in Toronto or remote."
|
||||||
|
|
||||||
|
|
||||||
|
def create_hero_section() -> dmc.Stack:
|
||||||
|
"""Create the hero section with name and tagline."""
|
||||||
|
return dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title(HEADLINE, order=1, ta="center"),
|
||||||
|
dmc.Text(TAGLINE, size="xl", c="dimmed", ta="center"),
|
||||||
|
],
|
||||||
|
gap="xs",
|
||||||
|
py="xl",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_summary_section() -> dmc.Paper:
|
||||||
|
"""Create the professional summary section."""
|
||||||
|
paragraphs = SUMMARY.strip().split("\n\n")
|
||||||
|
return dmc.Paper(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title("About", order=2, size="h3"),
|
||||||
|
*[dmc.Text(p.replace("\n", " "), size="md") for p in paragraphs],
|
||||||
|
],
|
||||||
|
gap="md",
|
||||||
|
),
|
||||||
|
p="xl",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_tech_stack_section() -> dmc.Paper:
|
||||||
|
"""Create the tech stack section with badges."""
|
||||||
|
return dmc.Paper(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title("Tech Stack", order=2, size="h3"),
|
||||||
|
dmc.Group(
|
||||||
|
[
|
||||||
|
dmc.Badge(tech, size="lg", variant="light", radius="sm")
|
||||||
|
for tech in TECH_STACK
|
||||||
|
],
|
||||||
|
gap="sm",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gap="md",
|
||||||
|
),
|
||||||
|
p="xl",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_project_card(project: dict[str, str]) -> dmc.Card:
|
||||||
|
"""Create a project card."""
|
||||||
|
status_color = "blue" if project["status"] == "In Development" else "gray"
|
||||||
|
return dmc.Card(
|
||||||
|
[
|
||||||
|
dmc.Group(
|
||||||
|
[
|
||||||
|
dmc.Text(project["title"], fw=500, size="lg"),
|
||||||
|
dmc.Badge(project["status"], color=status_color, variant="light"),
|
||||||
|
],
|
||||||
|
justify="space-between",
|
||||||
|
align="center",
|
||||||
|
),
|
||||||
|
dmc.Text(project["description"], size="sm", c="dimmed", mt="sm"),
|
||||||
|
],
|
||||||
|
withBorder=True,
|
||||||
|
radius="md",
|
||||||
|
p="lg",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_projects_section() -> dmc.Paper:
|
||||||
|
"""Create the portfolio projects section."""
|
||||||
|
return dmc.Paper(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title("Portfolio Projects", order=2, size="h3"),
|
||||||
|
dmc.SimpleGrid(
|
||||||
|
[create_project_card(p) for p in PROJECTS],
|
||||||
|
cols={"base": 1, "sm": 2},
|
||||||
|
spacing="lg",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gap="md",
|
||||||
|
),
|
||||||
|
p="xl",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_availability_section() -> dmc.Text:
|
||||||
|
"""Create the availability statement."""
|
||||||
|
return dmc.Text(AVAILABILITY, size="sm", c="dimmed", ta="center", fs="italic")
|
||||||
|
|
||||||
|
|
||||||
|
layout = dmc.Container(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
create_hero_section(),
|
||||||
|
create_summary_section(),
|
||||||
|
create_tech_stack_section(),
|
||||||
|
create_projects_section(),
|
||||||
|
dmc.Divider(my="lg"),
|
||||||
|
create_availability_section(),
|
||||||
|
dmc.Space(h=40),
|
||||||
|
],
|
||||||
|
gap="xl",
|
||||||
|
),
|
||||||
|
size="md",
|
||||||
|
py="xl",
|
||||||
|
)
|
||||||
1
portfolio_app/pages/toronto/__init__.py
Normal file
1
portfolio_app/pages/toronto/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Toronto Housing Dashboard pages."""
|
||||||
1565
portfolio_app/pages/toronto/callbacks/__init__.py
Normal file
1565
portfolio_app/pages/toronto/callbacks/__init__.py
Normal file
File diff suppressed because it is too large
Load Diff
294
portfolio_app/pages/toronto/dashboard.py
Normal file
294
portfolio_app/pages/toronto/dashboard.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
"""Toronto Housing Dashboard page."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
from portfolio_app.components import (
|
||||||
|
create_map_controls,
|
||||||
|
create_metric_cards_row,
|
||||||
|
create_time_slider,
|
||||||
|
create_year_selector,
|
||||||
|
)
|
||||||
|
|
||||||
|
dash.register_page(__name__, path="/toronto", name="Toronto Housing")
|
||||||
|
|
||||||
|
# Metric options for the purchase market
|
||||||
|
PURCHASE_METRIC_OPTIONS = [
|
||||||
|
{"label": "Average Price", "value": "avg_price"},
|
||||||
|
{"label": "Median Price", "value": "median_price"},
|
||||||
|
{"label": "Sales Volume", "value": "sales_count"},
|
||||||
|
{"label": "Days on Market", "value": "avg_dom"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Metric options for the rental market
|
||||||
|
RENTAL_METRIC_OPTIONS = [
|
||||||
|
{"label": "Average Rent", "value": "avg_rent"},
|
||||||
|
{"label": "Vacancy Rate", "value": "vacancy_rate"},
|
||||||
|
{"label": "Rental Universe", "value": "rental_universe"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Sample metrics for KPI cards (will be populated by callbacks)
|
||||||
|
SAMPLE_METRICS = [
|
||||||
|
{
|
||||||
|
"title": "Avg. Price",
|
||||||
|
"value": 1125000,
|
||||||
|
"delta": 2.3,
|
||||||
|
"prefix": "$",
|
||||||
|
"format_spec": ",.0f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Sales Volume",
|
||||||
|
"value": 4850,
|
||||||
|
"delta": -5.1,
|
||||||
|
"format_spec": ",",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Avg. DOM",
|
||||||
|
"value": 18,
|
||||||
|
"delta": 3,
|
||||||
|
"suffix": " days",
|
||||||
|
"positive_is_good": False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Avg. Rent",
|
||||||
|
"value": 2450,
|
||||||
|
"delta": 4.2,
|
||||||
|
"prefix": "$",
|
||||||
|
"format_spec": ",.0f",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_header() -> dmc.Group:
|
||||||
|
"""Create the dashboard header with title and controls."""
|
||||||
|
return dmc.Group(
|
||||||
|
[
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
dmc.Title("Toronto Housing Dashboard", order=1),
|
||||||
|
dmc.Text(
|
||||||
|
"Real estate market analysis for the Greater Toronto Area",
|
||||||
|
c="dimmed",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gap="xs",
|
||||||
|
),
|
||||||
|
dmc.Group(
|
||||||
|
[
|
||||||
|
dcc.Link(
|
||||||
|
dmc.Button(
|
||||||
|
"Methodology",
|
||||||
|
leftSection=DashIconify(
|
||||||
|
icon="tabler:info-circle", width=18
|
||||||
|
),
|
||||||
|
variant="subtle",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
href="/toronto/methodology",
|
||||||
|
),
|
||||||
|
create_year_selector(
|
||||||
|
id_prefix="toronto",
|
||||||
|
min_year=2020,
|
||||||
|
default_year=2024,
|
||||||
|
label="Year",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gap="md",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
justify="space-between",
|
||||||
|
align="flex-start",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_kpi_section() -> dmc.Box:
|
||||||
|
"""Create the KPI metrics row."""
|
||||||
|
return dmc.Box(
|
||||||
|
children=[
|
||||||
|
dmc.Title("Key Metrics", order=3, size="h4", mb="sm"),
|
||||||
|
html.Div(
|
||||||
|
id="toronto-kpi-cards",
|
||||||
|
children=[
|
||||||
|
create_metric_cards_row(SAMPLE_METRICS, id_prefix="toronto-kpi")
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_purchase_map_section() -> dmc.Grid:
|
||||||
|
"""Create the purchase market choropleth section."""
|
||||||
|
return dmc.Grid(
|
||||||
|
[
|
||||||
|
dmc.GridCol(
|
||||||
|
create_map_controls(
|
||||||
|
id_prefix="purchase-map",
|
||||||
|
metric_options=PURCHASE_METRIC_OPTIONS,
|
||||||
|
default_metric="avg_price",
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 3},
|
||||||
|
),
|
||||||
|
dmc.GridCol(
|
||||||
|
dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dcc.Graph(
|
||||||
|
id="purchase-choropleth",
|
||||||
|
config={"scrollZoom": True},
|
||||||
|
style={"height": "500px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="xs",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 9},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gutter="md",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_rental_map_section() -> dmc.Grid:
|
||||||
|
"""Create the rental market choropleth section."""
|
||||||
|
return dmc.Grid(
|
||||||
|
[
|
||||||
|
dmc.GridCol(
|
||||||
|
create_map_controls(
|
||||||
|
id_prefix="rental-map",
|
||||||
|
metric_options=RENTAL_METRIC_OPTIONS,
|
||||||
|
default_metric="avg_rent",
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 3},
|
||||||
|
),
|
||||||
|
dmc.GridCol(
|
||||||
|
dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dcc.Graph(
|
||||||
|
id="rental-choropleth",
|
||||||
|
config={"scrollZoom": True},
|
||||||
|
style={"height": "500px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="xs",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 9},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gutter="md",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_time_series_section() -> dmc.Grid:
|
||||||
|
"""Create the time series charts section."""
|
||||||
|
return dmc.Grid(
|
||||||
|
[
|
||||||
|
dmc.GridCol(
|
||||||
|
dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dmc.Title("Price Trends", order=4, size="h5", mb="sm"),
|
||||||
|
dcc.Graph(
|
||||||
|
id="price-time-series",
|
||||||
|
config={"displayModeBar": False},
|
||||||
|
style={"height": "350px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="md",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 6},
|
||||||
|
),
|
||||||
|
dmc.GridCol(
|
||||||
|
dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dmc.Title("Sales Volume", order=4, size="h5", mb="sm"),
|
||||||
|
dcc.Graph(
|
||||||
|
id="volume-time-series",
|
||||||
|
config={"displayModeBar": False},
|
||||||
|
style={"height": "350px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="md",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
),
|
||||||
|
span={"base": 12, "md": 6},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
gutter="md",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_market_comparison_section() -> dmc.Paper:
|
||||||
|
"""Create the market comparison chart section."""
|
||||||
|
return dmc.Paper(
|
||||||
|
children=[
|
||||||
|
dmc.Group(
|
||||||
|
[
|
||||||
|
dmc.Title("Market Indicators", order=4, size="h5"),
|
||||||
|
create_time_slider(
|
||||||
|
id_prefix="market-comparison",
|
||||||
|
min_year=2020,
|
||||||
|
label="",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
justify="space-between",
|
||||||
|
align="center",
|
||||||
|
mb="md",
|
||||||
|
),
|
||||||
|
dcc.Graph(
|
||||||
|
id="market-comparison-chart",
|
||||||
|
config={"displayModeBar": False},
|
||||||
|
style={"height": "400px"},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
p="md",
|
||||||
|
radius="sm",
|
||||||
|
withBorder=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_data_notice() -> dmc.Alert:
|
||||||
|
"""Create a notice about data availability."""
|
||||||
|
return dmc.Alert(
|
||||||
|
children=[
|
||||||
|
dmc.Text(
|
||||||
|
"This dashboard uses TRREB and CMHC data. "
|
||||||
|
"Geographic boundaries require QGIS digitization to enable choropleth maps. "
|
||||||
|
"Sample data is shown below.",
|
||||||
|
size="sm",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
title="Data Notice",
|
||||||
|
color="blue",
|
||||||
|
variant="light",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Register callbacks
|
||||||
|
from portfolio_app.pages.toronto import callbacks # noqa: E402, F401
|
||||||
|
|
||||||
|
layout = dmc.Container(
|
||||||
|
dmc.Stack(
|
||||||
|
[
|
||||||
|
create_header(),
|
||||||
|
create_data_notice(),
|
||||||
|
create_kpi_section(),
|
||||||
|
dmc.Divider(my="md", label="Purchase Market", labelPosition="center"),
|
||||||
|
create_purchase_map_section(),
|
||||||
|
dmc.Divider(my="md", label="Rental Market", labelPosition="center"),
|
||||||
|
create_rental_map_section(),
|
||||||
|
dmc.Divider(my="md", label="Trends", labelPosition="center"),
|
||||||
|
create_time_series_section(),
|
||||||
|
create_market_comparison_section(),
|
||||||
|
dmc.Space(h=40),
|
||||||
|
],
|
||||||
|
gap="lg",
|
||||||
|
),
|
||||||
|
size="xl",
|
||||||
|
py="xl",
|
||||||
|
)
|
||||||
274
portfolio_app/pages/toronto/methodology.py
Normal file
274
portfolio_app/pages/toronto/methodology.py
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
"""Methodology page for Toronto Housing Dashboard."""
|
||||||
|
|
||||||
|
import dash
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
from dash import dcc, html
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/toronto/methodology",
|
||||||
|
title="Methodology | Toronto Housing Dashboard",
|
||||||
|
description="Data sources, methodology, and limitations for the Toronto Housing Dashboard",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def layout() -> dmc.Container:
|
||||||
|
"""Render the methodology page layout."""
|
||||||
|
return dmc.Container(
|
||||||
|
size="md",
|
||||||
|
py="xl",
|
||||||
|
children=[
|
||||||
|
# Back to Dashboard button
|
||||||
|
dcc.Link(
|
||||||
|
dmc.Button(
|
||||||
|
"Back to Dashboard",
|
||||||
|
leftSection=DashIconify(icon="tabler:arrow-left", width=18),
|
||||||
|
variant="subtle",
|
||||||
|
color="gray",
|
||||||
|
),
|
||||||
|
href="/toronto",
|
||||||
|
),
|
||||||
|
# Header
|
||||||
|
dmc.Title("Methodology", order=1, mb="lg", mt="md"),
|
||||||
|
dmc.Text(
|
||||||
|
"This page documents the data sources, processing methodology, "
|
||||||
|
"and known limitations of the Toronto Housing Dashboard.",
|
||||||
|
size="lg",
|
||||||
|
c="dimmed",
|
||||||
|
mb="xl",
|
||||||
|
),
|
||||||
|
# Data Sources Section
|
||||||
|
dmc.Paper(
|
||||||
|
p="lg",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
mb="lg",
|
||||||
|
children=[
|
||||||
|
dmc.Title("Data Sources", order=2, mb="md"),
|
||||||
|
# TRREB
|
||||||
|
dmc.Title("Purchase Data: TRREB", order=3, size="h4", mb="sm"),
|
||||||
|
dmc.Text(
|
||||||
|
[
|
||||||
|
"The Toronto Regional Real Estate Board (TRREB) publishes monthly ",
|
||||||
|
html.Strong("Market Watch"),
|
||||||
|
" reports containing aggregate statistics for residential real estate "
|
||||||
|
"transactions across the Greater Toronto Area.",
|
||||||
|
],
|
||||||
|
mb="sm",
|
||||||
|
),
|
||||||
|
dmc.List(
|
||||||
|
[
|
||||||
|
dmc.ListItem("Source: TRREB Market Watch Reports (PDF)"),
|
||||||
|
dmc.ListItem("Geographic granularity: ~35 TRREB Districts"),
|
||||||
|
dmc.ListItem("Temporal granularity: Monthly"),
|
||||||
|
dmc.ListItem("Coverage: 2021-present"),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
"Metrics: Sales count, average/median price, new listings, ",
|
||||||
|
"active listings, days on market, sale-to-list ratio",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
],
|
||||||
|
mb="md",
|
||||||
|
),
|
||||||
|
dmc.Anchor(
|
||||||
|
"TRREB Market Watch Archive",
|
||||||
|
href="https://trreb.ca/market-data/market-watch/market-watch-archive/",
|
||||||
|
target="_blank",
|
||||||
|
mb="lg",
|
||||||
|
),
|
||||||
|
# CMHC
|
||||||
|
dmc.Title(
|
||||||
|
"Rental Data: CMHC", order=3, size="h4", mb="sm", mt="md"
|
||||||
|
),
|
||||||
|
dmc.Text(
|
||||||
|
[
|
||||||
|
"Canada Mortgage and Housing Corporation (CMHC) conducts the annual ",
|
||||||
|
html.Strong("Rental Market Survey"),
|
||||||
|
" providing rental market statistics for major urban centres.",
|
||||||
|
],
|
||||||
|
mb="sm",
|
||||||
|
),
|
||||||
|
dmc.List(
|
||||||
|
[
|
||||||
|
dmc.ListItem("Source: CMHC Rental Market Survey (Excel)"),
|
||||||
|
dmc.ListItem(
|
||||||
|
"Geographic granularity: ~20 CMHC Zones (Census Tract aligned)"
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
"Temporal granularity: Annual (October survey)"
|
||||||
|
),
|
||||||
|
dmc.ListItem("Coverage: 2021-present"),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
"Metrics: Average/median rent, vacancy rate, universe count, ",
|
||||||
|
"turnover rate, year-over-year rent change",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
],
|
||||||
|
mb="md",
|
||||||
|
),
|
||||||
|
dmc.Anchor(
|
||||||
|
"CMHC Housing Market Information Portal",
|
||||||
|
href="https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market",
|
||||||
|
target="_blank",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Geographic Considerations
|
||||||
|
dmc.Paper(
|
||||||
|
p="lg",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
mb="lg",
|
||||||
|
children=[
|
||||||
|
dmc.Title("Geographic Considerations", order=2, mb="md"),
|
||||||
|
dmc.Alert(
|
||||||
|
title="Important: Non-Aligned Geographies",
|
||||||
|
color="yellow",
|
||||||
|
mb="md",
|
||||||
|
children=[
|
||||||
|
"TRREB Districts and CMHC Zones do ",
|
||||||
|
html.Strong("not"),
|
||||||
|
" align geographically. They are displayed as separate layers and "
|
||||||
|
"should not be directly compared at the sub-regional level.",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
dmc.Text(
|
||||||
|
"The dashboard presents three geographic layers:",
|
||||||
|
mb="sm",
|
||||||
|
),
|
||||||
|
dmc.List(
|
||||||
|
[
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("TRREB Districts (~35): "),
|
||||||
|
"Used for purchase/sales data visualization. "
|
||||||
|
"Districts are defined by TRREB and labeled with codes like W01, C01, E01.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("CMHC Zones (~20): "),
|
||||||
|
"Used for rental data visualization. "
|
||||||
|
"Zones are aligned with Census Tract boundaries.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("City Neighbourhoods (158): "),
|
||||||
|
"Reference overlay only. "
|
||||||
|
"These are official City of Toronto neighbourhood boundaries.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Policy Events
|
||||||
|
dmc.Paper(
|
||||||
|
p="lg",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
mb="lg",
|
||||||
|
children=[
|
||||||
|
dmc.Title("Policy Event Annotations", order=2, mb="md"),
|
||||||
|
dmc.Text(
|
||||||
|
"The time series charts include markers for significant policy events "
|
||||||
|
"that may have influenced housing market conditions. These annotations are "
|
||||||
|
"for contextual reference only.",
|
||||||
|
mb="md",
|
||||||
|
),
|
||||||
|
dmc.Alert(
|
||||||
|
title="No Causation Claims",
|
||||||
|
color="blue",
|
||||||
|
children=[
|
||||||
|
"The presence of a policy marker near a market trend change does ",
|
||||||
|
html.Strong("not"),
|
||||||
|
" imply causation. Housing markets are influenced by numerous factors "
|
||||||
|
"beyond policy interventions.",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Limitations
|
||||||
|
dmc.Paper(
|
||||||
|
p="lg",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
mb="lg",
|
||||||
|
children=[
|
||||||
|
dmc.Title("Limitations", order=2, mb="md"),
|
||||||
|
dmc.List(
|
||||||
|
[
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("Aggregate Data: "),
|
||||||
|
"All statistics are aggregates. Individual property characteristics, "
|
||||||
|
"condition, and micro-location are not reflected.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("Reporting Lag: "),
|
||||||
|
"TRREB data reflects closed transactions, which may lag market "
|
||||||
|
"conditions by 1-3 months. CMHC data is annual.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("Geographic Boundaries: "),
|
||||||
|
"TRREB district boundaries were manually digitized from reference maps "
|
||||||
|
"and may contain minor inaccuracies.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
dmc.ListItem(
|
||||||
|
[
|
||||||
|
html.Strong("Data Suppression: "),
|
||||||
|
"Some cells may be suppressed for confidentiality when transaction "
|
||||||
|
"counts are below thresholds.",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Technical Implementation
|
||||||
|
dmc.Paper(
|
||||||
|
p="lg",
|
||||||
|
radius="md",
|
||||||
|
withBorder=True,
|
||||||
|
children=[
|
||||||
|
dmc.Title("Technical Implementation", order=2, mb="md"),
|
||||||
|
dmc.Text("This dashboard is built with:", mb="sm"),
|
||||||
|
dmc.List(
|
||||||
|
[
|
||||||
|
dmc.ListItem("Python 3.11+ with Dash and Plotly"),
|
||||||
|
dmc.ListItem("PostgreSQL with PostGIS for geospatial data"),
|
||||||
|
dmc.ListItem("dbt for data transformation"),
|
||||||
|
dmc.ListItem("Pydantic for data validation"),
|
||||||
|
dmc.ListItem("SQLAlchemy 2.0 for database operations"),
|
||||||
|
],
|
||||||
|
mb="md",
|
||||||
|
),
|
||||||
|
dmc.Anchor(
|
||||||
|
"View source code on GitHub",
|
||||||
|
href="https://github.com/lmiranda/personal-portfolio",
|
||||||
|
target="_blank",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
# Back link
|
||||||
|
dmc.Group(
|
||||||
|
mt="xl",
|
||||||
|
children=[
|
||||||
|
dmc.Anchor(
|
||||||
|
"← Back to Dashboard",
|
||||||
|
href="/toronto",
|
||||||
|
size="lg",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
1
portfolio_app/toronto/__init__.py
Normal file
1
portfolio_app/toronto/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Toronto housing data logic."""
|
||||||
257
portfolio_app/toronto/demo_data.py
Normal file
257
portfolio_app/toronto/demo_data.py
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
"""Demo/sample data for testing the Toronto Housing Dashboard without full pipeline.
|
||||||
|
|
||||||
|
This module provides synthetic data for development and demonstration purposes.
|
||||||
|
Replace with real data from the database in production.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def get_demo_districts() -> list[dict[str, Any]]:
|
||||||
|
"""Return sample TRREB district data."""
|
||||||
|
return [
|
||||||
|
{"district_code": "W01", "district_name": "Long Branch", "area_type": "West"},
|
||||||
|
{"district_code": "W02", "district_name": "Mimico", "area_type": "West"},
|
||||||
|
{
|
||||||
|
"district_code": "W03",
|
||||||
|
"district_name": "Kingsway South",
|
||||||
|
"area_type": "West",
|
||||||
|
},
|
||||||
|
{"district_code": "W04", "district_name": "Edenbridge", "area_type": "West"},
|
||||||
|
{"district_code": "W05", "district_name": "Islington", "area_type": "West"},
|
||||||
|
{"district_code": "W06", "district_name": "Rexdale", "area_type": "West"},
|
||||||
|
{"district_code": "W07", "district_name": "Willowdale", "area_type": "West"},
|
||||||
|
{"district_code": "W08", "district_name": "York", "area_type": "West"},
|
||||||
|
{
|
||||||
|
"district_code": "C01",
|
||||||
|
"district_name": "Downtown Core",
|
||||||
|
"area_type": "Central",
|
||||||
|
},
|
||||||
|
{"district_code": "C02", "district_name": "Annex", "area_type": "Central"},
|
||||||
|
{
|
||||||
|
"district_code": "C03",
|
||||||
|
"district_name": "Forest Hill",
|
||||||
|
"area_type": "Central",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"district_code": "C04",
|
||||||
|
"district_name": "Lawrence Park",
|
||||||
|
"area_type": "Central",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"district_code": "C06",
|
||||||
|
"district_name": "Willowdale East",
|
||||||
|
"area_type": "Central",
|
||||||
|
},
|
||||||
|
{"district_code": "C07", "district_name": "Thornhill", "area_type": "Central"},
|
||||||
|
{"district_code": "C08", "district_name": "Waterfront", "area_type": "Central"},
|
||||||
|
{"district_code": "E01", "district_name": "Leslieville", "area_type": "East"},
|
||||||
|
{"district_code": "E02", "district_name": "The Beaches", "area_type": "East"},
|
||||||
|
{"district_code": "E03", "district_name": "Danforth", "area_type": "East"},
|
||||||
|
{"district_code": "E04", "district_name": "Birch Cliff", "area_type": "East"},
|
||||||
|
{"district_code": "E05", "district_name": "Scarborough", "area_type": "East"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_demo_purchase_data() -> list[dict[str, Any]]:
|
||||||
|
"""Return sample purchase data for time series visualization."""
|
||||||
|
import random
|
||||||
|
|
||||||
|
random.seed(42)
|
||||||
|
data = []
|
||||||
|
|
||||||
|
base_prices = {
|
||||||
|
"W01": 850000,
|
||||||
|
"C01": 1200000,
|
||||||
|
"E01": 950000,
|
||||||
|
}
|
||||||
|
|
||||||
|
for year in [2024, 2025]:
|
||||||
|
for month in range(1, 13):
|
||||||
|
if year == 2025 and month > 12:
|
||||||
|
break
|
||||||
|
|
||||||
|
for district, base_price in base_prices.items():
|
||||||
|
# Add some randomness and trend
|
||||||
|
trend = (year - 2024) * 12 + month
|
||||||
|
price_variation = random.uniform(-0.05, 0.05)
|
||||||
|
trend_factor = 1 + (trend * 0.002) # Slight upward trend
|
||||||
|
|
||||||
|
avg_price = int(base_price * trend_factor * (1 + price_variation))
|
||||||
|
sales = random.randint(50, 200)
|
||||||
|
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"district_code": district,
|
||||||
|
"full_date": date(year, month, 1),
|
||||||
|
"year": year,
|
||||||
|
"month": month,
|
||||||
|
"avg_price": avg_price,
|
||||||
|
"median_price": int(avg_price * 0.95),
|
||||||
|
"sales_count": sales,
|
||||||
|
"new_listings": int(sales * random.uniform(1.2, 1.8)),
|
||||||
|
"active_listings": int(sales * random.uniform(2.0, 3.5)),
|
||||||
|
"days_on_market": random.randint(15, 45),
|
||||||
|
"sale_to_list_ratio": round(random.uniform(0.95, 1.05), 2),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def get_demo_rental_data() -> list[dict[str, Any]]:
|
||||||
|
"""Return sample rental data for visualization."""
|
||||||
|
data = []
|
||||||
|
|
||||||
|
zones = [
|
||||||
|
("Zone01", "Downtown"),
|
||||||
|
("Zone02", "Midtown"),
|
||||||
|
("Zone03", "North York"),
|
||||||
|
("Zone04", "Scarborough"),
|
||||||
|
("Zone05", "Etobicoke"),
|
||||||
|
]
|
||||||
|
|
||||||
|
bedroom_types = ["bachelor", "1_bedroom", "2_bedroom", "3_bedroom"]
|
||||||
|
|
||||||
|
base_rents = {
|
||||||
|
"bachelor": 1800,
|
||||||
|
"1_bedroom": 2200,
|
||||||
|
"2_bedroom": 2800,
|
||||||
|
"3_bedroom": 3400,
|
||||||
|
}
|
||||||
|
|
||||||
|
for year in [2021, 2022, 2023, 2024, 2025]:
|
||||||
|
for zone_code, zone_name in zones:
|
||||||
|
for bedroom in bedroom_types:
|
||||||
|
# Rental trend: ~5% increase per year
|
||||||
|
year_factor = 1 + ((year - 2021) * 0.05)
|
||||||
|
base_rent = base_rents[bedroom]
|
||||||
|
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"zone_code": zone_code,
|
||||||
|
"zone_name": zone_name,
|
||||||
|
"survey_year": year,
|
||||||
|
"full_date": date(year, 10, 1),
|
||||||
|
"bedroom_type": bedroom,
|
||||||
|
"average_rent": int(base_rent * year_factor),
|
||||||
|
"median_rent": int(base_rent * year_factor * 0.98),
|
||||||
|
"vacancy_rate": round(
|
||||||
|
2.5 - (year - 2021) * 0.3, 1
|
||||||
|
), # Decreasing vacancy
|
||||||
|
"universe": 5000 + (year - 2021) * 200,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def get_demo_policy_events() -> list[dict[str, Any]]:
|
||||||
|
"""Return sample policy events for annotation."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 6, 5),
|
||||||
|
"effective_date": date(2024, 6, 5),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "monetary",
|
||||||
|
"title": "BoC Rate Cut (25bp)",
|
||||||
|
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.75%",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 7, 24),
|
||||||
|
"effective_date": date(2024, 7, 24),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "monetary",
|
||||||
|
"title": "BoC Rate Cut (25bp)",
|
||||||
|
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.50%",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 9, 4),
|
||||||
|
"effective_date": date(2024, 9, 4),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "monetary",
|
||||||
|
"title": "BoC Rate Cut (25bp)",
|
||||||
|
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.25%",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 10, 23),
|
||||||
|
"effective_date": date(2024, 10, 23),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "monetary",
|
||||||
|
"title": "BoC Rate Cut (50bp)",
|
||||||
|
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.75%",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 12, 11),
|
||||||
|
"effective_date": date(2024, 12, 11),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "monetary",
|
||||||
|
"title": "BoC Rate Cut (50bp)",
|
||||||
|
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.25%",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 9, 16),
|
||||||
|
"effective_date": date(2024, 12, 15),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "regulatory",
|
||||||
|
"title": "CMHC 30-Year Amortization",
|
||||||
|
"description": "30-year amortization extended to all first-time buyers and new builds",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_date": date(2024, 9, 16),
|
||||||
|
"effective_date": date(2024, 12, 15),
|
||||||
|
"level": "federal",
|
||||||
|
"category": "regulatory",
|
||||||
|
"title": "Insured Mortgage Cap $1.5M",
|
||||||
|
"description": "Insured mortgage cap raised from $1M to $1.5M",
|
||||||
|
"expected_direction": "bullish",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_demo_summary_metrics() -> dict[str, dict[str, Any]]:
|
||||||
|
"""Return summary metrics for KPI cards."""
|
||||||
|
return {
|
||||||
|
"avg_price": {
|
||||||
|
"value": 1067968,
|
||||||
|
"title": "Avg. Price (2025)",
|
||||||
|
"delta": -4.7,
|
||||||
|
"delta_suffix": "%",
|
||||||
|
"prefix": "$",
|
||||||
|
"format_spec": ",.0f",
|
||||||
|
"positive_is_good": True,
|
||||||
|
},
|
||||||
|
"total_sales": {
|
||||||
|
"value": 67610,
|
||||||
|
"title": "Total Sales (2024)",
|
||||||
|
"delta": 2.6,
|
||||||
|
"delta_suffix": "%",
|
||||||
|
"format_spec": ",.0f",
|
||||||
|
"positive_is_good": True,
|
||||||
|
},
|
||||||
|
"avg_rent": {
|
||||||
|
"value": 2450,
|
||||||
|
"title": "Avg. Rent (2025)",
|
||||||
|
"delta": 3.2,
|
||||||
|
"delta_suffix": "%",
|
||||||
|
"prefix": "$",
|
||||||
|
"format_spec": ",.0f",
|
||||||
|
"positive_is_good": False,
|
||||||
|
},
|
||||||
|
"vacancy_rate": {
|
||||||
|
"value": 1.8,
|
||||||
|
"title": "Vacancy Rate",
|
||||||
|
"delta": -0.4,
|
||||||
|
"delta_suffix": "pp",
|
||||||
|
"suffix": "%",
|
||||||
|
"format_spec": ".1f",
|
||||||
|
"positive_is_good": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
32
portfolio_app/toronto/loaders/__init__.py
Normal file
32
portfolio_app/toronto/loaders/__init__.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
"""Database loaders for Toronto housing data."""
|
||||||
|
|
||||||
|
from .base import bulk_insert, get_session, upsert_by_key
|
||||||
|
from .cmhc import load_cmhc_record, load_cmhc_rentals
|
||||||
|
from .dimensions import (
|
||||||
|
generate_date_key,
|
||||||
|
load_cmhc_zones,
|
||||||
|
load_neighbourhoods,
|
||||||
|
load_policy_events,
|
||||||
|
load_time_dimension,
|
||||||
|
load_trreb_districts,
|
||||||
|
)
|
||||||
|
from .trreb import load_trreb_purchases, load_trreb_record
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Base utilities
|
||||||
|
"get_session",
|
||||||
|
"bulk_insert",
|
||||||
|
"upsert_by_key",
|
||||||
|
# Dimension loaders
|
||||||
|
"generate_date_key",
|
||||||
|
"load_time_dimension",
|
||||||
|
"load_trreb_districts",
|
||||||
|
"load_cmhc_zones",
|
||||||
|
"load_neighbourhoods",
|
||||||
|
"load_policy_events",
|
||||||
|
# Fact loaders
|
||||||
|
"load_trreb_purchases",
|
||||||
|
"load_trreb_record",
|
||||||
|
"load_cmhc_rentals",
|
||||||
|
"load_cmhc_record",
|
||||||
|
]
|
||||||
85
portfolio_app/toronto/loaders/base.py
Normal file
85
portfolio_app/toronto/loaders/base.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""Base loader utilities for database operations."""
|
||||||
|
|
||||||
|
from collections.abc import Generator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from typing import Any, TypeVar
|
||||||
|
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from portfolio_app.toronto.models import get_session_factory
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_session() -> Generator[Session, None, None]:
|
||||||
|
"""Get a database session with automatic cleanup.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
SQLAlchemy session that auto-commits on success, rollbacks on error.
|
||||||
|
"""
|
||||||
|
session_factory = get_session_factory()
|
||||||
|
session = session_factory()
|
||||||
|
try:
|
||||||
|
yield session
|
||||||
|
session.commit()
|
||||||
|
except Exception:
|
||||||
|
session.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
def bulk_insert(session: Session, objects: list[T]) -> int:
|
||||||
|
"""Bulk insert objects into the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Active SQLAlchemy session.
|
||||||
|
objects: List of ORM model instances to insert.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of objects inserted.
|
||||||
|
"""
|
||||||
|
session.add_all(objects)
|
||||||
|
session.flush()
|
||||||
|
return len(objects)
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_by_key(
|
||||||
|
session: Session,
|
||||||
|
model_class: Any,
|
||||||
|
objects: list[T],
|
||||||
|
key_columns: list[str],
|
||||||
|
) -> tuple[int, int]:
|
||||||
|
"""Upsert objects based on unique key columns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Active SQLAlchemy session.
|
||||||
|
model_class: The ORM model class.
|
||||||
|
objects: List of ORM model instances to upsert.
|
||||||
|
key_columns: Column names that form the unique key.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (inserted_count, updated_count).
|
||||||
|
"""
|
||||||
|
inserted = 0
|
||||||
|
updated = 0
|
||||||
|
|
||||||
|
for obj in objects:
|
||||||
|
# Build filter for existing record
|
||||||
|
filters = {col: getattr(obj, col) for col in key_columns}
|
||||||
|
existing = session.query(model_class).filter_by(**filters).first()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
# Update existing record
|
||||||
|
for column in model_class.__table__.columns:
|
||||||
|
if column.name not in key_columns and column.name != "id":
|
||||||
|
setattr(existing, column.name, getattr(obj, column.name))
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
# Insert new record
|
||||||
|
session.add(obj)
|
||||||
|
inserted += 1
|
||||||
|
|
||||||
|
session.flush()
|
||||||
|
return inserted, updated
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user