Compare commits
106 Commits
01a0984333
...
staging
| Author | SHA1 | Date | |
|---|---|---|---|
| a0417182a7 | |||
| 058d058975 | |||
| 0455ec69a0 | |||
| 9e216962b1 | |||
| dfa5f92d8a | |||
| 3bd2005c9d | |||
| 0c9769fd27 | |||
| cb908a18c3 | |||
| 558022f26e | |||
| 9e27fb8011 | |||
| cda2a078d9 | |||
| dd8de9810d | |||
| 56bcc1bb1d | |||
| ee0a7ef7ad | |||
| fd9850778e | |||
| 01e98103c7 | |||
| 62d1a52eed | |||
| e37611673f | |||
| 33306a911b | |||
| a5d6866d63 | |||
| f58b2f70e2 | |||
| 263b52d5e4 | |||
| f345d41535 | |||
| 14701f334c | |||
| 92763a17c4 | |||
| 546ee1cc92 | |||
| 9cc2cf0e00 | |||
| 28f239e8cd | |||
| c3de98c4a5 | |||
| eee015efac | |||
| 941305e71c | |||
| 54665bac63 | |||
| 3eb32a4766 | |||
| 69c4216cd5 | |||
| 6e00a17c05 | |||
| 8f3c5554f9 | |||
| 5839eabf1e | |||
| ebe48304d7 | |||
| 2fc2a1bdb5 | |||
| 6872aa510b | |||
| 9a1fc81f79 | |||
| cf6e874961 | |||
| 451dc10a10 | |||
| 193b9289b9 | |||
| 7a16e6d121 | |||
| ecc50e5d98 | |||
| ae3742630e | |||
| e70965b429 | |||
| 25954f17bb | |||
| bffd44a5a5 | |||
| bf6e392002 | |||
| d0f32edba7 | |||
| 4818c53fd2 | |||
| 1a878313f8 | |||
| 1eba95d4d1 | |||
| c9cf744d84 | |||
| 3054441630 | |||
| b6d210ec6b | |||
| 053acf6436 | |||
| f69d0c15a7 | |||
| 81993b23a7 | |||
| 457efec77f | |||
| f5f2bf3706 | |||
| fcaefabce8 | |||
| cb877df9e1 | |||
| 48b4eeeb62 | |||
| d3ca4ad4eb | |||
| e7bc545f25 | |||
| c8f4cc6241 | |||
| 3cd2eada7c | |||
| 138e6fe497 | |||
| cd7b5ce154 | |||
| e1135a77a8 | |||
| 39656ca836 | |||
| d64f90b3d3 | |||
| b3fb94c7cb | |||
| 1e0ea9cca2 | |||
| 9dfa24fb76 | |||
| 8701a12b41 | |||
| 6ef5460ad0 | |||
| 19ffc04573 | |||
| 08aa61f85e | |||
| 2a6db2a252 | |||
| 140d3085bf | |||
| ad6ee3d37f | |||
| 077e426d34 | |||
| b7907e68e4 | |||
| 457bb49395 | |||
| 88e23674a8 | |||
| 1c42533834 | |||
| 802efab8b8 | |||
| ead6d91a28 | |||
| 549e1fcbaf | |||
| 3ee4c20f5e | |||
| 68cc5bbe66 | |||
| 58f2c692e3 | |||
| 8200bbaa99 | |||
| 15da8a97ce | |||
| eb01ad1101 | |||
| 8453f78e31 | |||
| ff0f5a9b51 | |||
| 10f46f7cf1 | |||
| 160dc90308 | |||
| ff58e0a3ea | |||
| 38e4a0354b | |||
| c7e9b88adb |
15
.env.example
Normal file
15
.env.example
Normal file
@@ -0,0 +1,15 @@
|
||||
# Database Configuration
|
||||
DATABASE_URL=postgresql://portfolio:portfolio_dev@localhost:5432/portfolio
|
||||
POSTGRES_USER=portfolio
|
||||
POSTGRES_PASSWORD=portfolio_dev
|
||||
POSTGRES_DB=portfolio
|
||||
|
||||
# Application Settings
|
||||
DASH_DEBUG=true
|
||||
SECRET_KEY=change-me-in-production
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# Optional: dbt profile (defaults to profiles.yml)
|
||||
# DBT_PROFILES_DIR=.
|
||||
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- development
|
||||
- staging
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- development
|
||||
|
||||
jobs:
|
||||
lint-and-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install ruff pytest
|
||||
|
||||
- name: Run linter
|
||||
run: ruff check .
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Production
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Production Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.PROD_HOST }}
|
||||
username: ${{ secrets.PROD_USER }}
|
||||
key: ${{ secrets.PROD_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin main
|
||||
git reset --hard origin/main
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Production deployment complete!"
|
||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Staging
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- staging
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Staging Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.STAGING_HOST }}
|
||||
username: ${{ secrets.STAGING_USER }}
|
||||
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin staging
|
||||
git reset --hard origin/staging
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Staging deployment complete!"
|
||||
27
.gitignore
vendored
27
.gitignore
vendored
@@ -1,4 +1,28 @@
|
||||
# ---> Python
|
||||
# ====================
|
||||
# Project-Specific
|
||||
# ====================
|
||||
|
||||
# Processed data (generated, not source)
|
||||
data/*/processed/
|
||||
|
||||
# Reports (generated)
|
||||
reports/
|
||||
|
||||
# Backups
|
||||
backups/
|
||||
|
||||
# Notebook exports
|
||||
notebooks/*.html
|
||||
|
||||
# dbt
|
||||
dbt/target/
|
||||
dbt/dbt_packages/
|
||||
dbt/logs/
|
||||
|
||||
# ====================
|
||||
# Python
|
||||
# ====================
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -174,3 +198,4 @@ cython_debug/
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
dbt/.user.yml
|
||||
|
||||
33
.pre-commit-config.yaml
Normal file
33
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
exclude: ^data/(raw/|toronto/raw/geo/)
|
||||
- id: check-merge-conflict
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.1.9
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.8.0
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
- pydantic>=2.0
|
||||
- pandas-stubs
|
||||
- types-requests
|
||||
args: [--ignore-missing-imports]
|
||||
exclude: ^(tests/|dbt/)
|
||||
|
||||
ci:
|
||||
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
|
||||
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.11
|
||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.defaultInterpreterPath": "/home/leomiranda/WorkDev/personal/personal-portfolio/.venv/bin/python"
|
||||
}
|
||||
339
CLAUDE.md
Normal file
339
CLAUDE.md
Normal file
@@ -0,0 +1,339 @@
|
||||
# CLAUDE.md
|
||||
|
||||
## ⛔ MANDATORY BEHAVIOR RULES - READ FIRST
|
||||
|
||||
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||
|
||||
### 1. WHEN USER ASKS YOU TO CHECK SOMETHING - CHECK EVERYTHING
|
||||
- Search ALL locations, not just where you think it is
|
||||
- Check cache directories: `~/.claude/plugins/cache/`
|
||||
- Check installed: `~/.claude/plugins/marketplaces/`
|
||||
- Check source directories
|
||||
- **NEVER say "no" or "that's not the issue" without exhaustive verification**
|
||||
|
||||
### 2. WHEN USER SAYS SOMETHING IS WRONG - BELIEVE THEM
|
||||
- The user knows their system better than you
|
||||
- Investigate thoroughly before disagreeing
|
||||
- **Your confidence is often wrong. User's instincts are often right.**
|
||||
|
||||
### 3. NEVER SAY "DONE" WITHOUT VERIFICATION
|
||||
- Run the actual command/script to verify
|
||||
- Show the output to the user
|
||||
- **"Done" means VERIFIED WORKING, not "I made changes"**
|
||||
|
||||
### 4. SHOW EXACTLY WHAT USER ASKS FOR
|
||||
- If user asks for messages, show the MESSAGES
|
||||
- If user asks for code, show the CODE
|
||||
- **Do not interpret or summarize unless asked**
|
||||
|
||||
**FAILURE TO FOLLOW THESE RULES = WASTED USER TIME = UNACCEPTABLE**
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
## Mandatory Behavior Rules
|
||||
|
||||
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||
|
||||
1. **CHECK EVERYTHING** - Search ALL locations before saying "no" (cache, installed, source directories)
|
||||
2. **BELIEVE THE USER** - Investigate thoroughly before disagreeing; user instincts are often right
|
||||
3. **VERIFY BEFORE "DONE"** - Run commands, show output; "done" means verified working
|
||||
4. **SHOW EXACTLY WHAT'S ASKED** - Do not interpret or summarize unless requested
|
||||
|
||||
---
|
||||
|
||||
Working context for Claude Code on the Analytics Portfolio project.
|
||||
|
||||
---
|
||||
|
||||
## Project Status
|
||||
|
||||
**Last Completed Sprint**: 9 (Neighbourhood Dashboard Transition)
|
||||
**Current State**: Ready for deployment sprint or new features
|
||||
**Branch**: `development` (feature branches merge here)
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Run Commands
|
||||
|
||||
```bash
|
||||
# Setup & Database
|
||||
make setup # Install deps, create .env, init pre-commit
|
||||
make docker-up # Start PostgreSQL + PostGIS (auto-detects x86/ARM)
|
||||
make docker-down # Stop containers
|
||||
make db-init # Initialize database schema
|
||||
make db-reset # Drop and recreate database (DESTRUCTIVE)
|
||||
|
||||
# Data Loading
|
||||
make load-data # Load all project data (currently: Toronto)
|
||||
make load-toronto # Load Toronto data from APIs
|
||||
|
||||
# Application
|
||||
make run # Start Dash dev server
|
||||
|
||||
# Testing & Quality
|
||||
make test # Run pytest
|
||||
make lint # Run ruff linter
|
||||
make format # Run ruff formatter
|
||||
make typecheck # Run mypy type checker
|
||||
make ci # Run all checks (lint, typecheck, test)
|
||||
|
||||
# dbt
|
||||
make dbt-run # Run dbt models
|
||||
make dbt-test # Run dbt tests
|
||||
make dbt-docs # Generate and serve dbt documentation
|
||||
|
||||
# Run `make help` for full target list
|
||||
```
|
||||
|
||||
### Branch Workflow
|
||||
|
||||
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
||||
2. Work and commit on feature branch
|
||||
3. Merge INTO `development` when complete
|
||||
4. `development` -> `staging` -> `main` for releases
|
||||
|
||||
---
|
||||
|
||||
## Code Conventions
|
||||
|
||||
### Import Style
|
||||
|
||||
| Context | Style | Example |
|
||||
|---------|-------|---------|
|
||||
| Same directory | Single dot | `from .neighbourhood import NeighbourhoodRecord` |
|
||||
| Sibling directory | Double dot | `from ..schemas.neighbourhood import CensusRecord` |
|
||||
| External packages | Absolute | `import pandas as pd` |
|
||||
|
||||
### Module Responsibilities
|
||||
|
||||
| Directory | Purpose |
|
||||
|-----------|---------|
|
||||
| `schemas/` | Pydantic models for data validation |
|
||||
| `models/` | SQLAlchemy ORM for database persistence |
|
||||
| `parsers/` | API/CSV extraction for raw data ingestion |
|
||||
| `loaders/` | Database operations for data loading |
|
||||
| `services/` | Query functions for dbt mart queries |
|
||||
| `figures/` | Chart factories for Plotly figure generation |
|
||||
| `errors/` | Custom exception classes (see `errors/exceptions.py`) |
|
||||
|
||||
### Code Standards
|
||||
|
||||
- Python 3.10+ type hints: `list[str]`, `dict[str, int] | None`
|
||||
- Single responsibility functions with verb naming
|
||||
- Early returns over deep nesting
|
||||
- Google-style docstrings only for non-obvious behavior
|
||||
|
||||
---
|
||||
|
||||
## Application Structure
|
||||
|
||||
**Entry Point:** `portfolio_app/app.py` (Dash app factory with Pages routing)
|
||||
|
||||
| Directory | Purpose |
|
||||
|-----------|---------|
|
||||
| `pages/` | Dash Pages (file-based routing) |
|
||||
| `pages/toronto/` | Toronto Dashboard (`tabs/` for layouts, `callbacks/` for interactions) |
|
||||
| `components/` | Shared UI components |
|
||||
| `figures/toronto/` | Toronto chart factories |
|
||||
| `toronto/` | Toronto data logic (parsers, loaders, schemas, models) |
|
||||
|
||||
**Key URLs:** `/` (home), `/toronto` (dashboard), `/blog` (listing), `/blog/{slug}` (articles), `/health` (status)
|
||||
|
||||
### Multi-Dashboard Architecture
|
||||
|
||||
- **figures/**: Domain-namespaced (`figures/toronto/`, future: `figures/football/`)
|
||||
- **dbt models**: Domain subdirectories (`staging/toronto/`, `marts/toronto/`)
|
||||
- **Database schemas**: Domain-specific raw data (`raw_toronto`, future: `raw_football`)
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack (Locked)
|
||||
|
||||
| Layer | Technology | Version |
|
||||
|-------|------------|---------|
|
||||
| Database | PostgreSQL + PostGIS | 16.x |
|
||||
| Validation | Pydantic | >=2.0 |
|
||||
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
||||
| Transformation | dbt-postgres | >=1.7 |
|
||||
| Visualization | Dash + Plotly + dash-mantine-components | >=2.14 |
|
||||
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
||||
| Python | 3.11+ | Via pyenv |
|
||||
|
||||
**Notes**: SQLAlchemy 2.0 + Pydantic 2.0 only. Docker Compose V2 format (no `version` field).
|
||||
|
||||
---
|
||||
|
||||
## Data Model Overview
|
||||
|
||||
### Database Schemas
|
||||
|
||||
| Schema | Purpose |
|
||||
|--------|---------|
|
||||
| `public` | Shared dimensions (dim_time) |
|
||||
| `raw_toronto` | Toronto-specific raw/dimension tables |
|
||||
| `stg_toronto` | Toronto dbt staging views |
|
||||
| `int_toronto` | Toronto dbt intermediate views |
|
||||
| `mart_toronto` | Toronto dbt mart tables |
|
||||
|
||||
### dbt Project: `portfolio`
|
||||
|
||||
| Layer | Naming | Purpose |
|
||||
|-------|--------|---------|
|
||||
| Shared | `stg_dimensions__*` | Cross-domain dimensions |
|
||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
||||
| Marts | `mart_{domain}` | Final analytical tables |
|
||||
|
||||
---
|
||||
|
||||
## Deferred Features
|
||||
|
||||
**Stop and flag if a task requires these**:
|
||||
|
||||
| Feature | Reason |
|
||||
|---------|--------|
|
||||
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||
| ML prediction models | Energy project scope (future phase) |
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Required in `.env`:
|
||||
|
||||
```bash
|
||||
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||
POSTGRES_USER=portfolio
|
||||
POSTGRES_PASSWORD=<secure>
|
||||
POSTGRES_DB=portfolio
|
||||
DASH_DEBUG=true
|
||||
SECRET_KEY=<random>
|
||||
LOG_LEVEL=INFO
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Reference Documents
|
||||
|
||||
| Document | Location | Use When |
|
||||
|----------|----------|----------|
|
||||
| Project reference | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||
| Developer guide | `docs/CONTRIBUTING.md` | How to add pages, tabs |
|
||||
| Lessons learned | `docs/project-lessons-learned/INDEX.md` | Past issues and solutions |
|
||||
| Deployment runbook | `docs/runbooks/deployment.md` | Deploying to environments |
|
||||
|
||||
---
|
||||
|
||||
## Plugin Reference
|
||||
|
||||
### Sprint Management: projman
|
||||
|
||||
**CRITICAL: Always use projman for sprint and task management.**
|
||||
|
||||
| Skill | Trigger | Purpose |
|
||||
|-------|---------|---------|
|
||||
| `/projman:sprint-plan` | New sprint/feature | Architecture analysis + Gitea issue creation |
|
||||
| `/projman:sprint-start` | Begin implementation | Load lessons learned, start execution |
|
||||
| `/projman:sprint-status` | Check progress | Review blockers and completion |
|
||||
| `/projman:sprint-close` | Sprint completion | Capture lessons learned |
|
||||
|
||||
**Default workflow**: `/projman:sprint-plan` before code -> create issues -> `/projman:sprint-start` -> track via Gitea -> `/projman:sprint-close`
|
||||
|
||||
**Gitea**: `personal-projects/personal-portfolio` at `gitea.hotserv.cloud`
|
||||
|
||||
### Data Platform: data-platform
|
||||
|
||||
Use for dbt, PostgreSQL, and PostGIS operations.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/data-platform:data-review` | Audit data integrity, schema validity, dbt compliance |
|
||||
| `/data-platform:data-gate` | CI/CD data quality gate (pass/fail) |
|
||||
|
||||
**When to use:** Schema changes, dbt model development, data loading, before merging data PRs.
|
||||
|
||||
**MCP tools available:** `pg_connect`, `pg_query`, `pg_tables`, `pg_columns`, `pg_schemas`, `st_*` (PostGIS), `dbt_*` operations.
|
||||
|
||||
### Visualization: viz-platform
|
||||
|
||||
Use for Dash/Mantine component validation and chart creation.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/viz-platform:component` | Inspect DMC component props and validation |
|
||||
| `/viz-platform:chart` | Create themed Plotly charts |
|
||||
| `/viz-platform:theme` | Apply/validate themes |
|
||||
| `/viz-platform:dashboard` | Create dashboard layouts |
|
||||
|
||||
**When to use:** Dashboard development, new visualizations, component prop lookup.
|
||||
|
||||
### Code Quality: code-sentinel
|
||||
|
||||
Use for security scanning and refactoring analysis.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/code-sentinel:security-scan` | Full security audit of codebase |
|
||||
| `/code-sentinel:refactor` | Apply refactoring patterns |
|
||||
| `/code-sentinel:refactor-dry` | Preview refactoring without applying |
|
||||
|
||||
**When to use:** Before major releases, after adding auth/data handling code, periodic audits.
|
||||
|
||||
### Documentation: doc-guardian
|
||||
|
||||
Use for documentation drift detection and synchronization.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/doc-guardian:doc-audit` | Scan project for documentation drift |
|
||||
| `/doc-guardian:doc-sync` | Synchronize pending documentation updates |
|
||||
|
||||
**When to use:** After significant code changes, before releases.
|
||||
|
||||
### Pull Requests: pr-review
|
||||
|
||||
Use for comprehensive PR review with multiple analysis perspectives.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/pr-review:initial-setup` | Configure PR review for project |
|
||||
| Triggered automatically | Security, performance, maintainability, test analysis |
|
||||
|
||||
**When to use:** Before merging significant PRs to `development` or `main`.
|
||||
|
||||
### Requirement Clarification: clarity-assist
|
||||
|
||||
Use when requirements are ambiguous or need decomposition.
|
||||
|
||||
**When to use:** Unclear specifications, complex feature requests, conflicting requirements.
|
||||
|
||||
### Contract Validation: contract-validator
|
||||
|
||||
Use for plugin interface validation.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/contract-validator:agent-check` | Quick agent definition validation |
|
||||
| `/contract-validator:full-validation` | Full plugin contract validation |
|
||||
|
||||
**When to use:** When modifying plugin integrations or agent definitions.
|
||||
|
||||
### Git Workflow: git-flow
|
||||
|
||||
Use for standardized git operations.
|
||||
|
||||
| Skill | Purpose |
|
||||
|-------|---------|
|
||||
| `/git-flow:commit` | Auto-generated conventional commit |
|
||||
| `/git-flow:branch-start` | Create feature/fix/chore branch |
|
||||
| `/git-flow:git-status` | Comprehensive status with recommendations |
|
||||
|
||||
**When to use:** Complex merge scenarios, branch management, standardized commits.
|
||||
|
||||
---
|
||||
|
||||
*Last Updated: February 2026*
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024-2025 Leo Miranda
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
200
Makefile
Normal file
200
Makefile
Normal file
@@ -0,0 +1,200 @@
|
||||
.PHONY: setup docker-up docker-down db-init load-data load-all load-toronto load-toronto-only seed-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||
|
||||
# Default target
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
# Environment
|
||||
VENV := .venv
|
||||
PYTHON := $(VENV)/bin/python3
|
||||
PIP := $(VENV)/bin/pip
|
||||
DOCKER_COMPOSE := docker compose
|
||||
|
||||
# Architecture detection for Docker images
|
||||
ARCH := $(shell uname -m)
|
||||
ifeq ($(ARCH),aarch64)
|
||||
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||
else ifeq ($(ARCH),arm64)
|
||||
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||
else
|
||||
POSTGIS_IMAGE := postgis/postgis:16-3.4
|
||||
endif
|
||||
export POSTGIS_IMAGE
|
||||
|
||||
# Colors for output
|
||||
BLUE := \033[0;34m
|
||||
GREEN := \033[0;32m
|
||||
YELLOW := \033[0;33m
|
||||
NC := \033[0m
|
||||
|
||||
help: ## Show this help message
|
||||
@echo "Usage: make [target]"
|
||||
@echo ""
|
||||
@echo "Targets:"
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-15s$(NC) %s\n", $$1, $$2}'
|
||||
|
||||
# =============================================================================
|
||||
# Setup
|
||||
# =============================================================================
|
||||
|
||||
setup: ## Install dependencies, create .env, init pre-commit
|
||||
@echo "$(GREEN)Installing dependencies...$(NC)"
|
||||
$(PIP) install -e ".[dev,dbt]"
|
||||
@echo "$(GREEN)Setting up environment...$(NC)"
|
||||
@if [ ! -f .env ]; then cp .env.example .env; echo "$(YELLOW)Created .env from .env.example - please update values$(NC)"; fi
|
||||
@echo "$(GREEN)Installing pre-commit hooks...$(NC)"
|
||||
pre-commit install
|
||||
@echo "$(GREEN)Setup complete!$(NC)"
|
||||
|
||||
# =============================================================================
|
||||
# Docker
|
||||
# =============================================================================
|
||||
|
||||
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||
@echo "$(BLUE)Architecture: $(ARCH) -> Using image: $(POSTGIS_IMAGE)$(NC)"
|
||||
$(DOCKER_COMPOSE) up -d
|
||||
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||
@sleep 3
|
||||
@echo "$(GREEN)Database containers started!$(NC)"
|
||||
|
||||
docker-down: ## Stop containers
|
||||
@echo "$(YELLOW)Stopping containers...$(NC)"
|
||||
$(DOCKER_COMPOSE) down
|
||||
|
||||
docker-logs: ## View container logs
|
||||
$(DOCKER_COMPOSE) logs -f
|
||||
|
||||
# =============================================================================
|
||||
# Database
|
||||
# =============================================================================
|
||||
|
||||
db-init: ## Initialize database schema
|
||||
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||
$(PYTHON) scripts/db/init_schema.py
|
||||
|
||||
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||
@read -p "Are you sure? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
$(DOCKER_COMPOSE) down -v
|
||||
$(DOCKER_COMPOSE) up -d
|
||||
@sleep 3
|
||||
$(MAKE) db-init
|
||||
|
||||
# Domain-specific data loading
|
||||
load-toronto: ## Load Toronto data from APIs
|
||||
@echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)"
|
||||
$(PYTHON) scripts/data/load_toronto_data.py
|
||||
@echo "$(GREEN)Seeding Toronto development data...$(NC)"
|
||||
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||
|
||||
load-toronto-only: ## Load Toronto data without running dbt or seeding
|
||||
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
||||
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
||||
|
||||
# Aggregate data loading
|
||||
load-data: load-toronto ## Load all project data (currently: Toronto)
|
||||
@echo "$(GREEN)All data loaded!$(NC)"
|
||||
|
||||
load-all: load-data ## Alias for load-data
|
||||
|
||||
seed-data: ## Seed sample development data (amenities, median_age)
|
||||
@echo "$(GREEN)Seeding development data...$(NC)"
|
||||
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||
|
||||
# =============================================================================
|
||||
# Application
|
||||
# =============================================================================
|
||||
|
||||
run: ## Start Dash development server
|
||||
@echo "$(GREEN)Starting Dash server...$(NC)"
|
||||
$(PYTHON) -m portfolio_app.app
|
||||
|
||||
# =============================================================================
|
||||
# Testing
|
||||
# =============================================================================
|
||||
|
||||
test: ## Run pytest
|
||||
@echo "$(GREEN)Running tests...$(NC)"
|
||||
pytest
|
||||
|
||||
test-cov: ## Run pytest with coverage
|
||||
@echo "$(GREEN)Running tests with coverage...$(NC)"
|
||||
pytest --cov=portfolio_app --cov-report=html --cov-report=term
|
||||
|
||||
# =============================================================================
|
||||
# dbt
|
||||
# =============================================================================
|
||||
|
||||
dbt-run: ## Run dbt models
|
||||
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||
@set -a && . ./.env && set +a && cd dbt && dbt run --profiles-dir .
|
||||
|
||||
dbt-test: ## Run dbt tests
|
||||
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||
@set -a && . ./.env && set +a && cd dbt && dbt test --profiles-dir .
|
||||
|
||||
dbt-docs: ## Generate dbt documentation
|
||||
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||
@set -a && . ./.env && set +a && cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir .
|
||||
|
||||
# =============================================================================
|
||||
# Code Quality
|
||||
# =============================================================================
|
||||
|
||||
lint: ## Run ruff linter
|
||||
@echo "$(GREEN)Running linter...$(NC)"
|
||||
ruff check .
|
||||
|
||||
format: ## Run ruff formatter
|
||||
@echo "$(GREEN)Formatting code...$(NC)"
|
||||
ruff format .
|
||||
ruff check --fix .
|
||||
|
||||
typecheck: ## Run mypy type checker
|
||||
@echo "$(GREEN)Running type checker...$(NC)"
|
||||
mypy portfolio_app
|
||||
|
||||
ci: ## Run all checks (lint, typecheck, test)
|
||||
@echo "$(GREEN)Running CI checks...$(NC)"
|
||||
$(MAKE) lint
|
||||
$(MAKE) typecheck
|
||||
$(MAKE) test
|
||||
@echo "$(GREEN)All checks passed!$(NC)"
|
||||
|
||||
# =============================================================================
|
||||
# Operations
|
||||
# =============================================================================
|
||||
|
||||
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||
@./scripts/logs.sh $(SERVICE)
|
||||
|
||||
run-detached: ## Start containers and wait for health check
|
||||
@./scripts/run-detached.sh
|
||||
|
||||
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||
@./scripts/etl/toronto.sh $(MODE)
|
||||
|
||||
# =============================================================================
|
||||
# Deployment
|
||||
# =============================================================================
|
||||
|
||||
deploy: ## Deploy to production
|
||||
@echo "$(YELLOW)Deployment not yet configured$(NC)"
|
||||
@echo "TODO: Add deployment script"
|
||||
|
||||
# =============================================================================
|
||||
# Cleanup
|
||||
# =============================================================================
|
||||
|
||||
clean: ## Remove build artifacts and caches
|
||||
@echo "$(YELLOW)Cleaning up...$(NC)"
|
||||
rm -rf build/
|
||||
rm -rf dist/
|
||||
rm -rf *.egg-info/
|
||||
rm -rf .pytest_cache/
|
||||
rm -rf .ruff_cache/
|
||||
rm -rf .mypy_cache/
|
||||
rm -rf htmlcov/
|
||||
rm -rf .coverage
|
||||
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||
@echo "$(GREEN)Clean complete!$(NC)"
|
||||
200
README.md
200
README.md
@@ -1,2 +1,200 @@
|
||||
# personal-portfolio
|
||||
# Analytics Portfolio
|
||||
|
||||
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||
|
||||
**Live Demo:** [leodata.science](https://leodata.science)
|
||||
|
||||
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||
|
||||
## Live Pages
|
||||
|
||||
| Route | Page | Description |
|
||||
|-------|------|-------------|
|
||||
| `/` | Home | Bio landing page |
|
||||
| `/about` | About | Background and experience |
|
||||
| `/projects` | Projects | Portfolio project showcase |
|
||||
| `/resume` | Resume | Professional CV |
|
||||
| `/contact` | Contact | Contact form |
|
||||
| `/blog` | Blog | Technical articles |
|
||||
| `/blog/{slug}` | Article | Individual blog posts |
|
||||
| `/toronto` | Toronto Dashboard | Neighbourhood analysis (5 tabs) |
|
||||
| `/toronto/methodology` | Methodology | Dashboard data sources and methods |
|
||||
| `/health` | Health | API health check endpoint |
|
||||
|
||||
## Toronto Neighbourhood Dashboard
|
||||
|
||||
An interactive choropleth dashboard analyzing Toronto's 158 official neighbourhoods across five dimensions:
|
||||
|
||||
- **Overview**: Composite livability scores, income vs safety scatter
|
||||
- **Housing**: Affordability index, rent trends, dwelling types
|
||||
- **Safety**: Crime rates, breakdowns by type, trend analysis
|
||||
- **Demographics**: Income distribution, age pyramids, population density
|
||||
- **Amenities**: Parks, schools, transit accessibility
|
||||
|
||||
**Data Sources**:
|
||||
- City of Toronto Open Data Portal (neighbourhoods, census profiles, amenities)
|
||||
- Toronto Police Service (crime statistics)
|
||||
- CMHC Rental Market Survey (rental data by zone)
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Sources
|
||||
A1[City of Toronto API]
|
||||
A2[Toronto Police API]
|
||||
A3[CMHC Data]
|
||||
end
|
||||
|
||||
subgraph ETL
|
||||
B1[Parsers]
|
||||
B2[Loaders]
|
||||
end
|
||||
|
||||
subgraph Database
|
||||
C1[(PostgreSQL/PostGIS)]
|
||||
C2[dbt Models]
|
||||
end
|
||||
|
||||
subgraph Application
|
||||
D1[Dash App]
|
||||
D2[Plotly Figures]
|
||||
end
|
||||
|
||||
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||
```
|
||||
|
||||
**Pipeline Stages:**
|
||||
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||
|
||||
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone and setup
|
||||
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||
cd personal-portfolio
|
||||
|
||||
# Install dependencies and configure environment
|
||||
make setup
|
||||
|
||||
# Start database
|
||||
make docker-up
|
||||
|
||||
# Initialize database schema
|
||||
make db-init
|
||||
|
||||
# Run development server
|
||||
make run
|
||||
```
|
||||
|
||||
Visit `http://localhost:8050` to view the portfolio.
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── app.py # Dash app factory
|
||||
├── config.py # Pydantic settings
|
||||
├── pages/
|
||||
│ ├── home.py # Bio landing (/)
|
||||
│ ├── about.py # About page
|
||||
│ ├── contact.py # Contact form
|
||||
│ ├── projects.py # Project showcase
|
||||
│ ├── resume.py # Resume/CV
|
||||
│ ├── blog/ # Blog system
|
||||
│ │ ├── index.py # Article listing
|
||||
│ │ └── article.py # Article renderer
|
||||
│ └── toronto/ # Toronto dashboard
|
||||
│ ├── dashboard.py # Main layout with tabs
|
||||
│ ├── methodology.py # Data documentation
|
||||
│ ├── tabs/ # Tab layouts (5)
|
||||
│ └── callbacks/ # Interaction logic
|
||||
├── components/ # Shared UI components
|
||||
├── figures/
|
||||
│ └── toronto/ # Toronto figure factories
|
||||
├── content/
|
||||
│ └── blog/ # Markdown blog articles
|
||||
├── toronto/ # Toronto data logic
|
||||
│ ├── parsers/ # API data extraction
|
||||
│ ├── loaders/ # Database operations
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ └── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||
└── errors/ # Exception handling
|
||||
|
||||
dbt/ # dbt project: portfolio
|
||||
├── models/
|
||||
│ ├── shared/ # Cross-domain dimensions
|
||||
│ ├── staging/toronto/ # Toronto staging models
|
||||
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||
│ └── marts/toronto/ # Toronto analytical tables
|
||||
|
||||
notebooks/
|
||||
└── toronto/ # Toronto documentation (15 notebooks)
|
||||
├── overview/ # Overview tab visualizations
|
||||
├── housing/ # Housing tab visualizations
|
||||
├── safety/ # Safety tab visualizations
|
||||
├── demographics/ # Demographics tab visualizations
|
||||
└── amenities/ # Amenities tab visualizations
|
||||
|
||||
docs/
|
||||
├── PROJECT_REFERENCE.md # Architecture reference
|
||||
├── CONTRIBUTING.md # Developer guide
|
||||
└── project-lessons-learned/
|
||||
```
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology |
|
||||
|-------|------------|
|
||||
| Database | PostgreSQL 16 + PostGIS |
|
||||
| Validation | Pydantic 2.x |
|
||||
| ORM | SQLAlchemy 2.x |
|
||||
| Transformation | dbt-postgres |
|
||||
| Data Processing | Pandas, GeoPandas |
|
||||
| Visualization | Dash + Plotly |
|
||||
| UI Components | dash-mantine-components |
|
||||
| Testing | pytest |
|
||||
| Python | 3.11+ |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
make test # Run pytest
|
||||
make lint # Run ruff linter
|
||||
make format # Format code
|
||||
make ci # Run all checks
|
||||
make dbt-run # Run dbt models
|
||||
make dbt-test # Run dbt tests
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Copy `.env.example` to `.env` and configure:
|
||||
|
||||
```bash
|
||||
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||
POSTGRES_USER=portfolio
|
||||
POSTGRES_PASSWORD=<secure>
|
||||
POSTGRES_DB=portfolio
|
||||
DASH_DEBUG=true
|
||||
SECRET_KEY=<random>
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- **For developers**: See `docs/CONTRIBUTING.md` for setup and contribution guidelines
|
||||
- **For Claude Code**: See `CLAUDE.md` for AI assistant context
|
||||
- **Architecture**: See `docs/PROJECT_REFERENCE.md` for technical details
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
## Author
|
||||
|
||||
Leo Miranda
|
||||
|
||||
BIN
data/raw/cmhc/rmr-toronto-2021-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2021-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2022-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2022-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2023-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2023-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2024-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2024-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/cmhc/rmr-toronto-2025-en.xlsx
Normal file
BIN
data/raw/cmhc/rmr-toronto-2025-en.xlsx
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2401.pdf
Normal file
BIN
data/raw/trreb/mw2401.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2402.pdf
Normal file
BIN
data/raw/trreb/mw2402.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2403.pdf
Normal file
BIN
data/raw/trreb/mw2403.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2404.pdf
Normal file
BIN
data/raw/trreb/mw2404.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2405.pdf
Normal file
BIN
data/raw/trreb/mw2405.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2406.pdf
Normal file
BIN
data/raw/trreb/mw2406.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2407.pdf
Normal file
BIN
data/raw/trreb/mw2407.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2408.pdf
Normal file
BIN
data/raw/trreb/mw2408.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2409.pdf
Normal file
BIN
data/raw/trreb/mw2409.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2410.pdf
Normal file
BIN
data/raw/trreb/mw2410.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2411.pdf
Normal file
BIN
data/raw/trreb/mw2411.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2412.pdf
Normal file
BIN
data/raw/trreb/mw2412.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2501.pdf
Normal file
BIN
data/raw/trreb/mw2501.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2502.pdf
Normal file
BIN
data/raw/trreb/mw2502.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2503.pdf
Normal file
BIN
data/raw/trreb/mw2503.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2504.pdf
Normal file
BIN
data/raw/trreb/mw2504.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2505.pdf
Normal file
BIN
data/raw/trreb/mw2505.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2506.pdf
Normal file
BIN
data/raw/trreb/mw2506.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2507.pdf
Normal file
BIN
data/raw/trreb/mw2507.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2508.pdf
Normal file
BIN
data/raw/trreb/mw2508.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2509.pdf
Normal file
BIN
data/raw/trreb/mw2509.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2510.pdf
Normal file
BIN
data/raw/trreb/mw2510.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2511.pdf
Normal file
BIN
data/raw/trreb/mw2511.pdf
Normal file
Binary file not shown.
BIN
data/raw/trreb/mw2512.pdf
Normal file
BIN
data/raw/trreb/mw2512.pdf
Normal file
Binary file not shown.
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/raw/geo/.gitkeep
Normal file
0
data/toronto/raw/geo/.gitkeep
Normal file
38
data/toronto/raw/geo/cmhc_zones.geojson
Normal file
38
data/toronto/raw/geo/cmhc_zones.geojson
Normal file
File diff suppressed because one or more lines are too long
1
data/toronto/raw/geo/toronto_neighbourhoods.geojson
Normal file
1
data/toronto/raw/geo/toronto_neighbourhoods.geojson
Normal file
File diff suppressed because one or more lines are too long
0
data/toronto/reference/.gitkeep
Normal file
0
data/toronto/reference/.gitkeep
Normal file
33
dbt/dbt_project.yml
Normal file
33
dbt/dbt_project.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
name: 'portfolio'
|
||||
config-version: 2
|
||||
|
||||
profile: 'portfolio'
|
||||
|
||||
model-paths: ["models"]
|
||||
analysis-paths: ["analyses"]
|
||||
test-paths: ["tests"]
|
||||
seed-paths: ["seeds"]
|
||||
macro-paths: ["macros"]
|
||||
snapshot-paths: ["snapshots"]
|
||||
|
||||
clean-targets:
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
|
||||
models:
|
||||
portfolio:
|
||||
shared:
|
||||
+materialized: view
|
||||
+schema: shared
|
||||
staging:
|
||||
toronto:
|
||||
+materialized: view
|
||||
+schema: stg_toronto
|
||||
intermediate:
|
||||
toronto:
|
||||
+materialized: view
|
||||
+schema: int_toronto
|
||||
marts:
|
||||
toronto:
|
||||
+materialized: table
|
||||
+schema: mart_toronto
|
||||
0
dbt/macros/.gitkeep
Normal file
0
dbt/macros/.gitkeep
Normal file
11
dbt/macros/generate_schema_name.sql
Normal file
11
dbt/macros/generate_schema_name.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Override dbt default schema name generation.
|
||||
-- Use the custom schema name directly instead of
|
||||
-- concatenating with the target schema.
|
||||
-- See: https://docs.getdbt.com/docs/build/custom-schemas
|
||||
{% macro generate_schema_name(custom_schema_name, node) %}
|
||||
{%- if custom_schema_name is none -%}
|
||||
{{ target.schema }}
|
||||
{%- else -%}
|
||||
{{ custom_schema_name | trim }}
|
||||
{%- endif -%}
|
||||
{% endmacro %}
|
||||
0
dbt/macros/toronto/.gitkeep
Normal file
0
dbt/macros/toronto/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
87
dbt/models/intermediate/toronto/_intermediate.yml
Normal file
87
dbt/models/intermediate/toronto/_intermediate.yml
Normal file
@@ -0,0 +1,87 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: int_rentals__annual
|
||||
description: "Rental data enriched with time and zone dimensions"
|
||||
columns:
|
||||
- name: rental_id
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: zone_code
|
||||
data_tests:
|
||||
- not_null
|
||||
|
||||
- name: int_neighbourhood__demographics
|
||||
description: "Combined census demographics with neighbourhood attributes"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: census_year
|
||||
description: "Census year"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: income_quintile
|
||||
description: "Income quintile (1-5, city-wide)"
|
||||
|
||||
- name: int_neighbourhood__housing
|
||||
description: "Housing indicators combining census and rental data"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
- name: rent_to_income_pct
|
||||
description: "Rent as percentage of median income"
|
||||
- name: is_affordable
|
||||
description: "Boolean: rent <= 30% of income"
|
||||
|
||||
- name: int_neighbourhood__crime_summary
|
||||
description: "Aggregated crime with year-over-year trends"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Statistics year"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: crime_rate_per_100k
|
||||
description: "Total crime rate per 100K population"
|
||||
- name: yoy_change_pct
|
||||
description: "Year-over-year change percentage"
|
||||
|
||||
- name: int_neighbourhood__amenity_scores
|
||||
description: "Normalized amenities per capita and per area"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
- name: total_amenities_per_1000
|
||||
description: "Total amenities per 1000 population"
|
||||
- name: amenities_per_sqkm
|
||||
description: "Total amenities per square km"
|
||||
|
||||
- name: int_rentals__neighbourhood_allocated
|
||||
description: "CMHC rental data allocated to neighbourhoods via area weights"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Survey year"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: avg_rent_2bed
|
||||
description: "Weighted average 2-bedroom rent"
|
||||
- name: vacancy_rate
|
||||
description: "Weighted average vacancy rate"
|
||||
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
@@ -0,0 +1,60 @@
|
||||
-- Intermediate: Toronto CMA census statistics by year
|
||||
-- Provides city-wide averages for metrics not available at neighbourhood level
|
||||
-- Used when neighbourhood-level data is unavailable (e.g., median household income)
|
||||
-- Grain: One row per year
|
||||
|
||||
with years as (
|
||||
select * from {{ ref('int_year_spine') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
-- Census data is only available for 2016 and 2021
|
||||
-- Map each analysis year to the appropriate census year
|
||||
year_to_census as (
|
||||
select
|
||||
y.year,
|
||||
case
|
||||
when y.year <= 2018 then 2016
|
||||
else 2021
|
||||
end as census_year
|
||||
from years y
|
||||
),
|
||||
|
||||
-- Toronto CMA median household income from Statistics Canada
|
||||
-- Source: Census Profile Table 98-316-X2021001
|
||||
-- 2016: $65,829 (from Census Profile)
|
||||
-- 2021: $84,000 (from Census Profile)
|
||||
cma_income as (
|
||||
select 2016 as census_year, 65829 as median_household_income union all
|
||||
select 2021 as census_year, 84000 as median_household_income
|
||||
),
|
||||
|
||||
-- City-wide aggregates from loaded neighbourhood data
|
||||
city_aggregates as (
|
||||
select
|
||||
census_year,
|
||||
sum(population) as total_population,
|
||||
avg(population_density) as avg_population_density,
|
||||
avg(unemployment_rate) as avg_unemployment_rate
|
||||
from census
|
||||
where population is not null
|
||||
group by census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
y.year,
|
||||
y.census_year,
|
||||
ci.median_household_income,
|
||||
ca.total_population,
|
||||
ca.avg_population_density,
|
||||
ca.avg_unemployment_rate
|
||||
from year_to_census y
|
||||
left join cma_income ci on y.census_year = ci.census_year
|
||||
left join city_aggregates ca on y.census_year = ca.census_year
|
||||
)
|
||||
|
||||
select * from final
|
||||
@@ -0,0 +1,79 @@
|
||||
-- Intermediate: Normalized amenities per 1000 population
|
||||
-- Pivots amenity types and calculates per-capita metrics
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
amenities as (
|
||||
select * from {{ ref('stg_toronto__amenities') }}
|
||||
),
|
||||
|
||||
-- Aggregate amenity types
|
||||
amenities_by_year as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
amenity_year as year,
|
||||
sum(case when amenity_type = 'Parks' then amenity_count else 0 end) as parks_count,
|
||||
sum(case when amenity_type = 'Schools' then amenity_count else 0 end) as schools_count,
|
||||
sum(case when amenity_type = 'Transit Stops' then amenity_count else 0 end) as transit_count,
|
||||
sum(case when amenity_type = 'Libraries' then amenity_count else 0 end) as libraries_count,
|
||||
sum(case when amenity_type = 'Community Centres' then amenity_count else 0 end) as community_centres_count,
|
||||
sum(case when amenity_type = 'Recreation' then amenity_count else 0 end) as recreation_count,
|
||||
sum(amenity_count) as total_amenities
|
||||
from amenities
|
||||
group by neighbourhood_id, amenity_year
|
||||
),
|
||||
|
||||
amenity_scores as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.population,
|
||||
n.land_area_sqkm,
|
||||
|
||||
coalesce(a.year, 2021) as year,
|
||||
|
||||
-- Raw counts
|
||||
a.parks_count,
|
||||
a.schools_count,
|
||||
a.transit_count,
|
||||
a.libraries_count,
|
||||
a.community_centres_count,
|
||||
a.recreation_count,
|
||||
a.total_amenities,
|
||||
|
||||
-- Per 1000 population
|
||||
case when n.population > 0
|
||||
then round(a.parks_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as parks_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.schools_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as schools_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.transit_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as transit_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.total_amenities::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as total_amenities_per_1000,
|
||||
|
||||
-- Per square km
|
||||
case when n.land_area_sqkm > 0
|
||||
then round(a.total_amenities::numeric / n.land_area_sqkm, 2)
|
||||
else null
|
||||
end as amenities_per_sqkm
|
||||
|
||||
from neighbourhoods n
|
||||
left join amenities_by_year a on n.neighbourhood_id = a.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from amenity_scores
|
||||
@@ -0,0 +1,83 @@
|
||||
-- Intermediate: Aggregated crime by neighbourhood with YoY change
|
||||
-- Pivots crime types and calculates year-over-year trends
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
crime as (
|
||||
select * from {{ ref('stg_toronto__crime') }}
|
||||
),
|
||||
|
||||
-- Aggregate crime types
|
||||
crime_by_year as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
crime_year as year,
|
||||
sum(incident_count) as total_incidents,
|
||||
sum(case when crime_type = 'assault' then incident_count else 0 end) as assault_count,
|
||||
sum(case when crime_type = 'auto_theft' then incident_count else 0 end) as auto_theft_count,
|
||||
sum(case when crime_type = 'break_and_enter' then incident_count else 0 end) as break_enter_count,
|
||||
sum(case when crime_type = 'robbery' then incident_count else 0 end) as robbery_count,
|
||||
sum(case when crime_type = 'theft_over' then incident_count else 0 end) as theft_over_count,
|
||||
sum(case when crime_type = 'homicide' then incident_count else 0 end) as homicide_count,
|
||||
avg(rate_per_100k) as avg_rate_per_100k
|
||||
from crime
|
||||
group by neighbourhood_id, crime_year
|
||||
),
|
||||
|
||||
-- Add year-over-year changes
|
||||
with_yoy as (
|
||||
select
|
||||
c.*,
|
||||
lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
) as prev_year_incidents,
|
||||
round(
|
||||
(c.total_incidents - lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
))::numeric /
|
||||
nullif(lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
), 0) * 100,
|
||||
2
|
||||
) as yoy_change_pct
|
||||
from crime_by_year c
|
||||
),
|
||||
|
||||
crime_summary as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.population,
|
||||
|
||||
w.year,
|
||||
w.total_incidents,
|
||||
w.assault_count,
|
||||
w.auto_theft_count,
|
||||
w.break_enter_count,
|
||||
w.robbery_count,
|
||||
w.theft_over_count,
|
||||
w.homicide_count,
|
||||
w.yoy_change_pct,
|
||||
|
||||
-- Crime rate per 100K population (use source data avg, or calculate if population available)
|
||||
coalesce(
|
||||
w.avg_rate_per_100k,
|
||||
case
|
||||
when n.population > 0
|
||||
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
||||
else null
|
||||
end
|
||||
) as crime_rate_per_100k
|
||||
|
||||
from neighbourhoods n
|
||||
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from crime_summary
|
||||
@@ -0,0 +1,45 @@
|
||||
-- Intermediate: Combined census demographics by neighbourhood
|
||||
-- Joins neighbourhoods with census data for demographic analysis
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
demographics as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.land_area_sqkm,
|
||||
|
||||
-- Use census_year from census data, or fall back to dim_neighbourhood's year
|
||||
coalesce(c.census_year, n.census_year, 2021) as census_year,
|
||||
c.population,
|
||||
c.population_density,
|
||||
c.median_household_income,
|
||||
c.average_household_income,
|
||||
c.median_age,
|
||||
c.unemployment_rate,
|
||||
c.pct_bachelors_or_higher as education_bachelors_pct,
|
||||
c.average_dwelling_value,
|
||||
|
||||
-- Tenure mix
|
||||
c.pct_owner_occupied,
|
||||
c.pct_renter_occupied,
|
||||
|
||||
-- Income quintile (city-wide comparison)
|
||||
ntile(5) over (
|
||||
partition by c.census_year
|
||||
order by c.median_household_income
|
||||
) as income_quintile
|
||||
|
||||
from neighbourhoods n
|
||||
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from demographics
|
||||
@@ -0,0 +1,56 @@
|
||||
-- Intermediate: Housing indicators by neighbourhood
|
||||
-- Combines census housing data with allocated CMHC rental data
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
allocated_rentals as (
|
||||
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||
),
|
||||
|
||||
housing as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
|
||||
coalesce(r.year, c.census_year, 2021) as year,
|
||||
|
||||
-- Census housing metrics
|
||||
c.pct_owner_occupied,
|
||||
c.pct_renter_occupied,
|
||||
c.average_dwelling_value,
|
||||
c.median_household_income,
|
||||
|
||||
-- Allocated rental metrics (weighted average from CMHC zones)
|
||||
r.avg_rent_2bed,
|
||||
r.vacancy_rate,
|
||||
|
||||
-- Affordability calculations
|
||||
case
|
||||
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||
then round((r.avg_rent_2bed * 12 / c.median_household_income) * 100, 2)
|
||||
else null
|
||||
end as rent_to_income_pct,
|
||||
|
||||
-- Affordability threshold (30% of income)
|
||||
case
|
||||
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||
then r.avg_rent_2bed * 12 <= c.median_household_income * 0.30
|
||||
else null
|
||||
end as is_affordable
|
||||
|
||||
from neighbourhoods n
|
||||
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||
left join allocated_rentals r
|
||||
on n.neighbourhood_id = r.neighbourhood_id
|
||||
and r.year = c.census_year
|
||||
)
|
||||
|
||||
select * from housing
|
||||
57
dbt/models/intermediate/toronto/int_rentals__annual.sql
Normal file
57
dbt/models/intermediate/toronto/int_rentals__annual.sql
Normal file
@@ -0,0 +1,57 @@
|
||||
-- Intermediate: Annual rental data enriched with dimensions
|
||||
-- Joins rentals with time and zone dimensions for analysis
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('stg_cmhc__rentals') }}
|
||||
),
|
||||
|
||||
time_dim as (
|
||||
select * from {{ ref('stg_dimensions__time') }}
|
||||
),
|
||||
|
||||
zone_dim as (
|
||||
select * from {{ ref('stg_dimensions__cmhc_zones') }}
|
||||
),
|
||||
|
||||
enriched as (
|
||||
select
|
||||
r.rental_id,
|
||||
|
||||
-- Time attributes
|
||||
t.date_key,
|
||||
t.full_date,
|
||||
t.year,
|
||||
t.month,
|
||||
t.quarter,
|
||||
|
||||
-- Zone attributes
|
||||
z.zone_key,
|
||||
z.zone_code,
|
||||
z.zone_name,
|
||||
|
||||
-- Bedroom type
|
||||
r.bedroom_type,
|
||||
|
||||
-- Metrics
|
||||
r.rental_universe,
|
||||
r.avg_rent,
|
||||
r.median_rent,
|
||||
r.vacancy_rate,
|
||||
r.availability_rate,
|
||||
r.turnover_rate,
|
||||
r.year_over_year_rent_change,
|
||||
r.reliability_code,
|
||||
|
||||
-- Calculated metrics
|
||||
case
|
||||
when r.rental_universe > 0 and r.vacancy_rate is not null
|
||||
then round(r.rental_universe * (r.vacancy_rate / 100), 0)
|
||||
else null
|
||||
end as vacant_units_estimate
|
||||
|
||||
from rentals r
|
||||
inner join time_dim t on r.date_key = t.date_key
|
||||
inner join zone_dim z on r.zone_key = z.zone_key
|
||||
)
|
||||
|
||||
select * from enriched
|
||||
@@ -0,0 +1,73 @@
|
||||
-- Intermediate: CMHC rentals allocated to neighbourhoods via area weights
|
||||
-- Disaggregates zone-level rental data to neighbourhood level
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with crosswalk as (
|
||||
select * from {{ ref('stg_cmhc__zone_crosswalk') }}
|
||||
),
|
||||
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__annual') }}
|
||||
),
|
||||
|
||||
neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
-- Allocate rental metrics to neighbourhoods using area weights
|
||||
allocated as (
|
||||
select
|
||||
c.neighbourhood_id,
|
||||
r.year,
|
||||
r.bedroom_type,
|
||||
|
||||
-- Weighted average rent (using area weight)
|
||||
sum(r.avg_rent * c.area_weight) as weighted_avg_rent,
|
||||
sum(r.median_rent * c.area_weight) as weighted_median_rent,
|
||||
sum(c.area_weight) as total_weight,
|
||||
|
||||
-- Weighted vacancy rate
|
||||
sum(r.vacancy_rate * c.area_weight) / nullif(sum(c.area_weight), 0) as vacancy_rate,
|
||||
|
||||
-- Weighted rental universe
|
||||
sum(r.rental_universe * c.area_weight) as rental_units_estimate
|
||||
|
||||
from crosswalk c
|
||||
inner join rentals r on c.cmhc_zone_code = r.zone_code
|
||||
group by c.neighbourhood_id, r.year, r.bedroom_type
|
||||
),
|
||||
|
||||
-- Pivot to get 2-bedroom as primary metric
|
||||
pivoted as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
year,
|
||||
max(case when bedroom_type = '2bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
|
||||
max(case when bedroom_type = '1bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
|
||||
max(case when bedroom_type = 'bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
|
||||
max(case when bedroom_type = '3bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
|
||||
avg(vacancy_rate) as vacancy_rate,
|
||||
sum(rental_units_estimate) as total_rental_units
|
||||
from allocated
|
||||
group by neighbourhood_id, year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
|
||||
p.year,
|
||||
round(p.avg_rent_bachelor::numeric, 2) as avg_rent_bachelor,
|
||||
round(p.avg_rent_1bed::numeric, 2) as avg_rent_1bed,
|
||||
round(p.avg_rent_2bed::numeric, 2) as avg_rent_2bed,
|
||||
round(p.avg_rent_3bed::numeric, 2) as avg_rent_3bed,
|
||||
round(p.vacancy_rate::numeric, 2) as vacancy_rate,
|
||||
round(p.total_rental_units::numeric, 0) as total_rental_units
|
||||
|
||||
from neighbourhoods n
|
||||
inner join pivoted p on n.neighbourhood_id = p.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from final
|
||||
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Intermediate: Toronto CMA rental metrics by year
|
||||
-- Aggregates rental data to city-wide averages by year
|
||||
-- Source: StatCan CMHC data at CMA level
|
||||
-- Grain: One row per year
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('stg_cmhc__rentals') }}
|
||||
),
|
||||
|
||||
-- Pivot bedroom types to columns
|
||||
yearly_rentals as (
|
||||
select
|
||||
year,
|
||||
max(case when bedroom_type = 'bachelor' then avg_rent end) as avg_rent_bachelor,
|
||||
max(case when bedroom_type = '1bed' then avg_rent end) as avg_rent_1bed,
|
||||
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_2bed,
|
||||
max(case when bedroom_type = '3bed' then avg_rent end) as avg_rent_3bed,
|
||||
-- Use 2-bedroom as standard reference
|
||||
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_standard,
|
||||
max(vacancy_rate) as vacancy_rate
|
||||
from rentals
|
||||
group by year
|
||||
)
|
||||
|
||||
select * from yearly_rentals
|
||||
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Intermediate: Year spine for analysis
|
||||
-- Creates a row for each year from 2014-2025
|
||||
-- Used to drive time-series analysis across all data sources
|
||||
|
||||
with years as (
|
||||
-- Generate years from available data sources
|
||||
-- Crime data: 2014-2024, Rentals: 2019-2025
|
||||
select generate_series(2014, 2025) as year
|
||||
)
|
||||
|
||||
select year from years
|
||||
0
dbt/models/marts/.gitkeep
Normal file
0
dbt/models/marts/.gitkeep
Normal file
135
dbt/models/marts/toronto/_marts.yml
Normal file
135
dbt/models/marts/toronto/_marts.yml
Normal file
@@ -0,0 +1,135 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: mart_toronto_rentals
|
||||
description: "Final mart for Toronto rental market analysis by zone and time"
|
||||
columns:
|
||||
- name: rental_id
|
||||
description: "Unique rental record identifier"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: mart_neighbourhood_overview
|
||||
description: "Neighbourhood overview with composite livability score"
|
||||
meta:
|
||||
dashboard_tab: Overview
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: livability_score
|
||||
description: "Composite score: safety (30%), affordability (40%), amenities (30%)"
|
||||
- name: safety_score
|
||||
description: "Safety component score (0-100)"
|
||||
- name: affordability_score
|
||||
description: "Affordability component score (0-100)"
|
||||
- name: amenity_score
|
||||
description: "Amenity component score (0-100)"
|
||||
|
||||
- name: mart_neighbourhood_housing
|
||||
description: "Housing and affordability metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Housing
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: rent_to_income_pct
|
||||
description: "Rent as percentage of median income"
|
||||
- name: affordability_index
|
||||
description: "100 = city average affordability"
|
||||
- name: rent_yoy_change_pct
|
||||
description: "Year-over-year rent change"
|
||||
|
||||
- name: mart_neighbourhood_safety
|
||||
description: "Crime rates and safety metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Safety
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: crime_rate_per_100k
|
||||
description: "Total crime rate per 100K population"
|
||||
- name: crime_index
|
||||
description: "100 = city average crime rate"
|
||||
- name: safety_tier
|
||||
description: "Safety tier (1=safest, 5=highest crime)"
|
||||
data_tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
|
||||
- name: mart_neighbourhood_demographics
|
||||
description: "Demographics and income metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Demographics
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: median_household_income
|
||||
description: "Median household income"
|
||||
- name: income_index
|
||||
description: "100 = city average income"
|
||||
- name: income_quintile
|
||||
description: "Income quintile (1-5)"
|
||||
data_tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
|
||||
- name: mart_neighbourhood_amenities
|
||||
description: "Amenity access metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Amenities
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: total_amenities_per_1000
|
||||
description: "Total amenities per 1000 population"
|
||||
- name: amenity_index
|
||||
description: "100 = city average amenities"
|
||||
- name: amenity_tier
|
||||
description: "Amenity tier (1=best, 5=lowest)"
|
||||
data_tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
89
dbt/models/marts/toronto/mart_neighbourhood_amenities.sql
Normal file
89
dbt/models/marts/toronto/mart_neighbourhood_amenities.sql
Normal file
@@ -0,0 +1,89 @@
|
||||
-- Mart: Neighbourhood Amenities Analysis
|
||||
-- Dashboard Tab: Amenities
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with amenities as (
|
||||
select * from {{ ref('int_neighbourhood__amenity_scores') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
year,
|
||||
avg(parks_per_1000) as city_avg_parks,
|
||||
avg(schools_per_1000) as city_avg_schools,
|
||||
avg(transit_per_1000) as city_avg_transit,
|
||||
avg(total_amenities_per_1000) as city_avg_total_amenities
|
||||
from amenities
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
a.neighbourhood_id,
|
||||
a.neighbourhood_name,
|
||||
a.geometry,
|
||||
a.population,
|
||||
a.land_area_sqkm,
|
||||
a.year,
|
||||
|
||||
-- Raw counts
|
||||
a.parks_count,
|
||||
a.schools_count,
|
||||
a.transit_count,
|
||||
a.libraries_count,
|
||||
a.community_centres_count,
|
||||
a.recreation_count,
|
||||
a.total_amenities,
|
||||
|
||||
-- Per 1000 population
|
||||
a.parks_per_1000,
|
||||
a.schools_per_1000,
|
||||
a.transit_per_1000,
|
||||
a.total_amenities_per_1000,
|
||||
|
||||
-- Per square km
|
||||
a.amenities_per_sqkm,
|
||||
|
||||
-- City averages
|
||||
round(ca.city_avg_parks::numeric, 3) as city_avg_parks_per_1000,
|
||||
round(ca.city_avg_schools::numeric, 3) as city_avg_schools_per_1000,
|
||||
round(ca.city_avg_transit::numeric, 3) as city_avg_transit_per_1000,
|
||||
|
||||
-- Amenity index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_total_amenities > 0
|
||||
then round(a.total_amenities_per_1000 / ca.city_avg_total_amenities * 100, 1)
|
||||
else null
|
||||
end as amenity_index,
|
||||
|
||||
-- Category indices
|
||||
case
|
||||
when ca.city_avg_parks > 0
|
||||
then round(a.parks_per_1000 / ca.city_avg_parks * 100, 1)
|
||||
else null
|
||||
end as parks_index,
|
||||
|
||||
case
|
||||
when ca.city_avg_schools > 0
|
||||
then round(a.schools_per_1000 / ca.city_avg_schools * 100, 1)
|
||||
else null
|
||||
end as schools_index,
|
||||
|
||||
case
|
||||
when ca.city_avg_transit > 0
|
||||
then round(a.transit_per_1000 / ca.city_avg_transit * 100, 1)
|
||||
else null
|
||||
end as transit_index,
|
||||
|
||||
-- Amenity tier (1 = best, 5 = lowest)
|
||||
ntile(5) over (
|
||||
partition by a.year
|
||||
order by a.total_amenities_per_1000 desc
|
||||
) as amenity_tier
|
||||
|
||||
from amenities a
|
||||
left join city_avg ca on a.year = ca.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
81
dbt/models/marts/toronto/mart_neighbourhood_demographics.sql
Normal file
81
dbt/models/marts/toronto/mart_neighbourhood_demographics.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Mart: Neighbourhood Demographics Analysis
|
||||
-- Dashboard Tab: Demographics
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with demographics as (
|
||||
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
census_year,
|
||||
avg(median_household_income) as city_avg_income,
|
||||
avg(median_age) as city_avg_age,
|
||||
avg(unemployment_rate) as city_avg_unemployment,
|
||||
avg(education_bachelors_pct) as city_avg_education,
|
||||
avg(population_density) as city_avg_density
|
||||
from demographics
|
||||
group by census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
d.neighbourhood_id,
|
||||
d.neighbourhood_name,
|
||||
d.geometry,
|
||||
d.census_year as year,
|
||||
|
||||
-- Population
|
||||
d.population,
|
||||
d.land_area_sqkm,
|
||||
d.population_density,
|
||||
|
||||
-- Income
|
||||
d.median_household_income,
|
||||
d.average_household_income,
|
||||
d.income_quintile,
|
||||
|
||||
-- Income index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_income > 0
|
||||
then round(d.median_household_income / ca.city_avg_income * 100, 1)
|
||||
else null
|
||||
end as income_index,
|
||||
|
||||
-- Demographics
|
||||
d.median_age,
|
||||
d.unemployment_rate,
|
||||
d.education_bachelors_pct,
|
||||
|
||||
-- Age index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_age > 0
|
||||
then round(d.median_age / ca.city_avg_age * 100, 1)
|
||||
else null
|
||||
end as age_index,
|
||||
|
||||
-- Housing tenure
|
||||
d.pct_owner_occupied,
|
||||
d.pct_renter_occupied,
|
||||
d.average_dwelling_value,
|
||||
|
||||
-- Diversity index (using tenure mix as proxy - higher rental = more diverse typically)
|
||||
round(
|
||||
1 - (
|
||||
power(d.pct_owner_occupied / 100, 2) +
|
||||
power(d.pct_renter_occupied / 100, 2)
|
||||
),
|
||||
3
|
||||
) * 100 as tenure_diversity_index,
|
||||
|
||||
-- City comparisons
|
||||
round(ca.city_avg_income::numeric, 2) as city_avg_income,
|
||||
round(ca.city_avg_age::numeric, 1) as city_avg_age,
|
||||
round(ca.city_avg_unemployment::numeric, 2) as city_avg_unemployment
|
||||
|
||||
from demographics d
|
||||
left join city_avg ca on d.census_year = ca.census_year
|
||||
)
|
||||
|
||||
select * from final
|
||||
93
dbt/models/marts/toronto/mart_neighbourhood_housing.sql
Normal file
93
dbt/models/marts/toronto/mart_neighbourhood_housing.sql
Normal file
@@ -0,0 +1,93 @@
|
||||
-- Mart: Neighbourhood Housing Analysis
|
||||
-- Dashboard Tab: Housing
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with housing as (
|
||||
select * from {{ ref('int_neighbourhood__housing') }}
|
||||
),
|
||||
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||
),
|
||||
|
||||
demographics as (
|
||||
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year rent changes
|
||||
with_yoy as (
|
||||
select
|
||||
h.*,
|
||||
r.avg_rent_bachelor,
|
||||
r.avg_rent_1bed,
|
||||
r.avg_rent_3bed,
|
||||
r.total_rental_units,
|
||||
d.income_quintile,
|
||||
|
||||
-- Previous year rent for YoY calculation
|
||||
lag(h.avg_rent_2bed, 1) over (
|
||||
partition by h.neighbourhood_id
|
||||
order by h.year
|
||||
) as prev_year_rent_2bed
|
||||
|
||||
from housing h
|
||||
left join rentals r
|
||||
on h.neighbourhood_id = r.neighbourhood_id
|
||||
and h.year = r.year
|
||||
left join demographics d
|
||||
on h.neighbourhood_id = d.neighbourhood_id
|
||||
and h.year = d.census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
geometry,
|
||||
year,
|
||||
|
||||
-- Tenure mix
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
|
||||
-- Housing values
|
||||
average_dwelling_value,
|
||||
median_household_income,
|
||||
|
||||
-- Rental metrics
|
||||
avg_rent_bachelor,
|
||||
avg_rent_1bed,
|
||||
avg_rent_2bed,
|
||||
avg_rent_3bed,
|
||||
vacancy_rate,
|
||||
total_rental_units,
|
||||
|
||||
-- Affordability
|
||||
rent_to_income_pct,
|
||||
is_affordable,
|
||||
|
||||
-- Affordability index (100 = city average)
|
||||
round(
|
||||
rent_to_income_pct / nullif(
|
||||
avg(rent_to_income_pct) over (partition by year),
|
||||
0
|
||||
) * 100,
|
||||
1
|
||||
) as affordability_index,
|
||||
|
||||
-- Year-over-year rent change
|
||||
case
|
||||
when prev_year_rent_2bed > 0
|
||||
then round(
|
||||
(avg_rent_2bed - prev_year_rent_2bed) / prev_year_rent_2bed * 100,
|
||||
2
|
||||
)
|
||||
else null
|
||||
end as rent_yoy_change_pct,
|
||||
|
||||
income_quintile
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
@@ -0,0 +1,153 @@
|
||||
-- Mart: Neighbourhood Overview with Composite Livability Score
|
||||
-- Dashboard Tab: Overview
|
||||
-- Grain: One row per neighbourhood per year
|
||||
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
|
||||
|
||||
with years as (
|
||||
select * from {{ ref('int_year_spine') }}
|
||||
),
|
||||
|
||||
neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
-- Create base: all neighbourhoods × all years
|
||||
neighbourhood_years as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
y.year
|
||||
from neighbourhoods n
|
||||
cross join years y
|
||||
),
|
||||
|
||||
-- Census data (available for 2016, 2021)
|
||||
-- For each year, use the most recent census data available
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
census_mapped as (
|
||||
select
|
||||
ny.neighbourhood_id,
|
||||
ny.year,
|
||||
c.population,
|
||||
c.unemployment_rate,
|
||||
c.pct_bachelors_or_higher as education_bachelors_pct
|
||||
from neighbourhood_years ny
|
||||
left join census c on ny.neighbourhood_id = c.neighbourhood_id
|
||||
-- Use census year <= analysis year, prefer most recent
|
||||
and c.census_year = (
|
||||
select max(c2.census_year)
|
||||
from {{ ref('stg_toronto__census') }} c2
|
||||
where c2.neighbourhood_id = ny.neighbourhood_id
|
||||
and c2.census_year <= ny.year
|
||||
)
|
||||
),
|
||||
|
||||
-- CMA-level census data (for income - not available at neighbourhood level)
|
||||
cma_census as (
|
||||
select * from {{ ref('int_census__toronto_cma') }}
|
||||
),
|
||||
|
||||
-- Crime data (2014-2024)
|
||||
crime as (
|
||||
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||
),
|
||||
|
||||
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__toronto_cma') }}
|
||||
),
|
||||
|
||||
-- Compute scores
|
||||
scored as (
|
||||
select
|
||||
ny.neighbourhood_id,
|
||||
ny.neighbourhood_name,
|
||||
ny.geometry,
|
||||
ny.year,
|
||||
cm.population,
|
||||
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
|
||||
cma.median_household_income,
|
||||
|
||||
-- Safety score: inverse of crime rate (higher = safer)
|
||||
case
|
||||
when cr.crime_rate_per_100k is not null
|
||||
then 100 - percent_rank() over (
|
||||
partition by ny.year
|
||||
order by cr.crime_rate_per_100k
|
||||
) * 100
|
||||
else null
|
||||
end as safety_score,
|
||||
|
||||
-- Affordability score: inverse of rent-to-income ratio
|
||||
-- Using CMA-level income since neighbourhood-level not available
|
||||
case
|
||||
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||
then 100 - percent_rank() over (
|
||||
partition by ny.year
|
||||
order by (r.avg_rent_standard * 12 / cma.median_household_income)
|
||||
) * 100
|
||||
else null
|
||||
end as affordability_score,
|
||||
|
||||
-- Raw metrics
|
||||
cr.crime_rate_per_100k,
|
||||
case
|
||||
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
|
||||
else null
|
||||
end as rent_to_income_pct,
|
||||
r.avg_rent_standard as avg_rent_2bed,
|
||||
r.vacancy_rate
|
||||
|
||||
from neighbourhood_years ny
|
||||
left join census_mapped cm
|
||||
on ny.neighbourhood_id = cm.neighbourhood_id
|
||||
and ny.year = cm.year
|
||||
left join cma_census cma
|
||||
on ny.year = cma.year
|
||||
left join crime cr
|
||||
on ny.neighbourhood_id = cr.neighbourhood_id
|
||||
and ny.year = cr.year
|
||||
left join rentals r
|
||||
on ny.year = r.year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
geometry,
|
||||
year,
|
||||
population,
|
||||
median_household_income,
|
||||
|
||||
-- Component scores (0-100)
|
||||
round(safety_score::numeric, 1) as safety_score,
|
||||
round(affordability_score::numeric, 1) as affordability_score,
|
||||
-- TODO: Replace with actual amenity score when fact_amenities is populated
|
||||
-- Currently uses neutral placeholder (50.0) which affects livability_score accuracy
|
||||
50.0 as amenity_score,
|
||||
|
||||
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
|
||||
round(
|
||||
(coalesce(safety_score, 50) * 0.40 +
|
||||
coalesce(affordability_score, 50) * 0.40 +
|
||||
50 * 0.20)::numeric,
|
||||
1
|
||||
) as livability_score,
|
||||
|
||||
-- Raw metrics
|
||||
crime_rate_per_100k,
|
||||
rent_to_income_pct,
|
||||
avg_rent_2bed,
|
||||
vacancy_rate,
|
||||
null::numeric as total_amenities_per_1000
|
||||
|
||||
from scored
|
||||
)
|
||||
|
||||
select * from final
|
||||
78
dbt/models/marts/toronto/mart_neighbourhood_safety.sql
Normal file
78
dbt/models/marts/toronto/mart_neighbourhood_safety.sql
Normal file
@@ -0,0 +1,78 @@
|
||||
-- Mart: Neighbourhood Safety Analysis
|
||||
-- Dashboard Tab: Safety
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with crime as (
|
||||
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
year,
|
||||
avg(crime_rate_per_100k) as city_avg_crime_rate,
|
||||
avg(assault_count) as city_avg_assault,
|
||||
avg(auto_theft_count) as city_avg_auto_theft,
|
||||
avg(break_enter_count) as city_avg_break_enter
|
||||
from crime
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
c.neighbourhood_id,
|
||||
c.neighbourhood_name,
|
||||
c.geometry,
|
||||
c.population,
|
||||
c.year,
|
||||
|
||||
-- Total crime
|
||||
c.total_incidents,
|
||||
c.crime_rate_per_100k,
|
||||
c.yoy_change_pct as crime_yoy_change_pct,
|
||||
|
||||
-- Crime breakdown
|
||||
c.assault_count,
|
||||
c.auto_theft_count,
|
||||
c.break_enter_count,
|
||||
c.robbery_count,
|
||||
c.theft_over_count,
|
||||
c.homicide_count,
|
||||
|
||||
-- Per 100K rates by type
|
||||
case when c.population > 0
|
||||
then round(c.assault_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as assault_rate_per_100k,
|
||||
|
||||
case when c.population > 0
|
||||
then round(c.auto_theft_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as auto_theft_rate_per_100k,
|
||||
|
||||
case when c.population > 0
|
||||
then round(c.break_enter_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as break_enter_rate_per_100k,
|
||||
|
||||
-- Comparison to city average
|
||||
round(ca.city_avg_crime_rate::numeric, 2) as city_avg_crime_rate,
|
||||
|
||||
-- Crime index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_crime_rate > 0
|
||||
then round(c.crime_rate_per_100k / ca.city_avg_crime_rate * 100, 1)
|
||||
else null
|
||||
end as crime_index,
|
||||
|
||||
-- Safety tier based on crime rate percentile
|
||||
ntile(5) over (
|
||||
partition by c.year
|
||||
order by c.crime_rate_per_100k desc
|
||||
) as safety_tier
|
||||
|
||||
from crime c
|
||||
left join city_avg ca on c.year = ca.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
64
dbt/models/marts/toronto/mart_toronto_rentals.sql
Normal file
64
dbt/models/marts/toronto/mart_toronto_rentals.sql
Normal file
@@ -0,0 +1,64 @@
|
||||
-- Mart: Toronto Rental Market Analysis
|
||||
-- Final analytical table for rental market visualization
|
||||
-- Grain: One row per zone per bedroom type per survey year
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('int_rentals__annual') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year calculations
|
||||
with_yoy as (
|
||||
select
|
||||
r.*,
|
||||
|
||||
-- Previous year values
|
||||
lag(r.avg_rent, 1) over (
|
||||
partition by r.zone_code, r.bedroom_type
|
||||
order by r.year
|
||||
) as avg_rent_prev_year,
|
||||
|
||||
lag(r.vacancy_rate, 1) over (
|
||||
partition by r.zone_code, r.bedroom_type
|
||||
order by r.year
|
||||
) as vacancy_rate_prev_year
|
||||
|
||||
from rentals r
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
rental_id,
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
quarter,
|
||||
zone_key,
|
||||
zone_code,
|
||||
zone_name,
|
||||
bedroom_type,
|
||||
rental_universe,
|
||||
avg_rent,
|
||||
median_rent,
|
||||
vacancy_rate,
|
||||
availability_rate,
|
||||
turnover_rate,
|
||||
year_over_year_rent_change,
|
||||
reliability_code,
|
||||
vacant_units_estimate,
|
||||
|
||||
-- Calculated year-over-year (if not provided)
|
||||
coalesce(
|
||||
year_over_year_rent_change,
|
||||
case
|
||||
when avg_rent_prev_year > 0
|
||||
then round(((avg_rent - avg_rent_prev_year) / avg_rent_prev_year) * 100, 2)
|
||||
else null
|
||||
end
|
||||
) as rent_change_pct,
|
||||
|
||||
vacancy_rate - vacancy_rate_prev_year as vacancy_rate_change
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
33
dbt/models/shared/_shared.yml
Normal file
33
dbt/models/shared/_shared.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: stg_dimensions__time
|
||||
description: "Staged time dimension - shared across all projects"
|
||||
columns:
|
||||
- name: date_key
|
||||
description: "Primary key (YYYYMM format)"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: full_date
|
||||
description: "First day of month"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Calendar year"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: month
|
||||
description: "Month number (1-12)"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: quarter
|
||||
description: "Quarter (1-4)"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: month_name
|
||||
description: "Month name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: is_month_start
|
||||
description: "Always true (monthly grain)"
|
||||
25
dbt/models/shared/_sources.yml
Normal file
25
dbt/models/shared/_sources.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
version: 2
|
||||
|
||||
sources:
|
||||
- name: shared
|
||||
description: "Shared dimension tables used across all dashboards"
|
||||
database: portfolio
|
||||
schema: public
|
||||
tables:
|
||||
- name: dim_time
|
||||
description: "Time dimension (monthly grain) - shared across all projects"
|
||||
columns:
|
||||
- name: date_key
|
||||
description: "Primary key (YYYYMM format)"
|
||||
- name: full_date
|
||||
description: "First day of month"
|
||||
- name: year
|
||||
description: "Calendar year"
|
||||
- name: month
|
||||
description: "Month number (1-12)"
|
||||
- name: quarter
|
||||
description: "Quarter (1-4)"
|
||||
- name: month_name
|
||||
description: "Month name"
|
||||
- name: is_month_start
|
||||
description: "Always true (monthly grain)"
|
||||
22
dbt/models/shared/stg_dimensions__time.sql
Normal file
22
dbt/models/shared/stg_dimensions__time.sql
Normal file
@@ -0,0 +1,22 @@
|
||||
-- Staged time dimension
|
||||
-- Source: shared.dim_time table
|
||||
-- Grain: One row per month
|
||||
-- Note: Shared dimension used across all dashboard projects
|
||||
|
||||
with source as (
|
||||
select * from {{ source('shared', 'dim_time') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
month,
|
||||
quarter,
|
||||
month_name,
|
||||
is_month_start
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
0
dbt/models/staging/.gitkeep
Normal file
0
dbt/models/staging/.gitkeep
Normal file
93
dbt/models/staging/toronto/_sources.yml
Normal file
93
dbt/models/staging/toronto/_sources.yml
Normal file
@@ -0,0 +1,93 @@
|
||||
version: 2
|
||||
|
||||
sources:
|
||||
- name: toronto
|
||||
description: "Toronto data loaded from CMHC and City of Toronto sources"
|
||||
database: portfolio
|
||||
schema: raw_toronto
|
||||
tables:
|
||||
- name: fact_rentals
|
||||
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: date_key
|
||||
description: "Foreign key to dim_time"
|
||||
- name: zone_key
|
||||
description: "Foreign key to dim_cmhc_zone"
|
||||
|
||||
- name: dim_cmhc_zone
|
||||
description: "CMHC zone dimension with geometry"
|
||||
columns:
|
||||
- name: zone_key
|
||||
description: "Primary key"
|
||||
- name: zone_code
|
||||
description: "CMHC zone code"
|
||||
|
||||
- name: dim_neighbourhood
|
||||
description: "City of Toronto neighbourhoods (158 official boundaries)"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Primary key"
|
||||
|
||||
- name: dim_policy_event
|
||||
description: "Housing policy events for annotation"
|
||||
columns:
|
||||
- name: event_id
|
||||
description: "Primary key"
|
||||
|
||||
- name: fact_census
|
||||
description: "Census demographics by neighbourhood and year"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: census_year
|
||||
description: "Census year (2016, 2021, etc.)"
|
||||
- name: population
|
||||
description: "Total population"
|
||||
- name: median_household_income
|
||||
description: "Median household income"
|
||||
|
||||
- name: fact_crime
|
||||
description: "Crime statistics by neighbourhood, year, and type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: year
|
||||
description: "Statistics year"
|
||||
- name: crime_type
|
||||
description: "Type of crime"
|
||||
- name: count
|
||||
description: "Number of incidents"
|
||||
- name: rate_per_100k
|
||||
description: "Rate per 100,000 population"
|
||||
|
||||
- name: fact_amenities
|
||||
description: "Amenity counts by neighbourhood and type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: amenity_type
|
||||
description: "Type of amenity (parks, schools, transit)"
|
||||
- name: count
|
||||
description: "Number of amenities"
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
|
||||
- name: bridge_cmhc_neighbourhood
|
||||
description: "CMHC zone to neighbourhood mapping with area weights"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: cmhc_zone_code
|
||||
description: "CMHC zone code"
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood ID"
|
||||
- name: weight
|
||||
description: "Proportional area weight (0-1)"
|
||||
120
dbt/models/staging/toronto/_staging.yml
Normal file
120
dbt/models/staging/toronto/_staging.yml
Normal file
@@ -0,0 +1,120 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: stg_cmhc__rentals
|
||||
description: "Staged CMHC rental market data from fact_rentals"
|
||||
columns:
|
||||
- name: rental_id
|
||||
description: "Unique identifier for rental record"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: date_key
|
||||
description: "Date dimension key (YYYYMMDD)"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: zone_key
|
||||
description: "CMHC zone dimension key"
|
||||
data_tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__cmhc_zones
|
||||
description: "Staged CMHC zone dimension"
|
||||
columns:
|
||||
- name: zone_key
|
||||
description: "Zone dimension key"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: zone_code
|
||||
description: "CMHC zone code"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__neighbourhoods
|
||||
description: "Staged Toronto neighbourhood dimension (158 official boundaries)"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood primary key"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry (POLYGON)"
|
||||
|
||||
- name: stg_toronto__census
|
||||
description: "Staged census demographics by neighbourhood"
|
||||
columns:
|
||||
- name: census_id
|
||||
description: "Census record identifier"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: census_year
|
||||
description: "Census year (2016, 2021)"
|
||||
data_tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__crime
|
||||
description: "Staged crime statistics by neighbourhood"
|
||||
columns:
|
||||
- name: crime_id
|
||||
description: "Crime record identifier"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: crime_type
|
||||
description: "Type of crime"
|
||||
data_tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__amenities
|
||||
description: "Staged amenity counts by neighbourhood"
|
||||
columns:
|
||||
- name: amenity_id
|
||||
description: "Amenity record identifier"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: amenity_type
|
||||
description: "Type of amenity"
|
||||
data_tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_cmhc__zone_crosswalk
|
||||
description: "Staged CMHC zone to neighbourhood crosswalk with area weights"
|
||||
columns:
|
||||
- name: crosswalk_id
|
||||
description: "Crosswalk record identifier"
|
||||
data_tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: cmhc_zone_code
|
||||
description: "CMHC zone code"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
data_tests:
|
||||
- not_null
|
||||
- name: area_weight
|
||||
description: "Proportional area weight (0-1)"
|
||||
data_tests:
|
||||
- not_null
|
||||
31
dbt/models/staging/toronto/stg_cmhc__rentals.sql
Normal file
31
dbt/models/staging/toronto/stg_cmhc__rentals.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- Staged CMHC rental market survey data
|
||||
-- Source: fact_rentals table loaded from CMHC/StatCan
|
||||
-- Grain: One row per zone per bedroom type per survey year
|
||||
|
||||
with source as (
|
||||
select
|
||||
f.*,
|
||||
t.year as survey_year
|
||||
from {{ source('toronto', 'fact_rentals') }} f
|
||||
join {{ source('shared', 'dim_time') }} t on f.date_key = t.date_key
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as rental_id,
|
||||
date_key,
|
||||
zone_key,
|
||||
survey_year as year,
|
||||
bedroom_type,
|
||||
universe as rental_universe,
|
||||
avg_rent,
|
||||
median_rent,
|
||||
vacancy_rate,
|
||||
availability_rate,
|
||||
turnover_rate,
|
||||
rent_change_pct as year_over_year_rent_change,
|
||||
reliability_code
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
18
dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql
Normal file
18
dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Staged CMHC zone to neighbourhood crosswalk
|
||||
-- Source: bridge_cmhc_neighbourhood table
|
||||
-- Grain: One row per zone-neighbourhood intersection
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'bridge_cmhc_neighbourhood') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as crosswalk_id,
|
||||
cmhc_zone_code,
|
||||
neighbourhood_id,
|
||||
weight as area_weight
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Staged CMHC zone dimension
|
||||
-- Source: dim_cmhc_zone table
|
||||
-- Grain: One row per zone
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'dim_cmhc_zone') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
zone_key,
|
||||
zone_code,
|
||||
zone_name
|
||||
-- geometry column excluded: CMHC does not provide zone boundaries
|
||||
-- Spatial analysis uses dim_neighbourhood geometry instead
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
19
dbt/models/staging/toronto/stg_toronto__amenities.sql
Normal file
19
dbt/models/staging/toronto/stg_toronto__amenities.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Staged amenity counts by neighbourhood
|
||||
-- Source: fact_amenities table
|
||||
-- Grain: One row per neighbourhood per amenity type per year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'fact_amenities') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as amenity_id,
|
||||
neighbourhood_id,
|
||||
amenity_type,
|
||||
count as amenity_count,
|
||||
year as amenity_year
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
27
dbt/models/staging/toronto/stg_toronto__census.sql
Normal file
27
dbt/models/staging/toronto/stg_toronto__census.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Staged census demographics by neighbourhood
|
||||
-- Source: fact_census table
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'fact_census') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as census_id,
|
||||
neighbourhood_id,
|
||||
census_year,
|
||||
population,
|
||||
population_density,
|
||||
median_household_income,
|
||||
average_household_income,
|
||||
unemployment_rate,
|
||||
pct_bachelors_or_higher,
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
median_age,
|
||||
average_dwelling_value
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
20
dbt/models/staging/toronto/stg_toronto__crime.sql
Normal file
20
dbt/models/staging/toronto/stg_toronto__crime.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- Staged crime statistics by neighbourhood
|
||||
-- Source: fact_crime table
|
||||
-- Grain: One row per neighbourhood per year per crime type
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'fact_crime') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as crime_id,
|
||||
neighbourhood_id,
|
||||
year as crime_year,
|
||||
crime_type,
|
||||
count as incident_count,
|
||||
rate_per_100k
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
25
dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql
Normal file
25
dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Staged Toronto neighbourhood dimension
|
||||
-- Source: dim_neighbourhood table
|
||||
-- Grain: One row per neighbourhood (158 total)
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto', 'dim_neighbourhood') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
name as neighbourhood_name,
|
||||
geometry,
|
||||
population,
|
||||
land_area_sqkm,
|
||||
pop_density_per_sqkm,
|
||||
pct_bachelors_or_higher,
|
||||
median_household_income,
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
census_year
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
11
dbt/package-lock.yml
Normal file
11
dbt/package-lock.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
packages:
|
||||
- name: dbt_utils
|
||||
package: dbt-labs/dbt_utils
|
||||
version: 1.3.3
|
||||
- name: dbt_expectations
|
||||
package: calogica/dbt_expectations
|
||||
version: 0.10.4
|
||||
- name: dbt_date
|
||||
package: calogica/dbt_date
|
||||
version: 0.10.1
|
||||
sha1_hash: 51a51ab489f7b302c8745ae3c3781271816b01be
|
||||
5
dbt/packages.yml
Normal file
5
dbt/packages.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
packages:
|
||||
- package: dbt-labs/dbt_utils
|
||||
version: ">=1.0.0"
|
||||
- package: calogica/dbt_expectations
|
||||
version: ">=0.10.0"
|
||||
21
dbt/profiles.yml
Normal file
21
dbt/profiles.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
portfolio:
|
||||
target: dev
|
||||
outputs:
|
||||
dev:
|
||||
type: postgres
|
||||
host: localhost
|
||||
user: portfolio
|
||||
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||
port: 5432
|
||||
dbname: portfolio
|
||||
schema: public
|
||||
threads: 4
|
||||
prod:
|
||||
type: postgres
|
||||
host: "{{ env_var('POSTGRES_HOST') }}"
|
||||
user: "{{ env_var('POSTGRES_USER') }}"
|
||||
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||
port: 5432
|
||||
dbname: portfolio
|
||||
schema: public
|
||||
threads: 4
|
||||
0
dbt/tests/.gitkeep
Normal file
0
dbt/tests/.gitkeep
Normal file
22
docker-compose.yml
Normal file
22
docker-compose.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
services:
|
||||
db:
|
||||
image: ${POSTGIS_IMAGE:-postgis/postgis:16-3.4}
|
||||
container_name: portfolio-db
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "5432:5432"
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER:-portfolio}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-portfolio_dev}
|
||||
POSTGRES_DB: ${POSTGRES_DB:-portfolio}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ./scripts/db/init-postgis.sql:/docker-entrypoint-initdb.d/init-postgis.sql:ro
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-portfolio} -d ${POSTGRES_DB:-portfolio}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
500
docs/CONTRIBUTING.md
Normal file
500
docs/CONTRIBUTING.md
Normal file
@@ -0,0 +1,500 @@
|
||||
# Developer Guide
|
||||
|
||||
Instructions for contributing to the Analytics Portfolio project.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Development Setup](#development-setup)
|
||||
2. [Adding a Blog Post](#adding-a-blog-post)
|
||||
3. [Adding a New Page](#adding-a-new-page)
|
||||
4. [Adding a Dashboard Tab](#adding-a-dashboard-tab)
|
||||
5. [Creating Figure Factories](#creating-figure-factories)
|
||||
6. [Branch Workflow](#branch-workflow)
|
||||
7. [Code Standards](#code-standards)
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.11+ (via pyenv)
|
||||
- Docker and Docker Compose
|
||||
- Git
|
||||
|
||||
### Initial Setup
|
||||
|
||||
```bash
|
||||
# Clone repository
|
||||
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||
cd personal-portfolio
|
||||
|
||||
# Run setup (creates venv, installs deps, copies .env.example)
|
||||
make setup
|
||||
|
||||
# Start PostgreSQL + PostGIS
|
||||
make docker-up
|
||||
|
||||
# Initialize database
|
||||
make db-init
|
||||
|
||||
# Start development server
|
||||
make run
|
||||
```
|
||||
|
||||
The app runs at `http://localhost:8050`.
|
||||
|
||||
### Useful Commands
|
||||
|
||||
```bash
|
||||
make test # Run tests
|
||||
make test-cov # Run tests with coverage
|
||||
make lint # Check code style
|
||||
make format # Auto-format code
|
||||
make typecheck # Run mypy type checker
|
||||
make ci # Run all checks (lint, typecheck, test)
|
||||
make dbt-run # Run dbt transformations
|
||||
make dbt-test # Run dbt tests
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a Blog Post
|
||||
|
||||
Blog posts are Markdown files with YAML frontmatter, stored in `portfolio_app/content/blog/`.
|
||||
|
||||
### Step 1: Create the Markdown File
|
||||
|
||||
Create a new file in `portfolio_app/content/blog/`:
|
||||
|
||||
```bash
|
||||
touch portfolio_app/content/blog/your-article-slug.md
|
||||
```
|
||||
|
||||
The filename becomes the URL slug: `/blog/your-article-slug`
|
||||
|
||||
### Step 2: Add Frontmatter
|
||||
|
||||
Every blog post requires YAML frontmatter at the top:
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: "Your Article Title"
|
||||
date: "2026-01-17"
|
||||
description: "A brief description for the article card (1-2 sentences)"
|
||||
tags:
|
||||
- data-engineering
|
||||
- python
|
||||
- lessons-learned
|
||||
status: published
|
||||
---
|
||||
|
||||
Your article content starts here...
|
||||
```
|
||||
|
||||
**Required fields:**
|
||||
|
||||
| Field | Description |
|
||||
|-------|-------------|
|
||||
| `title` | Article title (displayed on cards and page) |
|
||||
| `date` | Publication date in `YYYY-MM-DD` format |
|
||||
| `description` | Short summary for article listing cards |
|
||||
| `tags` | List of tags (displayed as badges) |
|
||||
| `status` | `published` or `draft` (drafts are hidden from listing) |
|
||||
|
||||
### Step 3: Write Content
|
||||
|
||||
Use standard Markdown:
|
||||
|
||||
```markdown
|
||||
## Section Heading
|
||||
|
||||
Regular paragraph text.
|
||||
|
||||
### Subsection
|
||||
|
||||
- Bullet points
|
||||
- Another point
|
||||
|
||||
```python
|
||||
# Code blocks with syntax highlighting
|
||||
def example():
|
||||
return "Hello"
|
||||
```
|
||||
|
||||
**Bold text** and *italic text*.
|
||||
|
||||
> Blockquotes for callouts
|
||||
```
|
||||
|
||||
### Step 4: Test Locally
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
Visit `http://localhost:8050/blog` to see the article listing.
|
||||
Visit `http://localhost:8050/blog/your-article-slug` for the full article.
|
||||
|
||||
### Example: Complete Blog Post
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: "Building ETL Pipelines with Python"
|
||||
date: "2026-01-17"
|
||||
description: "Lessons from building production data pipelines at scale"
|
||||
tags:
|
||||
- python
|
||||
- etl
|
||||
- data-engineering
|
||||
status: published
|
||||
---
|
||||
|
||||
When I started building data pipelines, I made every mistake possible...
|
||||
|
||||
## The Problem
|
||||
|
||||
Most tutorials show toy examples. Real pipelines are different.
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
def safe_transform(df: pd.DataFrame) -> pd.DataFrame:
|
||||
try:
|
||||
return df.apply(transform_row, axis=1)
|
||||
except ValueError as e:
|
||||
logger.error(f"Transform failed: {e}")
|
||||
raise
|
||||
```
|
||||
|
||||
## Conclusion
|
||||
|
||||
Ship something that works, then iterate.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Page
|
||||
|
||||
Pages use Dash's automatic routing based on file location in `portfolio_app/pages/`.
|
||||
|
||||
### Step 1: Create the Page File
|
||||
|
||||
```bash
|
||||
touch portfolio_app/pages/your_page.py
|
||||
```
|
||||
|
||||
### Step 2: Register the Page
|
||||
|
||||
Every page must call `dash.register_page()`:
|
||||
|
||||
```python
|
||||
"""Your page description."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path="/your-page", # URL path
|
||||
name="Your Page", # Display name (for nav)
|
||||
title="Your Page Title" # Browser tab title
|
||||
)
|
||||
|
||||
|
||||
def layout() -> dmc.Container:
|
||||
"""Page layout function."""
|
||||
return dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Your Page", order=1),
|
||||
dmc.Text("Page content here."),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
```
|
||||
|
||||
### Step 3: Page with Dynamic Content
|
||||
|
||||
For pages with URL parameters:
|
||||
|
||||
```python
|
||||
# pages/blog/article.py
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path_template="/blog/<slug>", # Dynamic parameter
|
||||
name="Article",
|
||||
)
|
||||
|
||||
|
||||
def layout(slug: str = "") -> dmc.Container:
|
||||
"""Layout receives URL parameters as arguments."""
|
||||
article = get_article(slug)
|
||||
if not article:
|
||||
return dmc.Text("Article not found")
|
||||
|
||||
return dmc.Container(
|
||||
dmc.Title(article["meta"]["title"]),
|
||||
# ...
|
||||
)
|
||||
```
|
||||
|
||||
### Step 4: Add Navigation (Optional)
|
||||
|
||||
To add the page to the sidebar, edit `portfolio_app/components/sidebar.py`:
|
||||
|
||||
```python
|
||||
# For main pages (Home, About, Blog, etc.)
|
||||
NAV_ITEMS_MAIN = [
|
||||
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||
{"path": "/your-page", "icon": "tabler:star", "label": "Your Page"},
|
||||
# ...
|
||||
]
|
||||
|
||||
# For project/dashboard pages
|
||||
NAV_ITEMS_PROJECTS = [
|
||||
{"path": "/projects", "icon": "tabler:folder", "label": "Projects"},
|
||||
{"path": "/your-dashboard", "icon": "tabler:chart-bar", "label": "Your Dashboard"},
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
The sidebar uses icon buttons with tooltips. Each item needs `path`, `icon` (Tabler icon name), and `label` (tooltip text).
|
||||
|
||||
### URL Routing Summary
|
||||
|
||||
| File Location | URL |
|
||||
|---------------|-----|
|
||||
| `pages/home.py` | `/` (if `path="/"`) |
|
||||
| `pages/about.py` | `/about` |
|
||||
| `pages/blog/index.py` | `/blog` |
|
||||
| `pages/blog/article.py` | `/blog/<slug>` |
|
||||
| `pages/toronto/dashboard.py` | `/toronto` |
|
||||
|
||||
---
|
||||
|
||||
## Adding a Dashboard Tab
|
||||
|
||||
Dashboard tabs are in `portfolio_app/pages/toronto/tabs/`.
|
||||
|
||||
### Step 1: Create Tab Layout
|
||||
|
||||
```python
|
||||
# pages/toronto/tabs/your_tab.py
|
||||
"""Your tab description."""
|
||||
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
from portfolio_app.figures.toronto.choropleth import create_choropleth
|
||||
from portfolio_app.toronto.demo_data import get_demo_data
|
||||
|
||||
|
||||
def create_your_tab_layout() -> dmc.Stack:
|
||||
"""Create the tab layout."""
|
||||
data = get_demo_data()
|
||||
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Grid(
|
||||
[
|
||||
dmc.GridCol(
|
||||
# Map on left
|
||||
create_choropleth(data, "your_metric"),
|
||||
span=8,
|
||||
),
|
||||
dmc.GridCol(
|
||||
# KPI cards on right
|
||||
create_kpi_cards(data),
|
||||
span=4,
|
||||
),
|
||||
],
|
||||
),
|
||||
# Charts below
|
||||
create_supporting_charts(data),
|
||||
],
|
||||
gap="lg",
|
||||
)
|
||||
```
|
||||
|
||||
### Step 2: Register in Dashboard
|
||||
|
||||
Edit `pages/toronto/dashboard.py` to add the tab:
|
||||
|
||||
```python
|
||||
from portfolio_app.pages.toronto.tabs.your_tab import create_your_tab_layout
|
||||
|
||||
# In the tabs list:
|
||||
dmc.TabsTab("Your Tab", value="your-tab"),
|
||||
|
||||
# In the panels:
|
||||
dmc.TabsPanel(create_your_tab_layout(), value="your-tab"),
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Creating Figure Factories
|
||||
|
||||
Figure factories are organized by dashboard domain under `portfolio_app/figures/{domain}/`.
|
||||
|
||||
### Pattern
|
||||
|
||||
```python
|
||||
# figures/toronto/your_chart.py
|
||||
"""Your chart type factory for Toronto dashboard."""
|
||||
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def create_your_chart(
|
||||
df: pd.DataFrame,
|
||||
x_col: str,
|
||||
y_col: str,
|
||||
title: str = "",
|
||||
) -> go.Figure:
|
||||
"""Create a your_chart figure.
|
||||
|
||||
Args:
|
||||
df: DataFrame with data.
|
||||
x_col: Column for x-axis.
|
||||
y_col: Column for y-axis.
|
||||
title: Optional chart title.
|
||||
|
||||
Returns:
|
||||
Configured Plotly figure.
|
||||
"""
|
||||
fig = px.bar(df, x=x_col, y=y_col, title=title)
|
||||
|
||||
fig.update_layout(
|
||||
template="plotly_white",
|
||||
margin=dict(l=40, r=40, t=40, b=40),
|
||||
)
|
||||
|
||||
return fig
|
||||
```
|
||||
|
||||
### Export from `__init__.py`
|
||||
|
||||
```python
|
||||
# figures/toronto/__init__.py
|
||||
from .your_chart import create_your_chart
|
||||
|
||||
__all__ = [
|
||||
"create_your_chart",
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
### Importing Figure Factories
|
||||
|
||||
```python
|
||||
# In callbacks or tabs
|
||||
from portfolio_app.figures.toronto import create_choropleth_figure
|
||||
from portfolio_app.figures.toronto.bar_charts import create_ranking_bar
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Branch Workflow
|
||||
|
||||
```
|
||||
main (production)
|
||||
↑
|
||||
staging (pre-production)
|
||||
↑
|
||||
development (integration)
|
||||
↑
|
||||
feature/XX-description (your work)
|
||||
```
|
||||
|
||||
### Creating a Feature Branch
|
||||
|
||||
```bash
|
||||
# Start from development
|
||||
git checkout development
|
||||
git pull origin development
|
||||
|
||||
# Create feature branch
|
||||
git checkout -b feature/10-add-new-page
|
||||
|
||||
# Work, commit, push
|
||||
git add .
|
||||
git commit -m "feat: Add new page"
|
||||
git push -u origin feature/10-add-new-page
|
||||
```
|
||||
|
||||
### Merging
|
||||
|
||||
```bash
|
||||
# Merge into development
|
||||
git checkout development
|
||||
git merge feature/10-add-new-page
|
||||
git push origin development
|
||||
|
||||
# Delete feature branch
|
||||
git branch -d feature/10-add-new-page
|
||||
git push origin --delete feature/10-add-new-page
|
||||
```
|
||||
|
||||
**Rules:**
|
||||
- Never commit directly to `main` or `staging`
|
||||
- Never delete `development`
|
||||
- Feature branches are temporary
|
||||
|
||||
---
|
||||
|
||||
## Code Standards
|
||||
|
||||
### Type Hints
|
||||
|
||||
Use Python 3.10+ style:
|
||||
|
||||
```python
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||
...
|
||||
```
|
||||
|
||||
### Imports
|
||||
|
||||
| Context | Style |
|
||||
|---------|-------|
|
||||
| Same directory | `from .module import X` |
|
||||
| Sibling directory | `from ..schemas.model import Y` |
|
||||
| External packages | `import pandas as pd` |
|
||||
|
||||
### Formatting
|
||||
|
||||
```bash
|
||||
make format # Runs ruff formatter
|
||||
make lint # Checks style
|
||||
```
|
||||
|
||||
### Docstrings
|
||||
|
||||
Google style, only for non-obvious functions:
|
||||
|
||||
```python
|
||||
def calculate_score(values: list[float], weights: list[float]) -> float:
|
||||
"""Calculate weighted score.
|
||||
|
||||
Args:
|
||||
values: Raw metric values.
|
||||
weights: Weight for each metric.
|
||||
|
||||
Returns:
|
||||
Weighted average score.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Check `CLAUDE.md` for AI assistant context and architectural decisions.
|
||||
335
docs/DATABASE_SCHEMA.md
Normal file
335
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,335 @@
|
||||
# Database Schema
|
||||
|
||||
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||
|
||||
## Entity Relationship Diagram
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
dim_time {
|
||||
int date_key PK
|
||||
date full_date UK
|
||||
int year
|
||||
int month
|
||||
int quarter
|
||||
string month_name
|
||||
bool is_month_start
|
||||
}
|
||||
|
||||
dim_cmhc_zone {
|
||||
int zone_key PK
|
||||
string zone_code UK
|
||||
string zone_name
|
||||
geometry geometry
|
||||
}
|
||||
|
||||
dim_neighbourhood {
|
||||
int neighbourhood_id PK
|
||||
string name
|
||||
geometry geometry
|
||||
int population
|
||||
numeric land_area_sqkm
|
||||
numeric pop_density_per_sqkm
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric median_household_income
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
int census_year
|
||||
}
|
||||
|
||||
dim_policy_event {
|
||||
int event_id PK
|
||||
date event_date
|
||||
date effective_date
|
||||
string level
|
||||
string category
|
||||
string title
|
||||
text description
|
||||
string expected_direction
|
||||
string source_url
|
||||
string confidence
|
||||
}
|
||||
|
||||
fact_rentals {
|
||||
int id PK
|
||||
int date_key FK
|
||||
int zone_key FK
|
||||
string bedroom_type
|
||||
int universe
|
||||
numeric avg_rent
|
||||
numeric median_rent
|
||||
numeric vacancy_rate
|
||||
numeric availability_rate
|
||||
numeric turnover_rate
|
||||
numeric rent_change_pct
|
||||
string reliability_code
|
||||
}
|
||||
|
||||
fact_census {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int census_year
|
||||
int population
|
||||
numeric population_density
|
||||
numeric median_household_income
|
||||
numeric average_household_income
|
||||
numeric unemployment_rate
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
numeric median_age
|
||||
numeric average_dwelling_value
|
||||
}
|
||||
|
||||
fact_crime {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int year
|
||||
string crime_type
|
||||
int count
|
||||
numeric rate_per_100k
|
||||
}
|
||||
|
||||
fact_amenities {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
string amenity_type
|
||||
int count
|
||||
int year
|
||||
}
|
||||
|
||||
bridge_cmhc_neighbourhood {
|
||||
int id PK
|
||||
string cmhc_zone_code FK
|
||||
int neighbourhood_id FK
|
||||
numeric weight
|
||||
}
|
||||
|
||||
dim_time ||--o{ fact_rentals : "date_key"
|
||||
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||
```
|
||||
|
||||
## Schema Layers
|
||||
|
||||
### Database Schemas
|
||||
|
||||
| Schema | Purpose | Managed By |
|
||||
|--------|---------|------------|
|
||||
| `public` | Shared dimensions (dim_time) | SQLAlchemy |
|
||||
| `raw_toronto` | Toronto dimension and fact tables | SQLAlchemy |
|
||||
| `stg_toronto` | Toronto staging models | dbt |
|
||||
| `int_toronto` | Toronto intermediate models | dbt |
|
||||
| `mart_toronto` | Toronto analytical tables | dbt |
|
||||
|
||||
### Raw Toronto Schema (raw_toronto)
|
||||
|
||||
Toronto-specific tables loaded by SQLAlchemy:
|
||||
|
||||
| Table | Source | Description |
|
||||
|-------|--------|-------------|
|
||||
| `dim_neighbourhood` | City of Toronto API | 158 neighbourhood boundaries |
|
||||
| `dim_cmhc_zone` | CMHC | ~20 rental market zones |
|
||||
| `dim_policy_event` | Manual | Policy events for annotation |
|
||||
| `fact_census` | City of Toronto API | Census profile data |
|
||||
| `fact_crime` | Toronto Police API | Crime statistics |
|
||||
| `fact_amenities` | City of Toronto API | Amenity counts |
|
||||
| `fact_rentals` | CMHC Data Files | Rental market survey data |
|
||||
| `bridge_cmhc_neighbourhood` | Computed | Zone-neighbourhood mapping |
|
||||
|
||||
### Public Schema
|
||||
|
||||
Shared dimensions used across all projects:
|
||||
|
||||
| Table | Description |
|
||||
|-------|-------------|
|
||||
| `dim_time` | Time dimension (monthly grain) |
|
||||
|
||||
### Staging Schema - stg_toronto (dbt)
|
||||
|
||||
Staging models provide 1:1 cleaned representations of source data:
|
||||
|
||||
| Model | Source Table | Purpose |
|
||||
|-------|-------------|---------|
|
||||
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||
| `stg_toronto__crime` | raw.crime_data | Standardized crime categories |
|
||||
| `stg_toronto__amenities` | raw.amenities | Typed amenity counts |
|
||||
| `stg_dimensions__time` | generated | Time dimension |
|
||||
| `stg_dimensions__cmhc_zones` | raw.cmhc_zones | CMHC zone boundaries |
|
||||
| `stg_cmhc__zone_crosswalk` | raw.crosswalk | Zone-neighbourhood mapping |
|
||||
|
||||
### Marts Schema - mart_toronto (dbt)
|
||||
|
||||
Analytical tables ready for dashboard consumption:
|
||||
|
||||
| Model | Grain | Purpose |
|
||||
|-------|-------|---------|
|
||||
| `mart_neighbourhood_overview` | neighbourhood | Composite livability scores |
|
||||
| `mart_neighbourhood_housing` | neighbourhood | Housing and rent metrics |
|
||||
| `mart_neighbourhood_safety` | neighbourhood × year | Crime rate calculations |
|
||||
| `mart_neighbourhood_demographics` | neighbourhood | Income, age, population metrics |
|
||||
| `mart_neighbourhood_amenities` | neighbourhood | Amenity accessibility scores |
|
||||
| `mart_toronto_rentals` | zone × month | Time-series rental analysis |
|
||||
|
||||
## Table Details
|
||||
|
||||
### Dimension Tables
|
||||
|
||||
#### dim_time
|
||||
Time dimension for date-based analysis. Grain: one row per month.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||
|
||||
#### dim_cmhc_zone
|
||||
CMHC rental market zones (~20 zones covering Toronto).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||
|
||||
#### dim_neighbourhood
|
||||
Toronto's 158 official neighbourhoods.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||
| population | INTEGER | | Total population |
|
||||
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||
|
||||
#### dim_policy_event
|
||||
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| event_date | DATE | NOT NULL | Announcement date |
|
||||
| effective_date | DATE | | Implementation date |
|
||||
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||
| description | TEXT | | Detailed description |
|
||||
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||
| source_url | VARCHAR(500) | | Reference link |
|
||||
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||
|
||||
### Fact Tables
|
||||
|
||||
#### fact_rentals
|
||||
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||
| universe | INTEGER | | Total rental units |
|
||||
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||
|
||||
#### fact_census
|
||||
Census statistics. Grain: neighbourhood × census year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||
| population | INTEGER | | Total population |
|
||||
| population_density | NUMERIC(10,2) | | People per km² |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||
|
||||
#### fact_crime
|
||||
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||
| count | INTEGER | NOT NULL | Number of incidents |
|
||||
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||
|
||||
#### fact_amenities
|
||||
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||
| count | INTEGER | NOT NULL | Number of amenities |
|
||||
| year | INTEGER | NOT NULL | Reference year |
|
||||
|
||||
### Bridge Tables
|
||||
|
||||
#### bridge_cmhc_neighbourhood
|
||||
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||
|
||||
## Indexes
|
||||
|
||||
| Table | Index | Columns | Purpose |
|
||||
|-------|-------|---------|---------|
|
||||
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||
|
||||
## PostGIS Extensions
|
||||
|
||||
The database requires PostGIS for geospatial operations:
|
||||
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
```
|
||||
|
||||
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||
@@ -1,21 +1,193 @@
|
||||
# Portfolio Project Reference
|
||||
|
||||
**Project**: Analytics Portfolio
|
||||
**Owner**: Leo
|
||||
**Status**: Ready for Sprint 1
|
||||
**Owner**: Leo Miranda
|
||||
**Status**: Sprint 9 Complete (Dashboard Implementation Done)
|
||||
**Last Updated**: January 2026
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
Two-project analytics portfolio demonstrating end-to-end data engineering, visualization, and ML capabilities.
|
||||
Personal portfolio website with an interactive Toronto Neighbourhood Dashboard demonstrating data engineering, visualization, and analytics capabilities.
|
||||
|
||||
| Project | Domain | Key Skills | Phase |
|
||||
|---------|--------|------------|-------|
|
||||
| **Toronto Housing Dashboard** | Real estate | ETL, dimensional modeling, geospatial, choropleth | Phase 1 (Active) |
|
||||
| **Energy Pricing Analysis** | Utility markets | Time series, ML prediction, API integration | Phase 3 (Future) |
|
||||
| Component | Description | Status |
|
||||
|-----------|-------------|--------|
|
||||
| Portfolio Website | Bio, About, Projects, Resume, Contact, Blog | Complete |
|
||||
| Toronto Dashboard | 5-tab neighbourhood analysis | Complete |
|
||||
| Data Pipeline | dbt models, figure factories | Complete |
|
||||
| Deployment | Production deployment | Pending |
|
||||
|
||||
**Platform**: Monolithic Dash application on self-hosted VPS (bio landing page + dashboards).
|
||||
---
|
||||
|
||||
## Completed Work
|
||||
|
||||
### Sprint 1-6: Foundation
|
||||
- Repository setup, Docker, PostgreSQL + PostGIS
|
||||
- Bio landing page implementation
|
||||
- Initial data model design
|
||||
|
||||
### Sprint 7: Navigation & Theme
|
||||
- Sidebar navigation
|
||||
- Dark/light theme toggle
|
||||
- dash-mantine-components integration
|
||||
|
||||
### Sprint 8: Portfolio Website
|
||||
- About, Contact, Projects, Resume pages
|
||||
- Blog system with Markdown/frontmatter
|
||||
- Health endpoint
|
||||
|
||||
### Sprint 9: Neighbourhood Dashboard Transition
|
||||
- Phase 1: Deleted legacy TRREB code
|
||||
- Phase 2: Documentation cleanup
|
||||
- Phase 3: New neighbourhood-centric data model
|
||||
- Phase 4: dbt model restructuring
|
||||
- Phase 5: 5-tab dashboard implementation
|
||||
- Phase 6: 15 documentation notebooks
|
||||
- Phase 7: Final documentation review
|
||||
|
||||
---
|
||||
|
||||
## Application Architecture
|
||||
|
||||
### URL Routes
|
||||
|
||||
| URL | Page | File |
|
||||
|-----|------|------|
|
||||
| `/` | Home | `pages/home.py` |
|
||||
| `/about` | About | `pages/about.py` |
|
||||
| `/contact` | Contact | `pages/contact.py` |
|
||||
| `/projects` | Projects | `pages/projects.py` |
|
||||
| `/resume` | Resume | `pages/resume.py` |
|
||||
| `/blog` | Blog listing | `pages/blog/index.py` |
|
||||
| `/blog/{slug}` | Article | `pages/blog/article.py` |
|
||||
| `/toronto` | Dashboard | `pages/toronto/dashboard.py` |
|
||||
| `/toronto/methodology` | Methodology | `pages/toronto/methodology.py` |
|
||||
| `/health` | Health check | `pages/health.py` |
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── app.py # Dash app factory
|
||||
├── config.py # Pydantic BaseSettings
|
||||
├── assets/ # CSS, images
|
||||
├── callbacks/ # Global callbacks (sidebar, theme)
|
||||
├── components/ # Shared UI components
|
||||
├── content/blog/ # Markdown blog articles
|
||||
├── errors/ # Exception handling
|
||||
├── figures/
|
||||
│ └── toronto/ # Toronto figure factories
|
||||
├── pages/
|
||||
│ ├── home.py
|
||||
│ ├── about.py
|
||||
│ ├── contact.py
|
||||
│ ├── projects.py
|
||||
│ ├── resume.py
|
||||
│ ├── health.py
|
||||
│ ├── blog/
|
||||
│ │ ├── index.py
|
||||
│ │ └── article.py
|
||||
│ └── toronto/
|
||||
│ ├── dashboard.py
|
||||
│ ├── methodology.py
|
||||
│ ├── tabs/ # 5 tab layouts
|
||||
│ └── callbacks/ # Dashboard interactions (map_callbacks, chart_callbacks, selection_callbacks)
|
||||
├── toronto/ # Data logic
|
||||
│ ├── parsers/ # API extraction (geo, toronto_open_data, toronto_police, cmhc)
|
||||
│ ├── loaders/ # Database operations (base, cmhc, cmhc_crosswalk)
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ ├── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||
│ ├── services/ # Query functions (neighbourhood_service, geometry_service)
|
||||
│ └── demo_data.py # Sample data
|
||||
└── utils/
|
||||
└── markdown_loader.py # Blog article loading
|
||||
|
||||
dbt/ # dbt project: portfolio
|
||||
├── models/
|
||||
│ ├── shared/ # Cross-domain dimensions
|
||||
│ ├── staging/toronto/ # Toronto staging models
|
||||
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||
│ └── marts/toronto/ # Toronto mart tables
|
||||
|
||||
notebooks/
|
||||
└── toronto/ # Toronto documentation notebooks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Toronto Dashboard
|
||||
|
||||
### Data Sources
|
||||
|
||||
| Source | Data | Format |
|
||||
|--------|------|--------|
|
||||
| City of Toronto Open Data | Neighbourhoods (158), Census profiles, Parks, Schools, Childcare, TTC | GeoJSON, CSV, API |
|
||||
| Toronto Police Service | Crime rates, MCI, Shootings | CSV, API |
|
||||
| CMHC | Rental Market Survey | CSV |
|
||||
|
||||
### Geographic Model
|
||||
|
||||
```
|
||||
City of Toronto Neighbourhoods (158) ← Primary analysis unit
|
||||
CMHC Zones (~20) ← Rental data (Census Tract aligned)
|
||||
```
|
||||
|
||||
### Dashboard Tabs
|
||||
|
||||
| Tab | Choropleth Metric | Supporting Charts |
|
||||
|-----|-------------------|-------------------|
|
||||
| Overview | Livability score | Top/Bottom 10 bar, Income vs Safety scatter |
|
||||
| Housing | Affordability index | Rent trend line, Tenure breakdown bar |
|
||||
| Safety | Crime rate per 100K | Crime breakdown bar, Crime trend line |
|
||||
| Demographics | Median income | Age distribution, Population density bar |
|
||||
| Amenities | Amenity index | Amenity radar, Transit accessibility bar |
|
||||
|
||||
### Star Schema
|
||||
|
||||
| Table | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `dim_neighbourhood` | Dimension | 158 neighbourhoods with geometry |
|
||||
| `dim_time` | Dimension | Date dimension |
|
||||
| `dim_cmhc_zone` | Dimension | ~20 CMHC zones with geometry |
|
||||
| `fact_census` | Fact | Census indicators by neighbourhood |
|
||||
| `fact_crime` | Fact | Crime stats by neighbourhood |
|
||||
| `fact_rentals` | Fact | Rental data by CMHC zone |
|
||||
| `fact_amenities` | Fact | Amenity counts by neighbourhood |
|
||||
|
||||
### dbt Project: `portfolio`
|
||||
|
||||
**Model Structure:**
|
||||
```
|
||||
dbt/models/
|
||||
├── shared/ # Cross-domain dimensions (stg_dimensions__time)
|
||||
├── staging/toronto/ # Toronto staging models
|
||||
├── intermediate/toronto/ # Toronto intermediate models
|
||||
└── marts/toronto/ # Toronto mart tables
|
||||
```
|
||||
|
||||
| Layer | Naming | Example |
|
||||
|-------|--------|---------|
|
||||
| Shared | `stg_dimensions__*` | `stg_dimensions__time` |
|
||||
| Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` |
|
||||
| Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` |
|
||||
| Marts | `mart_{domain}` | `mart_neighbourhood_overview` |
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology | Version |
|
||||
|-------|------------|---------|
|
||||
| Database | PostgreSQL + PostGIS | 16.x |
|
||||
| Validation | Pydantic | 2.x |
|
||||
| ORM | SQLAlchemy | 2.x |
|
||||
| Transformation | dbt-postgres | 1.7+ |
|
||||
| Data Processing | Pandas, GeoPandas | Latest |
|
||||
| Visualization | Dash + Plotly | 2.14+ |
|
||||
| UI Components | dash-mantine-components | Latest |
|
||||
| Testing | pytest | 7.0+ |
|
||||
| Python | 3.11+ | Via pyenv |
|
||||
|
||||
---
|
||||
|
||||
@@ -23,325 +195,51 @@ Two-project analytics portfolio demonstrating end-to-end data engineering, visua
|
||||
|
||||
| Branch | Purpose | Deploys To |
|
||||
|--------|---------|------------|
|
||||
| `main` | Production releases only | VPS (production) |
|
||||
| `main` | Production releases | VPS (production) |
|
||||
| `staging` | Pre-production testing | VPS (staging) |
|
||||
| `development` | Active development | Local only |
|
||||
|
||||
**Rules**:
|
||||
- All feature branches created FROM `development`
|
||||
- All feature branches merge INTO `development`
|
||||
- `development` → `staging` for testing
|
||||
- `staging` → `main` for release
|
||||
- Direct commits to `main` or `staging` are forbidden
|
||||
- Branch naming: `feature/{sprint}-{description}` or `fix/{issue-id}`
|
||||
**Rules:**
|
||||
- Feature branches from `development`: `feature/{sprint}-{description}`
|
||||
- Merge into `development` when complete
|
||||
- `development` → `staging` → `main` for releases
|
||||
- Never delete `development`
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack (Locked)
|
||||
## Code Standards
|
||||
|
||||
| Layer | Technology | Version |
|
||||
|-------|------------|---------|
|
||||
| Database | PostgreSQL + PostGIS | 16.x |
|
||||
| Validation | Pydantic | ≥2.0 |
|
||||
| ORM | SQLAlchemy | ≥2.0 (2.0-style API only) |
|
||||
| Transformation | dbt-postgres | ≥1.7 |
|
||||
| Data Processing | Pandas | ≥2.1 |
|
||||
| Geospatial | GeoPandas + Shapely | ≥0.14 |
|
||||
| Visualization | Dash + Plotly | ≥2.14 |
|
||||
| UI Components | dash-mantine-components | Latest stable |
|
||||
| Testing | pytest | ≥7.0 |
|
||||
| Python | 3.11+ | Via pyenv |
|
||||
### Type Hints (Python 3.10+)
|
||||
|
||||
**Compatibility Notes**:
|
||||
- SQLAlchemy 2.0 + Pydantic 2.0 integrate well—never mix 1.x APIs
|
||||
- PostGIS extension required—enable during db init
|
||||
- Docker Compose V2 (no `version` field in compose files)
|
||||
```python
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
### Imports
|
||||
|
||||
## Code Conventions
|
||||
|
||||
### Import Style
|
||||
|
||||
| Context | Style | Example |
|
||||
|---------|-------|---------|
|
||||
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||
| External packages | Absolute | `import pandas as pd` |
|
||||
|
||||
### Module Separation
|
||||
|
||||
| Directory | Contains | Purpose |
|
||||
|-----------|----------|---------|
|
||||
| `schemas/` | Pydantic models | Data validation |
|
||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||
| `loaders/` | Database operations | Data loading |
|
||||
| `figures/` | Chart factories | Plotly figure generation |
|
||||
| `callbacks/` | Dash callbacks | Per-dashboard, in `pages/{dashboard}/callbacks/` |
|
||||
| `errors/` | Exceptions + handlers | Error handling |
|
||||
|
||||
### Code Standards
|
||||
|
||||
- **Type hints**: Mandatory, Python 3.10+ style (`list[str]`, `dict[str, int]`, `X | None`)
|
||||
- **Functions**: Single responsibility, verb naming, early returns over nesting
|
||||
- **Docstrings**: Google style, minimal—only for non-obvious behavior
|
||||
- **Constants**: Module-level for magic values, Pydantic BaseSettings for runtime config
|
||||
| Context | Style |
|
||||
|---------|-------|
|
||||
| Same directory | `from .module import X` |
|
||||
| Sibling directory | `from ..schemas.model import Y` |
|
||||
| External | `import pandas as pd` |
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
# errors/exceptions.py
|
||||
class PortfolioError(Exception):
|
||||
"""Base exception."""
|
||||
|
||||
class ParseError(PortfolioError):
|
||||
"""PDF/CSV parsing failed."""
|
||||
"""Data parsing failed."""
|
||||
|
||||
class ValidationError(PortfolioError):
|
||||
"""Pydantic or business rule validation failed."""
|
||||
"""Validation failed."""
|
||||
|
||||
class LoadError(PortfolioError):
|
||||
"""Database load operation failed."""
|
||||
"""Database load failed."""
|
||||
```
|
||||
|
||||
- Decorators for infrastructure concerns (logging, retry, transactions)
|
||||
- Explicit handling for domain logic (business rules, recovery strategies)
|
||||
|
||||
---
|
||||
|
||||
## Application Architecture
|
||||
|
||||
### Dash Pages Structure
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── app.py # Dash app factory with Pages routing
|
||||
├── config.py # Pydantic BaseSettings
|
||||
├── assets/ # CSS, images (auto-served by Dash)
|
||||
├── pages/
|
||||
│ ├── home.py # Bio landing page → /
|
||||
│ ├── toronto/
|
||||
│ │ ├── dashboard.py # Layout only → /toronto
|
||||
│ │ └── callbacks/ # Interaction logic
|
||||
│ └── energy/ # Phase 3
|
||||
├── components/ # Shared UI (navbar, footer, cards)
|
||||
├── figures/ # Shared chart factories
|
||||
├── toronto/ # Toronto data logic
|
||||
│ ├── parsers/
|
||||
│ ├── loaders/
|
||||
│ ├── schemas/ # Pydantic
|
||||
│ └── models/ # SQLAlchemy
|
||||
└── errors/
|
||||
```
|
||||
|
||||
### URL Routing (Automatic)
|
||||
|
||||
| URL | Page | Status |
|
||||
|-----|------|--------|
|
||||
| `/` | Bio landing page | Sprint 2 |
|
||||
| `/toronto` | Toronto Housing Dashboard | Sprint 6 |
|
||||
| `/energy` | Energy Pricing Dashboard | Phase 3 |
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Toronto Housing Dashboard
|
||||
|
||||
### Data Sources
|
||||
|
||||
| Track | Source | Format | Geography | Frequency |
|
||||
|-------|--------|--------|-----------|-----------|
|
||||
| Purchases | TRREB Monthly Reports | PDF | ~35 Districts | Monthly |
|
||||
| Rentals | CMHC Rental Market Survey | CSV | ~20 Zones | Annual |
|
||||
| Enrichment | City of Toronto Open Data | GeoJSON/CSV | 158 Neighbourhoods | Census |
|
||||
| Policy Events | Curated list | CSV | N/A | Event-based |
|
||||
|
||||
### Geographic Reality
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ City of Toronto Neighbourhoods (158) │ ← Enrichment only
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ CMHC Zones (~20) — Census Tract aligned │ ← Rental data
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Critical**: These geographies do NOT align. Display as separate layers with toggle—do not force crosswalks.
|
||||
|
||||
### Data Model (Star Schema)
|
||||
|
||||
| Table | Type | Keys |
|
||||
|-------|------|------|
|
||||
| `fact_purchases` | Fact | → dim_time, dim_trreb_district |
|
||||
| `fact_rentals` | Fact | → dim_time, dim_cmhc_zone |
|
||||
| `dim_time` | Dimension | date_key (PK) |
|
||||
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||
|
||||
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||
|
||||
### dbt Layer Structure
|
||||
|
||||
| Layer | Naming | Purpose |
|
||||
|-------|--------|---------|
|
||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||
| Intermediate | `int_{domain}__{transform}` | Business logic, filtering |
|
||||
| Marts | `mart_{domain}` | Final analytical tables |
|
||||
|
||||
---
|
||||
|
||||
## Sprint Overview
|
||||
|
||||
| Sprint | Focus | Milestone |
|
||||
|--------|-------|-----------|
|
||||
| 1 | Project bootstrap, start TRREB digitization | — |
|
||||
| 2 | Bio page, data acquisition | **Launch 1: Bio Live** |
|
||||
| 3 | Parsers, schemas, models | — |
|
||||
| 4 | Loaders, dbt | — |
|
||||
| 5 | Visualization | — |
|
||||
| 6 | Polish, deploy dashboard | **Launch 2: Dashboard Live** |
|
||||
| 7 | Buffer | — |
|
||||
|
||||
### Sprint 1 Deliverables
|
||||
|
||||
| Category | Tasks |
|
||||
|----------|-------|
|
||||
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||
|
||||
### Human Tasks (Cannot Automate)
|
||||
|
||||
| Task | Tool | Effort |
|
||||
|------|------|--------|
|
||||
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||
| Research policy events (10-20) | Manual research | 2-3 hours |
|
||||
| Replace social link placeholders | Manual | 5 minutes |
|
||||
|
||||
---
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Phase 1 — Build These
|
||||
|
||||
- Bio landing page with content from bio_content_v2.md
|
||||
- TRREB PDF parser
|
||||
- CMHC CSV processor
|
||||
- PostgreSQL + PostGIS database layer
|
||||
- Star schema (facts + dimensions)
|
||||
- dbt models with tests
|
||||
- Choropleth visualization (Dash)
|
||||
- Policy event annotation layer
|
||||
- Neighbourhood overlay (toggle-able)
|
||||
|
||||
### Phase 1 — Do NOT Build
|
||||
|
||||
| Feature | Reason | When |
|
||||
|---------|--------|------|
|
||||
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 | After Energy project |
|
||||
| Crime data integration | Deferred scope | Phase 4 |
|
||||
| Historical boundary reconciliation (140→158) | 2021+ data only for V1 | Phase 4 |
|
||||
| ML prediction models | Energy project scope | Phase 3 |
|
||||
| Multi-project shared infrastructure | Build first, abstract second | Phase 2 |
|
||||
|
||||
If a task seems to require Phase 3/4 features, **stop and flag it**.
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
### Root-Level Files (Allowed)
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `README.md` | Project overview |
|
||||
| `CLAUDE.md` | AI assistant context |
|
||||
| `pyproject.toml` | Python packaging |
|
||||
| `.gitignore` | Git ignore rules |
|
||||
| `.env.example` | Environment template |
|
||||
| `.python-version` | pyenv version |
|
||||
| `.pre-commit-config.yaml` | Pre-commit hooks |
|
||||
| `docker-compose.yml` | Container orchestration |
|
||||
| `Makefile` | Task automation |
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
portfolio/
|
||||
├── portfolio_app/ # Monolithic Dash application
|
||||
│ ├── app.py
|
||||
│ ├── config.py
|
||||
│ ├── assets/
|
||||
│ ├── pages/
|
||||
│ ├── components/
|
||||
│ ├── figures/
|
||||
│ ├── toronto/
|
||||
│ └── errors/
|
||||
├── tests/
|
||||
├── dbt/
|
||||
├── data/
|
||||
│ └── toronto/
|
||||
│ ├── raw/
|
||||
│ ├── processed/ # gitignored
|
||||
│ └── reference/
|
||||
├── scripts/
|
||||
│ ├── db/
|
||||
│ ├── docker/
|
||||
│ ├── deploy/
|
||||
│ ├── dbt/
|
||||
│ └── dev/
|
||||
├── docs/
|
||||
├── notebooks/
|
||||
├── backups/ # gitignored
|
||||
└── reports/ # gitignored
|
||||
```
|
||||
|
||||
### Gitignored Directories
|
||||
|
||||
- `data/*/processed/`
|
||||
- `reports/`
|
||||
- `backups/`
|
||||
- `notebooks/*.html`
|
||||
- `.env`
|
||||
- `__pycache__/`
|
||||
- `.venv/`
|
||||
|
||||
---
|
||||
|
||||
## Makefile Targets
|
||||
|
||||
| Target | Purpose |
|
||||
|--------|---------|
|
||||
| `setup` | Install deps, create .env, init pre-commit |
|
||||
| `docker-up` | Start PostgreSQL + PostGIS |
|
||||
| `docker-down` | Stop containers |
|
||||
| `db-init` | Initialize database schema |
|
||||
| `run` | Start Dash dev server |
|
||||
| `test` | Run pytest |
|
||||
| `dbt-run` | Run dbt models |
|
||||
| `dbt-test` | Run dbt tests |
|
||||
| `lint` | Run ruff linter |
|
||||
| `format` | Run ruff formatter |
|
||||
| `ci` | Run all checks |
|
||||
| `deploy` | Deploy to production |
|
||||
|
||||
---
|
||||
|
||||
## Script Standards
|
||||
|
||||
All scripts in `scripts/`:
|
||||
- Include usage comments at top
|
||||
- Idempotent where possible
|
||||
- Exit codes: 0 = success, 1 = error
|
||||
- Use `set -euo pipefail` for bash
|
||||
- Log to stdout, errors to stderr
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
@@ -360,37 +258,61 @@ LOG_LEVEL=INFO
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
## Makefile Targets
|
||||
|
||||
### Launch 1 (Sprint 2)
|
||||
- [ ] Bio page accessible via HTTPS
|
||||
- [ ] All bio content rendered (from bio_content_v2.md)
|
||||
- [ ] No placeholder text visible
|
||||
- [ ] Mobile responsive
|
||||
- [ ] Social links functional
|
||||
|
||||
### Launch 2 (Sprint 6)
|
||||
- [ ] Choropleth renders TRREB districts and CMHC zones
|
||||
- [ ] Purchase/rental mode toggle works
|
||||
- [ ] Time navigation works
|
||||
- [ ] Policy event markers visible
|
||||
- [ ] Neighbourhood overlay toggleable
|
||||
- [ ] Methodology documentation published
|
||||
- [ ] Data sources cited
|
||||
| Target | Purpose |
|
||||
|--------|---------|
|
||||
| `setup` | Install deps, create .env, init pre-commit |
|
||||
| `docker-up` | Start PostgreSQL + PostGIS (auto-detects x86/ARM) |
|
||||
| `docker-down` | Stop containers |
|
||||
| `docker-logs` | View container logs |
|
||||
| `db-init` | Initialize database schema |
|
||||
| `db-reset` | Drop and recreate database (DESTRUCTIVE) |
|
||||
| `load-data` | Load Toronto data from APIs, seed dev data |
|
||||
| `load-toronto-only` | Load Toronto data without dbt or seeding |
|
||||
| `seed-data` | Seed sample development data |
|
||||
| `run` | Start Dash dev server |
|
||||
| `test` | Run pytest |
|
||||
| `test-cov` | Run pytest with coverage |
|
||||
| `lint` | Run ruff linter |
|
||||
| `format` | Run ruff formatter |
|
||||
| `typecheck` | Run mypy type checker |
|
||||
| `ci` | Run all checks (lint, typecheck, test) |
|
||||
| `dbt-run` | Run dbt models |
|
||||
| `dbt-test` | Run dbt tests |
|
||||
| `dbt-docs` | Generate and serve dbt documentation |
|
||||
| `clean` | Remove build artifacts and caches |
|
||||
|
||||
---
|
||||
|
||||
## Reference Documents
|
||||
## Next Steps
|
||||
|
||||
For detailed specifications, see:
|
||||
### Deployment (Sprint 10+)
|
||||
- [ ] Production Docker configuration
|
||||
- [ ] CI/CD pipeline
|
||||
- [ ] HTTPS/SSL setup
|
||||
- [ ] Domain configuration
|
||||
|
||||
| Document | Location | Use When |
|
||||
|----------|----------|----------|
|
||||
| Data schemas | `docs/toronto_housing_spec.md` | Parser/model tasks |
|
||||
| WBS details | `docs/wbs.md` | Sprint planning |
|
||||
| Bio content | `docs/bio_content.md` | Building home.py |
|
||||
### Data Enhancement
|
||||
- [ ] Connect to live APIs (currently using demo data)
|
||||
- [ ] Data refresh automation
|
||||
- [ ] Historical data loading
|
||||
|
||||
### Future Projects
|
||||
- Energy Pricing Analysis dashboard (planned)
|
||||
|
||||
---
|
||||
|
||||
*Reference Version: 1.0*
|
||||
*Created: January 2026*
|
||||
## Related Documents
|
||||
|
||||
| Document | Purpose |
|
||||
|----------|---------|
|
||||
| `README.md` | Quick start guide |
|
||||
| `CLAUDE.md` | AI assistant context |
|
||||
| `docs/CONTRIBUTING.md` | Developer guide |
|
||||
| `notebooks/README.md` | Notebook documentation |
|
||||
|
||||
---
|
||||
|
||||
*Reference Version: 3.0*
|
||||
*Updated: January 2026*
|
||||
|
||||
520
docs/changes/Portfolio-Changes.txt
Normal file
520
docs/changes/Portfolio-Changes.txt
Normal file
@@ -0,0 +1,520 @@
|
||||
# Leo Miranda — Portfolio Website Blueprint
|
||||
|
||||
Structure, navigation, and complete page content
|
||||
|
||||
---
|
||||
|
||||
## Site Architecture
|
||||
|
||||
```
|
||||
leodata.science
|
||||
├── Home (Landing)
|
||||
├── About
|
||||
├── Projects (Overview + Status)
|
||||
│ └── [Side Navbar]
|
||||
│ ├── → Toronto Housing Market Dashboard (live)
|
||||
│ ├── → US Retail Energy Price Predictor (coming soon)
|
||||
│ └── → DataFlow Platform (Phase 3)
|
||||
├── Lab (Bandit Labs / Experiments)
|
||||
├── Blog
|
||||
│ └── [Articles]
|
||||
├── Resume (downloadable + inline)
|
||||
└── Contact
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Navigation Structure
|
||||
|
||||
Primary Nav: Home | Projects | Lab | Blog | About | Resume
|
||||
|
||||
Footer: LinkedIn | GitHub | Email | “Built with Dash & too much coffee”
|
||||
|
||||
---
|
||||
|
||||
# PAGE CONTENT
|
||||
|
||||
---
|
||||
|
||||
## 1. HOME (Landing Page)
|
||||
|
||||
### Hero Section
|
||||
|
||||
Headline:
|
||||
|
||||
> I turn messy data into systems that actually work.
|
||||
|
||||
Subhead:
|
||||
|
||||
> Data Engineer & Analytics Specialist. 8 years building pipelines, dashboards, and the infrastructure nobody sees but everyone depends on. Based in Toronto.
|
||||
|
||||
CTA Buttons:
|
||||
|
||||
- View Projects → /projects
|
||||
- Get In Touch → /contact
|
||||
|
||||
---
|
||||
|
||||
### Quick Impact Strip (Optional — 3-4 stats)
|
||||
|
||||
| 1B+ | 40% | 5 Years |
|
||||
|-------------------------------------------------|------------------------------------|-----------------------------|
|
||||
| Rows processed daily across enterprise platform | Efficiency gain through automation | Building DataFlow from zero |
|
||||
|
||||
---
|
||||
|
||||
### Featured Project Card
|
||||
|
||||
Toronto Housing Market Dashboard
|
||||
|
||||
> Real-time analytics on Toronto’s housing trends. dbt-powered ETL, Python scraping, Plotly visualization.
|
||||
> \[View Dashboard\] \[View Repository\]
|
||||
|
||||
---
|
||||
|
||||
### Brief Intro (2-3 sentences)
|
||||
|
||||
I’m a data engineer who’s spent the last 8 years in the trenches—building the infrastructure that feeds dashboards, automates the boring stuff, and makes data actually usable. Most of my work has been in contact center operations and energy, where I’ve had to be scrappy: one-person data teams, legacy systems, stakeholders who need answers yesterday.
|
||||
|
||||
I like solving real problems, not theoretical ones.
|
||||
|
||||
---
|
||||
|
||||
## 2. ABOUT PAGE
|
||||
|
||||
### Opening
|
||||
|
||||
I didn’t start in data. I started in project management—CAPM certified, ITIL trained, the whole corporate playbook. Then I realized I liked building systems more than managing timelines, and I was better at automating reports than attending meetings about them.
|
||||
|
||||
That pivot led me to where I am now: 8 years deep in data engineering, analytics, and the messy reality of turning raw information into something people can actually use.
|
||||
|
||||
---
|
||||
|
||||
### What I Actually Do
|
||||
|
||||
The short version: I build data infrastructure. Pipelines, warehouses, dashboards, automation—the invisible machinery that makes businesses run on data instead of gut feelings.
|
||||
|
||||
The longer version: At Summitt Energy, I’ve been the sole data professional supporting 150+ employees across 9 markets (Canada and US). I inherited nothing—no data warehouse, no reporting infrastructure, no documentation. Over 5 years, I built DataFlow: an enterprise platform processing 1B+ rows, integrating contact center data, CRM systems, and legacy tools that definitely weren’t designed to talk to each other.
|
||||
|
||||
That meant learning to be a generalist. I’ve done ETL pipeline development (Python, SQLAlchemy), dimensional modeling, dashboard design (Power BI, Plotly-Dash), API integration, and more stakeholder management than I’d like to admit. When you’re the only data person, you learn to wear every hat.
|
||||
|
||||
---
|
||||
|
||||
### How I Think About Data
|
||||
|
||||
I’m not interested in data for data’s sake. The question I always start with: What decision does this help someone make?
|
||||
|
||||
Most of my work has been in operations-heavy environments—contact centers, energy retail, logistics. These aren’t glamorous domains, but they’re where data can have massive impact. A 30% improvement in abandon rate isn’t just a metric; it’s thousands of customers who didn’t hang up frustrated. A 40% reduction in reporting time means managers can actually manage instead of wrestling with spreadsheets.
|
||||
|
||||
I care about outcomes, not technology stacks.
|
||||
|
||||
---
|
||||
|
||||
### The Technical Stuff (For Those Who Want It)
|
||||
|
||||
Languages: Python (Pandas, SQLAlchemy, FastAPI), SQL (MSSQL, PostgreSQL), R, VBA
|
||||
|
||||
Data Engineering: ETL/ELT pipelines, dimensional modeling (star schema), dbt patterns, batch processing, API integration, web scraping (Selenium)
|
||||
|
||||
Visualization: Plotly/Dash, Power BI, Tableau
|
||||
|
||||
Platforms: Genesys Cloud, Five9, Zoho, Azure DevOps
|
||||
|
||||
Currently Learning: Cloud certification (Azure DP-203), Airflow, Snowflake
|
||||
|
||||
---
|
||||
|
||||
### Outside Work
|
||||
|
||||
I’m a Brazilian-Canadian based in Toronto. I speak Portuguese (native), English (fluent), and enough Spanish to survive.
|
||||
|
||||
When I’m not staring at SQL, I’m usually:
|
||||
|
||||
- Building automation tools for small businesses through Bandit Labs (my side project)
|
||||
- Contributing to open source (MCP servers, Claude Code plugins)
|
||||
- Trying to explain to my kid why Daddy’s job involves “making computers talk to each other”
|
||||
|
||||
---
|
||||
|
||||
### What I’m Looking For
|
||||
|
||||
I’m currently exploring Senior Data Analyst and Data Engineer roles in the Toronto area (or remote). I’m most interested in:
|
||||
|
||||
- Companies that treat data as infrastructure, not an afterthought
|
||||
- Teams where I can contribute to architecture decisions, not just execute tickets
|
||||
- Operations-focused industries (energy, logistics, financial services, contact center tech)
|
||||
|
||||
If that sounds like your team, let’s talk.
|
||||
|
||||
\[Download Resume\] \[Contact Me\]
|
||||
|
||||
---
|
||||
|
||||
## 3. PROJECTS PAGE
|
||||
|
||||
### Navigation Note
|
||||
|
||||
The Projects page serves as an overview and status hub for all projects. A side navbar provides direct links to live dashboards and repositories. Users land on the overview first, then navigate to specific projects via the sidebar.
|
||||
|
||||
### Intro Text
|
||||
|
||||
These are projects I’ve built—some professional (anonymized where needed), some personal. Each one taught me something. Use the sidebar to jump directly to live dashboards or explore the overviews below.
|
||||
|
||||
---
|
||||
|
||||
### Project Card: Toronto Housing Market Dashboard
|
||||
|
||||
Type: Personal Project | Status: Live
|
||||
|
||||
The Problem:
|
||||
Toronto’s housing market moves fast, and most publicly available data is either outdated, behind paywalls, or scattered across dozens of sources. I wanted a single dashboard that tracked trends in real-time.
|
||||
|
||||
What I Built:
|
||||
|
||||
- Data Pipeline: Python scraper pulling listings data, automated on schedule
|
||||
- Transformation Layer: dbt-based SQL architecture (staging → intermediate → marts)
|
||||
- Visualization: Interactive Plotly-Dash dashboard with filters by neighborhood, price range, property type
|
||||
- Infrastructure: PostgreSQL backend, version-controlled in Git
|
||||
|
||||
Tech Stack: Python, dbt, PostgreSQL, Plotly-Dash, GitHub Actions
|
||||
|
||||
What I Learned:
|
||||
Real estate data is messy as hell. Listings get pulled, prices change, duplicates are everywhere. Building a reliable pipeline meant implementing serious data quality checks and learning to embrace “good enough” over “perfect.”
|
||||
|
||||
\[View Live Dashboard\] \[View Repository (ETL + dbt)\]
|
||||
|
||||
---
|
||||
|
||||
### Project Card: US Retail Energy Price Predictor
|
||||
|
||||
Type: Personal Project | Status: Coming Soon (Phase 2)
|
||||
|
||||
The Problem:
|
||||
Retail energy pricing in deregulated US markets is volatile and opaque. Consumers and analysts lack accessible tools to understand pricing trends and forecast where rates are headed.
|
||||
|
||||
What I’m Building:
|
||||
|
||||
- Data Pipeline: Automated ingestion of public pricing data across multiple US markets
|
||||
- ML Model: Price prediction using time series forecasting (ARIMA, Prophet, or similar)
|
||||
- Transformation Layer: dbt-based SQL architecture for feature engineering
|
||||
- Visualization: Interactive dashboard showing historical trends + predictions by state/market
|
||||
|
||||
Tech Stack: Python, Scikit-learn, dbt, PostgreSQL, Plotly-Dash
|
||||
|
||||
Why This Project:
|
||||
This showcases the ML side of my skillset—something the Toronto Housing dashboard doesn’t cover. It also leverages my domain expertise from 5+ years in retail energy operations.
|
||||
|
||||
\[Coming Soon\]
|
||||
|
||||
---
|
||||
|
||||
### Project Card: DataFlow Platform (Enterprise Case Study)
|
||||
|
||||
Type: Professional | Status: Deferred (Phase 3 — requires sanitized codebase)
|
||||
|
||||
The Context:
|
||||
When I joined Summitt Energy, there was no data infrastructure. Reports were manual. Insights were guesswork. I was hired to fix that.
|
||||
|
||||
What I Built (Over 5 Years):
|
||||
|
||||
- v1 (2020): Basic ETL scripts pulling Genesys Cloud data into MSSQL
|
||||
- v2 (2021): Dimensional model (star schema) with fact/dimension tables
|
||||
- v3 (2022): Python refactor with SQLAlchemy ORM, batch processing, error handling
|
||||
- v4 (2023-24): dbt-pattern SQL views (staging → intermediate → marts), FastAPI layer, CLI tools
|
||||
|
||||
Current State:
|
||||
|
||||
- 21 tables, 1B+ rows
|
||||
- 5,000+ daily transactions processed
|
||||
- Integrates Genesys Cloud, Zoho CRM, legacy systems
|
||||
- Feeds Power BI prototypes and production Dash dashboards
|
||||
- Near-zero reporting errors
|
||||
|
||||
Impact:
|
||||
|
||||
- 40% improvement in reporting efficiency
|
||||
- 30% reduction in call abandon rate (via KPI framework)
|
||||
- 50% faster Average Speed to Answer
|
||||
- 100% callback completion rate
|
||||
|
||||
What I Learned:
|
||||
Building data infrastructure as a team of one forces brutal prioritization. I learned to ship imperfect solutions fast, iterate based on feedback, and never underestimate how long stakeholder buy-in takes.
|
||||
|
||||
Note: This is proprietary work. A sanitized case study with architecture patterns (no proprietary data) will be published in Phase 3.
|
||||
|
||||
---
|
||||
|
||||
### Project Card: AI-Assisted Automation (Bandit Labs)
|
||||
|
||||
Type: Consulting/Side Business | Status: Active
|
||||
|
||||
What It Is:
|
||||
Bandit Labs is my consulting practice focused on automation for small businesses. Most clients don’t need enterprise data platforms—they need someone to eliminate the 4 hours/week they spend manually entering receipts.
|
||||
|
||||
Sample Work:
|
||||
|
||||
- Receipt Processing Automation: OCR pipeline (Tesseract, Google Vision) extracting purchase data from photos, pushing directly to QuickBooks. Eliminated 3-4 hours/week of manual entry for a restaurant client.
|
||||
- Product Margin Tracker: Plotly-Dash dashboard with real-time profitability insights
|
||||
- Claude Code Plugins: MCP servers for Gitea, Wiki.js, NetBox integration
|
||||
|
||||
Why I Do This:
|
||||
Small businesses are underserved by the data/automation industry. Everyone wants to sell them enterprise software they don’t need. I like solving problems at a scale where the impact is immediately visible.
|
||||
|
||||
\[Learn More About Bandit Labs\]
|
||||
|
||||
---
|
||||
|
||||
## 4. LAB PAGE (Bandit Labs / Experiments)
|
||||
|
||||
### Intro
|
||||
|
||||
This is where I experiment. Some of this becomes client work. Some of it teaches me something and gets abandoned. All of it is real code solving real (or at least real-adjacent) problems.
|
||||
|
||||
---
|
||||
|
||||
### Bandit Labs — Automation for Small Business
|
||||
|
||||
I started Bandit Labs because I kept meeting small business owners drowning in manual work that should have been automated years ago. Enterprise tools are overkill. Custom development is expensive. There’s a gap in the middle.
|
||||
|
||||
What I Offer:
|
||||
|
||||
- Receipt/invoice processing automation
|
||||
- Dashboard development (Plotly-Dash)
|
||||
- Data pipeline setup for non-technical teams
|
||||
- AI integration for repetitive tasks
|
||||
|
||||
Recent Client Work:
|
||||
|
||||
- Rio Açaí (Restaurant, Gatineau): Receipt OCR → QuickBooks integration. Saved 3-4 hours/week.
|
||||
|
||||
\[Contact for Consulting\]
|
||||
|
||||
---
|
||||
|
||||
### Open Source / Experiments
|
||||
|
||||
MCP Servers (Model Context Protocol)
|
||||
I’ve built production-ready MCP servers for:
|
||||
|
||||
- Gitea: Issue management, label operations
|
||||
- Wiki.js: Documentation access via GraphQL
|
||||
- NetBox: CMDB integration (DCIM, IPAM, Virtualization)
|
||||
|
||||
These let AI assistants (like Claude) interact with infrastructure tools through natural language. Still experimental, but surprisingly useful for my own workflows.
|
||||
|
||||
Claude Code Plugins
|
||||
|
||||
- projman: AI-guided sprint planning with Gitea/Wiki.js integration
|
||||
- cmdb-assistant: Conversational infrastructure queries against NetBox
|
||||
- project-hygiene: Post-task cleanup automation
|
||||
|
||||
\[View on GitHub\]
|
||||
|
||||
---
|
||||
|
||||
## 5. BLOG PAGE
|
||||
|
||||
### Intro
|
||||
|
||||
I write occasionally about data engineering, automation, and the reality of being a one-person data team. No hot takes, no growth hacking—just things I’ve learned the hard way.
|
||||
|
||||
---
|
||||
|
||||
### Suggested Initial Articles
|
||||
|
||||
Article 1: “Building a Data Platform as a Team of One”What I learned from 5 years as the sole data professional at a mid-size company
|
||||
|
||||
Outline:
|
||||
|
||||
- The reality of “full stack data” when there’s no one else
|
||||
- Prioritization frameworks (what to build first when everything is urgent)
|
||||
- Technical debt vs. shipping something
|
||||
- Building stakeholder trust without a team to back you up
|
||||
- What I’d do differently
|
||||
|
||||
---
|
||||
|
||||
Article 2: “dbt Patterns Without dbt (And Why I Eventually Adopted Them)”How I accidentally implemented analytics engineering best practices before knowing the terminology
|
||||
|
||||
Outline:
|
||||
|
||||
- The problem: SQL spaghetti in production dashboards
|
||||
- My solution: staging → intermediate → marts view architecture
|
||||
- Why separation of concerns matters for maintainability
|
||||
- The day I discovered dbt and realized I’d been doing this manually
|
||||
- Migration path for legacy SQL codebases
|
||||
|
||||
---
|
||||
|
||||
Article 3: “The Toronto Housing Market Dashboard: A Data Engineering Postmortem”Building a real-time analytics pipeline for messy, uncooperative data
|
||||
|
||||
Outline:
|
||||
|
||||
- Why I built this (and why public housing data sucks)
|
||||
- Data sourcing challenges and ethical scraping
|
||||
- Pipeline architecture decisions
|
||||
- dbt transformation layer design
|
||||
- What broke and how I fixed it
|
||||
- Dashboard design for non-technical users
|
||||
|
||||
---
|
||||
|
||||
Article 4: “Automating Small Business Operations with OCR and AI”A case study in practical automation for non-enterprise clients
|
||||
|
||||
Outline:
|
||||
|
||||
- The client problem: 4 hours/week on receipt entry
|
||||
- Why “just use \[enterprise tool\]” doesn’t work for small business
|
||||
- Building an OCR pipeline with Tesseract and Google Vision
|
||||
- QuickBooks integration gotchas
|
||||
- ROI calculation for automation projects
|
||||
|
||||
---
|
||||
|
||||
Article 5: “What I Wish I Knew Before Building My First ETL Pipeline”Hard-won lessons for junior data engineers
|
||||
|
||||
Outline:
|
||||
|
||||
- Error handling isn’t optional (it’s the whole job)
|
||||
- Logging is your best friend at 2am
|
||||
- Why idempotency matters
|
||||
- The staging table pattern
|
||||
- Testing data pipelines
|
||||
- Documentation nobody will read (write it anyway)
|
||||
|
||||
---
|
||||
|
||||
Article 6: “Predicting US Retail Energy Prices: An ML Project Walkthrough”Building a forecasting model with domain knowledge from 5 years in energy retail
|
||||
|
||||
Outline:
|
||||
|
||||
- Why retail energy pricing is hard to predict (deregulation, seasonality, policy)
|
||||
- Data sourcing and pipeline architecture
|
||||
- Feature engineering with dbt
|
||||
- Model selection (ARIMA vs Prophet vs ensemble)
|
||||
- Evaluation metrics that matter for price forecasting
|
||||
- Lessons from applying domain expertise to ML
|
||||
|
||||
---
|
||||
|
||||
## 6. RESUME PAGE
|
||||
|
||||
### Inline Display
|
||||
|
||||
Show a clean, readable version of the resume directly on the page. Use your tailored Senior Data Analyst version as the base.
|
||||
|
||||
### Download Options
|
||||
|
||||
- \[Download PDF\]
|
||||
- \[Download DOCX\]
|
||||
- \[View on LinkedIn\]
|
||||
|
||||
### Optional: Interactive Timeline
|
||||
|
||||
Visual timeline of career progression with expandable sections for each role. More engaging than a wall of text, but only if you have time to build it.
|
||||
|
||||
---
|
||||
|
||||
## 7. CONTACT PAGE
|
||||
|
||||
### Intro
|
||||
|
||||
I’m currently open to Senior Data Analyst and Data Engineer roles in Toronto (or remote). If you’re working on something interesting and need someone who can build data infrastructure from scratch, I’d like to hear about it.
|
||||
|
||||
For consulting inquiries (automation, dashboards, small business data work), reach out about Bandit Labs.
|
||||
|
||||
---
|
||||
|
||||
### Contact Form Fields
|
||||
|
||||
- Name
|
||||
- Email
|
||||
- Subject (dropdown: Job Opportunity / Consulting Inquiry / Other)
|
||||
- Message
|
||||
|
||||
---
|
||||
|
||||
### Direct Contact
|
||||
|
||||
- Email: leobrmi@hotmail.com
|
||||
- Phone: (416) 859-7936
|
||||
- LinkedIn: \[link\]
|
||||
- GitHub: \[link\]
|
||||
|
||||
---
|
||||
|
||||
### Location
|
||||
|
||||
Toronto, ON, Canada
|
||||
Canadian Citizen | Eligible to work in Canada and US
|
||||
|
||||
---
|
||||
|
||||
## TONE GUIDELINES
|
||||
|
||||
### Do:
|
||||
|
||||
- Be direct and specific
|
||||
- Use first person naturally
|
||||
- Include concrete metrics
|
||||
- Acknowledge constraints and tradeoffs
|
||||
- Show personality without being performative
|
||||
- Write like you talk (minus the profanity)
|
||||
|
||||
### Don’t:
|
||||
|
||||
- Use buzzwords without substance (“leveraging synergies”)
|
||||
- Oversell or inflate
|
||||
- Write in third person
|
||||
- Use passive voice excessively
|
||||
- Sound like a LinkedIn influencer
|
||||
- Pretend you’re a full team when you’re one person
|
||||
|
||||
---
|
||||
|
||||
## SEO / DISCOVERABILITY
|
||||
|
||||
### Target Keywords (Organic)
|
||||
|
||||
- Toronto data analyst
|
||||
- Data engineer portfolio
|
||||
- Python ETL developer
|
||||
- dbt analytics engineer
|
||||
- Contact center analytics
|
||||
|
||||
### Blog Strategy
|
||||
|
||||
Aim for 1-2 posts per month initially. Focus on:
|
||||
|
||||
- Technical tutorials (how I built X)
|
||||
- Lessons learned (what went wrong and how I fixed it)
|
||||
- Industry observations (data work in operations-heavy companies)
|
||||
|
||||
---
|
||||
|
||||
## IMPLEMENTATION PRIORITY
|
||||
|
||||
### Phase 1 (MVP — Get it live)
|
||||
|
||||
1. Home page (hero + brief intro + featured project)
|
||||
2. About page (full content)
|
||||
3. Projects page (overview + status cards with navbar links to dashboards)
|
||||
4. Resume page (inline + download)
|
||||
5. Contact page (form + direct info)
|
||||
6. Blog (start with 2-3 articles)
|
||||
|
||||
### Phase 2 (Expand)
|
||||
|
||||
1. Lab page (Bandit Labs + experiments)
|
||||
2. US Retail Energy Price Predictor (ML project — coming soon)
|
||||
3. Add more projects as completed
|
||||
|
||||
### Phase 3 (Polish)
|
||||
|
||||
1. DataFlow Platform case study (requires sanitized fork of proprietary codebase)
|
||||
2. Testimonials (if available from Summitt stakeholders)
|
||||
3. Interactive elements (timeline, project filters)
|
||||
|
||||
---
|
||||
|
||||
Last updated: January 2025
|
||||
56
docs/project-lessons-learned/INDEX.md
Normal file
56
docs/project-lessons-learned/INDEX.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Project Lessons Learned
|
||||
|
||||
This folder contains lessons learned from sprints and development work. These lessons help prevent repeating mistakes and capture valuable insights.
|
||||
|
||||
**Note:** This is a temporary local backup while Wiki.js integration is being configured. Once Wiki.js is ready, lessons will be migrated there for better searchability.
|
||||
|
||||
---
|
||||
|
||||
## Lessons Index
|
||||
|
||||
| Date | Sprint/Phase | Title | Tags |
|
||||
|------|--------------|-------|------|
|
||||
| 2026-02-01 | Sprint 10 | [Formspree Integration with Dash Callbacks](./sprint-10-formspree-dash-integration.md) | formspree, dash, callbacks, forms, spam-protection, honeypot, ajax |
|
||||
| 2026-01-17 | Sprint 9 | [Gitea Labels API Requires Org Context](./sprint-9-gitea-labels-user-repos.md) | gitea, mcp, api, labels, projman, configuration |
|
||||
| 2026-01-17 | Sprint 9 | [Always Read CLAUDE.md Before Asking Questions](./sprint-9-read-claude-md-first.md) | projman, claude-code, context, documentation, workflow |
|
||||
| 2026-01-17 | Sprint 9-10 | [Graceful Error Handling in Service Layers](./sprint-9-10-graceful-error-handling.md) | python, postgresql, error-handling, dash, graceful-degradation, arm64 |
|
||||
| 2026-01-17 | Sprint 9-10 | [Modular Callback Structure](./sprint-9-10-modular-callback-structure.md) | dash, callbacks, architecture, python, code-organization |
|
||||
| 2026-01-17 | Sprint 9-10 | [Figure Factory Pattern](./sprint-9-10-figure-factory-pattern.md) | plotly, dash, design-patterns, python, visualization |
|
||||
| 2026-01-16 | Phase 4 | [dbt Test Syntax Deprecation](./phase-4-dbt-test-syntax.md) | dbt, testing, yaml, deprecation |
|
||||
|
||||
---
|
||||
|
||||
## How to Use
|
||||
|
||||
### When Starting a Sprint
|
||||
1. Review relevant lessons in this folder before implementation
|
||||
2. Search by tags or keywords to find applicable insights
|
||||
3. Apply prevention strategies from past lessons
|
||||
|
||||
### When Closing a Sprint
|
||||
1. Document any significant lessons learned
|
||||
2. Use the template below
|
||||
3. Add entry to the index table above
|
||||
|
||||
---
|
||||
|
||||
## Lesson Template
|
||||
|
||||
```markdown
|
||||
# [Sprint/Phase] - [Lesson Title]
|
||||
|
||||
## Context
|
||||
[What were you trying to do?]
|
||||
|
||||
## Problem
|
||||
[What went wrong or what insight emerged?]
|
||||
|
||||
## Solution
|
||||
[How did you solve it?]
|
||||
|
||||
## Prevention
|
||||
[How can this be avoided in future sprints?]
|
||||
|
||||
## Tags
|
||||
[Comma-separated tags for search]
|
||||
```
|
||||
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Phase 4 - dbt Test Syntax Deprecation
|
||||
|
||||
## Context
|
||||
Implementing dbt mart models with `accepted_values` tests for tier columns (safety_tier, income_quintile, amenity_tier) that should only contain values 1-5.
|
||||
|
||||
## Problem
|
||||
dbt 1.9+ introduced a deprecation warning for generic test arguments. The old syntax:
|
||||
|
||||
```yaml
|
||||
tests:
|
||||
- accepted_values:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
```
|
||||
|
||||
Produces deprecation warnings:
|
||||
```
|
||||
MissingArgumentsPropertyInGenericTestDeprecation: Arguments to generic tests should be nested under the `arguments` property.
|
||||
```
|
||||
|
||||
## Solution
|
||||
Nest test arguments under the `arguments` property:
|
||||
|
||||
```yaml
|
||||
tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
```
|
||||
|
||||
This applies to all generic tests with arguments, not just `accepted_values`.
|
||||
|
||||
## Prevention
|
||||
- When writing dbt schema YAML files, always use the `arguments:` nesting for generic tests
|
||||
- Run `dbt parse --no-partial-parse` to catch all deprecation warnings before they become errors
|
||||
- Check dbt changelog when upgrading versions for breaking changes to test syntax
|
||||
|
||||
## Tags
|
||||
dbt, testing, yaml, deprecation, syntax, schema
|
||||
@@ -0,0 +1,70 @@
|
||||
# Sprint 10 - Formspree Integration with Dash Callbacks
|
||||
|
||||
## Context
|
||||
Implementing a contact form on a Dash portfolio site that submits to Formspree, a third-party form handling service.
|
||||
|
||||
## Insights
|
||||
|
||||
### Formspree AJAX Submission
|
||||
Formspree supports AJAX submissions (no page redirect) when you:
|
||||
1. POST with `Content-Type: application/json`
|
||||
2. Include `Accept: application/json` header
|
||||
3. Send form data as JSON body
|
||||
|
||||
This returns a JSON response instead of redirecting to a thank-you page, which is ideal for single-page Dash applications.
|
||||
|
||||
### Dash Multi-Output Callbacks for Forms
|
||||
When handling form submission with validation and feedback, use a multi-output callback pattern:
|
||||
|
||||
```python
|
||||
@callback(
|
||||
Output("feedback-container", "children"), # Success/error alert
|
||||
Output("submit-button", "loading"), # Button loading state
|
||||
Output("field-1", "value"), # Clear on success
|
||||
Output("field-2", "value"), # Clear on success
|
||||
Output("field-1", "error"), # Field-level errors
|
||||
Output("field-2", "error"), # Field-level errors
|
||||
Input("submit-button", "n_clicks"),
|
||||
State("field-1", "value"),
|
||||
State("field-2", "value"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
```
|
||||
|
||||
Use `no_update` for outputs you don't want to change (e.g., keep form values on validation error, only clear on success).
|
||||
|
||||
### Honeypot Spam Protection
|
||||
Simple and effective bot protection without CAPTCHA:
|
||||
1. Add a hidden text input field (CSS: `position: absolute; left: -9999px`)
|
||||
2. Set `tabIndex=-1` and `autoComplete="off"` to prevent accidental filling
|
||||
3. In callback, check if honeypot has value - if yes, it's a bot
|
||||
4. For bots: return fake success (don't reveal detection)
|
||||
5. For humans: proceed with real submission
|
||||
|
||||
Formspree also accepts `_gotcha` as a honeypot field name in the JSON payload.
|
||||
|
||||
## Code Pattern
|
||||
|
||||
```python
|
||||
# Honeypot check - bots fill hidden fields
|
||||
if honeypot_value:
|
||||
# Fake success - don't let bots know they were caught
|
||||
return (_create_success_alert(), False, "", "", None, None)
|
||||
|
||||
# Real submission for humans
|
||||
response = requests.post(
|
||||
FORMSPREE_ENDPOINT,
|
||||
json=form_data,
|
||||
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||||
timeout=10,
|
||||
)
|
||||
```
|
||||
|
||||
## Prevention/Best Practices
|
||||
- Always use `timeout` parameter with `requests.post()` to avoid hanging
|
||||
- Wrap external API calls in try/except for network errors
|
||||
- Return user-friendly error messages, not technical details
|
||||
- Use DMC's `required=True` and `error` props for form validation feedback
|
||||
|
||||
## Tags
|
||||
formspree, dash, callbacks, forms, spam-protection, honeypot, ajax, python, requests, validation
|
||||
@@ -0,0 +1,53 @@
|
||||
# Sprint 9-10 - Figure Factory Pattern for Reusable Charts
|
||||
|
||||
## Context
|
||||
Creating multiple chart types across 5 dashboard tabs, with consistent styling and behavior needed across all visualizations.
|
||||
|
||||
## Problem
|
||||
Without a standardized approach, each callback would create figures inline with:
|
||||
- Duplicated styling code (colors, fonts, backgrounds)
|
||||
- Inconsistent hover templates
|
||||
- Hard-to-maintain figure creation logic
|
||||
- No reuse between tabs
|
||||
|
||||
## Solution
|
||||
Created a `figures/` module with factory functions:
|
||||
|
||||
```
|
||||
figures/
|
||||
├── __init__.py # Exports all factories
|
||||
├── choropleth.py # Map visualizations
|
||||
├── bar_charts.py # ranking_bar, stacked_bar, horizontal_bar
|
||||
├── scatter.py # scatter_figure, bubble_chart
|
||||
├── radar.py # radar_figure, comparison_radar
|
||||
└── demographics.py # age_pyramid, donut_chart
|
||||
```
|
||||
|
||||
Factory pattern benefits:
|
||||
1. **Consistent styling** - dark theme applied once
|
||||
2. **Type-safe interfaces** - clear parameters for each chart type
|
||||
3. **Easy testing** - factories can be unit tested with sample data
|
||||
4. **Reusability** - same factory used across multiple tabs
|
||||
|
||||
Example factory signature:
|
||||
```python
|
||||
def create_ranking_bar(
|
||||
data: list[dict],
|
||||
name_column: str,
|
||||
value_column: str,
|
||||
title: str = "",
|
||||
top_n: int = 5,
|
||||
bottom_n: int = 5,
|
||||
top_color: str = "#4CAF50",
|
||||
bottom_color: str = "#F44336",
|
||||
) -> go.Figure:
|
||||
```
|
||||
|
||||
## Prevention
|
||||
- **Create factories early** - before implementing callbacks
|
||||
- **Design generic interfaces** - factories should work with any data matching the schema
|
||||
- **Apply styling in one place** - use constants for colors, fonts
|
||||
- **Test factories independently** - with synthetic data before integration
|
||||
|
||||
## Tags
|
||||
plotly, dash, design-patterns, python, visualization, reusability, code-organization
|
||||
@@ -0,0 +1,34 @@
|
||||
# Sprint 9-10 - Graceful Error Handling in Service Layers
|
||||
|
||||
## Context
|
||||
Building the Toronto Neighbourhood Dashboard with a service layer that queries PostgreSQL/PostGIS dbt marts to provide data to Dash callbacks.
|
||||
|
||||
## Problem
|
||||
Initial service layer implementation let database connection errors propagate as unhandled exceptions. When the PostGIS Docker container was unavailable (common on ARM64 systems where the x86_64 image fails), the entire dashboard would crash instead of gracefully degrading.
|
||||
|
||||
## Solution
|
||||
Wrapped database queries in try/except blocks to return empty DataFrames/lists/dicts when the database is unavailable:
|
||||
|
||||
```python
|
||||
def _execute_query(sql: str, params: dict | None = None) -> pd.DataFrame:
|
||||
try:
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
return pd.read_sql(text(sql), conn, params=params)
|
||||
except Exception:
|
||||
return pd.DataFrame()
|
||||
```
|
||||
|
||||
This allows:
|
||||
1. Dashboard to load and display empty states
|
||||
2. Development/testing without running database
|
||||
3. Graceful degradation in production
|
||||
|
||||
## Prevention
|
||||
- **Always design service layers with graceful degradation** - assume external dependencies can fail
|
||||
- **Return empty collections, not exceptions** - let UI components handle empty states
|
||||
- **Test without database** - verify the app doesn't crash when DB is unavailable
|
||||
- **Consider ARM64 compatibility** - PostGIS images may not support all platforms
|
||||
|
||||
## Tags
|
||||
python, postgresql, service-layer, error-handling, dash, graceful-degradation, arm64
|
||||
@@ -0,0 +1,45 @@
|
||||
# Sprint 9-10 - Modular Callback Structure for Multi-Tab Dashboards
|
||||
|
||||
## Context
|
||||
Implementing a 5-tab Toronto Neighbourhood Dashboard with multiple callbacks per tab (map updates, chart updates, KPI updates, selection handling).
|
||||
|
||||
## Problem
|
||||
Initial callback implementation approach would have placed all callbacks in a single file, leading to:
|
||||
- A monolithic file with 500+ lines
|
||||
- Difficult-to-navigate code
|
||||
- Callbacks for different tabs interleaved
|
||||
- Testing difficulties
|
||||
|
||||
## Solution
|
||||
Organized callbacks into three focused modules:
|
||||
|
||||
```
|
||||
callbacks/
|
||||
├── __init__.py # Imports all modules to register callbacks
|
||||
├── map_callbacks.py # Choropleth updates, map click handling
|
||||
├── chart_callbacks.py # Supporting chart updates (scatter, trend, donut)
|
||||
└── selection_callbacks.py # Dropdown population, KPI updates
|
||||
```
|
||||
|
||||
Key patterns:
|
||||
1. **Group by responsibility**, not by tab - all map-related callbacks together
|
||||
2. **Use noqa comments** for imports that register callbacks as side effects
|
||||
3. **Share helper functions** (like `_empty_chart()`) within modules
|
||||
|
||||
```python
|
||||
# callbacks/__init__.py
|
||||
from . import (
|
||||
chart_callbacks, # noqa: F401
|
||||
map_callbacks, # noqa: F401
|
||||
selection_callbacks, # noqa: F401
|
||||
)
|
||||
```
|
||||
|
||||
## Prevention
|
||||
- **Plan callback organization before implementation** - sketch which callbacks go where
|
||||
- **Group by function, not by feature** - keeps related logic together
|
||||
- **Keep modules under 400 lines** - split if exceeding
|
||||
- **Test imports early** - verify callbacks register correctly
|
||||
|
||||
## Tags
|
||||
dash, callbacks, architecture, python, code-organization, maintainability
|
||||
@@ -0,0 +1,29 @@
|
||||
# Sprint 9 - Gitea Labels API Requires Org Context
|
||||
|
||||
## Context
|
||||
Creating Gitea issues with labels via MCP tools during Sprint 9 planning for the personal-portfolio project.
|
||||
|
||||
## Problem
|
||||
When calling `create_issue` with a `labels` parameter, received:
|
||||
```
|
||||
404 Client Error: Not Found for url: https://gitea.hotserv.cloud/api/v1/orgs/lmiranda/labels
|
||||
```
|
||||
|
||||
The API attempted to fetch labels from an **organization** endpoint, but `lmiranda` is a **user account**, not an organization.
|
||||
|
||||
## Solution
|
||||
Created issues without the `labels` parameter and documented intended labels in the issue body instead:
|
||||
```markdown
|
||||
**Labels:** Type/Feature, Priority/Medium, Complexity/Simple, Efforts/XS, Component/Docs, Tech/Python
|
||||
```
|
||||
|
||||
This provides visibility into intended categorization while avoiding the API error.
|
||||
|
||||
## Prevention
|
||||
- When working with user-owned repos (not org repos), avoid using the `labels` parameter in `create_issue`
|
||||
- Document labels in issue body as a workaround
|
||||
- Consider creating a repo-level label set for user repos (Gitea supports this)
|
||||
- Update projman plugin to handle user vs org repos differently
|
||||
|
||||
## Tags
|
||||
gitea, mcp, api, labels, projman, configuration
|
||||
@@ -0,0 +1,30 @@
|
||||
# Sprint 9 - Always Read CLAUDE.md Before Asking Questions
|
||||
|
||||
## Context
|
||||
Starting Sprint 9 planning session with `/projman:sprint-plan` command.
|
||||
|
||||
## Problem
|
||||
Asked the user "what should I do?" when all the necessary context was already documented in CLAUDE.md:
|
||||
- Current sprint number and phase
|
||||
- Implementation plan location
|
||||
- Remaining phases to complete
|
||||
- Project conventions and workflows
|
||||
|
||||
This caused user frustration: "why are you asking what to do? cant you see this yourself"
|
||||
|
||||
## Solution
|
||||
Before asking any questions about what to do:
|
||||
1. Read `CLAUDE.md` in the project root
|
||||
2. Check "Project Status" section for current sprint/phase
|
||||
3. Follow references to implementation plans
|
||||
4. Review "Projman Plugin Workflow" section for expected behavior
|
||||
|
||||
## Prevention
|
||||
- **ALWAYS** read CLAUDE.md at the start of any sprint-related command
|
||||
- Look for "Current Sprint" and "Phase" indicators
|
||||
- Check for implementation plan references in `docs/changes/`
|
||||
- Only ask questions if information is genuinely missing from documentation
|
||||
- The projman plugin expects autonomous behavior based on documented context
|
||||
|
||||
## Tags
|
||||
projman, claude-code, context, documentation, workflow, sprint-planning
|
||||
265
docs/runbooks/adding-dashboard.md
Normal file
265
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,265 @@
|
||||
# Runbook: Adding a New Dashboard
|
||||
|
||||
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [ ] Data sources identified and accessible
|
||||
- [ ] Database schema designed
|
||||
- [ ] Basic Dash/Plotly familiarity
|
||||
|
||||
## Directory Structure
|
||||
|
||||
Create the following structure:
|
||||
|
||||
### Application Code (`portfolio_app/`)
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── pages/
|
||||
│ └── {dashboard_name}/
|
||||
│ ├── dashboard.py # Main layout with tabs
|
||||
│ ├── methodology.py # Data sources and methods page
|
||||
│ ├── tabs/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── overview.py # Overview tab layout
|
||||
│ │ └── ... # Additional tab layouts
|
||||
│ └── callbacks/
|
||||
│ ├── __init__.py
|
||||
│ └── ... # Callback modules
|
||||
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||
│ ├── __init__.py
|
||||
│ ├── parsers/ # API/CSV extraction
|
||||
│ │ └── __init__.py
|
||||
│ ├── loaders/ # Database operations
|
||||
│ │ └── __init__.py
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ │ └── __init__.py
|
||||
│ └── models/ # SQLAlchemy ORM (schema: raw_{dashboard_name})
|
||||
│ └── __init__.py
|
||||
└── figures/
|
||||
└── {dashboard_name}/ # Figure factories for this dashboard
|
||||
├── __init__.py
|
||||
└── ... # Chart modules
|
||||
```
|
||||
|
||||
### dbt Models (`dbt/models/`)
|
||||
|
||||
```
|
||||
dbt/models/
|
||||
├── staging/
|
||||
│ └── {dashboard_name}/ # Staging models
|
||||
│ ├── _sources.yml # Source definitions (schema: raw_{dashboard_name})
|
||||
│ ├── _staging.yml # Model tests/docs
|
||||
│ └── stg_*.sql # Staging models
|
||||
├── intermediate/
|
||||
│ └── {dashboard_name}/ # Intermediate models
|
||||
│ ├── _intermediate.yml
|
||||
│ └── int_*.sql
|
||||
└── marts/
|
||||
└── {dashboard_name}/ # Mart tables
|
||||
├── _marts.yml
|
||||
└── mart_*.sql
|
||||
```
|
||||
|
||||
### Documentation (`notebooks/`)
|
||||
|
||||
```
|
||||
notebooks/
|
||||
└── {dashboard_name}/ # Domain subdirectories
|
||||
├── overview/
|
||||
├── ...
|
||||
```
|
||||
|
||||
## Step-by-Step Checklist
|
||||
|
||||
### 1. Data Layer
|
||||
|
||||
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||
- [ ] Add database migrations if needed
|
||||
|
||||
### 2. Database Schema
|
||||
|
||||
- [ ] Define schema constant in models (e.g., `RAW_FOOTBALL_SCHEMA = "raw_football"`)
|
||||
- [ ] Add `__table_args__ = {"schema": RAW_FOOTBALL_SCHEMA}` to all models
|
||||
- [ ] Update `scripts/db/init_schema.py` to create the new schema
|
||||
|
||||
### 3. dbt Models
|
||||
|
||||
Create dbt models in `dbt/models/`:
|
||||
|
||||
- [ ] `staging/{dashboard_name}/_sources.yml` - Source definitions pointing to `raw_{dashboard_name}` schema
|
||||
- [ ] `staging/{dashboard_name}/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||
- [ ] `intermediate/{dashboard_name}/int_{domain}__{transform}.sql` - Business logic
|
||||
- [ ] `marts/{dashboard_name}/mart_{domain}.sql` - Final analytical tables
|
||||
|
||||
Update `dbt/dbt_project.yml` with new subdirectory config:
|
||||
```yaml
|
||||
models:
|
||||
portfolio:
|
||||
staging:
|
||||
{dashboard_name}:
|
||||
+materialized: view
|
||||
+schema: stg_{dashboard_name}
|
||||
intermediate:
|
||||
{dashboard_name}:
|
||||
+materialized: view
|
||||
+schema: int_{dashboard_name}
|
||||
marts:
|
||||
{dashboard_name}:
|
||||
+materialized: table
|
||||
+schema: mart_{dashboard_name}
|
||||
```
|
||||
|
||||
Follow naming conventions:
|
||||
- Staging: `stg_{source}__{entity}`
|
||||
- Intermediate: `int_{domain}__{transform}`
|
||||
- Marts: `mart_{domain}`
|
||||
|
||||
### 4. Visualization Layer
|
||||
|
||||
- [ ] Create figure factories in `figures/{dashboard_name}/`
|
||||
- [ ] Create `figures/{dashboard_name}/__init__.py` with exports
|
||||
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||
|
||||
Import pattern:
|
||||
```python
|
||||
from portfolio_app.figures.{dashboard_name} import create_choropleth_figure
|
||||
```
|
||||
|
||||
### 4. Dashboard Pages
|
||||
|
||||
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||
|
||||
```python
|
||||
import dash
|
||||
from dash import html, dcc
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path="/{dashboard_name}",
|
||||
title="{Dashboard Title}",
|
||||
description="{Description}"
|
||||
)
|
||||
|
||||
def layout():
|
||||
return dmc.Container([
|
||||
# Header
|
||||
dmc.Title("{Dashboard Title}", order=1),
|
||||
|
||||
# Tabs
|
||||
dmc.Tabs([
|
||||
dmc.TabsList([
|
||||
dmc.TabsTab("Overview", value="overview"),
|
||||
# Add more tabs
|
||||
]),
|
||||
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||
# Add more panels
|
||||
], value="overview"),
|
||||
])
|
||||
```
|
||||
|
||||
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||
|
||||
- [ ] Create one file per tab
|
||||
- [ ] Export layout function from each
|
||||
|
||||
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||
|
||||
- [ ] Create callback modules for interactivity
|
||||
- [ ] Import and register in dashboard.py
|
||||
|
||||
### 5. Navigation
|
||||
|
||||
Add to sidebar in `components/sidebar.py`:
|
||||
|
||||
```python
|
||||
dmc.NavLink(
|
||||
label="{Dashboard Name}",
|
||||
href="/{dashboard_name}",
|
||||
icon=DashIconify(icon="..."),
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Documentation
|
||||
|
||||
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||
- [ ] Document data sources
|
||||
- [ ] Document transformation logic
|
||||
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||
|
||||
### 7. Testing
|
||||
|
||||
- [ ] Add unit tests for parsers
|
||||
- [ ] Add unit tests for loaders
|
||||
- [ ] Add integration tests for callbacks
|
||||
- [ ] Run `make test`
|
||||
|
||||
### 8. Final Verification
|
||||
|
||||
- [ ] All pages render without errors
|
||||
- [ ] All callbacks respond correctly
|
||||
- [ ] Data loads successfully
|
||||
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||
- [ ] Linting passes (`make lint`)
|
||||
- [ ] Tests pass (`make test`)
|
||||
|
||||
## Example: Toronto Dashboard
|
||||
|
||||
Reference implementation: `portfolio_app/pages/toronto/`
|
||||
|
||||
Key files:
|
||||
- `dashboard.py` - Main layout with 5 tabs
|
||||
- `tabs/overview.py` - Livability scores, scatter plots
|
||||
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||
- `toronto/models/dimensions.py` - Dimension tables
|
||||
- `toronto/models/facts.py` - Fact tables
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Figure Factories
|
||||
|
||||
```python
|
||||
# figures/choropleth.py
|
||||
def create_choropleth_figure(
|
||||
gdf: gpd.GeoDataFrame,
|
||||
value_column: str,
|
||||
title: str,
|
||||
**kwargs
|
||||
) -> go.Figure:
|
||||
...
|
||||
```
|
||||
|
||||
### Callbacks
|
||||
|
||||
```python
|
||||
# callbacks/map_callbacks.py
|
||||
@callback(
|
||||
Output("neighbourhood-details", "children"),
|
||||
Input("choropleth-map", "clickData"),
|
||||
)
|
||||
def update_details(click_data):
|
||||
...
|
||||
```
|
||||
|
||||
### Data Loading
|
||||
|
||||
```python
|
||||
# {dashboard_name}/loaders/load.py
|
||||
def load_data(session: Session) -> None:
|
||||
# Parse from source
|
||||
records = parse_source_data()
|
||||
|
||||
# Validate with Pydantic
|
||||
validated = [Schema(**r) for r in records]
|
||||
|
||||
# Load to database
|
||||
for record in validated:
|
||||
session.add(Model(**record.model_dump()))
|
||||
|
||||
session.commit()
|
||||
```
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user