Compare commits
34 Commits
sprint-7-c
...
9a1fc81f79
| Author | SHA1 | Date | |
|---|---|---|---|
| 9a1fc81f79 | |||
| cf6e874961 | |||
| 451dc10a10 | |||
| 193b9289b9 | |||
| 7a16e6d121 | |||
| ecc50e5d98 | |||
| ae3742630e | |||
| e70965b429 | |||
| 25954f17bb | |||
| bffd44a5a5 | |||
| bf6e392002 | |||
| d0f32edba7 | |||
| 4818c53fd2 | |||
| 1a878313f8 | |||
| 1eba95d4d1 | |||
| c9cf744d84 | |||
| 3054441630 | |||
| b6d210ec6b | |||
| 053acf6436 | |||
| f69d0c15a7 | |||
| 81993b23a7 | |||
| 457efec77f | |||
| f5f2bf3706 | |||
| fcaefabce8 | |||
| cb877df9e1 | |||
| 48b4eeeb62 | |||
| d3ca4ad4eb | |||
| e7bc545f25 | |||
| c8f4cc6241 | |||
| 3cd2eada7c | |||
| 138e6fe497 | |||
| cd7b5ce154 | |||
| e1135a77a8 | |||
| 39656ca836 |
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- development
|
||||
- staging
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- development
|
||||
|
||||
jobs:
|
||||
lint-and-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install ruff pytest
|
||||
|
||||
- name: Run linter
|
||||
run: ruff check .
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Production
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Production Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.PROD_HOST }}
|
||||
username: ${{ secrets.PROD_USER }}
|
||||
key: ${{ secrets.PROD_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin main
|
||||
git reset --hard origin/main
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Production deployment complete!"
|
||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Staging
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- staging
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Staging Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.STAGING_HOST }}
|
||||
username: ${{ secrets.STAGING_USER }}
|
||||
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin staging
|
||||
git reset --hard origin/staging
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Staging deployment complete!"
|
||||
186
CLAUDE.md
186
CLAUDE.md
@@ -6,8 +6,8 @@ Working context for Claude Code on the Analytics Portfolio project.
|
||||
|
||||
## Project Status
|
||||
|
||||
**Current Sprint**: 7 (Navigation & Theme Modernization)
|
||||
**Phase**: 1 - Toronto Housing Dashboard
|
||||
**Last Completed Sprint**: 9 (Neighbourhood Dashboard Transition)
|
||||
**Current State**: Ready for deployment sprint or new features
|
||||
**Branch**: `development` (feature branches merge here)
|
||||
|
||||
---
|
||||
@@ -18,7 +18,7 @@ Working context for Claude Code on the Analytics Portfolio project.
|
||||
|
||||
```bash
|
||||
make setup # Install deps, create .env, init pre-commit
|
||||
make docker-up # Start PostgreSQL + PostGIS
|
||||
make docker-up # Start PostgreSQL + PostGIS (auto-detects x86/ARM)
|
||||
make docker-down # Stop containers
|
||||
make db-init # Initialize database schema
|
||||
make run # Start Dash dev server
|
||||
@@ -33,7 +33,10 @@ make ci # Run all checks
|
||||
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
||||
2. Work and commit on feature branch
|
||||
3. Merge INTO `development` when complete
|
||||
4. `development` -> `staging` -> `main` for releases
|
||||
4. Delete the feature branch after merge (keep branches clean)
|
||||
5. `development` -> `staging` -> `main` for releases
|
||||
|
||||
**CRITICAL: NEVER DELETE the `development` branch. It is the main integration branch.**
|
||||
|
||||
---
|
||||
|
||||
@@ -43,8 +46,8 @@ make ci # Run all checks
|
||||
|
||||
| Context | Style | Example |
|
||||
|---------|-------|---------|
|
||||
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||
| Same directory | Single dot | `from .neighbourhood import NeighbourhoodRecord` |
|
||||
| Sibling directory | Double dot | `from ..schemas.neighbourhood import CensusRecord` |
|
||||
| External packages | Absolute | `import pandas as pd` |
|
||||
|
||||
### Module Responsibilities
|
||||
@@ -53,7 +56,7 @@ make ci # Run all checks
|
||||
|-----------|----------|---------|
|
||||
| `schemas/` | Pydantic models | Data validation |
|
||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||
| `parsers/` | API/CSV extraction | Raw data ingestion |
|
||||
| `loaders/` | Database operations | Data loading |
|
||||
| `figures/` | Chart factories | Plotly figure generation |
|
||||
| `callbacks/` | Dash callbacks | In `pages/{dashboard}/callbacks/` |
|
||||
@@ -101,19 +104,57 @@ portfolio_app/
|
||||
├── app.py # Dash app factory with Pages routing
|
||||
├── config.py # Pydantic BaseSettings
|
||||
├── assets/ # CSS, images (auto-served)
|
||||
│ └── sidebar.css # Navigation styling
|
||||
├── callbacks/ # Global callbacks
|
||||
│ ├── sidebar.py # Sidebar toggle
|
||||
│ └── theme.py # Dark/light theme
|
||||
├── pages/
|
||||
│ ├── home.py # Bio landing page -> /
|
||||
│ ├── about.py # About page -> /about
|
||||
│ ├── contact.py # Contact form -> /contact
|
||||
│ ├── health.py # Health endpoint -> /health
|
||||
│ ├── projects.py # Project showcase -> /projects
|
||||
│ ├── resume.py # Resume/CV -> /resume
|
||||
│ ├── blog/
|
||||
│ │ ├── index.py # Blog listing -> /blog
|
||||
│ │ └── article.py # Blog article -> /blog/{slug}
|
||||
│ └── toronto/
|
||||
│ ├── dashboard.py # Layout only -> /toronto
|
||||
│ └── callbacks/ # Interaction logic
|
||||
├── components/ # Shared UI (navbar, footer, cards)
|
||||
│ ├── dashboard.py # Dashboard -> /toronto
|
||||
│ ├── methodology.py # Methodology -> /toronto/methodology
|
||||
│ ├── tabs/ # 5 tab layouts (overview, housing, safety, demographics, amenities)
|
||||
│ └── callbacks/ # Dashboard interactions
|
||||
├── components/ # Shared UI (sidebar, cards, controls)
|
||||
│ ├── metric_card.py # KPI card component
|
||||
│ ├── map_controls.py # Map control panel
|
||||
│ ├── sidebar.py # Navigation sidebar
|
||||
│ └── time_slider.py # Time range selector
|
||||
├── figures/ # Shared chart factories
|
||||
│ ├── choropleth.py # Map visualizations
|
||||
│ ├── bar_charts.py # Ranking, stacked, horizontal bars
|
||||
│ ├── scatter.py # Scatter and bubble plots
|
||||
│ ├── radar.py # Radar/spider charts
|
||||
│ ├── demographics.py # Age pyramids, donut charts
|
||||
│ ├── time_series.py # Trend lines
|
||||
│ └── summary_cards.py # KPI figures
|
||||
├── content/ # Markdown content
|
||||
│ └── blog/ # Blog articles
|
||||
├── toronto/ # Toronto data logic
|
||||
│ ├── parsers/
|
||||
│ ├── loaders/
|
||||
│ ├── schemas/ # Pydantic
|
||||
│ └── models/ # SQLAlchemy
|
||||
│ ├── models/ # SQLAlchemy
|
||||
│ └── demo_data.py # Sample data
|
||||
├── utils/ # Utilities
|
||||
│ └── markdown_loader.py # Markdown processing
|
||||
└── errors/
|
||||
|
||||
notebooks/ # Data documentation (Phase 6)
|
||||
├── README.md # Template and usage guide
|
||||
├── overview/ # Overview tab notebooks (3)
|
||||
├── housing/ # Housing tab notebooks (3)
|
||||
├── safety/ # Safety tab notebooks (3)
|
||||
├── demographics/ # Demographics tab notebooks (3)
|
||||
└── amenities/ # Amenities tab notebooks (3)
|
||||
```
|
||||
|
||||
### URL Routing
|
||||
@@ -121,7 +162,15 @@ portfolio_app/
|
||||
| URL | Page | Sprint |
|
||||
|-----|------|--------|
|
||||
| `/` | Bio landing page | 2 |
|
||||
| `/toronto` | Toronto Housing Dashboard | 6 |
|
||||
| `/about` | About page | 8 |
|
||||
| `/contact` | Contact form | 8 |
|
||||
| `/health` | Health endpoint | 8 |
|
||||
| `/projects` | Project showcase | 8 |
|
||||
| `/resume` | Resume/CV | 8 |
|
||||
| `/blog` | Blog listing | 8 |
|
||||
| `/blog/{slug}` | Blog article | 8 |
|
||||
| `/toronto` | Toronto Dashboard | 6 |
|
||||
| `/toronto/methodology` | Dashboard methodology | 6 |
|
||||
|
||||
---
|
||||
|
||||
@@ -144,6 +193,7 @@ portfolio_app/
|
||||
- SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
|
||||
- PostGIS extension required in database
|
||||
- Docker Compose V2 format (no `version` field)
|
||||
- **Multi-architecture support**: `make docker-up` auto-detects CPU architecture and uses the appropriate PostGIS image (x86_64: `postgis/postgis`, ARM64: `imresamu/postgis`)
|
||||
|
||||
---
|
||||
|
||||
@@ -152,27 +202,20 @@ portfolio_app/
|
||||
### Geographic Reality (Toronto Housing)
|
||||
|
||||
```
|
||||
TRREB Districts (~35) - Purchase data (W01, C01, E01...)
|
||||
City Neighbourhoods (158) - Primary geographic unit for analysis
|
||||
CMHC Zones (~20) - Rental data (Census Tract aligned)
|
||||
City Neighbourhoods (158) - Enrichment/overlay only
|
||||
```
|
||||
|
||||
**Critical**: These geographies do NOT align. Display as separate layers—do not force crosswalks.
|
||||
|
||||
### Star Schema
|
||||
|
||||
| Table | Type | Keys |
|
||||
|-------|------|------|
|
||||
| `fact_purchases` | Fact | -> dim_time, dim_trreb_district |
|
||||
| `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone |
|
||||
| `dim_time` | Dimension | date_key (PK) |
|
||||
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||
|
||||
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||
|
||||
### dbt Layers
|
||||
|
||||
| Layer | Naming | Purpose |
|
||||
@@ -183,37 +226,15 @@ City Neighbourhoods (158) - Enrichment/overlay only
|
||||
|
||||
---
|
||||
|
||||
## DO NOT BUILD (Phase 1)
|
||||
## Deferred Features
|
||||
|
||||
**Stop and flag if a task seems to require these**:
|
||||
|
||||
| Feature | Reason |
|
||||
|---------|--------|
|
||||
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 |
|
||||
| Crime data integration | Deferred to Phase 4 |
|
||||
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||
| ML prediction models | Energy project scope (Phase 3) |
|
||||
| Multi-project shared infrastructure | Build first, abstract second (Phase 2) |
|
||||
|
||||
---
|
||||
|
||||
## Sprint 1 Deliverables
|
||||
|
||||
| Category | Tasks |
|
||||
|----------|-------|
|
||||
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||
|
||||
### Human Tasks (Cannot Automate)
|
||||
|
||||
| Task | Tool | Effort |
|
||||
|------|------|--------|
|
||||
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||
| Research policy events (10-20) | Manual | 2-3 hours |
|
||||
| Replace social link placeholders | Manual | 5 minutes |
|
||||
| ML prediction models | Energy project scope (future phase) |
|
||||
| Multi-project shared infrastructure | Build first, abstract second |
|
||||
|
||||
---
|
||||
|
||||
@@ -248,10 +269,77 @@ All scripts in `scripts/`:
|
||||
|
||||
| Document | Location | Use When |
|
||||
|----------|----------|----------|
|
||||
| Full specification | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||
| Data schemas | `docs/toronto_housing_dashboard_spec_v5.md` | Parser/model tasks |
|
||||
| WBS details | `docs/wbs_sprint_plan_v4.md` | Sprint planning |
|
||||
| Project reference | `docs/PROJECT_REFERENCE.md` | Architecture decisions, completed work |
|
||||
| Developer guide | `docs/CONTRIBUTING.md` | How to add pages, blog posts, tabs |
|
||||
| Lessons learned | `docs/project-lessons-learned/INDEX.md` | Past issues and solutions |
|
||||
|
||||
---
|
||||
|
||||
*Last Updated: Sprint 7*
|
||||
## Projman Plugin Workflow
|
||||
|
||||
**CRITICAL: Always use the projman plugin for sprint and task management.**
|
||||
|
||||
### When to Use Projman Skills
|
||||
|
||||
| Skill | Trigger | Purpose |
|
||||
|-------|---------|---------|
|
||||
| `/projman:sprint-plan` | New sprint or phase implementation | Architecture analysis + Gitea issue creation |
|
||||
| `/projman:sprint-start` | Beginning implementation work | Load lessons learned (Wiki.js or local), start execution |
|
||||
| `/projman:sprint-status` | Check progress | Review blockers and completion status |
|
||||
| `/projman:sprint-close` | Sprint completion | Capture lessons learned (Wiki.js or local backup) |
|
||||
|
||||
### Default Behavior
|
||||
|
||||
When user requests implementation work:
|
||||
|
||||
1. **ALWAYS start with `/projman:sprint-plan`** before writing code
|
||||
2. Create Gitea issues with proper labels and acceptance criteria
|
||||
3. Use `/projman:sprint-start` to begin execution with lessons learned
|
||||
4. Track progress via Gitea issue comments
|
||||
5. Close sprint with `/projman:sprint-close` to document lessons
|
||||
|
||||
### Gitea Repository
|
||||
|
||||
- **Repo**: `lmiranda/personal-portfolio`
|
||||
- **Host**: `gitea.hotserv.cloud`
|
||||
- **Note**: `lmiranda` is a user account (not org), so label lookup may require repo-level labels
|
||||
|
||||
### MCP Tools Available
|
||||
|
||||
**Gitea**:
|
||||
- `list_issues`, `get_issue`, `create_issue`, `update_issue`, `add_comment`
|
||||
- `get_labels`, `suggest_labels`
|
||||
|
||||
**Wiki.js**:
|
||||
- `search_lessons`, `create_lesson`, `search_pages`, `get_page`
|
||||
|
||||
### Lessons Learned (Backup Method)
|
||||
|
||||
**When Wiki.js is unavailable**, use the local backup in `docs/project-lessons-learned/`:
|
||||
|
||||
**At Sprint Start:**
|
||||
1. Review `docs/project-lessons-learned/INDEX.md` for relevant past lessons
|
||||
2. Search lesson files by tags/keywords before implementation
|
||||
3. Apply prevention strategies from applicable lessons
|
||||
|
||||
**At Sprint Close:**
|
||||
1. Try Wiki.js `create_lesson` first
|
||||
2. If Wiki.js fails, create lesson in `docs/project-lessons-learned/`
|
||||
3. Use naming convention: `{phase-or-sprint}-{short-description}.md`
|
||||
4. Update `INDEX.md` with new entry
|
||||
5. Follow the lesson template in INDEX.md
|
||||
|
||||
**Migration:** Once Wiki.js is configured, lessons will be migrated there for better searchability.
|
||||
|
||||
### Issue Structure
|
||||
|
||||
Every Gitea issue should include:
|
||||
- **Overview**: Brief description
|
||||
- **Files to Create/Modify**: Explicit paths
|
||||
- **Acceptance Criteria**: Checkboxes
|
||||
- **Technical Notes**: Implementation hints
|
||||
- **Labels**: Listed in body (workaround for label API issues)
|
||||
|
||||
---
|
||||
|
||||
*Last Updated: January 2026 (Post-Sprint 9)*
|
||||
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024-2025 Leo Miranda
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
47
Makefile
47
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: setup docker-up docker-down db-init run test dbt-run dbt-test lint format ci deploy clean help
|
||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||
|
||||
# Default target
|
||||
.DEFAULT_GOAL := help
|
||||
@@ -8,6 +8,17 @@ PYTHON := python3
|
||||
PIP := pip
|
||||
DOCKER_COMPOSE := docker compose
|
||||
|
||||
# Architecture detection for Docker images
|
||||
ARCH := $(shell uname -m)
|
||||
ifeq ($(ARCH),aarch64)
|
||||
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||
else ifeq ($(ARCH),arm64)
|
||||
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||
else
|
||||
POSTGIS_IMAGE := postgis/postgis:16-3.4
|
||||
endif
|
||||
export POSTGIS_IMAGE
|
||||
|
||||
# Colors for output
|
||||
BLUE := \033[0;34m
|
||||
GREEN := \033[0;32m
|
||||
@@ -39,6 +50,7 @@ setup: ## Install dependencies, create .env, init pre-commit
|
||||
|
||||
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||
@echo "$(BLUE)Architecture: $(ARCH) -> Using image: $(POSTGIS_IMAGE)$(NC)"
|
||||
$(DOCKER_COMPOSE) up -d
|
||||
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||
@sleep 3
|
||||
@@ -57,11 +69,7 @@ docker-logs: ## View container logs
|
||||
|
||||
db-init: ## Initialize database schema
|
||||
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||
@if [ -f scripts/db/init.sh ]; then \
|
||||
bash scripts/db/init.sh; \
|
||||
else \
|
||||
echo "$(YELLOW)scripts/db/init.sh not found - skipping$(NC)"; \
|
||||
fi
|
||||
$(PYTHON) scripts/db/init_schema.py
|
||||
|
||||
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||
@@ -71,6 +79,14 @@ db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||
@sleep 3
|
||||
$(MAKE) db-init
|
||||
|
||||
load-data: ## Load Toronto data from APIs and run dbt
|
||||
@echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)"
|
||||
$(PYTHON) scripts/data/load_toronto_data.py
|
||||
|
||||
load-data-only: ## Load Toronto data without running dbt
|
||||
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
||||
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
||||
|
||||
# =============================================================================
|
||||
# Application
|
||||
# =============================================================================
|
||||
@@ -97,15 +113,15 @@ test-cov: ## Run pytest with coverage
|
||||
|
||||
dbt-run: ## Run dbt models
|
||||
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||
cd dbt && dbt run
|
||||
cd dbt && dbt run --profiles-dir .
|
||||
|
||||
dbt-test: ## Run dbt tests
|
||||
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||
cd dbt && dbt test
|
||||
cd dbt && dbt test --profiles-dir .
|
||||
|
||||
dbt-docs: ## Generate dbt documentation
|
||||
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||
cd dbt && dbt docs generate && dbt docs serve
|
||||
cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir .
|
||||
|
||||
# =============================================================================
|
||||
# Code Quality
|
||||
@@ -131,6 +147,19 @@ ci: ## Run all checks (lint, typecheck, test)
|
||||
$(MAKE) test
|
||||
@echo "$(GREEN)All checks passed!$(NC)"
|
||||
|
||||
# =============================================================================
|
||||
# Operations
|
||||
# =============================================================================
|
||||
|
||||
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||
@./scripts/logs.sh $(SERVICE)
|
||||
|
||||
run-detached: ## Start containers and wait for health check
|
||||
@./scripts/run-detached.sh
|
||||
|
||||
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||
@./scripts/etl/toronto.sh $(MODE)
|
||||
|
||||
# =============================================================================
|
||||
# Deployment
|
||||
# =============================================================================
|
||||
|
||||
173
README.md
173
README.md
@@ -1,36 +1,82 @@
|
||||
# Analytics Portfolio
|
||||
|
||||
A data analytics portfolio showcasing end-to-end data engineering, visualization, and analysis capabilities.
|
||||
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||
|
||||
## Projects
|
||||
**Live Demo:** [leodata.science](https://leodata.science)
|
||||
|
||||
### Toronto Housing Dashboard
|
||||
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||
|
||||
An interactive choropleth dashboard analyzing Toronto's housing market using multi-source data integration.
|
||||
## Live Pages
|
||||
|
||||
**Features:**
|
||||
- Purchase market analysis from TRREB monthly reports
|
||||
- Rental market analysis from CMHC annual surveys
|
||||
- Interactive choropleth maps by district/zone
|
||||
- Time series visualization with policy event annotations
|
||||
- Purchase/Rental mode toggle
|
||||
| Route | Page | Description |
|
||||
|-------|------|-------------|
|
||||
| `/` | Home | Bio landing page |
|
||||
| `/about` | About | Background and experience |
|
||||
| `/projects` | Projects | Portfolio project showcase |
|
||||
| `/resume` | Resume | Professional CV |
|
||||
| `/contact` | Contact | Contact form |
|
||||
| `/blog` | Blog | Technical articles |
|
||||
| `/blog/{slug}` | Article | Individual blog posts |
|
||||
| `/toronto` | Toronto Dashboard | Neighbourhood analysis (5 tabs) |
|
||||
| `/toronto/methodology` | Methodology | Dashboard data sources and methods |
|
||||
| `/health` | Health | API health check endpoint |
|
||||
|
||||
**Data Sources:**
|
||||
- [TRREB Market Watch](https://trreb.ca/market-data/market-watch/) - Monthly purchase statistics
|
||||
- [CMHC Rental Market Survey](https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market) - Annual rental data
|
||||
## Toronto Neighbourhood Dashboard
|
||||
|
||||
**Tech Stack:**
|
||||
- Python 3.11+ / Dash / Plotly
|
||||
- PostgreSQL + PostGIS
|
||||
- dbt for data transformation
|
||||
- Pydantic for validation
|
||||
- SQLAlchemy 2.0
|
||||
An interactive choropleth dashboard analyzing Toronto's 158 official neighbourhoods across five dimensions:
|
||||
|
||||
- **Overview**: Composite livability scores, income vs safety scatter
|
||||
- **Housing**: Affordability index, rent trends, dwelling types
|
||||
- **Safety**: Crime rates, breakdowns by type, trend analysis
|
||||
- **Demographics**: Income distribution, age pyramids, population density
|
||||
- **Amenities**: Parks, schools, transit accessibility
|
||||
|
||||
**Data Sources**:
|
||||
- City of Toronto Open Data Portal (neighbourhoods, census profiles, amenities)
|
||||
- Toronto Police Service (crime statistics)
|
||||
- CMHC Rental Market Survey (rental data by zone)
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Sources
|
||||
A1[City of Toronto API]
|
||||
A2[Toronto Police API]
|
||||
A3[CMHC Data]
|
||||
end
|
||||
|
||||
subgraph ETL
|
||||
B1[Parsers]
|
||||
B2[Loaders]
|
||||
end
|
||||
|
||||
subgraph Database
|
||||
C1[(PostgreSQL/PostGIS)]
|
||||
C2[dbt Models]
|
||||
end
|
||||
|
||||
subgraph Application
|
||||
D1[Dash App]
|
||||
D2[Plotly Figures]
|
||||
end
|
||||
|
||||
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||
```
|
||||
|
||||
**Pipeline Stages:**
|
||||
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||
|
||||
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone and setup
|
||||
git clone https://github.com/lmiranda/personal-portfolio.git
|
||||
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||
cd personal-portfolio
|
||||
|
||||
# Install dependencies and configure environment
|
||||
@@ -55,48 +101,72 @@ portfolio_app/
|
||||
├── app.py # Dash app factory
|
||||
├── config.py # Pydantic settings
|
||||
├── pages/
|
||||
│ ├── home.py # Bio landing page (/)
|
||||
│ └── toronto/ # Toronto dashboard (/toronto)
|
||||
│ ├── home.py # Bio landing (/)
|
||||
│ ├── about.py # About page
|
||||
│ ├── contact.py # Contact form
|
||||
│ ├── projects.py # Project showcase
|
||||
│ ├── resume.py # Resume/CV
|
||||
│ ├── blog/ # Blog system
|
||||
│ │ ├── index.py # Article listing
|
||||
│ │ └── article.py # Article renderer
|
||||
│ └── toronto/ # Toronto dashboard
|
||||
│ ├── dashboard.py # Main layout with tabs
|
||||
│ ├── methodology.py # Data documentation
|
||||
│ ├── tabs/ # Tab layouts (5)
|
||||
│ └── callbacks/ # Interaction logic
|
||||
├── components/ # Shared UI components
|
||||
├── figures/ # Plotly figure factories
|
||||
└── toronto/ # Toronto data logic
|
||||
├── parsers/ # PDF/CSV extraction
|
||||
├── loaders/ # Database operations
|
||||
├── schemas/ # Pydantic models
|
||||
└── models/ # SQLAlchemy ORM
|
||||
├── content/
|
||||
│ └── blog/ # Markdown blog articles
|
||||
├── toronto/ # Toronto data logic
|
||||
│ ├── parsers/ # API data extraction
|
||||
│ ├── loaders/ # Database operations
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ └── models/ # SQLAlchemy ORM
|
||||
└── errors/ # Exception handling
|
||||
|
||||
dbt/
|
||||
├── models/
|
||||
│ ├── staging/ # 1:1 source tables
|
||||
│ ├── intermediate/ # Business logic
|
||||
│ └── marts/ # Analytical tables
|
||||
|
||||
notebooks/ # Data documentation (15 notebooks)
|
||||
├── overview/ # Overview tab visualizations
|
||||
├── housing/ # Housing tab visualizations
|
||||
├── safety/ # Safety tab visualizations
|
||||
├── demographics/ # Demographics tab visualizations
|
||||
└── amenities/ # Amenities tab visualizations
|
||||
|
||||
docs/
|
||||
├── PROJECT_REFERENCE.md # Architecture reference
|
||||
├── CONTRIBUTING.md # Developer guide
|
||||
└── project-lessons-learned/
|
||||
```
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology |
|
||||
|-------|------------|
|
||||
| Database | PostgreSQL 16 + PostGIS |
|
||||
| Validation | Pydantic 2.x |
|
||||
| ORM | SQLAlchemy 2.x |
|
||||
| Transformation | dbt-postgres |
|
||||
| Data Processing | Pandas, GeoPandas |
|
||||
| Visualization | Dash + Plotly |
|
||||
| UI Components | dash-mantine-components |
|
||||
| Testing | pytest |
|
||||
| Python | 3.11+ |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
make test # Run tests
|
||||
make lint # Run linter
|
||||
make test # Run pytest
|
||||
make lint # Run ruff linter
|
||||
make format # Format code
|
||||
make ci # Run all checks
|
||||
```
|
||||
|
||||
## Data Pipeline
|
||||
|
||||
```
|
||||
Raw Files (PDF/Excel)
|
||||
↓
|
||||
Parsers (pdfplumber, pandas)
|
||||
↓
|
||||
Pydantic Validation
|
||||
↓
|
||||
SQLAlchemy Loaders
|
||||
↓
|
||||
PostgreSQL + PostGIS
|
||||
↓
|
||||
dbt Transformations
|
||||
↓
|
||||
Dash Visualization
|
||||
make dbt-run # Run dbt models
|
||||
make dbt-test # Run dbt tests
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
@@ -109,12 +179,19 @@ POSTGRES_USER=portfolio
|
||||
POSTGRES_PASSWORD=<secure>
|
||||
POSTGRES_DB=portfolio
|
||||
DASH_DEBUG=true
|
||||
SECRET_KEY=<random>
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- **For developers**: See `docs/CONTRIBUTING.md` for setup and contribution guidelines
|
||||
- **For Claude Code**: See `CLAUDE.md` for AI assistant context
|
||||
- **Architecture**: See `docs/PROJECT_REFERENCE.md` for technical details
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
## Author
|
||||
|
||||
Leo Miranda - [GitHub](https://github.com/lmiranda) | [LinkedIn](https://linkedin.com/in/yourprofile)
|
||||
Leo Miranda
|
||||
|
||||
@@ -1,17 +1,6 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: int_purchases__monthly
|
||||
description: "Purchase data enriched with time and district dimensions"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: district_code
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: int_rentals__annual
|
||||
description: "Rental data enriched with time and zone dimensions"
|
||||
columns:
|
||||
@@ -22,3 +11,77 @@ models:
|
||||
- name: zone_code
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: int_neighbourhood__demographics
|
||||
description: "Combined census demographics with neighbourhood attributes"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: census_year
|
||||
description: "Census year"
|
||||
tests:
|
||||
- not_null
|
||||
- name: income_quintile
|
||||
description: "Income quintile (1-5, city-wide)"
|
||||
|
||||
- name: int_neighbourhood__housing
|
||||
description: "Housing indicators combining census and rental data"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
- name: rent_to_income_pct
|
||||
description: "Rent as percentage of median income"
|
||||
- name: is_affordable
|
||||
description: "Boolean: rent <= 30% of income"
|
||||
|
||||
- name: int_neighbourhood__crime_summary
|
||||
description: "Aggregated crime with year-over-year trends"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Statistics year"
|
||||
tests:
|
||||
- not_null
|
||||
- name: crime_rate_per_100k
|
||||
description: "Total crime rate per 100K population"
|
||||
- name: yoy_change_pct
|
||||
description: "Year-over-year change percentage"
|
||||
|
||||
- name: int_neighbourhood__amenity_scores
|
||||
description: "Normalized amenities per capita and per area"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
- name: total_amenities_per_1000
|
||||
description: "Total amenities per 1000 population"
|
||||
- name: amenities_per_sqkm
|
||||
description: "Total amenities per square km"
|
||||
|
||||
- name: int_rentals__neighbourhood_allocated
|
||||
description: "CMHC rental data allocated to neighbourhoods via area weights"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: year
|
||||
description: "Survey year"
|
||||
tests:
|
||||
- not_null
|
||||
- name: avg_rent_2bed
|
||||
description: "Weighted average 2-bedroom rent"
|
||||
- name: vacancy_rate
|
||||
description: "Weighted average vacancy rate"
|
||||
|
||||
60
dbt/models/intermediate/int_census__toronto_cma.sql
Normal file
60
dbt/models/intermediate/int_census__toronto_cma.sql
Normal file
@@ -0,0 +1,60 @@
|
||||
-- Intermediate: Toronto CMA census statistics by year
|
||||
-- Provides city-wide averages for metrics not available at neighbourhood level
|
||||
-- Used when neighbourhood-level data is unavailable (e.g., median household income)
|
||||
-- Grain: One row per year
|
||||
|
||||
with years as (
|
||||
select * from {{ ref('int_year_spine') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
-- Census data is only available for 2016 and 2021
|
||||
-- Map each analysis year to the appropriate census year
|
||||
year_to_census as (
|
||||
select
|
||||
y.year,
|
||||
case
|
||||
when y.year <= 2018 then 2016
|
||||
else 2021
|
||||
end as census_year
|
||||
from years y
|
||||
),
|
||||
|
||||
-- Toronto CMA median household income from Statistics Canada
|
||||
-- Source: Census Profile Table 98-316-X2021001
|
||||
-- 2016: $65,829 (from Census Profile)
|
||||
-- 2021: $84,000 (from Census Profile)
|
||||
cma_income as (
|
||||
select 2016 as census_year, 65829 as median_household_income union all
|
||||
select 2021 as census_year, 84000 as median_household_income
|
||||
),
|
||||
|
||||
-- City-wide aggregates from loaded neighbourhood data
|
||||
city_aggregates as (
|
||||
select
|
||||
census_year,
|
||||
sum(population) as total_population,
|
||||
avg(population_density) as avg_population_density,
|
||||
avg(unemployment_rate) as avg_unemployment_rate
|
||||
from census
|
||||
where population is not null
|
||||
group by census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
y.year,
|
||||
y.census_year,
|
||||
ci.median_household_income,
|
||||
ca.total_population,
|
||||
ca.avg_population_density,
|
||||
ca.avg_unemployment_rate
|
||||
from year_to_census y
|
||||
left join cma_income ci on y.census_year = ci.census_year
|
||||
left join city_aggregates ca on y.census_year = ca.census_year
|
||||
)
|
||||
|
||||
select * from final
|
||||
@@ -0,0 +1,79 @@
|
||||
-- Intermediate: Normalized amenities per 1000 population
|
||||
-- Pivots amenity types and calculates per-capita metrics
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
amenities as (
|
||||
select * from {{ ref('stg_toronto__amenities') }}
|
||||
),
|
||||
|
||||
-- Aggregate amenity types
|
||||
amenities_by_year as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
amenity_year as year,
|
||||
sum(case when amenity_type = 'Parks' then amenity_count else 0 end) as parks_count,
|
||||
sum(case when amenity_type = 'Schools' then amenity_count else 0 end) as schools_count,
|
||||
sum(case when amenity_type = 'Transit Stops' then amenity_count else 0 end) as transit_count,
|
||||
sum(case when amenity_type = 'Libraries' then amenity_count else 0 end) as libraries_count,
|
||||
sum(case when amenity_type = 'Community Centres' then amenity_count else 0 end) as community_centres_count,
|
||||
sum(case when amenity_type = 'Recreation' then amenity_count else 0 end) as recreation_count,
|
||||
sum(amenity_count) as total_amenities
|
||||
from amenities
|
||||
group by neighbourhood_id, amenity_year
|
||||
),
|
||||
|
||||
amenity_scores as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.population,
|
||||
n.land_area_sqkm,
|
||||
|
||||
coalesce(a.year, 2021) as year,
|
||||
|
||||
-- Raw counts
|
||||
a.parks_count,
|
||||
a.schools_count,
|
||||
a.transit_count,
|
||||
a.libraries_count,
|
||||
a.community_centres_count,
|
||||
a.recreation_count,
|
||||
a.total_amenities,
|
||||
|
||||
-- Per 1000 population
|
||||
case when n.population > 0
|
||||
then round(a.parks_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as parks_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.schools_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as schools_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.transit_count::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as transit_per_1000,
|
||||
|
||||
case when n.population > 0
|
||||
then round(a.total_amenities::numeric / n.population * 1000, 3)
|
||||
else null
|
||||
end as total_amenities_per_1000,
|
||||
|
||||
-- Per square km
|
||||
case when n.land_area_sqkm > 0
|
||||
then round(a.total_amenities::numeric / n.land_area_sqkm, 2)
|
||||
else null
|
||||
end as amenities_per_sqkm
|
||||
|
||||
from neighbourhoods n
|
||||
left join amenities_by_year a on n.neighbourhood_id = a.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from amenity_scores
|
||||
83
dbt/models/intermediate/int_neighbourhood__crime_summary.sql
Normal file
83
dbt/models/intermediate/int_neighbourhood__crime_summary.sql
Normal file
@@ -0,0 +1,83 @@
|
||||
-- Intermediate: Aggregated crime by neighbourhood with YoY change
|
||||
-- Pivots crime types and calculates year-over-year trends
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
crime as (
|
||||
select * from {{ ref('stg_toronto__crime') }}
|
||||
),
|
||||
|
||||
-- Aggregate crime types
|
||||
crime_by_year as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
crime_year as year,
|
||||
sum(incident_count) as total_incidents,
|
||||
sum(case when crime_type = 'Assault' then incident_count else 0 end) as assault_count,
|
||||
sum(case when crime_type = 'Auto Theft' then incident_count else 0 end) as auto_theft_count,
|
||||
sum(case when crime_type = 'Break and Enter' then incident_count else 0 end) as break_enter_count,
|
||||
sum(case when crime_type = 'Robbery' then incident_count else 0 end) as robbery_count,
|
||||
sum(case when crime_type = 'Theft Over' then incident_count else 0 end) as theft_over_count,
|
||||
sum(case when crime_type = 'Homicide' then incident_count else 0 end) as homicide_count,
|
||||
avg(rate_per_100k) as avg_rate_per_100k
|
||||
from crime
|
||||
group by neighbourhood_id, crime_year
|
||||
),
|
||||
|
||||
-- Add year-over-year changes
|
||||
with_yoy as (
|
||||
select
|
||||
c.*,
|
||||
lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
) as prev_year_incidents,
|
||||
round(
|
||||
(c.total_incidents - lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
))::numeric /
|
||||
nullif(lag(c.total_incidents, 1) over (
|
||||
partition by c.neighbourhood_id
|
||||
order by c.year
|
||||
), 0) * 100,
|
||||
2
|
||||
) as yoy_change_pct
|
||||
from crime_by_year c
|
||||
),
|
||||
|
||||
crime_summary as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.population,
|
||||
|
||||
w.year,
|
||||
w.total_incidents,
|
||||
w.assault_count,
|
||||
w.auto_theft_count,
|
||||
w.break_enter_count,
|
||||
w.robbery_count,
|
||||
w.theft_over_count,
|
||||
w.homicide_count,
|
||||
w.yoy_change_pct,
|
||||
|
||||
-- Crime rate per 100K population (use source data avg, or calculate if population available)
|
||||
coalesce(
|
||||
w.avg_rate_per_100k,
|
||||
case
|
||||
when n.population > 0
|
||||
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
||||
else null
|
||||
end
|
||||
) as crime_rate_per_100k
|
||||
|
||||
from neighbourhoods n
|
||||
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from crime_summary
|
||||
45
dbt/models/intermediate/int_neighbourhood__demographics.sql
Normal file
45
dbt/models/intermediate/int_neighbourhood__demographics.sql
Normal file
@@ -0,0 +1,45 @@
|
||||
-- Intermediate: Combined census demographics by neighbourhood
|
||||
-- Joins neighbourhoods with census data for demographic analysis
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
demographics as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
n.land_area_sqkm,
|
||||
|
||||
-- Use census_year from census data, or fall back to dim_neighbourhood's year
|
||||
coalesce(c.census_year, n.census_year, 2021) as census_year,
|
||||
c.population,
|
||||
c.population_density,
|
||||
c.median_household_income,
|
||||
c.average_household_income,
|
||||
c.median_age,
|
||||
c.unemployment_rate,
|
||||
c.pct_bachelors_or_higher as education_bachelors_pct,
|
||||
c.average_dwelling_value,
|
||||
|
||||
-- Tenure mix
|
||||
c.pct_owner_occupied,
|
||||
c.pct_renter_occupied,
|
||||
|
||||
-- Income quintile (city-wide comparison)
|
||||
ntile(5) over (
|
||||
partition by c.census_year
|
||||
order by c.median_household_income
|
||||
) as income_quintile
|
||||
|
||||
from neighbourhoods n
|
||||
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from demographics
|
||||
56
dbt/models/intermediate/int_neighbourhood__housing.sql
Normal file
56
dbt/models/intermediate/int_neighbourhood__housing.sql
Normal file
@@ -0,0 +1,56 @@
|
||||
-- Intermediate: Housing indicators by neighbourhood
|
||||
-- Combines census housing data with allocated CMHC rental data
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
allocated_rentals as (
|
||||
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||
),
|
||||
|
||||
housing as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
|
||||
coalesce(r.year, c.census_year, 2021) as year,
|
||||
|
||||
-- Census housing metrics
|
||||
c.pct_owner_occupied,
|
||||
c.pct_renter_occupied,
|
||||
c.average_dwelling_value,
|
||||
c.median_household_income,
|
||||
|
||||
-- Allocated rental metrics (weighted average from CMHC zones)
|
||||
r.avg_rent_2bed,
|
||||
r.vacancy_rate,
|
||||
|
||||
-- Affordability calculations
|
||||
case
|
||||
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||
then round((r.avg_rent_2bed * 12 / c.median_household_income) * 100, 2)
|
||||
else null
|
||||
end as rent_to_income_pct,
|
||||
|
||||
-- Affordability threshold (30% of income)
|
||||
case
|
||||
when c.median_household_income > 0 and r.avg_rent_2bed > 0
|
||||
then r.avg_rent_2bed * 12 <= c.median_household_income * 0.30
|
||||
else null
|
||||
end as is_affordable
|
||||
|
||||
from neighbourhoods n
|
||||
left join census c on n.neighbourhood_id = c.neighbourhood_id
|
||||
left join allocated_rentals r
|
||||
on n.neighbourhood_id = r.neighbourhood_id
|
||||
and r.year = c.census_year
|
||||
)
|
||||
|
||||
select * from housing
|
||||
@@ -1,62 +0,0 @@
|
||||
-- Intermediate: Monthly purchase data enriched with dimensions
|
||||
-- Joins purchases with time and district dimensions for analysis
|
||||
|
||||
with purchases as (
|
||||
select * from {{ ref('stg_trreb__purchases') }}
|
||||
),
|
||||
|
||||
time_dim as (
|
||||
select * from {{ ref('stg_dimensions__time') }}
|
||||
),
|
||||
|
||||
district_dim as (
|
||||
select * from {{ ref('stg_dimensions__trreb_districts') }}
|
||||
),
|
||||
|
||||
enriched as (
|
||||
select
|
||||
p.purchase_id,
|
||||
|
||||
-- Time attributes
|
||||
t.date_key,
|
||||
t.full_date,
|
||||
t.year,
|
||||
t.month,
|
||||
t.quarter,
|
||||
t.month_name,
|
||||
|
||||
-- District attributes
|
||||
d.district_key,
|
||||
d.district_code,
|
||||
d.district_name,
|
||||
d.area_type,
|
||||
|
||||
-- Metrics
|
||||
p.sales_count,
|
||||
p.dollar_volume,
|
||||
p.avg_price,
|
||||
p.median_price,
|
||||
p.new_listings,
|
||||
p.active_listings,
|
||||
p.days_on_market,
|
||||
p.sale_to_list_ratio,
|
||||
|
||||
-- Calculated metrics
|
||||
case
|
||||
when p.active_listings > 0
|
||||
then round(p.sales_count::numeric / p.active_listings, 3)
|
||||
else null
|
||||
end as absorption_rate,
|
||||
|
||||
case
|
||||
when p.sales_count > 0
|
||||
then round(p.active_listings::numeric / p.sales_count, 1)
|
||||
else null
|
||||
end as months_of_inventory
|
||||
|
||||
from purchases p
|
||||
inner join time_dim t on p.date_key = t.date_key
|
||||
inner join district_dim d on p.district_key = d.district_key
|
||||
)
|
||||
|
||||
select * from enriched
|
||||
@@ -0,0 +1,73 @@
|
||||
-- Intermediate: CMHC rentals allocated to neighbourhoods via area weights
|
||||
-- Disaggregates zone-level rental data to neighbourhood level
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with crosswalk as (
|
||||
select * from {{ ref('stg_cmhc__zone_crosswalk') }}
|
||||
),
|
||||
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__annual') }}
|
||||
),
|
||||
|
||||
neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
-- Allocate rental metrics to neighbourhoods using area weights
|
||||
allocated as (
|
||||
select
|
||||
c.neighbourhood_id,
|
||||
r.year,
|
||||
r.bedroom_type,
|
||||
|
||||
-- Weighted average rent (using area weight)
|
||||
sum(r.avg_rent * c.area_weight) as weighted_avg_rent,
|
||||
sum(r.median_rent * c.area_weight) as weighted_median_rent,
|
||||
sum(c.area_weight) as total_weight,
|
||||
|
||||
-- Weighted vacancy rate
|
||||
sum(r.vacancy_rate * c.area_weight) / nullif(sum(c.area_weight), 0) as vacancy_rate,
|
||||
|
||||
-- Weighted rental universe
|
||||
sum(r.rental_universe * c.area_weight) as rental_units_estimate
|
||||
|
||||
from crosswalk c
|
||||
inner join rentals r on c.cmhc_zone_code = r.zone_code
|
||||
group by c.neighbourhood_id, r.year, r.bedroom_type
|
||||
),
|
||||
|
||||
-- Pivot to get 2-bedroom as primary metric
|
||||
pivoted as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
year,
|
||||
max(case when bedroom_type = 'Two Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
|
||||
max(case when bedroom_type = 'One Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
|
||||
max(case when bedroom_type = 'Bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
|
||||
max(case when bedroom_type = 'Three Bedroom +' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
|
||||
avg(vacancy_rate) as vacancy_rate,
|
||||
sum(rental_units_estimate) as total_rental_units
|
||||
from allocated
|
||||
group by neighbourhood_id, year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
|
||||
p.year,
|
||||
round(p.avg_rent_bachelor::numeric, 2) as avg_rent_bachelor,
|
||||
round(p.avg_rent_1bed::numeric, 2) as avg_rent_1bed,
|
||||
round(p.avg_rent_2bed::numeric, 2) as avg_rent_2bed,
|
||||
round(p.avg_rent_3bed::numeric, 2) as avg_rent_3bed,
|
||||
round(p.vacancy_rate::numeric, 2) as vacancy_rate,
|
||||
round(p.total_rental_units::numeric, 0) as total_rental_units
|
||||
|
||||
from neighbourhoods n
|
||||
inner join pivoted p on n.neighbourhood_id = p.neighbourhood_id
|
||||
)
|
||||
|
||||
select * from final
|
||||
25
dbt/models/intermediate/int_rentals__toronto_cma.sql
Normal file
25
dbt/models/intermediate/int_rentals__toronto_cma.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Intermediate: Toronto CMA rental metrics by year
|
||||
-- Aggregates rental data to city-wide averages by year
|
||||
-- Source: StatCan CMHC data at CMA level
|
||||
-- Grain: One row per year
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('stg_cmhc__rentals') }}
|
||||
),
|
||||
|
||||
-- Pivot bedroom types to columns
|
||||
yearly_rentals as (
|
||||
select
|
||||
year,
|
||||
max(case when bedroom_type = 'bachelor' then avg_rent end) as avg_rent_bachelor,
|
||||
max(case when bedroom_type = '1bed' then avg_rent end) as avg_rent_1bed,
|
||||
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_2bed,
|
||||
max(case when bedroom_type = '3bed' then avg_rent end) as avg_rent_3bed,
|
||||
-- Use 2-bedroom as standard reference
|
||||
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_standard,
|
||||
max(vacancy_rate) as vacancy_rate
|
||||
from rentals
|
||||
group by year
|
||||
)
|
||||
|
||||
select * from yearly_rentals
|
||||
11
dbt/models/intermediate/int_year_spine.sql
Normal file
11
dbt/models/intermediate/int_year_spine.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Intermediate: Year spine for analysis
|
||||
-- Creates a row for each year from 2014-2025
|
||||
-- Used to drive time-series analysis across all data sources
|
||||
|
||||
with years as (
|
||||
-- Generate years from available data sources
|
||||
-- Crime data: 2014-2024, Rentals: 2019-2025
|
||||
select generate_series(2014, 2025) as year
|
||||
)
|
||||
|
||||
select year from years
|
||||
@@ -1,15 +1,6 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: mart_toronto_purchases
|
||||
description: "Final mart for Toronto purchase/sales analysis by district and time"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
description: "Unique purchase record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: mart_toronto_rentals
|
||||
description: "Final mart for Toronto rental market analysis by zone and time"
|
||||
columns:
|
||||
@@ -19,5 +10,126 @@ models:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: mart_toronto_market_summary
|
||||
description: "Combined market summary aggregating purchases and rentals at Toronto level"
|
||||
- name: mart_neighbourhood_overview
|
||||
description: "Neighbourhood overview with composite livability score"
|
||||
meta:
|
||||
dashboard_tab: Overview
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: livability_score
|
||||
description: "Composite score: safety (30%), affordability (40%), amenities (30%)"
|
||||
- name: safety_score
|
||||
description: "Safety component score (0-100)"
|
||||
- name: affordability_score
|
||||
description: "Affordability component score (0-100)"
|
||||
- name: amenity_score
|
||||
description: "Amenity component score (0-100)"
|
||||
|
||||
- name: mart_neighbourhood_housing
|
||||
description: "Housing and affordability metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Housing
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: rent_to_income_pct
|
||||
description: "Rent as percentage of median income"
|
||||
- name: affordability_index
|
||||
description: "100 = city average affordability"
|
||||
- name: rent_yoy_change_pct
|
||||
description: "Year-over-year rent change"
|
||||
|
||||
- name: mart_neighbourhood_safety
|
||||
description: "Crime rates and safety metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Safety
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: crime_rate_per_100k
|
||||
description: "Total crime rate per 100K population"
|
||||
- name: crime_index
|
||||
description: "100 = city average crime rate"
|
||||
- name: safety_tier
|
||||
description: "Safety tier (1=safest, 5=highest crime)"
|
||||
tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
|
||||
- name: mart_neighbourhood_demographics
|
||||
description: "Demographics and income metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Demographics
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: median_household_income
|
||||
description: "Median household income"
|
||||
- name: income_index
|
||||
description: "100 = city average income"
|
||||
- name: income_quintile
|
||||
description: "Income quintile (1-5)"
|
||||
tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
|
||||
- name: mart_neighbourhood_amenities
|
||||
description: "Amenity access metrics by neighbourhood"
|
||||
meta:
|
||||
dashboard_tab: Amenities
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood identifier"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry for mapping"
|
||||
- name: total_amenities_per_1000
|
||||
description: "Total amenities per 1000 population"
|
||||
- name: amenity_index
|
||||
description: "100 = city average amenities"
|
||||
- name: amenity_tier
|
||||
description: "Amenity tier (1=best, 5=lowest)"
|
||||
tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
|
||||
89
dbt/models/marts/mart_neighbourhood_amenities.sql
Normal file
89
dbt/models/marts/mart_neighbourhood_amenities.sql
Normal file
@@ -0,0 +1,89 @@
|
||||
-- Mart: Neighbourhood Amenities Analysis
|
||||
-- Dashboard Tab: Amenities
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with amenities as (
|
||||
select * from {{ ref('int_neighbourhood__amenity_scores') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
year,
|
||||
avg(parks_per_1000) as city_avg_parks,
|
||||
avg(schools_per_1000) as city_avg_schools,
|
||||
avg(transit_per_1000) as city_avg_transit,
|
||||
avg(total_amenities_per_1000) as city_avg_total_amenities
|
||||
from amenities
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
a.neighbourhood_id,
|
||||
a.neighbourhood_name,
|
||||
a.geometry,
|
||||
a.population,
|
||||
a.land_area_sqkm,
|
||||
a.year,
|
||||
|
||||
-- Raw counts
|
||||
a.parks_count,
|
||||
a.schools_count,
|
||||
a.transit_count,
|
||||
a.libraries_count,
|
||||
a.community_centres_count,
|
||||
a.recreation_count,
|
||||
a.total_amenities,
|
||||
|
||||
-- Per 1000 population
|
||||
a.parks_per_1000,
|
||||
a.schools_per_1000,
|
||||
a.transit_per_1000,
|
||||
a.total_amenities_per_1000,
|
||||
|
||||
-- Per square km
|
||||
a.amenities_per_sqkm,
|
||||
|
||||
-- City averages
|
||||
round(ca.city_avg_parks::numeric, 3) as city_avg_parks_per_1000,
|
||||
round(ca.city_avg_schools::numeric, 3) as city_avg_schools_per_1000,
|
||||
round(ca.city_avg_transit::numeric, 3) as city_avg_transit_per_1000,
|
||||
|
||||
-- Amenity index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_total_amenities > 0
|
||||
then round(a.total_amenities_per_1000 / ca.city_avg_total_amenities * 100, 1)
|
||||
else null
|
||||
end as amenity_index,
|
||||
|
||||
-- Category indices
|
||||
case
|
||||
when ca.city_avg_parks > 0
|
||||
then round(a.parks_per_1000 / ca.city_avg_parks * 100, 1)
|
||||
else null
|
||||
end as parks_index,
|
||||
|
||||
case
|
||||
when ca.city_avg_schools > 0
|
||||
then round(a.schools_per_1000 / ca.city_avg_schools * 100, 1)
|
||||
else null
|
||||
end as schools_index,
|
||||
|
||||
case
|
||||
when ca.city_avg_transit > 0
|
||||
then round(a.transit_per_1000 / ca.city_avg_transit * 100, 1)
|
||||
else null
|
||||
end as transit_index,
|
||||
|
||||
-- Amenity tier (1 = best, 5 = lowest)
|
||||
ntile(5) over (
|
||||
partition by a.year
|
||||
order by a.total_amenities_per_1000 desc
|
||||
) as amenity_tier
|
||||
|
||||
from amenities a
|
||||
left join city_avg ca on a.year = ca.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
81
dbt/models/marts/mart_neighbourhood_demographics.sql
Normal file
81
dbt/models/marts/mart_neighbourhood_demographics.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Mart: Neighbourhood Demographics Analysis
|
||||
-- Dashboard Tab: Demographics
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with demographics as (
|
||||
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
census_year,
|
||||
avg(median_household_income) as city_avg_income,
|
||||
avg(median_age) as city_avg_age,
|
||||
avg(unemployment_rate) as city_avg_unemployment,
|
||||
avg(education_bachelors_pct) as city_avg_education,
|
||||
avg(population_density) as city_avg_density
|
||||
from demographics
|
||||
group by census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
d.neighbourhood_id,
|
||||
d.neighbourhood_name,
|
||||
d.geometry,
|
||||
d.census_year as year,
|
||||
|
||||
-- Population
|
||||
d.population,
|
||||
d.land_area_sqkm,
|
||||
d.population_density,
|
||||
|
||||
-- Income
|
||||
d.median_household_income,
|
||||
d.average_household_income,
|
||||
d.income_quintile,
|
||||
|
||||
-- Income index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_income > 0
|
||||
then round(d.median_household_income / ca.city_avg_income * 100, 1)
|
||||
else null
|
||||
end as income_index,
|
||||
|
||||
-- Demographics
|
||||
d.median_age,
|
||||
d.unemployment_rate,
|
||||
d.education_bachelors_pct,
|
||||
|
||||
-- Age index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_age > 0
|
||||
then round(d.median_age / ca.city_avg_age * 100, 1)
|
||||
else null
|
||||
end as age_index,
|
||||
|
||||
-- Housing tenure
|
||||
d.pct_owner_occupied,
|
||||
d.pct_renter_occupied,
|
||||
d.average_dwelling_value,
|
||||
|
||||
-- Diversity index (using tenure mix as proxy - higher rental = more diverse typically)
|
||||
round(
|
||||
1 - (
|
||||
power(d.pct_owner_occupied / 100, 2) +
|
||||
power(d.pct_renter_occupied / 100, 2)
|
||||
),
|
||||
3
|
||||
) * 100 as tenure_diversity_index,
|
||||
|
||||
-- City comparisons
|
||||
round(ca.city_avg_income::numeric, 2) as city_avg_income,
|
||||
round(ca.city_avg_age::numeric, 1) as city_avg_age,
|
||||
round(ca.city_avg_unemployment::numeric, 2) as city_avg_unemployment
|
||||
|
||||
from demographics d
|
||||
left join city_avg ca on d.census_year = ca.census_year
|
||||
)
|
||||
|
||||
select * from final
|
||||
93
dbt/models/marts/mart_neighbourhood_housing.sql
Normal file
93
dbt/models/marts/mart_neighbourhood_housing.sql
Normal file
@@ -0,0 +1,93 @@
|
||||
-- Mart: Neighbourhood Housing Analysis
|
||||
-- Dashboard Tab: Housing
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with housing as (
|
||||
select * from {{ ref('int_neighbourhood__housing') }}
|
||||
),
|
||||
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
|
||||
),
|
||||
|
||||
demographics as (
|
||||
select * from {{ ref('int_neighbourhood__demographics') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year rent changes
|
||||
with_yoy as (
|
||||
select
|
||||
h.*,
|
||||
r.avg_rent_bachelor,
|
||||
r.avg_rent_1bed,
|
||||
r.avg_rent_3bed,
|
||||
r.total_rental_units,
|
||||
d.income_quintile,
|
||||
|
||||
-- Previous year rent for YoY calculation
|
||||
lag(h.avg_rent_2bed, 1) over (
|
||||
partition by h.neighbourhood_id
|
||||
order by h.year
|
||||
) as prev_year_rent_2bed
|
||||
|
||||
from housing h
|
||||
left join rentals r
|
||||
on h.neighbourhood_id = r.neighbourhood_id
|
||||
and h.year = r.year
|
||||
left join demographics d
|
||||
on h.neighbourhood_id = d.neighbourhood_id
|
||||
and h.year = d.census_year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
geometry,
|
||||
year,
|
||||
|
||||
-- Tenure mix
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
|
||||
-- Housing values
|
||||
average_dwelling_value,
|
||||
median_household_income,
|
||||
|
||||
-- Rental metrics
|
||||
avg_rent_bachelor,
|
||||
avg_rent_1bed,
|
||||
avg_rent_2bed,
|
||||
avg_rent_3bed,
|
||||
vacancy_rate,
|
||||
total_rental_units,
|
||||
|
||||
-- Affordability
|
||||
rent_to_income_pct,
|
||||
is_affordable,
|
||||
|
||||
-- Affordability index (100 = city average)
|
||||
round(
|
||||
rent_to_income_pct / nullif(
|
||||
avg(rent_to_income_pct) over (partition by year),
|
||||
0
|
||||
) * 100,
|
||||
1
|
||||
) as affordability_index,
|
||||
|
||||
-- Year-over-year rent change
|
||||
case
|
||||
when prev_year_rent_2bed > 0
|
||||
then round(
|
||||
(avg_rent_2bed - prev_year_rent_2bed) / prev_year_rent_2bed * 100,
|
||||
2
|
||||
)
|
||||
else null
|
||||
end as rent_yoy_change_pct,
|
||||
|
||||
income_quintile
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
152
dbt/models/marts/mart_neighbourhood_overview.sql
Normal file
152
dbt/models/marts/mart_neighbourhood_overview.sql
Normal file
@@ -0,0 +1,152 @@
|
||||
-- Mart: Neighbourhood Overview with Composite Livability Score
|
||||
-- Dashboard Tab: Overview
|
||||
-- Grain: One row per neighbourhood per year
|
||||
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
|
||||
|
||||
with years as (
|
||||
select * from {{ ref('int_year_spine') }}
|
||||
),
|
||||
|
||||
neighbourhoods as (
|
||||
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||
),
|
||||
|
||||
-- Create base: all neighbourhoods × all years
|
||||
neighbourhood_years as (
|
||||
select
|
||||
n.neighbourhood_id,
|
||||
n.neighbourhood_name,
|
||||
n.geometry,
|
||||
y.year
|
||||
from neighbourhoods n
|
||||
cross join years y
|
||||
),
|
||||
|
||||
-- Census data (available for 2016, 2021)
|
||||
-- For each year, use the most recent census data available
|
||||
census as (
|
||||
select * from {{ ref('stg_toronto__census') }}
|
||||
),
|
||||
|
||||
census_mapped as (
|
||||
select
|
||||
ny.neighbourhood_id,
|
||||
ny.year,
|
||||
c.population,
|
||||
c.unemployment_rate,
|
||||
c.pct_bachelors_or_higher as education_bachelors_pct
|
||||
from neighbourhood_years ny
|
||||
left join census c on ny.neighbourhood_id = c.neighbourhood_id
|
||||
-- Use census year <= analysis year, prefer most recent
|
||||
and c.census_year = (
|
||||
select max(c2.census_year)
|
||||
from {{ ref('stg_toronto__census') }} c2
|
||||
where c2.neighbourhood_id = ny.neighbourhood_id
|
||||
and c2.census_year <= ny.year
|
||||
)
|
||||
),
|
||||
|
||||
-- CMA-level census data (for income - not available at neighbourhood level)
|
||||
cma_census as (
|
||||
select * from {{ ref('int_census__toronto_cma') }}
|
||||
),
|
||||
|
||||
-- Crime data (2014-2024)
|
||||
crime as (
|
||||
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||
),
|
||||
|
||||
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
|
||||
rentals as (
|
||||
select * from {{ ref('int_rentals__toronto_cma') }}
|
||||
),
|
||||
|
||||
-- Compute scores
|
||||
scored as (
|
||||
select
|
||||
ny.neighbourhood_id,
|
||||
ny.neighbourhood_name,
|
||||
ny.geometry,
|
||||
ny.year,
|
||||
cm.population,
|
||||
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
|
||||
cma.median_household_income,
|
||||
|
||||
-- Safety score: inverse of crime rate (higher = safer)
|
||||
case
|
||||
when cr.crime_rate_per_100k is not null
|
||||
then 100 - percent_rank() over (
|
||||
partition by ny.year
|
||||
order by cr.crime_rate_per_100k
|
||||
) * 100
|
||||
else null
|
||||
end as safety_score,
|
||||
|
||||
-- Affordability score: inverse of rent-to-income ratio
|
||||
-- Using CMA-level income since neighbourhood-level not available
|
||||
case
|
||||
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||
then 100 - percent_rank() over (
|
||||
partition by ny.year
|
||||
order by (r.avg_rent_standard * 12 / cma.median_household_income)
|
||||
) * 100
|
||||
else null
|
||||
end as affordability_score,
|
||||
|
||||
-- Raw metrics
|
||||
cr.crime_rate_per_100k,
|
||||
case
|
||||
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
|
||||
else null
|
||||
end as rent_to_income_pct,
|
||||
r.avg_rent_standard as avg_rent_2bed,
|
||||
r.vacancy_rate
|
||||
|
||||
from neighbourhood_years ny
|
||||
left join census_mapped cm
|
||||
on ny.neighbourhood_id = cm.neighbourhood_id
|
||||
and ny.year = cm.year
|
||||
left join cma_census cma
|
||||
on ny.year = cma.year
|
||||
left join crime cr
|
||||
on ny.neighbourhood_id = cr.neighbourhood_id
|
||||
and ny.year = cr.year
|
||||
left join rentals r
|
||||
on ny.year = r.year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
neighbourhood_name,
|
||||
geometry,
|
||||
year,
|
||||
population,
|
||||
median_household_income,
|
||||
|
||||
-- Component scores (0-100)
|
||||
round(safety_score::numeric, 1) as safety_score,
|
||||
round(affordability_score::numeric, 1) as affordability_score,
|
||||
-- Amenity score not available at this level, use placeholder
|
||||
50.0 as amenity_score,
|
||||
|
||||
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
|
||||
round(
|
||||
(coalesce(safety_score, 50) * 0.40 +
|
||||
coalesce(affordability_score, 50) * 0.40 +
|
||||
50 * 0.20)::numeric,
|
||||
1
|
||||
) as livability_score,
|
||||
|
||||
-- Raw metrics
|
||||
crime_rate_per_100k,
|
||||
rent_to_income_pct,
|
||||
avg_rent_2bed,
|
||||
vacancy_rate,
|
||||
null::numeric as total_amenities_per_1000
|
||||
|
||||
from scored
|
||||
)
|
||||
|
||||
select * from final
|
||||
78
dbt/models/marts/mart_neighbourhood_safety.sql
Normal file
78
dbt/models/marts/mart_neighbourhood_safety.sql
Normal file
@@ -0,0 +1,78 @@
|
||||
-- Mart: Neighbourhood Safety Analysis
|
||||
-- Dashboard Tab: Safety
|
||||
-- Grain: One row per neighbourhood per year
|
||||
|
||||
with crime as (
|
||||
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||
),
|
||||
|
||||
-- City-wide averages for comparison
|
||||
city_avg as (
|
||||
select
|
||||
year,
|
||||
avg(crime_rate_per_100k) as city_avg_crime_rate,
|
||||
avg(assault_count) as city_avg_assault,
|
||||
avg(auto_theft_count) as city_avg_auto_theft,
|
||||
avg(break_enter_count) as city_avg_break_enter
|
||||
from crime
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
c.neighbourhood_id,
|
||||
c.neighbourhood_name,
|
||||
c.geometry,
|
||||
c.population,
|
||||
c.year,
|
||||
|
||||
-- Total crime
|
||||
c.total_incidents,
|
||||
c.crime_rate_per_100k,
|
||||
c.yoy_change_pct as crime_yoy_change_pct,
|
||||
|
||||
-- Crime breakdown
|
||||
c.assault_count,
|
||||
c.auto_theft_count,
|
||||
c.break_enter_count,
|
||||
c.robbery_count,
|
||||
c.theft_over_count,
|
||||
c.homicide_count,
|
||||
|
||||
-- Per 100K rates by type
|
||||
case when c.population > 0
|
||||
then round(c.assault_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as assault_rate_per_100k,
|
||||
|
||||
case when c.population > 0
|
||||
then round(c.auto_theft_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as auto_theft_rate_per_100k,
|
||||
|
||||
case when c.population > 0
|
||||
then round(c.break_enter_count::numeric / c.population * 100000, 2)
|
||||
else null
|
||||
end as break_enter_rate_per_100k,
|
||||
|
||||
-- Comparison to city average
|
||||
round(ca.city_avg_crime_rate::numeric, 2) as city_avg_crime_rate,
|
||||
|
||||
-- Crime index (100 = city average)
|
||||
case
|
||||
when ca.city_avg_crime_rate > 0
|
||||
then round(c.crime_rate_per_100k / ca.city_avg_crime_rate * 100, 1)
|
||||
else null
|
||||
end as crime_index,
|
||||
|
||||
-- Safety tier based on crime rate percentile
|
||||
ntile(5) over (
|
||||
partition by c.year
|
||||
order by c.crime_rate_per_100k desc
|
||||
) as safety_tier
|
||||
|
||||
from crime c
|
||||
left join city_avg ca on c.year = ca.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
@@ -1,81 +0,0 @@
|
||||
-- Mart: Toronto Market Summary
|
||||
-- Aggregated view combining purchase and rental market indicators
|
||||
-- Grain: One row per year-month
|
||||
|
||||
with purchases_agg as (
|
||||
select
|
||||
year,
|
||||
month,
|
||||
month_name,
|
||||
quarter,
|
||||
|
||||
-- Aggregate purchase metrics across all districts
|
||||
sum(sales_count) as total_sales,
|
||||
sum(dollar_volume) as total_dollar_volume,
|
||||
round(avg(avg_price), 0) as avg_price_all_districts,
|
||||
round(avg(median_price), 0) as median_price_all_districts,
|
||||
sum(new_listings) as total_new_listings,
|
||||
sum(active_listings) as total_active_listings,
|
||||
round(avg(days_on_market), 0) as avg_days_on_market,
|
||||
round(avg(sale_to_list_ratio), 2) as avg_sale_to_list_ratio,
|
||||
round(avg(absorption_rate), 3) as avg_absorption_rate,
|
||||
round(avg(months_of_inventory), 1) as avg_months_of_inventory,
|
||||
round(avg(avg_price_yoy_pct), 2) as avg_price_yoy_pct
|
||||
|
||||
from {{ ref('mart_toronto_purchases') }}
|
||||
group by year, month, month_name, quarter
|
||||
),
|
||||
|
||||
rentals_agg as (
|
||||
select
|
||||
year,
|
||||
|
||||
-- Aggregate rental metrics across all zones (all bedroom types)
|
||||
round(avg(avg_rent), 0) as avg_rent_all_zones,
|
||||
round(avg(vacancy_rate), 2) as avg_vacancy_rate,
|
||||
round(avg(rent_change_pct), 2) as avg_rent_change_pct,
|
||||
sum(rental_universe) as total_rental_universe
|
||||
|
||||
from {{ ref('mart_toronto_rentals') }}
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
p.year,
|
||||
p.month,
|
||||
p.month_name,
|
||||
p.quarter,
|
||||
|
||||
-- Purchase market indicators
|
||||
p.total_sales,
|
||||
p.total_dollar_volume,
|
||||
p.avg_price_all_districts,
|
||||
p.median_price_all_districts,
|
||||
p.total_new_listings,
|
||||
p.total_active_listings,
|
||||
p.avg_days_on_market,
|
||||
p.avg_sale_to_list_ratio,
|
||||
p.avg_absorption_rate,
|
||||
p.avg_months_of_inventory,
|
||||
p.avg_price_yoy_pct,
|
||||
|
||||
-- Rental market indicators (annual, so join on year)
|
||||
r.avg_rent_all_zones,
|
||||
r.avg_vacancy_rate,
|
||||
r.avg_rent_change_pct,
|
||||
r.total_rental_universe,
|
||||
|
||||
-- Affordability indicator (price to rent ratio)
|
||||
case
|
||||
when r.avg_rent_all_zones > 0
|
||||
then round(p.avg_price_all_districts / (r.avg_rent_all_zones * 12), 1)
|
||||
else null
|
||||
end as price_to_annual_rent_ratio
|
||||
|
||||
from purchases_agg p
|
||||
left join rentals_agg r on p.year = r.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
order by year desc, month desc
|
||||
@@ -1,79 +0,0 @@
|
||||
-- Mart: Toronto Purchase Market Analysis
|
||||
-- Final analytical table for purchase/sales data visualization
|
||||
-- Grain: One row per district per month
|
||||
|
||||
with purchases as (
|
||||
select * from {{ ref('int_purchases__monthly') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year calculations
|
||||
with_yoy as (
|
||||
select
|
||||
p.*,
|
||||
|
||||
-- Previous year same month values
|
||||
lag(p.avg_price, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as avg_price_prev_year,
|
||||
|
||||
lag(p.sales_count, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as sales_count_prev_year,
|
||||
|
||||
lag(p.median_price, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as median_price_prev_year
|
||||
|
||||
from purchases p
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
purchase_id,
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
month,
|
||||
quarter,
|
||||
month_name,
|
||||
district_key,
|
||||
district_code,
|
||||
district_name,
|
||||
area_type,
|
||||
sales_count,
|
||||
dollar_volume,
|
||||
avg_price,
|
||||
median_price,
|
||||
new_listings,
|
||||
active_listings,
|
||||
days_on_market,
|
||||
sale_to_list_ratio,
|
||||
absorption_rate,
|
||||
months_of_inventory,
|
||||
|
||||
-- Year-over-year changes
|
||||
case
|
||||
when avg_price_prev_year > 0
|
||||
then round(((avg_price - avg_price_prev_year) / avg_price_prev_year) * 100, 2)
|
||||
else null
|
||||
end as avg_price_yoy_pct,
|
||||
|
||||
case
|
||||
when sales_count_prev_year > 0
|
||||
then round(((sales_count - sales_count_prev_year)::numeric / sales_count_prev_year) * 100, 2)
|
||||
else null
|
||||
end as sales_count_yoy_pct,
|
||||
|
||||
case
|
||||
when median_price_prev_year > 0
|
||||
then round(((median_price - median_price_prev_year) / median_price_prev_year) * 100, 2)
|
||||
else null
|
||||
end as median_price_yoy_pct
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
@@ -2,20 +2,10 @@ version: 2
|
||||
|
||||
sources:
|
||||
- name: toronto_housing
|
||||
description: "Toronto housing data loaded from TRREB and CMHC sources"
|
||||
description: "Toronto housing data loaded from CMHC and City of Toronto sources"
|
||||
database: portfolio
|
||||
schema: public
|
||||
tables:
|
||||
- name: fact_purchases
|
||||
description: "TRREB monthly purchase/sales statistics by district"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: date_key
|
||||
description: "Foreign key to dim_time"
|
||||
- name: district_key
|
||||
description: "Foreign key to dim_trreb_district"
|
||||
|
||||
- name: fact_rentals
|
||||
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||
columns:
|
||||
@@ -32,14 +22,6 @@ sources:
|
||||
- name: date_key
|
||||
description: "Primary key (YYYYMMDD format)"
|
||||
|
||||
- name: dim_trreb_district
|
||||
description: "TRREB district dimension with geometry"
|
||||
columns:
|
||||
- name: district_key
|
||||
description: "Primary key"
|
||||
- name: district_code
|
||||
description: "TRREB district code"
|
||||
|
||||
- name: dim_cmhc_zone
|
||||
description: "CMHC zone dimension with geometry"
|
||||
columns:
|
||||
@@ -49,7 +31,7 @@ sources:
|
||||
description: "CMHC zone code"
|
||||
|
||||
- name: dim_neighbourhood
|
||||
description: "City of Toronto neighbourhoods (reference only)"
|
||||
description: "City of Toronto neighbourhoods (158 official boundaries)"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Primary key"
|
||||
@@ -59,3 +41,59 @@ sources:
|
||||
columns:
|
||||
- name: event_id
|
||||
description: "Primary key"
|
||||
|
||||
- name: fact_census
|
||||
description: "Census demographics by neighbourhood and year"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: census_year
|
||||
description: "Census year (2016, 2021, etc.)"
|
||||
- name: population
|
||||
description: "Total population"
|
||||
- name: median_household_income
|
||||
description: "Median household income"
|
||||
|
||||
- name: fact_crime
|
||||
description: "Crime statistics by neighbourhood, year, and type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: year
|
||||
description: "Statistics year"
|
||||
- name: crime_type
|
||||
description: "Type of crime"
|
||||
- name: count
|
||||
description: "Number of incidents"
|
||||
- name: rate_per_100k
|
||||
description: "Rate per 100,000 population"
|
||||
|
||||
- name: fact_amenities
|
||||
description: "Amenity counts by neighbourhood and type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: neighbourhood_id
|
||||
description: "Foreign key to dim_neighbourhood"
|
||||
- name: amenity_type
|
||||
description: "Type of amenity (parks, schools, transit)"
|
||||
- name: count
|
||||
description: "Number of amenities"
|
||||
- name: year
|
||||
description: "Reference year"
|
||||
|
||||
- name: bridge_cmhc_neighbourhood
|
||||
description: "CMHC zone to neighbourhood mapping with area weights"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: cmhc_zone_code
|
||||
description: "CMHC zone code"
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood ID"
|
||||
- name: weight
|
||||
description: "Proportional area weight (0-1)"
|
||||
|
||||
@@ -1,23 +1,6 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: stg_trreb__purchases
|
||||
description: "Staged TRREB purchase/sales data from fact_purchases"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
description: "Unique identifier for purchase record"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: date_key
|
||||
description: "Date dimension key (YYYYMMDD)"
|
||||
tests:
|
||||
- not_null
|
||||
- name: district_key
|
||||
description: "TRREB district dimension key"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_cmhc__rentals
|
||||
description: "Staged CMHC rental market data from fact_rentals"
|
||||
columns:
|
||||
@@ -44,20 +27,6 @@ models:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__trreb_districts
|
||||
description: "Staged TRREB district dimension"
|
||||
columns:
|
||||
- name: district_key
|
||||
description: "District dimension key"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: district_code
|
||||
description: "TRREB district code (e.g., W01, C01)"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__cmhc_zones
|
||||
description: "Staged CMHC zone dimension"
|
||||
columns:
|
||||
@@ -71,3 +40,90 @@ models:
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__neighbourhoods
|
||||
description: "Staged Toronto neighbourhood dimension (158 official boundaries)"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood primary key"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_name
|
||||
description: "Official neighbourhood name"
|
||||
tests:
|
||||
- not_null
|
||||
- name: geometry
|
||||
description: "PostGIS geometry (POLYGON)"
|
||||
|
||||
- name: stg_toronto__census
|
||||
description: "Staged census demographics by neighbourhood"
|
||||
columns:
|
||||
- name: census_id
|
||||
description: "Census record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
tests:
|
||||
- not_null
|
||||
- name: census_year
|
||||
description: "Census year (2016, 2021)"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__crime
|
||||
description: "Staged crime statistics by neighbourhood"
|
||||
columns:
|
||||
- name: crime_id
|
||||
description: "Crime record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
tests:
|
||||
- not_null
|
||||
- name: crime_type
|
||||
description: "Type of crime"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_toronto__amenities
|
||||
description: "Staged amenity counts by neighbourhood"
|
||||
columns:
|
||||
- name: amenity_id
|
||||
description: "Amenity record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
tests:
|
||||
- not_null
|
||||
- name: amenity_type
|
||||
description: "Type of amenity"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_cmhc__zone_crosswalk
|
||||
description: "Staged CMHC zone to neighbourhood crosswalk with area weights"
|
||||
columns:
|
||||
- name: crosswalk_id
|
||||
description: "Crosswalk record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: cmhc_zone_code
|
||||
description: "CMHC zone code"
|
||||
tests:
|
||||
- not_null
|
||||
- name: neighbourhood_id
|
||||
description: "Neighbourhood foreign key"
|
||||
tests:
|
||||
- not_null
|
||||
- name: area_weight
|
||||
description: "Proportional area weight (0-1)"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
-- Staged CMHC rental market survey data
|
||||
-- Source: fact_rentals table loaded from CMHC CSV exports
|
||||
-- Source: fact_rentals table loaded from CMHC/StatCan
|
||||
-- Grain: One row per zone per bedroom type per survey year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_rentals') }}
|
||||
select
|
||||
f.*,
|
||||
t.year as survey_year
|
||||
from {{ source('toronto_housing', 'fact_rentals') }} f
|
||||
join {{ source('toronto_housing', 'dim_time') }} t on f.date_key = t.date_key
|
||||
),
|
||||
|
||||
staged as (
|
||||
@@ -11,6 +15,7 @@ staged as (
|
||||
id as rental_id,
|
||||
date_key,
|
||||
zone_key,
|
||||
survey_year as year,
|
||||
bedroom_type,
|
||||
universe as rental_universe,
|
||||
avg_rent,
|
||||
|
||||
18
dbt/models/staging/stg_cmhc__zone_crosswalk.sql
Normal file
18
dbt/models/staging/stg_cmhc__zone_crosswalk.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Staged CMHC zone to neighbourhood crosswalk
|
||||
-- Source: bridge_cmhc_neighbourhood table
|
||||
-- Grain: One row per zone-neighbourhood intersection
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'bridge_cmhc_neighbourhood') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as crosswalk_id,
|
||||
cmhc_zone_code,
|
||||
neighbourhood_id,
|
||||
weight as area_weight
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
@@ -1,19 +0,0 @@
|
||||
-- Staged TRREB district dimension
|
||||
-- Source: dim_trreb_district table
|
||||
-- Grain: One row per district
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'dim_trreb_district') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
district_key,
|
||||
district_code,
|
||||
district_name,
|
||||
area_type,
|
||||
geometry
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
19
dbt/models/staging/stg_toronto__amenities.sql
Normal file
19
dbt/models/staging/stg_toronto__amenities.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Staged amenity counts by neighbourhood
|
||||
-- Source: fact_amenities table
|
||||
-- Grain: One row per neighbourhood per amenity type per year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_amenities') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as amenity_id,
|
||||
neighbourhood_id,
|
||||
amenity_type,
|
||||
count as amenity_count,
|
||||
year as amenity_year
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
27
dbt/models/staging/stg_toronto__census.sql
Normal file
27
dbt/models/staging/stg_toronto__census.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Staged census demographics by neighbourhood
|
||||
-- Source: fact_census table
|
||||
-- Grain: One row per neighbourhood per census year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_census') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as census_id,
|
||||
neighbourhood_id,
|
||||
census_year,
|
||||
population,
|
||||
population_density,
|
||||
median_household_income,
|
||||
average_household_income,
|
||||
unemployment_rate,
|
||||
pct_bachelors_or_higher,
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
median_age,
|
||||
average_dwelling_value
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
20
dbt/models/staging/stg_toronto__crime.sql
Normal file
20
dbt/models/staging/stg_toronto__crime.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- Staged crime statistics by neighbourhood
|
||||
-- Source: fact_crime table
|
||||
-- Grain: One row per neighbourhood per year per crime type
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_crime') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as crime_id,
|
||||
neighbourhood_id,
|
||||
year as crime_year,
|
||||
crime_type,
|
||||
count as incident_count,
|
||||
rate_per_100k
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
25
dbt/models/staging/stg_toronto__neighbourhoods.sql
Normal file
25
dbt/models/staging/stg_toronto__neighbourhoods.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Staged Toronto neighbourhood dimension
|
||||
-- Source: dim_neighbourhood table
|
||||
-- Grain: One row per neighbourhood (158 total)
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'dim_neighbourhood') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
neighbourhood_id,
|
||||
name as neighbourhood_name,
|
||||
geometry,
|
||||
population,
|
||||
land_area_sqkm,
|
||||
pop_density_per_sqkm,
|
||||
pct_bachelors_or_higher,
|
||||
median_household_income,
|
||||
pct_owner_occupied,
|
||||
pct_renter_occupied,
|
||||
census_year
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
@@ -1,25 +0,0 @@
|
||||
-- Staged TRREB purchase/sales data
|
||||
-- Source: fact_purchases table loaded from TRREB Market Watch PDFs
|
||||
-- Grain: One row per district per month
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_purchases') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as purchase_id,
|
||||
date_key,
|
||||
district_key,
|
||||
sales_count,
|
||||
dollar_volume,
|
||||
avg_price,
|
||||
median_price,
|
||||
new_listings,
|
||||
active_listings,
|
||||
avg_dom as days_on_market,
|
||||
avg_sp_lp as sale_to_list_ratio
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
11
dbt/package-lock.yml
Normal file
11
dbt/package-lock.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
packages:
|
||||
- name: dbt_utils
|
||||
package: dbt-labs/dbt_utils
|
||||
version: 1.3.3
|
||||
- name: dbt_expectations
|
||||
package: calogica/dbt_expectations
|
||||
version: 0.10.4
|
||||
- name: dbt_date
|
||||
package: calogica/dbt_date
|
||||
version: 0.10.1
|
||||
sha1_hash: 51a51ab489f7b302c8745ae3c3781271816b01be
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
db:
|
||||
image: postgis/postgis:16-3.4
|
||||
image: ${POSTGIS_IMAGE:-postgis/postgis:16-3.4}
|
||||
container_name: portfolio-db
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
|
||||
480
docs/CONTRIBUTING.md
Normal file
480
docs/CONTRIBUTING.md
Normal file
@@ -0,0 +1,480 @@
|
||||
# Developer Guide
|
||||
|
||||
Instructions for contributing to the Analytics Portfolio project.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Development Setup](#development-setup)
|
||||
2. [Adding a Blog Post](#adding-a-blog-post)
|
||||
3. [Adding a New Page](#adding-a-new-page)
|
||||
4. [Adding a Dashboard Tab](#adding-a-dashboard-tab)
|
||||
5. [Creating Figure Factories](#creating-figure-factories)
|
||||
6. [Branch Workflow](#branch-workflow)
|
||||
7. [Code Standards](#code-standards)
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.11+ (via pyenv)
|
||||
- Docker and Docker Compose
|
||||
- Git
|
||||
|
||||
### Initial Setup
|
||||
|
||||
```bash
|
||||
# Clone repository
|
||||
git clone https://gitea.hotserv.cloud/lmiranda/personal-portfolio.git
|
||||
cd personal-portfolio
|
||||
|
||||
# Run setup (creates venv, installs deps, copies .env.example)
|
||||
make setup
|
||||
|
||||
# Start PostgreSQL + PostGIS
|
||||
make docker-up
|
||||
|
||||
# Initialize database
|
||||
make db-init
|
||||
|
||||
# Start development server
|
||||
make run
|
||||
```
|
||||
|
||||
The app runs at `http://localhost:8050`.
|
||||
|
||||
### Useful Commands
|
||||
|
||||
```bash
|
||||
make test # Run tests
|
||||
make lint # Check code style
|
||||
make format # Auto-format code
|
||||
make ci # Run all checks (lint + test)
|
||||
make dbt-run # Run dbt transformations
|
||||
make dbt-test # Run dbt tests
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a Blog Post
|
||||
|
||||
Blog posts are Markdown files with YAML frontmatter, stored in `portfolio_app/content/blog/`.
|
||||
|
||||
### Step 1: Create the Markdown File
|
||||
|
||||
Create a new file in `portfolio_app/content/blog/`:
|
||||
|
||||
```bash
|
||||
touch portfolio_app/content/blog/your-article-slug.md
|
||||
```
|
||||
|
||||
The filename becomes the URL slug: `/blog/your-article-slug`
|
||||
|
||||
### Step 2: Add Frontmatter
|
||||
|
||||
Every blog post requires YAML frontmatter at the top:
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: "Your Article Title"
|
||||
date: "2026-01-17"
|
||||
description: "A brief description for the article card (1-2 sentences)"
|
||||
tags:
|
||||
- data-engineering
|
||||
- python
|
||||
- lessons-learned
|
||||
status: published
|
||||
---
|
||||
|
||||
Your article content starts here...
|
||||
```
|
||||
|
||||
**Required fields:**
|
||||
|
||||
| Field | Description |
|
||||
|-------|-------------|
|
||||
| `title` | Article title (displayed on cards and page) |
|
||||
| `date` | Publication date in `YYYY-MM-DD` format |
|
||||
| `description` | Short summary for article listing cards |
|
||||
| `tags` | List of tags (displayed as badges) |
|
||||
| `status` | `published` or `draft` (drafts are hidden from listing) |
|
||||
|
||||
### Step 3: Write Content
|
||||
|
||||
Use standard Markdown:
|
||||
|
||||
```markdown
|
||||
## Section Heading
|
||||
|
||||
Regular paragraph text.
|
||||
|
||||
### Subsection
|
||||
|
||||
- Bullet points
|
||||
- Another point
|
||||
|
||||
```python
|
||||
# Code blocks with syntax highlighting
|
||||
def example():
|
||||
return "Hello"
|
||||
```
|
||||
|
||||
**Bold text** and *italic text*.
|
||||
|
||||
> Blockquotes for callouts
|
||||
```
|
||||
|
||||
### Step 4: Test Locally
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
Visit `http://localhost:8050/blog` to see the article listing.
|
||||
Visit `http://localhost:8050/blog/your-article-slug` for the full article.
|
||||
|
||||
### Example: Complete Blog Post
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: "Building ETL Pipelines with Python"
|
||||
date: "2026-01-17"
|
||||
description: "Lessons from building production data pipelines at scale"
|
||||
tags:
|
||||
- python
|
||||
- etl
|
||||
- data-engineering
|
||||
status: published
|
||||
---
|
||||
|
||||
When I started building data pipelines, I made every mistake possible...
|
||||
|
||||
## The Problem
|
||||
|
||||
Most tutorials show toy examples. Real pipelines are different.
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
def safe_transform(df: pd.DataFrame) -> pd.DataFrame:
|
||||
try:
|
||||
return df.apply(transform_row, axis=1)
|
||||
except ValueError as e:
|
||||
logger.error(f"Transform failed: {e}")
|
||||
raise
|
||||
```
|
||||
|
||||
## Conclusion
|
||||
|
||||
Ship something that works, then iterate.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Page
|
||||
|
||||
Pages use Dash's automatic routing based on file location in `portfolio_app/pages/`.
|
||||
|
||||
### Step 1: Create the Page File
|
||||
|
||||
```bash
|
||||
touch portfolio_app/pages/your_page.py
|
||||
```
|
||||
|
||||
### Step 2: Register the Page
|
||||
|
||||
Every page must call `dash.register_page()`:
|
||||
|
||||
```python
|
||||
"""Your page description."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path="/your-page", # URL path
|
||||
name="Your Page", # Display name (for nav)
|
||||
title="Your Page Title" # Browser tab title
|
||||
)
|
||||
|
||||
|
||||
def layout() -> dmc.Container:
|
||||
"""Page layout function."""
|
||||
return dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Your Page", order=1),
|
||||
dmc.Text("Page content here."),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
```
|
||||
|
||||
### Step 3: Page with Dynamic Content
|
||||
|
||||
For pages with URL parameters:
|
||||
|
||||
```python
|
||||
# pages/blog/article.py
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path_template="/blog/<slug>", # Dynamic parameter
|
||||
name="Article",
|
||||
)
|
||||
|
||||
|
||||
def layout(slug: str = "") -> dmc.Container:
|
||||
"""Layout receives URL parameters as arguments."""
|
||||
article = get_article(slug)
|
||||
if not article:
|
||||
return dmc.Text("Article not found")
|
||||
|
||||
return dmc.Container(
|
||||
dmc.Title(article["meta"]["title"]),
|
||||
# ...
|
||||
)
|
||||
```
|
||||
|
||||
### Step 4: Add Navigation (Optional)
|
||||
|
||||
To add the page to the sidebar, edit `portfolio_app/components/sidebar.py`:
|
||||
|
||||
```python
|
||||
NAV_ITEMS = [
|
||||
{"label": "Home", "href": "/", "icon": "tabler:home"},
|
||||
{"label": "Your Page", "href": "/your-page", "icon": "tabler:star"},
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
### URL Routing Summary
|
||||
|
||||
| File Location | URL |
|
||||
|---------------|-----|
|
||||
| `pages/home.py` | `/` (if `path="/"`) |
|
||||
| `pages/about.py` | `/about` |
|
||||
| `pages/blog/index.py` | `/blog` |
|
||||
| `pages/blog/article.py` | `/blog/<slug>` |
|
||||
| `pages/toronto/dashboard.py` | `/toronto` |
|
||||
|
||||
---
|
||||
|
||||
## Adding a Dashboard Tab
|
||||
|
||||
Dashboard tabs are in `portfolio_app/pages/toronto/tabs/`.
|
||||
|
||||
### Step 1: Create Tab Layout
|
||||
|
||||
```python
|
||||
# pages/toronto/tabs/your_tab.py
|
||||
"""Your tab description."""
|
||||
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
from portfolio_app.figures.choropleth import create_choropleth
|
||||
from portfolio_app.toronto.demo_data import get_demo_data
|
||||
|
||||
|
||||
def create_your_tab_layout() -> dmc.Stack:
|
||||
"""Create the tab layout."""
|
||||
data = get_demo_data()
|
||||
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Grid(
|
||||
[
|
||||
dmc.GridCol(
|
||||
# Map on left
|
||||
create_choropleth(data, "your_metric"),
|
||||
span=8,
|
||||
),
|
||||
dmc.GridCol(
|
||||
# KPI cards on right
|
||||
create_kpi_cards(data),
|
||||
span=4,
|
||||
),
|
||||
],
|
||||
),
|
||||
# Charts below
|
||||
create_supporting_charts(data),
|
||||
],
|
||||
gap="lg",
|
||||
)
|
||||
```
|
||||
|
||||
### Step 2: Register in Dashboard
|
||||
|
||||
Edit `pages/toronto/dashboard.py` to add the tab:
|
||||
|
||||
```python
|
||||
from portfolio_app.pages.toronto.tabs.your_tab import create_your_tab_layout
|
||||
|
||||
# In the tabs list:
|
||||
dmc.TabsTab("Your Tab", value="your-tab"),
|
||||
|
||||
# In the panels:
|
||||
dmc.TabsPanel(create_your_tab_layout(), value="your-tab"),
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Creating Figure Factories
|
||||
|
||||
Figure factories are in `portfolio_app/figures/`. They create reusable Plotly figures.
|
||||
|
||||
### Pattern
|
||||
|
||||
```python
|
||||
# figures/your_chart.py
|
||||
"""Your chart type factory."""
|
||||
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def create_your_chart(
|
||||
df: pd.DataFrame,
|
||||
x_col: str,
|
||||
y_col: str,
|
||||
title: str = "",
|
||||
) -> go.Figure:
|
||||
"""Create a your_chart figure.
|
||||
|
||||
Args:
|
||||
df: DataFrame with data.
|
||||
x_col: Column for x-axis.
|
||||
y_col: Column for y-axis.
|
||||
title: Optional chart title.
|
||||
|
||||
Returns:
|
||||
Configured Plotly figure.
|
||||
"""
|
||||
fig = px.bar(df, x=x_col, y=y_col, title=title)
|
||||
|
||||
fig.update_layout(
|
||||
template="plotly_white",
|
||||
margin=dict(l=40, r=40, t=40, b=40),
|
||||
)
|
||||
|
||||
return fig
|
||||
```
|
||||
|
||||
### Export from `__init__.py`
|
||||
|
||||
```python
|
||||
# figures/__init__.py
|
||||
from .your_chart import create_your_chart
|
||||
|
||||
__all__ = [
|
||||
"create_your_chart",
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Branch Workflow
|
||||
|
||||
```
|
||||
main (production)
|
||||
↑
|
||||
staging (pre-production)
|
||||
↑
|
||||
development (integration)
|
||||
↑
|
||||
feature/XX-description (your work)
|
||||
```
|
||||
|
||||
### Creating a Feature Branch
|
||||
|
||||
```bash
|
||||
# Start from development
|
||||
git checkout development
|
||||
git pull origin development
|
||||
|
||||
# Create feature branch
|
||||
git checkout -b feature/10-add-new-page
|
||||
|
||||
# Work, commit, push
|
||||
git add .
|
||||
git commit -m "feat: Add new page"
|
||||
git push -u origin feature/10-add-new-page
|
||||
```
|
||||
|
||||
### Merging
|
||||
|
||||
```bash
|
||||
# Merge into development
|
||||
git checkout development
|
||||
git merge feature/10-add-new-page
|
||||
git push origin development
|
||||
|
||||
# Delete feature branch
|
||||
git branch -d feature/10-add-new-page
|
||||
git push origin --delete feature/10-add-new-page
|
||||
```
|
||||
|
||||
**Rules:**
|
||||
- Never commit directly to `main` or `staging`
|
||||
- Never delete `development`
|
||||
- Feature branches are temporary
|
||||
|
||||
---
|
||||
|
||||
## Code Standards
|
||||
|
||||
### Type Hints
|
||||
|
||||
Use Python 3.10+ style:
|
||||
|
||||
```python
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||
...
|
||||
```
|
||||
|
||||
### Imports
|
||||
|
||||
| Context | Style |
|
||||
|---------|-------|
|
||||
| Same directory | `from .module import X` |
|
||||
| Sibling directory | `from ..schemas.model import Y` |
|
||||
| External packages | `import pandas as pd` |
|
||||
|
||||
### Formatting
|
||||
|
||||
```bash
|
||||
make format # Runs ruff formatter
|
||||
make lint # Checks style
|
||||
```
|
||||
|
||||
### Docstrings
|
||||
|
||||
Google style, only for non-obvious functions:
|
||||
|
||||
```python
|
||||
def calculate_score(values: list[float], weights: list[float]) -> float:
|
||||
"""Calculate weighted score.
|
||||
|
||||
Args:
|
||||
values: Raw metric values.
|
||||
weights: Weight for each metric.
|
||||
|
||||
Returns:
|
||||
Weighted average score.
|
||||
"""
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Check `CLAUDE.md` for AI assistant context and architectural decisions.
|
||||
307
docs/DATABASE_SCHEMA.md
Normal file
307
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,307 @@
|
||||
# Database Schema
|
||||
|
||||
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||
|
||||
## Entity Relationship Diagram
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
dim_time {
|
||||
int date_key PK
|
||||
date full_date UK
|
||||
int year
|
||||
int month
|
||||
int quarter
|
||||
string month_name
|
||||
bool is_month_start
|
||||
}
|
||||
|
||||
dim_cmhc_zone {
|
||||
int zone_key PK
|
||||
string zone_code UK
|
||||
string zone_name
|
||||
geometry geometry
|
||||
}
|
||||
|
||||
dim_neighbourhood {
|
||||
int neighbourhood_id PK
|
||||
string name
|
||||
geometry geometry
|
||||
int population
|
||||
numeric land_area_sqkm
|
||||
numeric pop_density_per_sqkm
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric median_household_income
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
int census_year
|
||||
}
|
||||
|
||||
dim_policy_event {
|
||||
int event_id PK
|
||||
date event_date
|
||||
date effective_date
|
||||
string level
|
||||
string category
|
||||
string title
|
||||
text description
|
||||
string expected_direction
|
||||
string source_url
|
||||
string confidence
|
||||
}
|
||||
|
||||
fact_rentals {
|
||||
int id PK
|
||||
int date_key FK
|
||||
int zone_key FK
|
||||
string bedroom_type
|
||||
int universe
|
||||
numeric avg_rent
|
||||
numeric median_rent
|
||||
numeric vacancy_rate
|
||||
numeric availability_rate
|
||||
numeric turnover_rate
|
||||
numeric rent_change_pct
|
||||
string reliability_code
|
||||
}
|
||||
|
||||
fact_census {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int census_year
|
||||
int population
|
||||
numeric population_density
|
||||
numeric median_household_income
|
||||
numeric average_household_income
|
||||
numeric unemployment_rate
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
numeric median_age
|
||||
numeric average_dwelling_value
|
||||
}
|
||||
|
||||
fact_crime {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int year
|
||||
string crime_type
|
||||
int count
|
||||
numeric rate_per_100k
|
||||
}
|
||||
|
||||
fact_amenities {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
string amenity_type
|
||||
int count
|
||||
int year
|
||||
}
|
||||
|
||||
bridge_cmhc_neighbourhood {
|
||||
int id PK
|
||||
string cmhc_zone_code FK
|
||||
int neighbourhood_id FK
|
||||
numeric weight
|
||||
}
|
||||
|
||||
dim_time ||--o{ fact_rentals : "date_key"
|
||||
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||
```
|
||||
|
||||
## Schema Layers
|
||||
|
||||
### Raw Schema
|
||||
|
||||
Raw data is loaded directly from external sources without transformation:
|
||||
|
||||
| Table | Source | Description |
|
||||
|-------|--------|-------------|
|
||||
| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries |
|
||||
| `raw.census_profiles` | City of Toronto API | Census profile data |
|
||||
| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood |
|
||||
| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data |
|
||||
|
||||
### Staging Schema (dbt)
|
||||
|
||||
Staging models provide 1:1 cleaned representations of source data:
|
||||
|
||||
| Model | Source Table | Purpose |
|
||||
|-------|-------------|---------|
|
||||
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||
| `stg_police__crimes` | raw.crime_data | Standardized crime categories |
|
||||
|
||||
### Marts Schema (dbt)
|
||||
|
||||
Analytical tables ready for dashboard consumption:
|
||||
|
||||
| Model | Grain | Purpose |
|
||||
|-------|-------|---------|
|
||||
| `mart_neighbourhood_summary` | neighbourhood | Composite livability scores |
|
||||
| `mart_rental_trends` | zone × month | Time-series rental analysis |
|
||||
| `mart_crime_rates` | neighbourhood × year | Crime rate calculations |
|
||||
| `mart_amenity_density` | neighbourhood | Amenity accessibility scores |
|
||||
|
||||
## Table Details
|
||||
|
||||
### Dimension Tables
|
||||
|
||||
#### dim_time
|
||||
Time dimension for date-based analysis. Grain: one row per month.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||
|
||||
#### dim_cmhc_zone
|
||||
CMHC rental market zones (~20 zones covering Toronto).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||
|
||||
#### dim_neighbourhood
|
||||
Toronto's 158 official neighbourhoods.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||
| population | INTEGER | | Total population |
|
||||
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||
|
||||
#### dim_policy_event
|
||||
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| event_date | DATE | NOT NULL | Announcement date |
|
||||
| effective_date | DATE | | Implementation date |
|
||||
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||
| description | TEXT | | Detailed description |
|
||||
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||
| source_url | VARCHAR(500) | | Reference link |
|
||||
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||
|
||||
### Fact Tables
|
||||
|
||||
#### fact_rentals
|
||||
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||
| universe | INTEGER | | Total rental units |
|
||||
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||
|
||||
#### fact_census
|
||||
Census statistics. Grain: neighbourhood × census year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||
| population | INTEGER | | Total population |
|
||||
| population_density | NUMERIC(10,2) | | People per km² |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||
|
||||
#### fact_crime
|
||||
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||
| count | INTEGER | NOT NULL | Number of incidents |
|
||||
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||
|
||||
#### fact_amenities
|
||||
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||
| count | INTEGER | NOT NULL | Number of amenities |
|
||||
| year | INTEGER | NOT NULL | Reference year |
|
||||
|
||||
### Bridge Tables
|
||||
|
||||
#### bridge_cmhc_neighbourhood
|
||||
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||
|
||||
## Indexes
|
||||
|
||||
| Table | Index | Columns | Purpose |
|
||||
|-------|-------|---------|---------|
|
||||
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||
|
||||
## PostGIS Extensions
|
||||
|
||||
The database requires PostGIS for geospatial operations:
|
||||
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
```
|
||||
|
||||
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||
@@ -1,21 +1,171 @@
|
||||
# Portfolio Project Reference
|
||||
|
||||
**Project**: Analytics Portfolio
|
||||
**Owner**: Leo
|
||||
**Status**: Ready for Sprint 1
|
||||
**Owner**: Leo Miranda
|
||||
**Status**: Sprint 9 Complete (Dashboard Implementation Done)
|
||||
**Last Updated**: January 2026
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
Two-project analytics portfolio demonstrating end-to-end data engineering, visualization, and ML capabilities.
|
||||
Personal portfolio website with an interactive Toronto Neighbourhood Dashboard demonstrating data engineering, visualization, and analytics capabilities.
|
||||
|
||||
| Project | Domain | Key Skills | Phase |
|
||||
|---------|--------|------------|-------|
|
||||
| **Toronto Housing Dashboard** | Real estate | ETL, dimensional modeling, geospatial, choropleth | Phase 1 (Active) |
|
||||
| **Energy Pricing Analysis** | Utility markets | Time series, ML prediction, API integration | Phase 3 (Future) |
|
||||
| Component | Description | Status |
|
||||
|-----------|-------------|--------|
|
||||
| Portfolio Website | Bio, About, Projects, Resume, Contact, Blog | Complete |
|
||||
| Toronto Dashboard | 5-tab neighbourhood analysis | Complete |
|
||||
| Data Pipeline | dbt models, figure factories | Complete |
|
||||
| Deployment | Production deployment | Pending |
|
||||
|
||||
**Platform**: Monolithic Dash application on self-hosted VPS (bio landing page + dashboards).
|
||||
---
|
||||
|
||||
## Completed Work
|
||||
|
||||
### Sprint 1-6: Foundation
|
||||
- Repository setup, Docker, PostgreSQL + PostGIS
|
||||
- Bio landing page implementation
|
||||
- Initial data model design
|
||||
|
||||
### Sprint 7: Navigation & Theme
|
||||
- Sidebar navigation
|
||||
- Dark/light theme toggle
|
||||
- dash-mantine-components integration
|
||||
|
||||
### Sprint 8: Portfolio Website
|
||||
- About, Contact, Projects, Resume pages
|
||||
- Blog system with Markdown/frontmatter
|
||||
- Health endpoint
|
||||
|
||||
### Sprint 9: Neighbourhood Dashboard Transition
|
||||
- Phase 1: Deleted legacy TRREB code
|
||||
- Phase 2: Documentation cleanup
|
||||
- Phase 3: New neighbourhood-centric data model
|
||||
- Phase 4: dbt model restructuring
|
||||
- Phase 5: 5-tab dashboard implementation
|
||||
- Phase 6: 15 documentation notebooks
|
||||
- Phase 7: Final documentation review
|
||||
|
||||
---
|
||||
|
||||
## Application Architecture
|
||||
|
||||
### URL Routes
|
||||
|
||||
| URL | Page | File |
|
||||
|-----|------|------|
|
||||
| `/` | Home | `pages/home.py` |
|
||||
| `/about` | About | `pages/about.py` |
|
||||
| `/contact` | Contact | `pages/contact.py` |
|
||||
| `/projects` | Projects | `pages/projects.py` |
|
||||
| `/resume` | Resume | `pages/resume.py` |
|
||||
| `/blog` | Blog listing | `pages/blog/index.py` |
|
||||
| `/blog/{slug}` | Article | `pages/blog/article.py` |
|
||||
| `/toronto` | Dashboard | `pages/toronto/dashboard.py` |
|
||||
| `/toronto/methodology` | Methodology | `pages/toronto/methodology.py` |
|
||||
| `/health` | Health check | `pages/health.py` |
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── app.py # Dash app factory
|
||||
├── config.py # Pydantic BaseSettings
|
||||
├── assets/ # CSS, images
|
||||
├── callbacks/ # Global callbacks (sidebar, theme)
|
||||
├── components/ # Shared UI components
|
||||
├── content/blog/ # Markdown blog articles
|
||||
├── errors/ # Exception handling
|
||||
├── figures/ # Plotly figure factories
|
||||
├── pages/
|
||||
│ ├── home.py
|
||||
│ ├── about.py
|
||||
│ ├── contact.py
|
||||
│ ├── projects.py
|
||||
│ ├── resume.py
|
||||
│ ├── health.py
|
||||
│ ├── blog/
|
||||
│ │ ├── index.py
|
||||
│ │ └── article.py
|
||||
│ └── toronto/
|
||||
│ ├── dashboard.py
|
||||
│ ├── methodology.py
|
||||
│ ├── tabs/ # 5 tab layouts
|
||||
│ └── callbacks/ # Dashboard interactions
|
||||
├── toronto/ # Data logic
|
||||
│ ├── parsers/ # API extraction
|
||||
│ ├── loaders/ # Database operations
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ ├── models/ # SQLAlchemy ORM
|
||||
│ └── demo_data.py # Sample data
|
||||
└── utils/
|
||||
└── markdown_loader.py # Blog article loading
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Toronto Dashboard
|
||||
|
||||
### Data Sources
|
||||
|
||||
| Source | Data | Format |
|
||||
|--------|------|--------|
|
||||
| City of Toronto Open Data | Neighbourhoods (158), Census profiles, Parks, Schools, Childcare, TTC | GeoJSON, CSV, API |
|
||||
| Toronto Police Service | Crime rates, MCI, Shootings | CSV, API |
|
||||
| CMHC | Rental Market Survey | CSV |
|
||||
|
||||
### Geographic Model
|
||||
|
||||
```
|
||||
City of Toronto Neighbourhoods (158) ← Primary analysis unit
|
||||
CMHC Zones (~20) ← Rental data (Census Tract aligned)
|
||||
```
|
||||
|
||||
### Dashboard Tabs
|
||||
|
||||
| Tab | Choropleth Metric | Supporting Charts |
|
||||
|-----|-------------------|-------------------|
|
||||
| Overview | Livability score | Top/Bottom 10 bar, Income vs Safety scatter |
|
||||
| Housing | Affordability index | Rent trend line, Tenure breakdown bar |
|
||||
| Safety | Crime rate per 100K | Crime breakdown bar, Crime trend line |
|
||||
| Demographics | Median income | Age distribution, Population density bar |
|
||||
| Amenities | Amenity index | Amenity radar, Transit accessibility bar |
|
||||
|
||||
### Star Schema
|
||||
|
||||
| Table | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `dim_neighbourhood` | Dimension | 158 neighbourhoods with geometry |
|
||||
| `dim_time` | Dimension | Date dimension |
|
||||
| `dim_cmhc_zone` | Dimension | ~20 CMHC zones with geometry |
|
||||
| `fact_census` | Fact | Census indicators by neighbourhood |
|
||||
| `fact_crime` | Fact | Crime stats by neighbourhood |
|
||||
| `fact_rentals` | Fact | Rental data by CMHC zone |
|
||||
| `fact_amenities` | Fact | Amenity counts by neighbourhood |
|
||||
|
||||
### dbt Layers
|
||||
|
||||
| Layer | Naming | Example |
|
||||
|-------|--------|---------|
|
||||
| Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` |
|
||||
| Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` |
|
||||
| Marts | `mart_{domain}` | `mart_neighbourhood_overview` |
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology | Version |
|
||||
|-------|------------|---------|
|
||||
| Database | PostgreSQL + PostGIS | 16.x |
|
||||
| Validation | Pydantic | 2.x |
|
||||
| ORM | SQLAlchemy | 2.x |
|
||||
| Transformation | dbt-postgres | 1.7+ |
|
||||
| Data Processing | Pandas, GeoPandas | Latest |
|
||||
| Visualization | Dash + Plotly | 2.14+ |
|
||||
| UI Components | dash-mantine-components | Latest |
|
||||
| Testing | pytest | 7.0+ |
|
||||
| Python | 3.11+ | Via pyenv |
|
||||
|
||||
---
|
||||
|
||||
@@ -23,325 +173,51 @@ Two-project analytics portfolio demonstrating end-to-end data engineering, visua
|
||||
|
||||
| Branch | Purpose | Deploys To |
|
||||
|--------|---------|------------|
|
||||
| `main` | Production releases only | VPS (production) |
|
||||
| `main` | Production releases | VPS (production) |
|
||||
| `staging` | Pre-production testing | VPS (staging) |
|
||||
| `development` | Active development | Local only |
|
||||
|
||||
**Rules**:
|
||||
- All feature branches created FROM `development`
|
||||
- All feature branches merge INTO `development`
|
||||
- `development` → `staging` for testing
|
||||
- `staging` → `main` for release
|
||||
- Direct commits to `main` or `staging` are forbidden
|
||||
- Branch naming: `feature/{sprint}-{description}` or `fix/{issue-id}`
|
||||
**Rules:**
|
||||
- Feature branches from `development`: `feature/{sprint}-{description}`
|
||||
- Merge into `development` when complete
|
||||
- `development` → `staging` → `main` for releases
|
||||
- Never delete `development`
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack (Locked)
|
||||
## Code Standards
|
||||
|
||||
| Layer | Technology | Version |
|
||||
|-------|------------|---------|
|
||||
| Database | PostgreSQL + PostGIS | 16.x |
|
||||
| Validation | Pydantic | ≥2.0 |
|
||||
| ORM | SQLAlchemy | ≥2.0 (2.0-style API only) |
|
||||
| Transformation | dbt-postgres | ≥1.7 |
|
||||
| Data Processing | Pandas | ≥2.1 |
|
||||
| Geospatial | GeoPandas + Shapely | ≥0.14 |
|
||||
| Visualization | Dash + Plotly | ≥2.14 |
|
||||
| UI Components | dash-mantine-components | Latest stable |
|
||||
| Testing | pytest | ≥7.0 |
|
||||
| Python | 3.11+ | Via pyenv |
|
||||
### Type Hints (Python 3.10+)
|
||||
|
||||
**Compatibility Notes**:
|
||||
- SQLAlchemy 2.0 + Pydantic 2.0 integrate well—never mix 1.x APIs
|
||||
- PostGIS extension required—enable during db init
|
||||
- Docker Compose V2 (no `version` field in compose files)
|
||||
```python
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
### Imports
|
||||
|
||||
## Code Conventions
|
||||
|
||||
### Import Style
|
||||
|
||||
| Context | Style | Example |
|
||||
|---------|-------|---------|
|
||||
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||
| External packages | Absolute | `import pandas as pd` |
|
||||
|
||||
### Module Separation
|
||||
|
||||
| Directory | Contains | Purpose |
|
||||
|-----------|----------|---------|
|
||||
| `schemas/` | Pydantic models | Data validation |
|
||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||
| `loaders/` | Database operations | Data loading |
|
||||
| `figures/` | Chart factories | Plotly figure generation |
|
||||
| `callbacks/` | Dash callbacks | Per-dashboard, in `pages/{dashboard}/callbacks/` |
|
||||
| `errors/` | Exceptions + handlers | Error handling |
|
||||
|
||||
### Code Standards
|
||||
|
||||
- **Type hints**: Mandatory, Python 3.10+ style (`list[str]`, `dict[str, int]`, `X | None`)
|
||||
- **Functions**: Single responsibility, verb naming, early returns over nesting
|
||||
- **Docstrings**: Google style, minimal—only for non-obvious behavior
|
||||
- **Constants**: Module-level for magic values, Pydantic BaseSettings for runtime config
|
||||
| Context | Style |
|
||||
|---------|-------|
|
||||
| Same directory | `from .module import X` |
|
||||
| Sibling directory | `from ..schemas.model import Y` |
|
||||
| External | `import pandas as pd` |
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
# errors/exceptions.py
|
||||
class PortfolioError(Exception):
|
||||
"""Base exception."""
|
||||
|
||||
class ParseError(PortfolioError):
|
||||
"""PDF/CSV parsing failed."""
|
||||
"""Data parsing failed."""
|
||||
|
||||
class ValidationError(PortfolioError):
|
||||
"""Pydantic or business rule validation failed."""
|
||||
"""Validation failed."""
|
||||
|
||||
class LoadError(PortfolioError):
|
||||
"""Database load operation failed."""
|
||||
"""Database load failed."""
|
||||
```
|
||||
|
||||
- Decorators for infrastructure concerns (logging, retry, transactions)
|
||||
- Explicit handling for domain logic (business rules, recovery strategies)
|
||||
|
||||
---
|
||||
|
||||
## Application Architecture
|
||||
|
||||
### Dash Pages Structure
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── app.py # Dash app factory with Pages routing
|
||||
├── config.py # Pydantic BaseSettings
|
||||
├── assets/ # CSS, images (auto-served by Dash)
|
||||
├── pages/
|
||||
│ ├── home.py # Bio landing page → /
|
||||
│ ├── toronto/
|
||||
│ │ ├── dashboard.py # Layout only → /toronto
|
||||
│ │ └── callbacks/ # Interaction logic
|
||||
│ └── energy/ # Phase 3
|
||||
├── components/ # Shared UI (navbar, footer, cards)
|
||||
├── figures/ # Shared chart factories
|
||||
├── toronto/ # Toronto data logic
|
||||
│ ├── parsers/
|
||||
│ ├── loaders/
|
||||
│ ├── schemas/ # Pydantic
|
||||
│ └── models/ # SQLAlchemy
|
||||
└── errors/
|
||||
```
|
||||
|
||||
### URL Routing (Automatic)
|
||||
|
||||
| URL | Page | Status |
|
||||
|-----|------|--------|
|
||||
| `/` | Bio landing page | Sprint 2 |
|
||||
| `/toronto` | Toronto Housing Dashboard | Sprint 6 |
|
||||
| `/energy` | Energy Pricing Dashboard | Phase 3 |
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Toronto Housing Dashboard
|
||||
|
||||
### Data Sources
|
||||
|
||||
| Track | Source | Format | Geography | Frequency |
|
||||
|-------|--------|--------|-----------|-----------|
|
||||
| Purchases | TRREB Monthly Reports | PDF | ~35 Districts | Monthly |
|
||||
| Rentals | CMHC Rental Market Survey | CSV | ~20 Zones | Annual |
|
||||
| Enrichment | City of Toronto Open Data | GeoJSON/CSV | 158 Neighbourhoods | Census |
|
||||
| Policy Events | Curated list | CSV | N/A | Event-based |
|
||||
|
||||
### Geographic Reality
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ City of Toronto Neighbourhoods (158) │ ← Enrichment only
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ CMHC Zones (~20) — Census Tract aligned │ ← Rental data
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Critical**: These geographies do NOT align. Display as separate layers with toggle—do not force crosswalks.
|
||||
|
||||
### Data Model (Star Schema)
|
||||
|
||||
| Table | Type | Keys |
|
||||
|-------|------|------|
|
||||
| `fact_purchases` | Fact | → dim_time, dim_trreb_district |
|
||||
| `fact_rentals` | Fact | → dim_time, dim_cmhc_zone |
|
||||
| `dim_time` | Dimension | date_key (PK) |
|
||||
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||
|
||||
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||
|
||||
### dbt Layer Structure
|
||||
|
||||
| Layer | Naming | Purpose |
|
||||
|-------|--------|---------|
|
||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||
| Intermediate | `int_{domain}__{transform}` | Business logic, filtering |
|
||||
| Marts | `mart_{domain}` | Final analytical tables |
|
||||
|
||||
---
|
||||
|
||||
## Sprint Overview
|
||||
|
||||
| Sprint | Focus | Milestone |
|
||||
|--------|-------|-----------|
|
||||
| 1 | Project bootstrap, start TRREB digitization | — |
|
||||
| 2 | Bio page, data acquisition | **Launch 1: Bio Live** |
|
||||
| 3 | Parsers, schemas, models | — |
|
||||
| 4 | Loaders, dbt | — |
|
||||
| 5 | Visualization | — |
|
||||
| 6 | Polish, deploy dashboard | **Launch 2: Dashboard Live** |
|
||||
| 7 | Buffer | — |
|
||||
|
||||
### Sprint 1 Deliverables
|
||||
|
||||
| Category | Tasks |
|
||||
|----------|-------|
|
||||
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||
|
||||
### Human Tasks (Cannot Automate)
|
||||
|
||||
| Task | Tool | Effort |
|
||||
|------|------|--------|
|
||||
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||
| Research policy events (10-20) | Manual research | 2-3 hours |
|
||||
| Replace social link placeholders | Manual | 5 minutes |
|
||||
|
||||
---
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Phase 1 — Build These
|
||||
|
||||
- Bio landing page with content from bio_content_v2.md
|
||||
- TRREB PDF parser
|
||||
- CMHC CSV processor
|
||||
- PostgreSQL + PostGIS database layer
|
||||
- Star schema (facts + dimensions)
|
||||
- dbt models with tests
|
||||
- Choropleth visualization (Dash)
|
||||
- Policy event annotation layer
|
||||
- Neighbourhood overlay (toggle-able)
|
||||
|
||||
### Phase 1 — Do NOT Build
|
||||
|
||||
| Feature | Reason | When |
|
||||
|---------|--------|------|
|
||||
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 | After Energy project |
|
||||
| Crime data integration | Deferred scope | Phase 4 |
|
||||
| Historical boundary reconciliation (140→158) | 2021+ data only for V1 | Phase 4 |
|
||||
| ML prediction models | Energy project scope | Phase 3 |
|
||||
| Multi-project shared infrastructure | Build first, abstract second | Phase 2 |
|
||||
|
||||
If a task seems to require Phase 3/4 features, **stop and flag it**.
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
### Root-Level Files (Allowed)
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `README.md` | Project overview |
|
||||
| `CLAUDE.md` | AI assistant context |
|
||||
| `pyproject.toml` | Python packaging |
|
||||
| `.gitignore` | Git ignore rules |
|
||||
| `.env.example` | Environment template |
|
||||
| `.python-version` | pyenv version |
|
||||
| `.pre-commit-config.yaml` | Pre-commit hooks |
|
||||
| `docker-compose.yml` | Container orchestration |
|
||||
| `Makefile` | Task automation |
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
portfolio/
|
||||
├── portfolio_app/ # Monolithic Dash application
|
||||
│ ├── app.py
|
||||
│ ├── config.py
|
||||
│ ├── assets/
|
||||
│ ├── pages/
|
||||
│ ├── components/
|
||||
│ ├── figures/
|
||||
│ ├── toronto/
|
||||
│ └── errors/
|
||||
├── tests/
|
||||
├── dbt/
|
||||
├── data/
|
||||
│ └── toronto/
|
||||
│ ├── raw/
|
||||
│ ├── processed/ # gitignored
|
||||
│ └── reference/
|
||||
├── scripts/
|
||||
│ ├── db/
|
||||
│ ├── docker/
|
||||
│ ├── deploy/
|
||||
│ ├── dbt/
|
||||
│ └── dev/
|
||||
├── docs/
|
||||
├── notebooks/
|
||||
├── backups/ # gitignored
|
||||
└── reports/ # gitignored
|
||||
```
|
||||
|
||||
### Gitignored Directories
|
||||
|
||||
- `data/*/processed/`
|
||||
- `reports/`
|
||||
- `backups/`
|
||||
- `notebooks/*.html`
|
||||
- `.env`
|
||||
- `__pycache__/`
|
||||
- `.venv/`
|
||||
|
||||
---
|
||||
|
||||
## Makefile Targets
|
||||
|
||||
| Target | Purpose |
|
||||
|--------|---------|
|
||||
| `setup` | Install deps, create .env, init pre-commit |
|
||||
| `docker-up` | Start PostgreSQL + PostGIS |
|
||||
| `docker-down` | Stop containers |
|
||||
| `db-init` | Initialize database schema |
|
||||
| `run` | Start Dash dev server |
|
||||
| `test` | Run pytest |
|
||||
| `dbt-run` | Run dbt models |
|
||||
| `dbt-test` | Run dbt tests |
|
||||
| `lint` | Run ruff linter |
|
||||
| `format` | Run ruff formatter |
|
||||
| `ci` | Run all checks |
|
||||
| `deploy` | Deploy to production |
|
||||
|
||||
---
|
||||
|
||||
## Script Standards
|
||||
|
||||
All scripts in `scripts/`:
|
||||
- Include usage comments at top
|
||||
- Idempotent where possible
|
||||
- Exit codes: 0 = success, 1 = error
|
||||
- Use `set -euo pipefail` for bash
|
||||
- Log to stdout, errors to stderr
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
@@ -360,37 +236,52 @@ LOG_LEVEL=INFO
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
## Makefile Targets
|
||||
|
||||
### Launch 1 (Sprint 2)
|
||||
- [ ] Bio page accessible via HTTPS
|
||||
- [ ] All bio content rendered (from bio_content_v2.md)
|
||||
- [ ] No placeholder text visible
|
||||
- [ ] Mobile responsive
|
||||
- [ ] Social links functional
|
||||
|
||||
### Launch 2 (Sprint 6)
|
||||
- [ ] Choropleth renders TRREB districts and CMHC zones
|
||||
- [ ] Purchase/rental mode toggle works
|
||||
- [ ] Time navigation works
|
||||
- [ ] Policy event markers visible
|
||||
- [ ] Neighbourhood overlay toggleable
|
||||
- [ ] Methodology documentation published
|
||||
- [ ] Data sources cited
|
||||
| Target | Purpose |
|
||||
|--------|---------|
|
||||
| `setup` | Install deps, create .env, init pre-commit |
|
||||
| `docker-up` | Start PostgreSQL + PostGIS |
|
||||
| `docker-down` | Stop containers |
|
||||
| `db-init` | Initialize database schema |
|
||||
| `run` | Start Dash dev server |
|
||||
| `test` | Run pytest |
|
||||
| `dbt-run` | Run dbt models |
|
||||
| `dbt-test` | Run dbt tests |
|
||||
| `lint` | Run ruff linter |
|
||||
| `format` | Run ruff formatter |
|
||||
| `ci` | Run all checks |
|
||||
|
||||
---
|
||||
|
||||
## Reference Documents
|
||||
## Next Steps
|
||||
|
||||
For detailed specifications, see:
|
||||
### Deployment (Sprint 10+)
|
||||
- [ ] Production Docker configuration
|
||||
- [ ] CI/CD pipeline
|
||||
- [ ] HTTPS/SSL setup
|
||||
- [ ] Domain configuration
|
||||
|
||||
| Document | Location | Use When |
|
||||
|----------|----------|----------|
|
||||
| Data schemas | `docs/toronto_housing_spec.md` | Parser/model tasks |
|
||||
| WBS details | `docs/wbs.md` | Sprint planning |
|
||||
| Bio content | `docs/bio_content.md` | Building home.py |
|
||||
### Data Enhancement
|
||||
- [ ] Connect to live APIs (currently using demo data)
|
||||
- [ ] Data refresh automation
|
||||
- [ ] Historical data loading
|
||||
|
||||
### Future Projects
|
||||
- Energy Pricing Analysis dashboard (planned)
|
||||
|
||||
---
|
||||
|
||||
*Reference Version: 1.0*
|
||||
*Created: January 2026*
|
||||
## Related Documents
|
||||
|
||||
| Document | Purpose |
|
||||
|----------|---------|
|
||||
| `README.md` | Quick start guide |
|
||||
| `CLAUDE.md` | AI assistant context |
|
||||
| `docs/CONTRIBUTING.md` | Developer guide |
|
||||
| `notebooks/README.md` | Notebook documentation |
|
||||
|
||||
---
|
||||
|
||||
*Reference Version: 3.0*
|
||||
*Updated: January 2026*
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
# Portfolio Bio Content
|
||||
|
||||
**Version**: 2.0
|
||||
**Last Updated**: January 2026
|
||||
**Purpose**: Content source for `portfolio_app/pages/home.py`
|
||||
|
||||
---
|
||||
|
||||
## Document Context
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
||||
| **Role** | Bio content and social links for landing page |
|
||||
| **Consumed By** | `portfolio_app/pages/home.py` |
|
||||
|
||||
---
|
||||
|
||||
## Headline
|
||||
|
||||
**Primary**: Leo | Data Engineer & Analytics Developer
|
||||
|
||||
**Tagline**: I build data infrastructure that actually gets used.
|
||||
|
||||
---
|
||||
|
||||
## Professional Summary
|
||||
|
||||
Over the past 5 years, I've designed and evolved an enterprise analytics platform from scratch—now processing 1B+ rows across 21 tables with Python-based ETL pipelines and dbt-style SQL transformations. The result: 40% efficiency gains, 30% reduction in call abandon rates, and dashboards that executives actually open.
|
||||
|
||||
My approach: dimensional modeling (star schema), layered transformations (staging → intermediate → marts), and automation that eliminates manual work. I've built everything from self-service analytics portals to OCR-powered receipt processing systems.
|
||||
|
||||
Currently at Summitt Energy supporting multi-market operations across Canada and 8 US states. Previously cut my teeth on IT infrastructure projects at Petrobras (Fortune 500) and the Project Management Institute.
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Category | Technologies |
|
||||
|----------|--------------|
|
||||
| **Languages** | Python, SQL |
|
||||
| **Data Processing** | Pandas, SQLAlchemy, FastAPI |
|
||||
| **Databases** | PostgreSQL, MSSQL |
|
||||
| **Visualization** | Power BI, Plotly, Dash |
|
||||
| **Patterns** | dbt, dimensional modeling, star schema |
|
||||
| **Other** | Genesys Cloud |
|
||||
|
||||
**Display Format** (for landing page):
|
||||
```
|
||||
Python (Pandas, SQLAlchemy, FastAPI) • SQL (MSSQL, PostgreSQL) • Power BI • Plotly/Dash • Genesys Cloud • dbt patterns
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Side Project
|
||||
|
||||
**Bandit Labs** — Building automation and AI tooling for small businesses.
|
||||
|
||||
*Note: Keep this brief on portfolio; link only if separate landing page exists.*
|
||||
|
||||
---
|
||||
|
||||
## Social Links
|
||||
|
||||
| Platform | URL | Icon |
|
||||
|----------|-----|------|
|
||||
| **LinkedIn** | `https://linkedin.com/in/[USERNAME]` | `lucide-react: Linkedin` |
|
||||
| **GitHub** | `https://github.com/[USERNAME]` | `lucide-react: Github` |
|
||||
|
||||
> **TODO**: Replace `[USERNAME]` placeholders with actual URLs before bio page launch.
|
||||
|
||||
---
|
||||
|
||||
## Availability Statement
|
||||
|
||||
Open to **Senior Data Analyst**, **Analytics Engineer**, and **BI Developer** opportunities in Toronto or remote.
|
||||
|
||||
---
|
||||
|
||||
## Portfolio Projects Section
|
||||
|
||||
*Dynamically populated based on deployed projects.*
|
||||
|
||||
| Project | Status | Link |
|
||||
|---------|--------|------|
|
||||
| Toronto Housing Dashboard | In Development | `/toronto` |
|
||||
| Energy Pricing Analysis | Planned | `/energy` |
|
||||
|
||||
**Display Logic**:
|
||||
- Show only projects with `status = deployed`
|
||||
- "In Development" projects can show as coming soon or be hidden (user preference)
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
### Content Hierarchy for `home.py`
|
||||
|
||||
```
|
||||
1. Name + Tagline (hero section)
|
||||
2. Professional Summary (2-3 paragraphs)
|
||||
3. Tech Stack (horizontal chips or inline list)
|
||||
4. Portfolio Projects (cards linking to dashboards)
|
||||
5. Social Links (icon buttons)
|
||||
6. Availability statement (subtle, bottom)
|
||||
```
|
||||
|
||||
### Styling Recommendations
|
||||
|
||||
- Clean, minimal — let the projects speak
|
||||
- Dark/light mode support via dash-mantine-components theme
|
||||
- No headshot required (optional)
|
||||
- Mobile-responsive layout
|
||||
|
||||
### Content Updates
|
||||
|
||||
When updating bio content:
|
||||
1. Edit this document
|
||||
2. Update `home.py` to reflect changes
|
||||
3. Redeploy
|
||||
|
||||
---
|
||||
|
||||
## Related Documents
|
||||
|
||||
| Document | Relationship |
|
||||
|----------|--------------|
|
||||
| `portfolio_project_plan_v5.md` | Parent — references this for bio content |
|
||||
| `portfolio_app/pages/home.py` | Consumer — implements this content |
|
||||
|
||||
---
|
||||
|
||||
*Document Version: 2.0*
|
||||
*Updated: January 2026*
|
||||
520
docs/changes/Portfolio-Changes.txt
Normal file
520
docs/changes/Portfolio-Changes.txt
Normal file
@@ -0,0 +1,520 @@
|
||||
# Leo Miranda — Portfolio Website Blueprint
|
||||
|
||||
Structure, navigation, and complete page content
|
||||
|
||||
---
|
||||
|
||||
## Site Architecture
|
||||
|
||||
```
|
||||
leodata.science
|
||||
├── Home (Landing)
|
||||
├── About
|
||||
├── Projects (Overview + Status)
|
||||
│ └── [Side Navbar]
|
||||
│ ├── → Toronto Housing Market Dashboard (live)
|
||||
│ ├── → US Retail Energy Price Predictor (coming soon)
|
||||
│ └── → DataFlow Platform (Phase 3)
|
||||
├── Lab (Bandit Labs / Experiments)
|
||||
├── Blog
|
||||
│ └── [Articles]
|
||||
├── Resume (downloadable + inline)
|
||||
└── Contact
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Navigation Structure
|
||||
|
||||
Primary Nav: Home | Projects | Lab | Blog | About | Resume
|
||||
|
||||
Footer: LinkedIn | GitHub | Email | “Built with Dash & too much coffee”
|
||||
|
||||
---
|
||||
|
||||
# PAGE CONTENT
|
||||
|
||||
---
|
||||
|
||||
## 1. HOME (Landing Page)
|
||||
|
||||
### Hero Section
|
||||
|
||||
Headline:
|
||||
|
||||
> I turn messy data into systems that actually work.
|
||||
|
||||
Subhead:
|
||||
|
||||
> Data Engineer & Analytics Specialist. 8 years building pipelines, dashboards, and the infrastructure nobody sees but everyone depends on. Based in Toronto.
|
||||
|
||||
CTA Buttons:
|
||||
|
||||
- View Projects → /projects
|
||||
- Get In Touch → /contact
|
||||
|
||||
---
|
||||
|
||||
### Quick Impact Strip (Optional — 3-4 stats)
|
||||
|
||||
| 1B+ | 40% | 5 Years |
|
||||
|-------------------------------------------------|------------------------------------|-----------------------------|
|
||||
| Rows processed daily across enterprise platform | Efficiency gain through automation | Building DataFlow from zero |
|
||||
|
||||
---
|
||||
|
||||
### Featured Project Card
|
||||
|
||||
Toronto Housing Market Dashboard
|
||||
|
||||
> Real-time analytics on Toronto’s housing trends. dbt-powered ETL, Python scraping, Plotly visualization.
|
||||
> \[View Dashboard\] \[View Repository\]
|
||||
|
||||
---
|
||||
|
||||
### Brief Intro (2-3 sentences)
|
||||
|
||||
I’m a data engineer who’s spent the last 8 years in the trenches—building the infrastructure that feeds dashboards, automates the boring stuff, and makes data actually usable. Most of my work has been in contact center operations and energy, where I’ve had to be scrappy: one-person data teams, legacy systems, stakeholders who need answers yesterday.
|
||||
|
||||
I like solving real problems, not theoretical ones.
|
||||
|
||||
---
|
||||
|
||||
## 2. ABOUT PAGE
|
||||
|
||||
### Opening
|
||||
|
||||
I didn’t start in data. I started in project management—CAPM certified, ITIL trained, the whole corporate playbook. Then I realized I liked building systems more than managing timelines, and I was better at automating reports than attending meetings about them.
|
||||
|
||||
That pivot led me to where I am now: 8 years deep in data engineering, analytics, and the messy reality of turning raw information into something people can actually use.
|
||||
|
||||
---
|
||||
|
||||
### What I Actually Do
|
||||
|
||||
The short version: I build data infrastructure. Pipelines, warehouses, dashboards, automation—the invisible machinery that makes businesses run on data instead of gut feelings.
|
||||
|
||||
The longer version: At Summitt Energy, I’ve been the sole data professional supporting 150+ employees across 9 markets (Canada and US). I inherited nothing—no data warehouse, no reporting infrastructure, no documentation. Over 5 years, I built DataFlow: an enterprise platform processing 1B+ rows, integrating contact center data, CRM systems, and legacy tools that definitely weren’t designed to talk to each other.
|
||||
|
||||
That meant learning to be a generalist. I’ve done ETL pipeline development (Python, SQLAlchemy), dimensional modeling, dashboard design (Power BI, Plotly-Dash), API integration, and more stakeholder management than I’d like to admit. When you’re the only data person, you learn to wear every hat.
|
||||
|
||||
---
|
||||
|
||||
### How I Think About Data
|
||||
|
||||
I’m not interested in data for data’s sake. The question I always start with: What decision does this help someone make?
|
||||
|
||||
Most of my work has been in operations-heavy environments—contact centers, energy retail, logistics. These aren’t glamorous domains, but they’re where data can have massive impact. A 30% improvement in abandon rate isn’t just a metric; it’s thousands of customers who didn’t hang up frustrated. A 40% reduction in reporting time means managers can actually manage instead of wrestling with spreadsheets.
|
||||
|
||||
I care about outcomes, not technology stacks.
|
||||
|
||||
---
|
||||
|
||||
### The Technical Stuff (For Those Who Want It)
|
||||
|
||||
Languages: Python (Pandas, SQLAlchemy, FastAPI), SQL (MSSQL, PostgreSQL), R, VBA
|
||||
|
||||
Data Engineering: ETL/ELT pipelines, dimensional modeling (star schema), dbt patterns, batch processing, API integration, web scraping (Selenium)
|
||||
|
||||
Visualization: Plotly/Dash, Power BI, Tableau
|
||||
|
||||
Platforms: Genesys Cloud, Five9, Zoho, Azure DevOps
|
||||
|
||||
Currently Learning: Cloud certification (Azure DP-203), Airflow, Snowflake
|
||||
|
||||
---
|
||||
|
||||
### Outside Work
|
||||
|
||||
I’m a Brazilian-Canadian based in Toronto. I speak Portuguese (native), English (fluent), and enough Spanish to survive.
|
||||
|
||||
When I’m not staring at SQL, I’m usually:
|
||||
|
||||
- Building automation tools for small businesses through Bandit Labs (my side project)
|
||||
- Contributing to open source (MCP servers, Claude Code plugins)
|
||||
- Trying to explain to my kid why Daddy’s job involves “making computers talk to each other”
|
||||
|
||||
---
|
||||
|
||||
### What I’m Looking For
|
||||
|
||||
I’m currently exploring Senior Data Analyst and Data Engineer roles in the Toronto area (or remote). I’m most interested in:
|
||||
|
||||
- Companies that treat data as infrastructure, not an afterthought
|
||||
- Teams where I can contribute to architecture decisions, not just execute tickets
|
||||
- Operations-focused industries (energy, logistics, financial services, contact center tech)
|
||||
|
||||
If that sounds like your team, let’s talk.
|
||||
|
||||
\[Download Resume\] \[Contact Me\]
|
||||
|
||||
---
|
||||
|
||||
## 3. PROJECTS PAGE
|
||||
|
||||
### Navigation Note
|
||||
|
||||
The Projects page serves as an overview and status hub for all projects. A side navbar provides direct links to live dashboards and repositories. Users land on the overview first, then navigate to specific projects via the sidebar.
|
||||
|
||||
### Intro Text
|
||||
|
||||
These are projects I’ve built—some professional (anonymized where needed), some personal. Each one taught me something. Use the sidebar to jump directly to live dashboards or explore the overviews below.
|
||||
|
||||
---
|
||||
|
||||
### Project Card: Toronto Housing Market Dashboard
|
||||
|
||||
Type: Personal Project | Status: Live
|
||||
|
||||
The Problem:
|
||||
Toronto’s housing market moves fast, and most publicly available data is either outdated, behind paywalls, or scattered across dozens of sources. I wanted a single dashboard that tracked trends in real-time.
|
||||
|
||||
What I Built:
|
||||
|
||||
- Data Pipeline: Python scraper pulling listings data, automated on schedule
|
||||
- Transformation Layer: dbt-based SQL architecture (staging → intermediate → marts)
|
||||
- Visualization: Interactive Plotly-Dash dashboard with filters by neighborhood, price range, property type
|
||||
- Infrastructure: PostgreSQL backend, version-controlled in Git
|
||||
|
||||
Tech Stack: Python, dbt, PostgreSQL, Plotly-Dash, GitHub Actions
|
||||
|
||||
What I Learned:
|
||||
Real estate data is messy as hell. Listings get pulled, prices change, duplicates are everywhere. Building a reliable pipeline meant implementing serious data quality checks and learning to embrace “good enough” over “perfect.”
|
||||
|
||||
\[View Live Dashboard\] \[View Repository (ETL + dbt)\]
|
||||
|
||||
---
|
||||
|
||||
### Project Card: US Retail Energy Price Predictor
|
||||
|
||||
Type: Personal Project | Status: Coming Soon (Phase 2)
|
||||
|
||||
The Problem:
|
||||
Retail energy pricing in deregulated US markets is volatile and opaque. Consumers and analysts lack accessible tools to understand pricing trends and forecast where rates are headed.
|
||||
|
||||
What I’m Building:
|
||||
|
||||
- Data Pipeline: Automated ingestion of public pricing data across multiple US markets
|
||||
- ML Model: Price prediction using time series forecasting (ARIMA, Prophet, or similar)
|
||||
- Transformation Layer: dbt-based SQL architecture for feature engineering
|
||||
- Visualization: Interactive dashboard showing historical trends + predictions by state/market
|
||||
|
||||
Tech Stack: Python, Scikit-learn, dbt, PostgreSQL, Plotly-Dash
|
||||
|
||||
Why This Project:
|
||||
This showcases the ML side of my skillset—something the Toronto Housing dashboard doesn’t cover. It also leverages my domain expertise from 5+ years in retail energy operations.
|
||||
|
||||
\[Coming Soon\]
|
||||
|
||||
---
|
||||
|
||||
### Project Card: DataFlow Platform (Enterprise Case Study)
|
||||
|
||||
Type: Professional | Status: Deferred (Phase 3 — requires sanitized codebase)
|
||||
|
||||
The Context:
|
||||
When I joined Summitt Energy, there was no data infrastructure. Reports were manual. Insights were guesswork. I was hired to fix that.
|
||||
|
||||
What I Built (Over 5 Years):
|
||||
|
||||
- v1 (2020): Basic ETL scripts pulling Genesys Cloud data into MSSQL
|
||||
- v2 (2021): Dimensional model (star schema) with fact/dimension tables
|
||||
- v3 (2022): Python refactor with SQLAlchemy ORM, batch processing, error handling
|
||||
- v4 (2023-24): dbt-pattern SQL views (staging → intermediate → marts), FastAPI layer, CLI tools
|
||||
|
||||
Current State:
|
||||
|
||||
- 21 tables, 1B+ rows
|
||||
- 5,000+ daily transactions processed
|
||||
- Integrates Genesys Cloud, Zoho CRM, legacy systems
|
||||
- Feeds Power BI prototypes and production Dash dashboards
|
||||
- Near-zero reporting errors
|
||||
|
||||
Impact:
|
||||
|
||||
- 40% improvement in reporting efficiency
|
||||
- 30% reduction in call abandon rate (via KPI framework)
|
||||
- 50% faster Average Speed to Answer
|
||||
- 100% callback completion rate
|
||||
|
||||
What I Learned:
|
||||
Building data infrastructure as a team of one forces brutal prioritization. I learned to ship imperfect solutions fast, iterate based on feedback, and never underestimate how long stakeholder buy-in takes.
|
||||
|
||||
Note: This is proprietary work. A sanitized case study with architecture patterns (no proprietary data) will be published in Phase 3.
|
||||
|
||||
---
|
||||
|
||||
### Project Card: AI-Assisted Automation (Bandit Labs)
|
||||
|
||||
Type: Consulting/Side Business | Status: Active
|
||||
|
||||
What It Is:
|
||||
Bandit Labs is my consulting practice focused on automation for small businesses. Most clients don’t need enterprise data platforms—they need someone to eliminate the 4 hours/week they spend manually entering receipts.
|
||||
|
||||
Sample Work:
|
||||
|
||||
- Receipt Processing Automation: OCR pipeline (Tesseract, Google Vision) extracting purchase data from photos, pushing directly to QuickBooks. Eliminated 3-4 hours/week of manual entry for a restaurant client.
|
||||
- Product Margin Tracker: Plotly-Dash dashboard with real-time profitability insights
|
||||
- Claude Code Plugins: MCP servers for Gitea, Wiki.js, NetBox integration
|
||||
|
||||
Why I Do This:
|
||||
Small businesses are underserved by the data/automation industry. Everyone wants to sell them enterprise software they don’t need. I like solving problems at a scale where the impact is immediately visible.
|
||||
|
||||
\[Learn More About Bandit Labs\]
|
||||
|
||||
---
|
||||
|
||||
## 4. LAB PAGE (Bandit Labs / Experiments)
|
||||
|
||||
### Intro
|
||||
|
||||
This is where I experiment. Some of this becomes client work. Some of it teaches me something and gets abandoned. All of it is real code solving real (or at least real-adjacent) problems.
|
||||
|
||||
---
|
||||
|
||||
### Bandit Labs — Automation for Small Business
|
||||
|
||||
I started Bandit Labs because I kept meeting small business owners drowning in manual work that should have been automated years ago. Enterprise tools are overkill. Custom development is expensive. There’s a gap in the middle.
|
||||
|
||||
What I Offer:
|
||||
|
||||
- Receipt/invoice processing automation
|
||||
- Dashboard development (Plotly-Dash)
|
||||
- Data pipeline setup for non-technical teams
|
||||
- AI integration for repetitive tasks
|
||||
|
||||
Recent Client Work:
|
||||
|
||||
- Rio Açaí (Restaurant, Gatineau): Receipt OCR → QuickBooks integration. Saved 3-4 hours/week.
|
||||
|
||||
\[Contact for Consulting\]
|
||||
|
||||
---
|
||||
|
||||
### Open Source / Experiments
|
||||
|
||||
MCP Servers (Model Context Protocol)
|
||||
I’ve built production-ready MCP servers for:
|
||||
|
||||
- Gitea: Issue management, label operations
|
||||
- Wiki.js: Documentation access via GraphQL
|
||||
- NetBox: CMDB integration (DCIM, IPAM, Virtualization)
|
||||
|
||||
These let AI assistants (like Claude) interact with infrastructure tools through natural language. Still experimental, but surprisingly useful for my own workflows.
|
||||
|
||||
Claude Code Plugins
|
||||
|
||||
- projman: AI-guided sprint planning with Gitea/Wiki.js integration
|
||||
- cmdb-assistant: Conversational infrastructure queries against NetBox
|
||||
- project-hygiene: Post-task cleanup automation
|
||||
|
||||
\[View on GitHub\]
|
||||
|
||||
---
|
||||
|
||||
## 5. BLOG PAGE
|
||||
|
||||
### Intro
|
||||
|
||||
I write occasionally about data engineering, automation, and the reality of being a one-person data team. No hot takes, no growth hacking—just things I’ve learned the hard way.
|
||||
|
||||
---
|
||||
|
||||
### Suggested Initial Articles
|
||||
|
||||
Article 1: “Building a Data Platform as a Team of One”What I learned from 5 years as the sole data professional at a mid-size company
|
||||
|
||||
Outline:
|
||||
|
||||
- The reality of “full stack data” when there’s no one else
|
||||
- Prioritization frameworks (what to build first when everything is urgent)
|
||||
- Technical debt vs. shipping something
|
||||
- Building stakeholder trust without a team to back you up
|
||||
- What I’d do differently
|
||||
|
||||
---
|
||||
|
||||
Article 2: “dbt Patterns Without dbt (And Why I Eventually Adopted Them)”How I accidentally implemented analytics engineering best practices before knowing the terminology
|
||||
|
||||
Outline:
|
||||
|
||||
- The problem: SQL spaghetti in production dashboards
|
||||
- My solution: staging → intermediate → marts view architecture
|
||||
- Why separation of concerns matters for maintainability
|
||||
- The day I discovered dbt and realized I’d been doing this manually
|
||||
- Migration path for legacy SQL codebases
|
||||
|
||||
---
|
||||
|
||||
Article 3: “The Toronto Housing Market Dashboard: A Data Engineering Postmortem”Building a real-time analytics pipeline for messy, uncooperative data
|
||||
|
||||
Outline:
|
||||
|
||||
- Why I built this (and why public housing data sucks)
|
||||
- Data sourcing challenges and ethical scraping
|
||||
- Pipeline architecture decisions
|
||||
- dbt transformation layer design
|
||||
- What broke and how I fixed it
|
||||
- Dashboard design for non-technical users
|
||||
|
||||
---
|
||||
|
||||
Article 4: “Automating Small Business Operations with OCR and AI”A case study in practical automation for non-enterprise clients
|
||||
|
||||
Outline:
|
||||
|
||||
- The client problem: 4 hours/week on receipt entry
|
||||
- Why “just use \[enterprise tool\]” doesn’t work for small business
|
||||
- Building an OCR pipeline with Tesseract and Google Vision
|
||||
- QuickBooks integration gotchas
|
||||
- ROI calculation for automation projects
|
||||
|
||||
---
|
||||
|
||||
Article 5: “What I Wish I Knew Before Building My First ETL Pipeline”Hard-won lessons for junior data engineers
|
||||
|
||||
Outline:
|
||||
|
||||
- Error handling isn’t optional (it’s the whole job)
|
||||
- Logging is your best friend at 2am
|
||||
- Why idempotency matters
|
||||
- The staging table pattern
|
||||
- Testing data pipelines
|
||||
- Documentation nobody will read (write it anyway)
|
||||
|
||||
---
|
||||
|
||||
Article 6: “Predicting US Retail Energy Prices: An ML Project Walkthrough”Building a forecasting model with domain knowledge from 5 years in energy retail
|
||||
|
||||
Outline:
|
||||
|
||||
- Why retail energy pricing is hard to predict (deregulation, seasonality, policy)
|
||||
- Data sourcing and pipeline architecture
|
||||
- Feature engineering with dbt
|
||||
- Model selection (ARIMA vs Prophet vs ensemble)
|
||||
- Evaluation metrics that matter for price forecasting
|
||||
- Lessons from applying domain expertise to ML
|
||||
|
||||
---
|
||||
|
||||
## 6. RESUME PAGE
|
||||
|
||||
### Inline Display
|
||||
|
||||
Show a clean, readable version of the resume directly on the page. Use your tailored Senior Data Analyst version as the base.
|
||||
|
||||
### Download Options
|
||||
|
||||
- \[Download PDF\]
|
||||
- \[Download DOCX\]
|
||||
- \[View on LinkedIn\]
|
||||
|
||||
### Optional: Interactive Timeline
|
||||
|
||||
Visual timeline of career progression with expandable sections for each role. More engaging than a wall of text, but only if you have time to build it.
|
||||
|
||||
---
|
||||
|
||||
## 7. CONTACT PAGE
|
||||
|
||||
### Intro
|
||||
|
||||
I’m currently open to Senior Data Analyst and Data Engineer roles in Toronto (or remote). If you’re working on something interesting and need someone who can build data infrastructure from scratch, I’d like to hear about it.
|
||||
|
||||
For consulting inquiries (automation, dashboards, small business data work), reach out about Bandit Labs.
|
||||
|
||||
---
|
||||
|
||||
### Contact Form Fields
|
||||
|
||||
- Name
|
||||
- Email
|
||||
- Subject (dropdown: Job Opportunity / Consulting Inquiry / Other)
|
||||
- Message
|
||||
|
||||
---
|
||||
|
||||
### Direct Contact
|
||||
|
||||
- Email: leobrmi@hotmail.com
|
||||
- Phone: (416) 859-7936
|
||||
- LinkedIn: \[link\]
|
||||
- GitHub: \[link\]
|
||||
|
||||
---
|
||||
|
||||
### Location
|
||||
|
||||
Toronto, ON, Canada
|
||||
Canadian Citizen | Eligible to work in Canada and US
|
||||
|
||||
---
|
||||
|
||||
## TONE GUIDELINES
|
||||
|
||||
### Do:
|
||||
|
||||
- Be direct and specific
|
||||
- Use first person naturally
|
||||
- Include concrete metrics
|
||||
- Acknowledge constraints and tradeoffs
|
||||
- Show personality without being performative
|
||||
- Write like you talk (minus the profanity)
|
||||
|
||||
### Don’t:
|
||||
|
||||
- Use buzzwords without substance (“leveraging synergies”)
|
||||
- Oversell or inflate
|
||||
- Write in third person
|
||||
- Use passive voice excessively
|
||||
- Sound like a LinkedIn influencer
|
||||
- Pretend you’re a full team when you’re one person
|
||||
|
||||
---
|
||||
|
||||
## SEO / DISCOVERABILITY
|
||||
|
||||
### Target Keywords (Organic)
|
||||
|
||||
- Toronto data analyst
|
||||
- Data engineer portfolio
|
||||
- Python ETL developer
|
||||
- dbt analytics engineer
|
||||
- Contact center analytics
|
||||
|
||||
### Blog Strategy
|
||||
|
||||
Aim for 1-2 posts per month initially. Focus on:
|
||||
|
||||
- Technical tutorials (how I built X)
|
||||
- Lessons learned (what went wrong and how I fixed it)
|
||||
- Industry observations (data work in operations-heavy companies)
|
||||
|
||||
---
|
||||
|
||||
## IMPLEMENTATION PRIORITY
|
||||
|
||||
### Phase 1 (MVP — Get it live)
|
||||
|
||||
1. Home page (hero + brief intro + featured project)
|
||||
2. About page (full content)
|
||||
3. Projects page (overview + status cards with navbar links to dashboards)
|
||||
4. Resume page (inline + download)
|
||||
5. Contact page (form + direct info)
|
||||
6. Blog (start with 2-3 articles)
|
||||
|
||||
### Phase 2 (Expand)
|
||||
|
||||
1. Lab page (Bandit Labs + experiments)
|
||||
2. US Retail Energy Price Predictor (ML project — coming soon)
|
||||
3. Add more projects as completed
|
||||
|
||||
### Phase 3 (Polish)
|
||||
|
||||
1. DataFlow Platform case study (requires sanitized fork of proprietary codebase)
|
||||
2. Testimonials (if available from Summitt stakeholders)
|
||||
3. Interactive elements (timeline, project filters)
|
||||
|
||||
---
|
||||
|
||||
Last updated: January 2025
|
||||
55
docs/project-lessons-learned/INDEX.md
Normal file
55
docs/project-lessons-learned/INDEX.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# Project Lessons Learned
|
||||
|
||||
This folder contains lessons learned from sprints and development work. These lessons help prevent repeating mistakes and capture valuable insights.
|
||||
|
||||
**Note:** This is a temporary local backup while Wiki.js integration is being configured. Once Wiki.js is ready, lessons will be migrated there for better searchability.
|
||||
|
||||
---
|
||||
|
||||
## Lessons Index
|
||||
|
||||
| Date | Sprint/Phase | Title | Tags |
|
||||
|------|--------------|-------|------|
|
||||
| 2026-01-17 | Sprint 9 | [Gitea Labels API Requires Org Context](./sprint-9-gitea-labels-user-repos.md) | gitea, mcp, api, labels, projman, configuration |
|
||||
| 2026-01-17 | Sprint 9 | [Always Read CLAUDE.md Before Asking Questions](./sprint-9-read-claude-md-first.md) | projman, claude-code, context, documentation, workflow |
|
||||
| 2026-01-17 | Sprint 9-10 | [Graceful Error Handling in Service Layers](./sprint-9-10-graceful-error-handling.md) | python, postgresql, error-handling, dash, graceful-degradation, arm64 |
|
||||
| 2026-01-17 | Sprint 9-10 | [Modular Callback Structure](./sprint-9-10-modular-callback-structure.md) | dash, callbacks, architecture, python, code-organization |
|
||||
| 2026-01-17 | Sprint 9-10 | [Figure Factory Pattern](./sprint-9-10-figure-factory-pattern.md) | plotly, dash, design-patterns, python, visualization |
|
||||
| 2026-01-16 | Phase 4 | [dbt Test Syntax Deprecation](./phase-4-dbt-test-syntax.md) | dbt, testing, yaml, deprecation |
|
||||
|
||||
---
|
||||
|
||||
## How to Use
|
||||
|
||||
### When Starting a Sprint
|
||||
1. Review relevant lessons in this folder before implementation
|
||||
2. Search by tags or keywords to find applicable insights
|
||||
3. Apply prevention strategies from past lessons
|
||||
|
||||
### When Closing a Sprint
|
||||
1. Document any significant lessons learned
|
||||
2. Use the template below
|
||||
3. Add entry to the index table above
|
||||
|
||||
---
|
||||
|
||||
## Lesson Template
|
||||
|
||||
```markdown
|
||||
# [Sprint/Phase] - [Lesson Title]
|
||||
|
||||
## Context
|
||||
[What were you trying to do?]
|
||||
|
||||
## Problem
|
||||
[What went wrong or what insight emerged?]
|
||||
|
||||
## Solution
|
||||
[How did you solve it?]
|
||||
|
||||
## Prevention
|
||||
[How can this be avoided in future sprints?]
|
||||
|
||||
## Tags
|
||||
[Comma-separated tags for search]
|
||||
```
|
||||
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
38
docs/project-lessons-learned/phase-4-dbt-test-syntax.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Phase 4 - dbt Test Syntax Deprecation
|
||||
|
||||
## Context
|
||||
Implementing dbt mart models with `accepted_values` tests for tier columns (safety_tier, income_quintile, amenity_tier) that should only contain values 1-5.
|
||||
|
||||
## Problem
|
||||
dbt 1.9+ introduced a deprecation warning for generic test arguments. The old syntax:
|
||||
|
||||
```yaml
|
||||
tests:
|
||||
- accepted_values:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
```
|
||||
|
||||
Produces deprecation warnings:
|
||||
```
|
||||
MissingArgumentsPropertyInGenericTestDeprecation: Arguments to generic tests should be nested under the `arguments` property.
|
||||
```
|
||||
|
||||
## Solution
|
||||
Nest test arguments under the `arguments` property:
|
||||
|
||||
```yaml
|
||||
tests:
|
||||
- accepted_values:
|
||||
arguments:
|
||||
values: [1, 2, 3, 4, 5]
|
||||
```
|
||||
|
||||
This applies to all generic tests with arguments, not just `accepted_values`.
|
||||
|
||||
## Prevention
|
||||
- When writing dbt schema YAML files, always use the `arguments:` nesting for generic tests
|
||||
- Run `dbt parse --no-partial-parse` to catch all deprecation warnings before they become errors
|
||||
- Check dbt changelog when upgrading versions for breaking changes to test syntax
|
||||
|
||||
## Tags
|
||||
dbt, testing, yaml, deprecation, syntax, schema
|
||||
@@ -0,0 +1,53 @@
|
||||
# Sprint 9-10 - Figure Factory Pattern for Reusable Charts
|
||||
|
||||
## Context
|
||||
Creating multiple chart types across 5 dashboard tabs, with consistent styling and behavior needed across all visualizations.
|
||||
|
||||
## Problem
|
||||
Without a standardized approach, each callback would create figures inline with:
|
||||
- Duplicated styling code (colors, fonts, backgrounds)
|
||||
- Inconsistent hover templates
|
||||
- Hard-to-maintain figure creation logic
|
||||
- No reuse between tabs
|
||||
|
||||
## Solution
|
||||
Created a `figures/` module with factory functions:
|
||||
|
||||
```
|
||||
figures/
|
||||
├── __init__.py # Exports all factories
|
||||
├── choropleth.py # Map visualizations
|
||||
├── bar_charts.py # ranking_bar, stacked_bar, horizontal_bar
|
||||
├── scatter.py # scatter_figure, bubble_chart
|
||||
├── radar.py # radar_figure, comparison_radar
|
||||
└── demographics.py # age_pyramid, donut_chart
|
||||
```
|
||||
|
||||
Factory pattern benefits:
|
||||
1. **Consistent styling** - dark theme applied once
|
||||
2. **Type-safe interfaces** - clear parameters for each chart type
|
||||
3. **Easy testing** - factories can be unit tested with sample data
|
||||
4. **Reusability** - same factory used across multiple tabs
|
||||
|
||||
Example factory signature:
|
||||
```python
|
||||
def create_ranking_bar(
|
||||
data: list[dict],
|
||||
name_column: str,
|
||||
value_column: str,
|
||||
title: str = "",
|
||||
top_n: int = 5,
|
||||
bottom_n: int = 5,
|
||||
top_color: str = "#4CAF50",
|
||||
bottom_color: str = "#F44336",
|
||||
) -> go.Figure:
|
||||
```
|
||||
|
||||
## Prevention
|
||||
- **Create factories early** - before implementing callbacks
|
||||
- **Design generic interfaces** - factories should work with any data matching the schema
|
||||
- **Apply styling in one place** - use constants for colors, fonts
|
||||
- **Test factories independently** - with synthetic data before integration
|
||||
|
||||
## Tags
|
||||
plotly, dash, design-patterns, python, visualization, reusability, code-organization
|
||||
@@ -0,0 +1,34 @@
|
||||
# Sprint 9-10 - Graceful Error Handling in Service Layers
|
||||
|
||||
## Context
|
||||
Building the Toronto Neighbourhood Dashboard with a service layer that queries PostgreSQL/PostGIS dbt marts to provide data to Dash callbacks.
|
||||
|
||||
## Problem
|
||||
Initial service layer implementation let database connection errors propagate as unhandled exceptions. When the PostGIS Docker container was unavailable (common on ARM64 systems where the x86_64 image fails), the entire dashboard would crash instead of gracefully degrading.
|
||||
|
||||
## Solution
|
||||
Wrapped database queries in try/except blocks to return empty DataFrames/lists/dicts when the database is unavailable:
|
||||
|
||||
```python
|
||||
def _execute_query(sql: str, params: dict | None = None) -> pd.DataFrame:
|
||||
try:
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
return pd.read_sql(text(sql), conn, params=params)
|
||||
except Exception:
|
||||
return pd.DataFrame()
|
||||
```
|
||||
|
||||
This allows:
|
||||
1. Dashboard to load and display empty states
|
||||
2. Development/testing without running database
|
||||
3. Graceful degradation in production
|
||||
|
||||
## Prevention
|
||||
- **Always design service layers with graceful degradation** - assume external dependencies can fail
|
||||
- **Return empty collections, not exceptions** - let UI components handle empty states
|
||||
- **Test without database** - verify the app doesn't crash when DB is unavailable
|
||||
- **Consider ARM64 compatibility** - PostGIS images may not support all platforms
|
||||
|
||||
## Tags
|
||||
python, postgresql, service-layer, error-handling, dash, graceful-degradation, arm64
|
||||
@@ -0,0 +1,45 @@
|
||||
# Sprint 9-10 - Modular Callback Structure for Multi-Tab Dashboards
|
||||
|
||||
## Context
|
||||
Implementing a 5-tab Toronto Neighbourhood Dashboard with multiple callbacks per tab (map updates, chart updates, KPI updates, selection handling).
|
||||
|
||||
## Problem
|
||||
Initial callback implementation approach would have placed all callbacks in a single file, leading to:
|
||||
- A monolithic file with 500+ lines
|
||||
- Difficult-to-navigate code
|
||||
- Callbacks for different tabs interleaved
|
||||
- Testing difficulties
|
||||
|
||||
## Solution
|
||||
Organized callbacks into three focused modules:
|
||||
|
||||
```
|
||||
callbacks/
|
||||
├── __init__.py # Imports all modules to register callbacks
|
||||
├── map_callbacks.py # Choropleth updates, map click handling
|
||||
├── chart_callbacks.py # Supporting chart updates (scatter, trend, donut)
|
||||
└── selection_callbacks.py # Dropdown population, KPI updates
|
||||
```
|
||||
|
||||
Key patterns:
|
||||
1. **Group by responsibility**, not by tab - all map-related callbacks together
|
||||
2. **Use noqa comments** for imports that register callbacks as side effects
|
||||
3. **Share helper functions** (like `_empty_chart()`) within modules
|
||||
|
||||
```python
|
||||
# callbacks/__init__.py
|
||||
from . import (
|
||||
chart_callbacks, # noqa: F401
|
||||
map_callbacks, # noqa: F401
|
||||
selection_callbacks, # noqa: F401
|
||||
)
|
||||
```
|
||||
|
||||
## Prevention
|
||||
- **Plan callback organization before implementation** - sketch which callbacks go where
|
||||
- **Group by function, not by feature** - keeps related logic together
|
||||
- **Keep modules under 400 lines** - split if exceeding
|
||||
- **Test imports early** - verify callbacks register correctly
|
||||
|
||||
## Tags
|
||||
dash, callbacks, architecture, python, code-organization, maintainability
|
||||
@@ -0,0 +1,29 @@
|
||||
# Sprint 9 - Gitea Labels API Requires Org Context
|
||||
|
||||
## Context
|
||||
Creating Gitea issues with labels via MCP tools during Sprint 9 planning for the personal-portfolio project.
|
||||
|
||||
## Problem
|
||||
When calling `create_issue` with a `labels` parameter, received:
|
||||
```
|
||||
404 Client Error: Not Found for url: https://gitea.hotserv.cloud/api/v1/orgs/lmiranda/labels
|
||||
```
|
||||
|
||||
The API attempted to fetch labels from an **organization** endpoint, but `lmiranda` is a **user account**, not an organization.
|
||||
|
||||
## Solution
|
||||
Created issues without the `labels` parameter and documented intended labels in the issue body instead:
|
||||
```markdown
|
||||
**Labels:** Type/Feature, Priority/Medium, Complexity/Simple, Efforts/XS, Component/Docs, Tech/Python
|
||||
```
|
||||
|
||||
This provides visibility into intended categorization while avoiding the API error.
|
||||
|
||||
## Prevention
|
||||
- When working with user-owned repos (not org repos), avoid using the `labels` parameter in `create_issue`
|
||||
- Document labels in issue body as a workaround
|
||||
- Consider creating a repo-level label set for user repos (Gitea supports this)
|
||||
- Update projman plugin to handle user vs org repos differently
|
||||
|
||||
## Tags
|
||||
gitea, mcp, api, labels, projman, configuration
|
||||
@@ -0,0 +1,30 @@
|
||||
# Sprint 9 - Always Read CLAUDE.md Before Asking Questions
|
||||
|
||||
## Context
|
||||
Starting Sprint 9 planning session with `/projman:sprint-plan` command.
|
||||
|
||||
## Problem
|
||||
Asked the user "what should I do?" when all the necessary context was already documented in CLAUDE.md:
|
||||
- Current sprint number and phase
|
||||
- Implementation plan location
|
||||
- Remaining phases to complete
|
||||
- Project conventions and workflows
|
||||
|
||||
This caused user frustration: "why are you asking what to do? cant you see this yourself"
|
||||
|
||||
## Solution
|
||||
Before asking any questions about what to do:
|
||||
1. Read `CLAUDE.md` in the project root
|
||||
2. Check "Project Status" section for current sprint/phase
|
||||
3. Follow references to implementation plans
|
||||
4. Review "Projman Plugin Workflow" section for expected behavior
|
||||
|
||||
## Prevention
|
||||
- **ALWAYS** read CLAUDE.md at the start of any sprint-related command
|
||||
- Look for "Current Sprint" and "Phase" indicators
|
||||
- Check for implementation plan references in `docs/changes/`
|
||||
- Only ask questions if information is genuinely missing from documentation
|
||||
- The projman plugin expects autonomous behavior based on documented context
|
||||
|
||||
## Tags
|
||||
projman, claude-code, context, documentation, workflow, sprint-planning
|
||||
200
docs/runbooks/adding-dashboard.md
Normal file
200
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,200 @@
|
||||
# Runbook: Adding a New Dashboard
|
||||
|
||||
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [ ] Data sources identified and accessible
|
||||
- [ ] Database schema designed
|
||||
- [ ] Basic Dash/Plotly familiarity
|
||||
|
||||
## Directory Structure
|
||||
|
||||
Create the following structure under `portfolio_app/`:
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── pages/
|
||||
│ └── {dashboard_name}/
|
||||
│ ├── dashboard.py # Main layout with tabs
|
||||
│ ├── methodology.py # Data sources and methods page
|
||||
│ ├── tabs/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── overview.py # Overview tab layout
|
||||
│ │ └── ... # Additional tab layouts
|
||||
│ └── callbacks/
|
||||
│ ├── __init__.py
|
||||
│ └── ... # Callback modules
|
||||
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||
│ ├── __init__.py
|
||||
│ ├── parsers/ # API/CSV extraction
|
||||
│ │ └── __init__.py
|
||||
│ ├── loaders/ # Database operations
|
||||
│ │ └── __init__.py
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ │ └── __init__.py
|
||||
│ └── models/ # SQLAlchemy ORM
|
||||
│ └── __init__.py
|
||||
```
|
||||
|
||||
## Step-by-Step Checklist
|
||||
|
||||
### 1. Data Layer
|
||||
|
||||
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||
- [ ] Add database migrations if needed
|
||||
|
||||
### 2. dbt Models
|
||||
|
||||
Create dbt models in `dbt/models/`:
|
||||
|
||||
- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||
- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic
|
||||
- [ ] `marts/mart_{domain}.sql` - Final analytical tables
|
||||
|
||||
Follow naming conventions:
|
||||
- Staging: `stg_{source}__{entity}`
|
||||
- Intermediate: `int_{domain}__{transform}`
|
||||
- Marts: `mart_{domain}`
|
||||
|
||||
### 3. Visualization Layer
|
||||
|
||||
- [ ] Create figure factories in `figures/` (or reuse existing)
|
||||
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||
|
||||
### 4. Dashboard Pages
|
||||
|
||||
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||
|
||||
```python
|
||||
import dash
|
||||
from dash import html, dcc
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path="/{dashboard_name}",
|
||||
title="{Dashboard Title}",
|
||||
description="{Description}"
|
||||
)
|
||||
|
||||
def layout():
|
||||
return dmc.Container([
|
||||
# Header
|
||||
dmc.Title("{Dashboard Title}", order=1),
|
||||
|
||||
# Tabs
|
||||
dmc.Tabs([
|
||||
dmc.TabsList([
|
||||
dmc.TabsTab("Overview", value="overview"),
|
||||
# Add more tabs
|
||||
]),
|
||||
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||
# Add more panels
|
||||
], value="overview"),
|
||||
])
|
||||
```
|
||||
|
||||
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||
|
||||
- [ ] Create one file per tab
|
||||
- [ ] Export layout function from each
|
||||
|
||||
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||
|
||||
- [ ] Create callback modules for interactivity
|
||||
- [ ] Import and register in dashboard.py
|
||||
|
||||
### 5. Navigation
|
||||
|
||||
Add to sidebar in `components/sidebar.py`:
|
||||
|
||||
```python
|
||||
dmc.NavLink(
|
||||
label="{Dashboard Name}",
|
||||
href="/{dashboard_name}",
|
||||
icon=DashIconify(icon="..."),
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Documentation
|
||||
|
||||
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||
- [ ] Document data sources
|
||||
- [ ] Document transformation logic
|
||||
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||
|
||||
### 7. Testing
|
||||
|
||||
- [ ] Add unit tests for parsers
|
||||
- [ ] Add unit tests for loaders
|
||||
- [ ] Add integration tests for callbacks
|
||||
- [ ] Run `make test`
|
||||
|
||||
### 8. Final Verification
|
||||
|
||||
- [ ] All pages render without errors
|
||||
- [ ] All callbacks respond correctly
|
||||
- [ ] Data loads successfully
|
||||
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||
- [ ] Linting passes (`make lint`)
|
||||
- [ ] Tests pass (`make test`)
|
||||
|
||||
## Example: Toronto Dashboard
|
||||
|
||||
Reference implementation: `portfolio_app/pages/toronto/`
|
||||
|
||||
Key files:
|
||||
- `dashboard.py` - Main layout with 5 tabs
|
||||
- `tabs/overview.py` - Livability scores, scatter plots
|
||||
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||
- `toronto/models/dimensions.py` - Dimension tables
|
||||
- `toronto/models/facts.py` - Fact tables
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Figure Factories
|
||||
|
||||
```python
|
||||
# figures/choropleth.py
|
||||
def create_choropleth_figure(
|
||||
gdf: gpd.GeoDataFrame,
|
||||
value_column: str,
|
||||
title: str,
|
||||
**kwargs
|
||||
) -> go.Figure:
|
||||
...
|
||||
```
|
||||
|
||||
### Callbacks
|
||||
|
||||
```python
|
||||
# callbacks/map_callbacks.py
|
||||
@callback(
|
||||
Output("neighbourhood-details", "children"),
|
||||
Input("choropleth-map", "clickData"),
|
||||
)
|
||||
def update_details(click_data):
|
||||
...
|
||||
```
|
||||
|
||||
### Data Loading
|
||||
|
||||
```python
|
||||
# {dashboard_name}/loaders/load.py
|
||||
def load_data(session: Session) -> None:
|
||||
# Parse from source
|
||||
records = parse_source_data()
|
||||
|
||||
# Validate with Pydantic
|
||||
validated = [Schema(**r) for r in records]
|
||||
|
||||
# Load to database
|
||||
for record in validated:
|
||||
session.add(Model(**record.model_dump()))
|
||||
|
||||
session.commit()
|
||||
```
|
||||
232
docs/runbooks/deployment.md
Normal file
232
docs/runbooks/deployment.md
Normal file
@@ -0,0 +1,232 @@
|
||||
# Runbook: Deployment
|
||||
|
||||
This runbook covers deployment procedures for the Analytics Portfolio application.
|
||||
|
||||
## Environments
|
||||
|
||||
| Environment | Branch | Server | URL |
|
||||
|-------------|--------|--------|-----|
|
||||
| Development | `development` | Local | http://localhost:8050 |
|
||||
| Staging | `staging` | Homelab (hotserv) | Internal |
|
||||
| Production | `main` | Bandit Labs VPS | https://leodata.science |
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
### Automatic Deployment
|
||||
|
||||
Deployments are triggered automatically via Gitea Actions:
|
||||
|
||||
1. **Push to `staging`** → Deploys to staging server
|
||||
2. **Push to `main`** → Deploys to production server
|
||||
|
||||
### Workflow Files
|
||||
|
||||
- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
|
||||
- `.gitea/workflows/deploy-staging.yml` - Staging deployment
|
||||
- `.gitea/workflows/deploy-production.yml` - Production deployment
|
||||
|
||||
### Required Secrets
|
||||
|
||||
Configure these in Gitea repository settings:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `STAGING_HOST` | Staging server hostname/IP |
|
||||
| `STAGING_USER` | SSH username for staging |
|
||||
| `STAGING_SSH_KEY` | Private key for staging SSH |
|
||||
| `PROD_HOST` | Production server hostname/IP |
|
||||
| `PROD_USER` | SSH username for production |
|
||||
| `PROD_SSH_KEY` | Private key for production SSH |
|
||||
|
||||
## Manual Deployment
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- SSH access to target server
|
||||
- Repository cloned at `~/apps/personal-portfolio`
|
||||
- Virtual environment created at `.venv`
|
||||
- Docker and Docker Compose installed
|
||||
- PostgreSQL container running
|
||||
|
||||
### Steps
|
||||
|
||||
```bash
|
||||
# 1. SSH to server
|
||||
ssh user@server
|
||||
|
||||
# 2. Navigate to app directory
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
# 3. Pull latest changes
|
||||
git fetch origin {branch}
|
||||
git reset --hard origin/{branch}
|
||||
|
||||
# 4. Activate virtual environment
|
||||
source .venv/bin/activate
|
||||
|
||||
# 5. Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 6. Run database migrations (if any)
|
||||
# python -m alembic upgrade head
|
||||
|
||||
# 7. Run dbt models
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
# 8. Restart application
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
# 9. Verify health
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
## Rollback Procedure
|
||||
|
||||
### Quick Rollback
|
||||
|
||||
If deployment fails, rollback to previous commit:
|
||||
|
||||
```bash
|
||||
# 1. Find previous working commit
|
||||
git log --oneline -10
|
||||
|
||||
# 2. Reset to that commit
|
||||
git reset --hard {commit_hash}
|
||||
|
||||
# 3. Restart services
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
# 4. Verify
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
### Full Rollback (Database)
|
||||
|
||||
If database changes need to be reverted:
|
||||
|
||||
```bash
|
||||
# 1. Stop application
|
||||
docker compose down
|
||||
|
||||
# 2. Restore database backup
|
||||
pg_restore -h localhost -U portfolio -d portfolio backup.dump
|
||||
|
||||
# 3. Revert code
|
||||
git reset --hard {commit_hash}
|
||||
|
||||
# 4. Run dbt at that version
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
# 5. Restart
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## Health Checks
|
||||
|
||||
### Application Health
|
||||
|
||||
```bash
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
Expected response:
|
||||
```json
|
||||
{"status": "healthy"}
|
||||
```
|
||||
|
||||
### Database Health
|
||||
|
||||
```bash
|
||||
docker compose exec postgres pg_isready -U portfolio
|
||||
```
|
||||
|
||||
### Container Status
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
make logs
|
||||
|
||||
# Specific service
|
||||
make logs SERVICE=postgres
|
||||
|
||||
# Or directly
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
### Check Resource Usage
|
||||
|
||||
```bash
|
||||
docker stats
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Application Won't Start
|
||||
|
||||
1. Check container logs: `docker compose logs app`
|
||||
2. Verify environment variables: `cat .env`
|
||||
3. Check database connectivity: `docker compose exec postgres pg_isready`
|
||||
4. Verify port availability: `lsof -i :8050`
|
||||
|
||||
### Database Connection Errors
|
||||
|
||||
1. Check postgres container: `docker compose ps postgres`
|
||||
2. Verify DATABASE_URL in `.env`
|
||||
3. Check postgres logs: `docker compose logs postgres`
|
||||
4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
|
||||
|
||||
### dbt Failures
|
||||
|
||||
1. Check dbt logs: `cd dbt && dbt debug`
|
||||
2. Verify profiles.yml: `cat dbt/profiles.yml`
|
||||
3. Run with verbose output: `dbt run --debug`
|
||||
|
||||
### Out of Memory
|
||||
|
||||
1. Check memory usage: `free -h`
|
||||
2. Review container limits in docker-compose.yml
|
||||
3. Consider increasing swap or server resources
|
||||
|
||||
## Backup Procedures
|
||||
|
||||
### Database Backup
|
||||
|
||||
```bash
|
||||
# Create backup
|
||||
docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
|
||||
|
||||
# Compressed backup
|
||||
docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
|
||||
```
|
||||
|
||||
### Restore from Backup
|
||||
|
||||
```bash
|
||||
# From SQL file
|
||||
docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
|
||||
|
||||
# From dump file
|
||||
docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
|
||||
```
|
||||
|
||||
## Deployment Checklist
|
||||
|
||||
Before deploying to production:
|
||||
|
||||
- [ ] All tests pass (`make test`)
|
||||
- [ ] Linting passes (`make lint`)
|
||||
- [ ] Staging deployment successful
|
||||
- [ ] Manual testing on staging complete
|
||||
- [ ] Database backup taken
|
||||
- [ ] Rollback plan confirmed
|
||||
- [ ] Team notified of deployment window
|
||||
@@ -1,809 +0,0 @@
|
||||
# Toronto Housing Price Dashboard
|
||||
## Portfolio Project — Data Specification & Architecture
|
||||
|
||||
**Version**: 5.1
|
||||
**Last Updated**: January 2026
|
||||
**Status**: Specification Complete
|
||||
|
||||
---
|
||||
|
||||
## Document Context
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Parent Document** | `portfolio_project_plan_v5.md` |
|
||||
| **Role** | Detailed specification for Toronto Housing Dashboard |
|
||||
| **Scope** | Data schemas, source URLs, geographic boundaries, V1/V2 decisions |
|
||||
|
||||
**Rule**: For overall project scope, phasing, tech stack, and deployment architecture, see `portfolio_project_plan_v5.md`. This document provides implementation-level detail for the Toronto Housing project specifically.
|
||||
|
||||
**Terminology Note**: This document uses **Stages 1–4** to describe Toronto Housing implementation steps. These are distinct from the **Phases 1–5** in `portfolio_project_plan_v5.md`, which describe the overall portfolio project lifecycle.
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
A dashboard analyzing housing price variations across Toronto neighbourhoods over time, with dual analysis tracks:
|
||||
|
||||
| Track | Data Domain | Primary Source | Geographic Unit |
|
||||
|-------|-------------|----------------|-----------------|
|
||||
| **Purchases** | Sales transactions | TRREB Monthly Reports | ~35 Districts |
|
||||
| **Rentals** | Rental market stats | CMHC Rental Market Survey | ~20 Zones |
|
||||
|
||||
**Core Visualization**: Interactive choropleth map of Toronto with toggle between rental/purchase analysis, time-series exploration by month/year.
|
||||
|
||||
**Enrichment Layer** (V1: overlay only): Neighbourhood-level demographic and socioeconomic context including population density, education attainment, and income. Crime data deferred to Phase 4 of the portfolio project (post-Energy project).
|
||||
|
||||
**Tech Stack & Deployment**: See `portfolio_project_plan_v5.md` → Tech Stack, Deployment Architecture
|
||||
|
||||
---
|
||||
|
||||
## Geographic Layers
|
||||
|
||||
### Layer Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ City of Toronto Official Neighbourhoods (158) │ ← Reference overlay + Enrichment data
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ TRREB Districts (~35) — W01, C01, E01, etc. │ ← Purchase data
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ CMHC Survey Zones (~20) — Census Tract aligned │ ← Rental data
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Boundary Files
|
||||
|
||||
| Layer | Zones | Format | Source | Status |
|
||||
|-------|-------|--------|--------|--------|
|
||||
| **City Neighbourhoods** | 158 | GeoJSON, Shapefile | [GitHub - jasonicarter/toronto-geojson](https://github.com/jasonicarter/toronto-geojson) | ✅ Ready to use |
|
||||
| **TRREB Districts** | ~35 | PDF only | [TRREB Toronto Map PDF](https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf) | ⚠ Requires manual digitization |
|
||||
| **CMHC Zones** | ~20 | R package | R `cmhc` package via `get_cmhc_geography()` | ✅ Available (see note) |
|
||||
|
||||
### Digitization Task: TRREB Districts
|
||||
|
||||
**Input**: TRREB Toronto PDF map
|
||||
**Output**: GeoJSON with district codes (W01-W10, C01-C15, E01-E11)
|
||||
**Tool**: QGIS
|
||||
|
||||
**Process**:
|
||||
1. Import PDF as raster layer in QGIS
|
||||
2. Create vector layer with polygon features
|
||||
3. Trace district boundaries
|
||||
4. Add attributes: `district_code`, `district_name`, `area_type` (West/Central/East)
|
||||
5. Export as GeoJSON (WGS84 / EPSG:4326)
|
||||
|
||||
### CMHC Zone Boundaries
|
||||
|
||||
**Source**: The R `cmhc` package provides CMHC survey geography via the `get_cmhc_geography()` function.
|
||||
|
||||
**Extraction Process**:
|
||||
```r
|
||||
# In R
|
||||
library(cmhc)
|
||||
library(sf)
|
||||
|
||||
# Get Toronto CMA zones
|
||||
toronto_zones <- get_cmhc_geography(
|
||||
geography_type = "ZONE",
|
||||
cma = "Toronto"
|
||||
)
|
||||
|
||||
# Export to GeoJSON for Python/PostGIS
|
||||
st_write(toronto_zones, "cmhc_zones.geojson", driver = "GeoJSON")
|
||||
```
|
||||
|
||||
**Output**: `data/toronto/raw/geo/cmhc_zones.geojson`
|
||||
|
||||
**Why R?**: CMHC zone boundaries are not published as standalone files. The `cmhc` R package is the only reliable programmatic source. One-time extraction, then use GeoJSON in Python stack.
|
||||
|
||||
### ⚠ Neighbourhood Boundary Change (140 → 158)
|
||||
|
||||
The City of Toronto updated from 140 to 158 social planning neighbourhoods in **April 2021**. This affects data alignment:
|
||||
|
||||
| Data Source | Pre-2021 | Post-2021 | Handling |
|
||||
|-------------|----------|-----------|----------|
|
||||
| Census (2016 and earlier) | 140 neighbourhoods | N/A | Use 140-model files |
|
||||
| Census (2021+) | N/A | 158 neighbourhoods | Use 158-model files |
|
||||
|
||||
**V1 Strategy**: Use 2021 Census on 158 boundaries only. Defer historical trend analysis to portfolio Phase 4.
|
||||
|
||||
---
|
||||
|
||||
## Data Source #1: TRREB Monthly Market Reports
|
||||
|
||||
### Source Details
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Provider** | Toronto Regional Real Estate Board |
|
||||
| **URL** | [TRREB Market Watch](https://trreb.ca/index.php/market-news/market-watch) |
|
||||
| **Format** | PDF (monthly reports) |
|
||||
| **Update Frequency** | Monthly |
|
||||
| **Historical Availability** | 2007–Present |
|
||||
| **Access** | Public (aggregate data in PDFs) |
|
||||
| **Extraction Method** | PDF parsing (`pdfplumber` or `camelot-py`) |
|
||||
|
||||
### Available Tables
|
||||
|
||||
#### Table: `trreb_monthly_summary`
|
||||
**Location in PDF**: Pages 3-4 (Summary by Area)
|
||||
|
||||
| Column | Data Type | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `report_date` | DATE | First of month (YYYY-MM-01) |
|
||||
| `area_code` | VARCHAR(3) | District code (W01, C01, E01, etc.) |
|
||||
| `area_name` | VARCHAR(100) | District name |
|
||||
| `area_type` | VARCHAR(10) | West / Central / East / North |
|
||||
| `sales` | INTEGER | Number of transactions |
|
||||
| `dollar_volume` | DECIMAL | Total sales volume ($) |
|
||||
| `avg_price` | DECIMAL | Average sale price ($) |
|
||||
| `median_price` | DECIMAL | Median sale price ($) |
|
||||
| `new_listings` | INTEGER | New listings count |
|
||||
| `active_listings` | INTEGER | Active listings at month end |
|
||||
| `avg_sp_lp` | DECIMAL | Avg sale price / list price ratio (%) |
|
||||
| `avg_dom` | INTEGER | Average days on market |
|
||||
|
||||
### Dimensions
|
||||
|
||||
| Dimension | Granularity | Values |
|
||||
|-----------|-------------|--------|
|
||||
| **Time** | Monthly | 2007-01 to present |
|
||||
| **Geography** | District | ~35 TRREB districts |
|
||||
| **Property Type** | Aggregate | All residential (no breakdown in summary) |
|
||||
|
||||
### Metrics Available
|
||||
|
||||
| Metric | Aggregation | Use Case |
|
||||
|--------|-------------|----------|
|
||||
| `avg_price` | Pre-calculated monthly avg | Primary price indicator |
|
||||
| `median_price` | Pre-calculated monthly median | Robust price indicator |
|
||||
| `sales` | Count | Market activity volume |
|
||||
| `avg_dom` | Average | Market velocity |
|
||||
| `avg_sp_lp` | Ratio | Buyer/seller market indicator |
|
||||
| `new_listings` | Count | Supply indicator |
|
||||
| `active_listings` | Snapshot | Inventory level |
|
||||
|
||||
### ⚠ Limitations
|
||||
|
||||
- No transaction-level data (aggregates only)
|
||||
- Property type breakdown requires parsing additional tables
|
||||
- PDF structure may vary slightly across years
|
||||
- District boundaries haven't changed since 2011
|
||||
|
||||
---
|
||||
|
||||
## Data Source #2: CMHC Rental Market Survey
|
||||
|
||||
### Source Details
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Provider** | Canada Mortgage and Housing Corporation |
|
||||
| **URL** | [CMHC Housing Market Information Portal](https://www03.cmhc-schl.gc.ca/hmip-pimh/) |
|
||||
| **Format** | CSV export, API |
|
||||
| **Update Frequency** | Annual (October survey) |
|
||||
| **Historical Availability** | 1990–Present |
|
||||
| **Access** | Public, free registration for bulk downloads |
|
||||
| **Geographic Levels** | CMA → Zone → Neighbourhood → Census Tract |
|
||||
|
||||
### Available Tables
|
||||
|
||||
#### Table: `cmhc_rental_summary`
|
||||
**Portal Path**: Toronto → Primary Rental Market → Summary Statistics
|
||||
|
||||
| Column | Data Type | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `survey_year` | INTEGER | Survey year (October) |
|
||||
| `zone_code` | VARCHAR(10) | CMHC zone identifier |
|
||||
| `zone_name` | VARCHAR(100) | Zone name |
|
||||
| `bedroom_type` | VARCHAR(20) | Bachelor / 1-Bed / 2-Bed / 3-Bed+ / Total |
|
||||
| `universe` | INTEGER | Total rental units in zone |
|
||||
| `vacancy_rate` | DECIMAL | Vacancy rate (%) |
|
||||
| `vacancy_rate_reliability` | VARCHAR(1) | Reliability code (a/b/c/d) |
|
||||
| `availability_rate` | DECIMAL | Availability rate (%) |
|
||||
| `average_rent` | DECIMAL | Average monthly rent ($) |
|
||||
| `average_rent_reliability` | VARCHAR(1) | Reliability code |
|
||||
| `median_rent` | DECIMAL | Median monthly rent ($) |
|
||||
| `rent_change_pct` | DECIMAL | YoY rent change (%) |
|
||||
| `turnover_rate` | DECIMAL | Unit turnover rate (%) |
|
||||
|
||||
### Dimensions
|
||||
|
||||
| Dimension | Granularity | Values |
|
||||
|-----------|-------------|--------|
|
||||
| **Time** | Annual | 1990 to present (October snapshot) |
|
||||
| **Geography** | Zone | ~20 CMHC zones in Toronto CMA |
|
||||
| **Bedroom Type** | Category | Bachelor, 1-Bed, 2-Bed, 3-Bed+, Total |
|
||||
| **Structure Type** | Category | Row, Apartment (available in detailed tables) |
|
||||
|
||||
### Metrics Available
|
||||
|
||||
| Metric | Aggregation | Use Case |
|
||||
|--------|-------------|----------|
|
||||
| `average_rent` | Pre-calculated avg | Primary rent indicator |
|
||||
| `median_rent` | Pre-calculated median | Robust rent indicator |
|
||||
| `vacancy_rate` | Percentage | Market tightness |
|
||||
| `availability_rate` | Percentage | Supply accessibility |
|
||||
| `turnover_rate` | Percentage | Tenant mobility |
|
||||
| `rent_change_pct` | YoY % | Rent growth tracking |
|
||||
| `universe` | Count | Market size |
|
||||
|
||||
### Reliability Codes
|
||||
|
||||
| Code | Meaning | Coefficient of Variation |
|
||||
|------|---------|-------------------------|
|
||||
| `a` | Excellent | CV ≤ 2.5% |
|
||||
| `b` | Good | 2.5% < CV ≤ 5% |
|
||||
| `c` | Fair | 5% < CV ≤ 10% |
|
||||
| `d` | Poor (use with caution) | CV > 10% |
|
||||
| `**` | Data suppressed | Sample too small |
|
||||
|
||||
### ⚠ Limitations
|
||||
|
||||
- Annual only (no monthly granularity)
|
||||
- October snapshot (point-in-time)
|
||||
- Zones are larger than TRREB districts
|
||||
- Purpose-built rental only (excludes condo rentals in base survey)
|
||||
|
||||
---
|
||||
|
||||
## Data Source #3: City of Toronto Open Data
|
||||
|
||||
### Source Details
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Provider** | City of Toronto |
|
||||
| **URL** | [Toronto Open Data Portal](https://open.toronto.ca/) |
|
||||
| **Format** | GeoJSON, Shapefile, CSV |
|
||||
| **Use Case** | Reference layer, demographic enrichment |
|
||||
|
||||
### Relevant Datasets
|
||||
|
||||
#### Dataset: `neighbourhoods`
|
||||
|
||||
| Column | Data Type | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `area_id` | INTEGER | Neighbourhood ID (1-158) |
|
||||
| `area_name` | VARCHAR(100) | Official neighbourhood name |
|
||||
| `geometry` | POLYGON | Boundary geometry |
|
||||
|
||||
#### Dataset: `neighbourhood_profiles` (Census-linked)
|
||||
|
||||
| Column | Data Type | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `neighbourhood_id` | INTEGER | Links to neighbourhoods |
|
||||
| `population` | INTEGER | Total population |
|
||||
| `avg_household_income` | DECIMAL | Average household income |
|
||||
| `dwelling_count` | INTEGER | Total dwellings |
|
||||
| `owner_pct` | DECIMAL | % owner-occupied |
|
||||
| `renter_pct` | DECIMAL | % renter-occupied |
|
||||
|
||||
### Enrichment Potential
|
||||
|
||||
Can overlay demographic context on housing data:
|
||||
- Income brackets by neighbourhood
|
||||
- Ownership vs rental ratios
|
||||
- Population density
|
||||
- Dwelling type distribution
|
||||
|
||||
---
|
||||
|
||||
## Data Source #4: Enrichment Data (Density, Education)
|
||||
|
||||
### Purpose
|
||||
|
||||
Provide socioeconomic context to housing price analysis. Enables questions like:
|
||||
- Do neighbourhoods with higher education attainment have higher prices?
|
||||
- How does population density correlate with price per square foot?
|
||||
|
||||
### Geographic Alignment Reality
|
||||
|
||||
**Critical constraint**: Enrichment data is available at the **158-neighbourhood** level, while core housing data sits at **TRREB districts (~35)** and **CMHC zones (~20)**. These do not align cleanly.
|
||||
|
||||
```
|
||||
158 Neighbourhoods (fine) → Enrichment data lives here
|
||||
(no clean crosswalk)
|
||||
~35 TRREB Districts (coarse) → Purchase data lives here
|
||||
~20 CMHC Zones (coarse) → Rental data lives here
|
||||
```
|
||||
|
||||
### Available Enrichment Datasets
|
||||
|
||||
#### Dataset: Neighbourhood Profiles (Census)
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Provider** | City of Toronto (via Statistics Canada Census) |
|
||||
| **URL** | [Toronto Open Data - Neighbourhood Profiles](https://open.toronto.ca/dataset/neighbourhood-profiles/) |
|
||||
| **Format** | CSV, JSON, XML, XLSX |
|
||||
| **Update Frequency** | Every 5 years (Census cycle) |
|
||||
| **Available Years** | 2001, 2006, 2011, 2016, 2021 |
|
||||
| **Geographic Unit** | 158 neighbourhoods (140 pre-2021) |
|
||||
|
||||
**Key Variables**:
|
||||
|
||||
| Variable | Description | Use Case |
|
||||
|----------|-------------|----------|
|
||||
| `population` | Total population | Density calculation |
|
||||
| `land_area_sqkm` | Area in square kilometers | Density calculation |
|
||||
| `pop_density_per_sqkm` | Population per km | Density metric |
|
||||
| `pct_bachelors_or_higher` | % age 25-64 with bachelor's+ | Education proxy |
|
||||
| `median_household_income` | Median total household income | Income metric |
|
||||
| `avg_household_income` | Average total household income | Income metric |
|
||||
| `pct_owner_occupied` | % owner-occupied dwellings | Tenure split |
|
||||
| `pct_renter_occupied` | % renter-occupied dwellings | Tenure split |
|
||||
|
||||
**Download URL (2021, 158 neighbourhoods)**:
|
||||
```
|
||||
https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx
|
||||
```
|
||||
|
||||
### Crime Data — Deferred to Portfolio Phase 4
|
||||
|
||||
Crime data (TPS Neighbourhood Crime Rates) is **not included in V1 scope**. It will be added in portfolio Phase 4 after the Energy Pricing project is complete.
|
||||
|
||||
**Rationale**:
|
||||
- Crime data is socially/politically sensitive and requires careful methodology documentation
|
||||
- V1 focuses on core housing metrics and policy events
|
||||
- Deferral reduces scope creep risk
|
||||
|
||||
**Future Reference** (Portfolio Phase 4):
|
||||
- Source: [TPS Public Safety Data Portal](https://data.torontopolice.on.ca/)
|
||||
- Dataset: Neighbourhood Crime Rates (Major Crime Indicators)
|
||||
- Geographic Unit: 158 neighbourhoods
|
||||
|
||||
### V1 Enrichment Data Summary
|
||||
|
||||
| Measure | Source | Geography | Frequency | Format | Status |
|
||||
|---------|--------|-----------|-----------|--------|--------|
|
||||
| **Population Density** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||
| **Education Attainment** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||
| **Median Income** | Neighbourhood Profiles | 158 neighbourhoods | Census (5-year) | CSV/JSON | ✅ Ready |
|
||||
| **Crime Rates (MCI)** | TPS Data Portal | 158 neighbourhoods | Annual | GeoJSON/CSV | Deferred to Portfolio Phase 4 |
|
||||
|
||||
---
|
||||
|
||||
## Data Source #5: Policy Events
|
||||
|
||||
### Purpose
|
||||
|
||||
Provide temporal context for housing price movements. Display as annotation markers on time series charts. **No causation claims** — correlation/context only.
|
||||
|
||||
### Event Schema
|
||||
|
||||
#### Table: `dim_policy_event`
|
||||
|
||||
| Column | Data Type | Description |
|
||||
|--------|-----------|-------------|
|
||||
| `event_id` | INTEGER (PK) | Auto-increment primary key |
|
||||
| `event_date` | DATE | Date event was announced/occurred |
|
||||
| `effective_date` | DATE | Date policy took effect (if different) |
|
||||
| `level` | VARCHAR(20) | `federal` / `provincial` / `municipal` |
|
||||
| `category` | VARCHAR(20) | `monetary` / `tax` / `regulatory` / `supply` / `economic` |
|
||||
| `title` | VARCHAR(200) | Short event title for display |
|
||||
| `description` | TEXT | Longer description for tooltip |
|
||||
| `expected_direction` | VARCHAR(10) | `bearish` / `bullish` / `neutral` |
|
||||
| `source_url` | VARCHAR(500) | Link to official announcement/documentation |
|
||||
| `confidence` | VARCHAR(10) | `high` / `medium` / `low` |
|
||||
| `created_at` | TIMESTAMP | Record creation timestamp |
|
||||
|
||||
### Event Tiers
|
||||
|
||||
| Tier | Level | Category Examples | Inclusion Criteria |
|
||||
|------|-------|-------------------|-------------------|
|
||||
| **1** | Federal | BoC rate decisions, OSFI stress tests | Always include; objective, documented |
|
||||
| **1** | Provincial | Fair Housing Plan, foreign buyer tax, rent control | Always include; legislative record |
|
||||
| **2** | Municipal | Zoning reforms, development charges | Include if material impact expected |
|
||||
| **2** | Economic | COVID measures, major employer closures | Include if Toronto-specific impact |
|
||||
| **3** | Market | Major project announcements | Strict criteria; must be verifiable |
|
||||
|
||||
### Expected Direction Values
|
||||
|
||||
| Value | Meaning | Example |
|
||||
|-------|---------|---------|
|
||||
| `bullish` | Expected to increase prices | Rate cut, supply restriction |
|
||||
| `bearish` | Expected to decrease prices | Rate hike, foreign buyer tax |
|
||||
| `neutral` | Uncertain or mixed impact | Regulatory clarification |
|
||||
|
||||
### ⚠ Caveats
|
||||
|
||||
- **No causation claims**: Events are context, not explanation
|
||||
- **Lag effects**: Policy impact may not be immediate
|
||||
- **Confounding factors**: Multiple simultaneous influences
|
||||
- **Display only**: No statistical analysis in V1
|
||||
|
||||
### Sample Events (Tier 1)
|
||||
|
||||
| Date | Level | Category | Title | Direction |
|
||||
|------|-------|----------|-------|-----------|
|
||||
| 2017-04-20 | provincial | tax | Ontario Fair Housing Plan | bearish |
|
||||
| 2018-01-01 | federal | regulatory | OSFI B-20 Stress Test | bearish |
|
||||
| 2020-03-27 | federal | monetary | BoC Emergency Rate Cut (0.25%) | bullish |
|
||||
| 2022-03-02 | federal | monetary | BoC Rate Hike Cycle Begins | bearish |
|
||||
| 2023-06-01 | federal | tax | Federal 2-Year Foreign Buyer Ban | bearish |
|
||||
|
||||
---
|
||||
|
||||
## Data Integration Strategy
|
||||
|
||||
### Temporal Alignment
|
||||
|
||||
| Source | Native Frequency | Alignment Strategy |
|
||||
|--------|------------------|---------------------|
|
||||
| TRREB | Monthly | Use as-is |
|
||||
| CMHC | Annual (October) | Spread to monthly OR display annual overlay |
|
||||
| Census/Enrichment | 5-year | Static snapshot; display as reference |
|
||||
| Policy Events | Event-based | Display as vertical markers on time axis |
|
||||
|
||||
**Recommendation**: Keep separate time axes. TRREB monthly for purchases, CMHC annual for rentals. Don't force artificial monthly rental data.
|
||||
|
||||
### Geographic Alignment
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ VISUALIZATION APPROACH │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ Purchase Mode Rental Mode │
|
||||
│ ───────────────── ────────────── │
|
||||
│ Map: TRREB Districts Map: CMHC Zones │
|
||||
│ Time: Monthly slider Time: Annual selector │
|
||||
│ Metrics: Price, Sales Metrics: Rent, Vacancy │
|
||||
│ │
|
||||
│ ┌───────────────────────────────────────────────────────┐ │
|
||||
│ │ City Neighbourhoods Overlay │ │
|
||||
│ │ (158 boundaries as reference layer) │ │
|
||||
│ │ + Enrichment data (density, education, income) │ │
|
||||
│ ──────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Enrichment Integration Strategy (Phased)
|
||||
|
||||
#### V1: Reference Overlay (Current Scope)
|
||||
|
||||
**Approach**: Display neighbourhood enrichment as a separate toggle-able layer. No joins to housing data.
|
||||
|
||||
**UX**:
|
||||
- User hovers over TRREB district → tooltip shows "This district contains neighbourhoods: Annex, Casa Loma, Yorkville..."
|
||||
- User toggles "Show Enrichment" → choropleth switches to neighbourhood-level density/education/income
|
||||
- Enrichment and housing metrics displayed side-by-side, not merged
|
||||
|
||||
**Pros**:
|
||||
- No imputation or dodgy aggregations
|
||||
- Honest about geographic mismatch
|
||||
- Ships faster
|
||||
|
||||
**Cons**:
|
||||
- Can't do correlation analysis (price vs. enrichment) directly in dashboard
|
||||
|
||||
**Implementation**:
|
||||
- `dim_neighbourhood` as standalone dimension (no FK to fact tables)
|
||||
- Spatial lookup on hover (point-in-polygon)
|
||||
|
||||
#### V2/Portfolio Phase 4: Area-Weighted Aggregation (Future Scope)
|
||||
|
||||
**Approach**: Pre-compute area-weighted averages of neighbourhood metrics for each TRREB district and CMHC zone.
|
||||
|
||||
**Process**:
|
||||
1. Spatial join: intersect neighbourhood polygons with TRREB/CMHC polygons
|
||||
2. Compute overlap area for each neighbourhood-district pair
|
||||
3. Weight neighbourhood metrics by overlap area proportion
|
||||
4. User selects aggregation method in UI
|
||||
|
||||
**Aggregation Methods to Expose**:
|
||||
|
||||
| Method | Description | Best For |
|
||||
|--------|-------------|----------|
|
||||
| **Area-weighted mean** | Weight by % overlap area | Continuous metrics (density) |
|
||||
| **Population-weighted mean** | Weight by population in overlap | Per-capita metrics (education) |
|
||||
| **Majority assignment** | Assign neighbourhood to district with >50% overlap | Categorical data |
|
||||
| **Max overlap** | Assign to single district with largest overlap | 1:1 mapping needs |
|
||||
|
||||
**Default**: Population-weighted (more defensible for per-capita metrics). Hide selector behind "Advanced" toggle.
|
||||
|
||||
### V1 Future-Proofing (Do Now)
|
||||
|
||||
| Action | Why |
|
||||
|--------|-----|
|
||||
| Store neighbourhood boundaries in same CRS as TRREB/CMHC (WGS84) | Avoids reprojection headaches |
|
||||
| Keep `dim_neighbourhood` normalized (not denormalized into district tables) | Clean separation for V2 join |
|
||||
| Document Census year for each metric | Ready for 2026 Census |
|
||||
| Include `census_year` column in dim_neighbourhood | Enables SCD tracking |
|
||||
|
||||
### V1 Defer (Don't Do Yet)
|
||||
|
||||
| Action | Why Not |
|
||||
|--------|---------|
|
||||
| Pre-compute area-weighted crosswalk | Don't need for V1 |
|
||||
| Build aggregation method selector UI | No backend to support it |
|
||||
| Crime data integration | Deferred to Portfolio Phase 4 |
|
||||
| Historical neighbourhood boundary reconciliation (140→158) | Use 2021+ data only for V1 |
|
||||
|
||||
---
|
||||
|
||||
## Proposed Data Model
|
||||
|
||||
### Star Schema
|
||||
|
||||
```
|
||||
┌──────────────────┐
|
||||
│ dim_time │
|
||||
├──────────────────┤
|
||||
│ date_key (PK) │
|
||||
│ year │
|
||||
│ month │
|
||||
│ quarter │
|
||||
│ month_name │
|
||||
───────────────────────┘
|
||||
│
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ │ │
|
||||
│
|
||||
┌──────────────────┐ │ ┌──────────────────┐
|
||||
│ dim_trreb_district│ │ │ dim_cmhc_zone │
|
||||
├──────────────────┤ │ ├──────────────────┤
|
||||
│ district_key (PK)│ │ │ zone_key (PK) │
|
||||
│ district_code │ │ │ zone_code │
|
||||
│ district_name │ │ │ zone_name │
|
||||
│ area_type │ │ │ geometry │
|
||||
│ geometry │
|
||||
───────────────────────┘ │ │
|
||||
│ │ │
|
||||
│
|
||||
┌──────────────────┐ │ ┌──────────────────┐
|
||||
│ fact_purchases │ │ │ fact_rentals │
|
||||
├──────────────────┤ │ ├──────────────────┤
|
||||
│ date_key (FK) │ │ │ date_key (FK) │
|
||||
│ district_key (FK)│ │ │ zone_key (FK) │
|
||||
│ sales_count │ │ │ bedroom_type │
|
||||
│ avg_price │ │ │ avg_rent │
|
||||
│ median_price │ │ │ median_rent │
|
||||
│ new_listings │ │ │ vacancy_rate │
|
||||
│ active_listings │ │ │ universe │
|
||||
│ avg_dom │ │ │ turnover_rate │
|
||||
│ avg_sp_lp │ │ │ reliability_code │
|
||||
─────────────────────┘ │ ─────────────────────┘
|
||||
│
|
||||
|
||||
┌───────────────────────────┐
|
||||
│ dim_neighbourhood │
|
||||
├───────────────────────────┤
|
||||
│ neighbourhood_id (PK) │
|
||||
│ name │
|
||||
│ geometry │
|
||||
│ population │
|
||||
│ land_area_sqkm │
|
||||
│ pop_density_per_sqkm │
|
||||
│ pct_bachelors_or_higher │
|
||||
│ median_household_income │
|
||||
│ pct_owner_occupied │
|
||||
│ pct_renter_occupied │
|
||||
│ census_year │ ← For SCD tracking
|
||||
──────────────────────────────┘
|
||||
|
||||
┌───────────────────────────┐
|
||||
│ dim_policy_event │
|
||||
├───────────────────────────┤
|
||||
│ event_id (PK) │
|
||||
│ event_date │
|
||||
│ effective_date │
|
||||
│ level │ ← federal/provincial/municipal
|
||||
│ category │ ← monetary/tax/regulatory/supply/economic
|
||||
│ title │
|
||||
│ description │
|
||||
│ expected_direction │ ← bearish/bullish/neutral
|
||||
│ source_url │
|
||||
│ confidence │ ← high/medium/low
|
||||
│ created_at │
|
||||
──────────────────────────────┘
|
||||
|
||||
┌───────────────────────────┐
|
||||
│ bridge_district_neighbourhood │ ← Portfolio Phase 4 ONLY
|
||||
├───────────────────────────┤
|
||||
│ district_key (FK) │
|
||||
│ neighbourhood_id (FK) │
|
||||
│ area_overlap_pct │
|
||||
│ population_overlap │ ← For pop-weighted agg
|
||||
──────────────────────────────┘
|
||||
```
|
||||
|
||||
**Notes**:
|
||||
- `dim_neighbourhood` has no FK relationship to fact tables in V1
|
||||
- `dim_policy_event` is standalone (no FK to facts); used for time-series annotation
|
||||
- `bridge_district_neighbourhood` is Portfolio Phase 4 scope only
|
||||
- Similar bridge table needed for CMHC zones in Portfolio Phase 4
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
> **Note**: Toronto Housing data logic lives in `portfolio_app/toronto/`. See `portfolio_project_plan_v5.md` for full project structure.
|
||||
|
||||
### Data Directory Structure
|
||||
|
||||
```
|
||||
data/
|
||||
└── toronto/
|
||||
├── raw/
|
||||
│ ├── trreb/
|
||||
│ │ └── market_watch_YYYY_MM.pdf
|
||||
│ ├── cmhc/
|
||||
│ │ └── rental_survey_YYYY.csv
|
||||
│ ├── enrichment/
|
||||
│ │ └── neighbourhood_profiles_2021.xlsx
|
||||
│ └── geo/
|
||||
│ ├── toronto_neighbourhoods.geojson
|
||||
│ ├── trreb_districts.geojson ← (to be created via QGIS)
|
||||
│ └── cmhc_zones.geojson ← (from R cmhc package)
|
||||
│
|
||||
├── processed/ ← gitignored
|
||||
│ ├── fact_purchases.parquet
|
||||
│ ├── fact_rentals.parquet
|
||||
│ ├── dim_time.parquet
|
||||
│ ├── dim_trreb_district.parquet
|
||||
│ ├── dim_cmhc_zone.parquet
|
||||
│ ├── dim_neighbourhood.parquet
|
||||
│ └── dim_policy_event.parquet
|
||||
│
|
||||
└── reference/
|
||||
├── policy_events.csv ← Curated event list
|
||||
└── neighbourhood_boundary_changelog.md ← 140→158 notes
|
||||
```
|
||||
|
||||
### Code Module Structure
|
||||
|
||||
```
|
||||
portfolio_app/toronto/
|
||||
├── __init__.py
|
||||
├── parsers/
|
||||
│ ├── __init__.py
|
||||
│ ├── trreb.py # PDF extraction
|
||||
│ └── cmhc.py # CSV processing
|
||||
├── loaders/
|
||||
│ ├── __init__.py
|
||||
│ └── database.py # DB operations
|
||||
├── schemas/ # Pydantic models
|
||||
│ ├── __init__.py
|
||||
│ ├── trreb.py
|
||||
│ ├── cmhc.py
|
||||
│ ├── enrichment.py
|
||||
│ └── policy_event.py
|
||||
├── models/ # SQLAlchemy ORM
|
||||
│ ├── __init__.py
|
||||
│ ├── base.py # DeclarativeBase, engine
|
||||
│ ├── dimensions.py # dim_time, dim_trreb_district, dim_policy_event, etc.
|
||||
│ └── facts.py # fact_purchases, fact_rentals
|
||||
└── transforms/
|
||||
└── __init__.py
|
||||
```
|
||||
|
||||
### Notebooks
|
||||
|
||||
```
|
||||
notebooks/
|
||||
├── 01_trreb_pdf_extraction.ipynb
|
||||
├── 02_cmhc_data_prep.ipynb
|
||||
├── 03_geo_layer_prep.ipynb
|
||||
├── 04_enrichment_data_prep.ipynb
|
||||
├── 05_policy_events_curation.ipynb
|
||||
└── 06_spatial_crosswalk.ipynb ← Portfolio Phase 4 only
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ Implementation Checklist
|
||||
|
||||
> **Note**: These are **Stages** within the Toronto Housing project (Portfolio Phase 1). They are distinct from the overall portfolio **Phases** defined in `portfolio_project_plan_v5.md`.
|
||||
|
||||
### Stage 1: Data Acquisition
|
||||
- [ ] Download TRREB monthly PDFs (2020-present as MVP)
|
||||
- [ ] Register for CMHC portal and export Toronto rental data
|
||||
- [ ] Extract CMHC zone boundaries via R `cmhc` package
|
||||
- [ ] Download City of Toronto neighbourhood GeoJSON (158 boundaries)
|
||||
- [ ] Digitize TRREB district boundaries in QGIS
|
||||
- [ ] Download Neighbourhood Profiles (2021 Census, 158-model)
|
||||
|
||||
### Stage 2: Data Processing
|
||||
- [ ] Build TRREB PDF parser (`portfolio_app/toronto/parsers/trreb.py`)
|
||||
- [ ] Build Pydantic schemas (`portfolio_app/toronto/schemas/`)
|
||||
- [ ] Build SQLAlchemy models (`portfolio_app/toronto/models/`)
|
||||
- [ ] Extract and validate TRREB monthly summaries
|
||||
- [ ] Clean and structure CMHC rental data
|
||||
- [ ] Process Neighbourhood Profiles into `dim_neighbourhood`
|
||||
- [ ] Curate and load policy events into `dim_policy_event`
|
||||
- [ ] Create dimension tables
|
||||
- [ ] Build fact tables
|
||||
- [ ] Validate all geospatial layers use same CRS (WGS84/EPSG:4326)
|
||||
|
||||
### Stage 3: Visualization (V1)
|
||||
- [ ] Create dashboard page (`portfolio_app/pages/toronto/dashboard.py`)
|
||||
- [ ] Build choropleth figures (`portfolio_app/figures/choropleth.py`)
|
||||
- [ ] Build time series figures (`portfolio_app/figures/time_series.py`)
|
||||
- [ ] Design dashboard layout (purchase/rental toggle)
|
||||
- [ ] Implement choropleth map with layer switching
|
||||
- [ ] Add time slider/selector
|
||||
- [ ] Build neighbourhood overlay (toggle-able)
|
||||
- [ ] Add enrichment layer toggle (density/education/income choropleth)
|
||||
- [ ] Add policy event markers on time series
|
||||
- [ ] Add tooltips with cross-reference info ("This district contains...")
|
||||
- [ ] Add tooltips showing enrichment metrics on hover
|
||||
|
||||
### Stage 4: Polish (V1)
|
||||
- [ ] Add data source citations
|
||||
- [ ] Document methodology (especially geographic limitations)
|
||||
- [ ] Write docs (`docs/methodology.md`, `docs/data_sources.md`)
|
||||
- [ ] Deploy to portfolio
|
||||
|
||||
### Future Enhancements (Portfolio Phase 4 — Post-Energy Project)
|
||||
- [ ] Add crime data to dim_neighbourhood
|
||||
- [ ] Build spatial crosswalk (neighbourhood ↔ district/zone intersections)
|
||||
- [ ] Compute area-weighted and population-weighted aggregations
|
||||
- [ ] Add aggregation method selector to UI
|
||||
- [ ] Enable correlation analysis (price vs. enrichment metrics)
|
||||
- [ ] Add historical neighbourhood boundary support (140→158)
|
||||
|
||||
**Deployment & dbt Architecture**: See `portfolio_project_plan_v5.md` for:
|
||||
- dbt layer structure and testing strategy
|
||||
- Deployment architecture
|
||||
- Data quality framework
|
||||
|
||||
---
|
||||
|
||||
## References & Links
|
||||
|
||||
### Core Housing Data
|
||||
|
||||
| Resource | URL |
|
||||
|----------|-----|
|
||||
| TRREB Market Watch | https://trreb.ca/index.php/market-news/market-watch |
|
||||
| CMHC Housing Portal | https://www03.cmhc-schl.gc.ca/hmip-pimh/ |
|
||||
|
||||
### Geographic Boundaries
|
||||
|
||||
| Resource | URL |
|
||||
|----------|-----|
|
||||
| Toronto Neighbourhoods GeoJSON | https://github.com/jasonicarter/toronto-geojson |
|
||||
| TRREB District Map (PDF) | https://webapp.proptx.ca/trrebdata/common/maps/Toronto.pdf |
|
||||
| Statistics Canada Census Tracts | https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/index-eng.cfm |
|
||||
| R `cmhc` package (CRAN) | https://cran.r-project.org/package=cmhc |
|
||||
|
||||
### Enrichment Data
|
||||
|
||||
| Resource | URL |
|
||||
|----------|-----|
|
||||
| Toronto Open Data Portal | https://open.toronto.ca/ |
|
||||
| Neighbourhood Profiles (CKAN) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/neighbourhood-profiles |
|
||||
| Neighbourhood Profiles 2021 (Direct Download) | https://ckan0.cf.opendata.inter.prod-toronto.ca/dataset/6e19a90f-971c-46b3-852c-0c48c436d1fc/resource/19d4a806-7385-4889-acf2-256f1e079060/download/nbhd_2021_census_profile_full_158model.xlsx |
|
||||
|
||||
### Policy Events Research
|
||||
|
||||
| Resource | URL |
|
||||
|----------|-----|
|
||||
| Bank of Canada Interest Rates | https://www.bankofcanada.ca/rates/interest-rates/ |
|
||||
| OSFI (Stress Test Rules) | https://www.osfi-bsif.gc.ca/ |
|
||||
| Ontario Legislature (Bills) | https://www.ola.org/ |
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
| Resource | URL |
|
||||
|----------|-----|
|
||||
| Statistics Canada 2021 Census Reference | https://www12.statcan.gc.ca/census-recensement/2021/ref/index-eng.cfm |
|
||||
| City of Toronto Neighbourhood Profiles Overview | https://www.toronto.ca/city-government/data-research-maps/neighbourhoods-communities/neighbourhood-profiles/ |
|
||||
|
||||
---
|
||||
|
||||
## Related Documents
|
||||
|
||||
| Document | Relationship | Use For |
|
||||
|----------|--------------|---------|
|
||||
| `portfolio_project_plan_v5.md` | Parent document | Overall scope, phasing, tech stack, deployment, dbt architecture, data quality framework |
|
||||
|
||||
---
|
||||
|
||||
*Document Version: 5.1*
|
||||
*Updated: January 2026*
|
||||
*Project: Toronto Housing Price Dashboard — Portfolio Piece*
|
||||
@@ -1,794 +0,0 @@
|
||||
# Work Breakdown Structure & Sprint Plan
|
||||
|
||||
**Project**: Toronto Housing Dashboard (Portfolio Phase 1)
|
||||
**Version**: 4.1
|
||||
**Date**: January 2026
|
||||
|
||||
---
|
||||
|
||||
## Document Context
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Parent Documents** | `portfolio_project_plan_v5.md`, `toronto_housing_dashboard_spec_v5.md` |
|
||||
| **Content Source** | `bio_content_v2.md` |
|
||||
| **Role** | Executable sprint plan for Phase 1 delivery |
|
||||
|
||||
---
|
||||
|
||||
## Milestones
|
||||
|
||||
| Milestone | Deliverable | Target Sprint |
|
||||
|-----------|-------------|---------------|
|
||||
| **Launch 1** | Bio Landing Page | Sprint 2 |
|
||||
| **Launch 2** | Toronto Housing Dashboard | Sprint 6 |
|
||||
|
||||
---
|
||||
|
||||
## WBS Structure
|
||||
|
||||
```
|
||||
1.0 Launch 1: Bio Landing Page
|
||||
├── 1.1 Project Bootstrap
|
||||
├── 1.2 Infrastructure
|
||||
├── 1.3 Application Foundation
|
||||
├── 1.4 Bio Page
|
||||
└── 1.5 Deployment
|
||||
|
||||
2.0 Launch 2: Toronto Housing Dashboard
|
||||
├── 2.1 Data Acquisition
|
||||
├── 2.2 Data Processing
|
||||
├── 2.3 Database Layer
|
||||
├── 2.4 dbt Transformation
|
||||
├── 2.5 Visualization
|
||||
├── 2.6 Documentation
|
||||
└── 2.7 Operations
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Launch 1: Bio Landing Page
|
||||
|
||||
### 1.1 Project Bootstrap
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 1.1.1 | Git repository initialization | — | Low | Low |
|
||||
| 1.1.2 | Create `.gitignore` | 1.1.1 | Low | Low |
|
||||
| 1.1.3 | Create `pyproject.toml` | 1.1.1 | Low | Low |
|
||||
| 1.1.4 | Create `.python-version` (3.11+) | 1.1.1 | Low | Low |
|
||||
| 1.1.5 | Create `.env.example` | 1.1.1 | Low | Low |
|
||||
| 1.1.6 | Create `README.md` (initial) | 1.1.1 | Low | Low |
|
||||
| 1.1.7 | Create `CLAUDE.md` | 1.1.1 | Low | Low |
|
||||
| 1.1.8 | Create `Makefile` with all targets | 1.1.3 | Low | Medium |
|
||||
|
||||
### 1.2 Infrastructure
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 1.2.1 | Python env setup (pyenv, venv, deps) | 1.1.3, 1.1.4 | Low | Low |
|
||||
| 1.2.2 | Create `.pre-commit-config.yaml` | 1.2.1 | Low | Low |
|
||||
| 1.2.3 | Install pre-commit hooks | 1.2.2 | Low | Low |
|
||||
| 1.2.4 | Create `docker-compose.yml` (PostgreSQL + PostGIS) | 1.1.5 | Low | Low |
|
||||
| 1.2.5 | Create `scripts/` directory structure | 1.1.1 | Low | Low |
|
||||
| 1.2.6 | Create `scripts/docker/up.sh` | 1.2.5 | Low | Low |
|
||||
| 1.2.7 | Create `scripts/docker/down.sh` | 1.2.5 | Low | Low |
|
||||
| 1.2.8 | Create `scripts/docker/logs.sh` | 1.2.5 | Low | Low |
|
||||
| 1.2.9 | Create `scripts/docker/rebuild.sh` | 1.2.5 | Low | Low |
|
||||
| 1.2.10 | Create `scripts/db/init.sh` (PostGIS extension) | 1.2.5 | Low | Low |
|
||||
| 1.2.11 | Create `scripts/dev/setup.sh` | 1.2.5 | Low | Low |
|
||||
| 1.2.12 | Verify Docker + PostGIS working | 1.2.4, 1.2.10 | Low | Low |
|
||||
|
||||
### 1.3 Application Foundation
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 1.3.1 | Create `portfolio_app/` directory structure (full tree) | 1.2.1 | Low | Low |
|
||||
| 1.3.2 | Create `portfolio_app/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 1.3.3 | Create `portfolio_app/config.py` (Pydantic BaseSettings) | 1.3.1 | Low | Medium |
|
||||
| 1.3.4 | Create `portfolio_app/errors/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 1.3.5 | Create `portfolio_app/errors/exceptions.py` | 1.3.4 | Low | Low |
|
||||
| 1.3.6 | Create `portfolio_app/errors/handlers.py` | 1.3.5 | Low | Medium |
|
||||
| 1.3.7 | Create `portfolio_app/app.py` (Dash + Pages routing) | 1.3.3 | Low | Medium |
|
||||
| 1.3.8 | Configure dash-mantine-components theme | 1.3.7 | Low | Low |
|
||||
| 1.3.9 | Create `portfolio_app/assets/` directory | 1.3.1 | Low | Low |
|
||||
| 1.3.10 | Create `portfolio_app/assets/styles.css` | 1.3.9 | Low | Medium |
|
||||
| 1.3.11 | Create `portfolio_app/assets/variables.css` | 1.3.9 | Low | Low |
|
||||
| 1.3.12 | Add `portfolio_app/assets/favicon.ico` | 1.3.9 | Low | Low |
|
||||
| 1.3.13 | Create `portfolio_app/assets/images/` directory | 1.3.9 | Low | Low |
|
||||
| 1.3.14 | Create `tests/` directory structure | 1.2.1 | Low | Low |
|
||||
| 1.3.15 | Create `tests/__init__.py` | 1.3.14 | Low | Low |
|
||||
| 1.3.16 | Create `tests/conftest.py` | 1.3.14 | Low | Medium |
|
||||
| 1.3.17 | Configure pytest in `pyproject.toml` | 1.1.3, 1.3.14 | Low | Low |
|
||||
|
||||
### 1.4 Bio Page
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 1.4.1 | Create `portfolio_app/components/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 1.4.2 | Create `portfolio_app/components/navbar.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||
| 1.4.3 | Create `portfolio_app/components/footer.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||
| 1.4.4 | Create `portfolio_app/components/cards.py` | 1.4.1, 1.3.8 | Low | Low |
|
||||
| 1.4.5 | Create `portfolio_app/pages/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 1.4.6 | Create `portfolio_app/pages/home.py` (layout) | 1.4.5, 1.4.2, 1.4.3 | Low | Low |
|
||||
| 1.4.7 | Integrate bio content from `bio_content_v2.md` | 1.4.6 | Low | Low |
|
||||
| 1.4.8 | Replace social link placeholders with real URLs | 1.4.7 | Low | Low |
|
||||
| 1.4.9 | Implement project cards (deployed/in-dev logic) | 1.4.4, 1.4.6 | Low | Low |
|
||||
| 1.4.10 | Test bio page renders locally | 1.4.9 | Low | Low |
|
||||
|
||||
### 1.5 Deployment
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 1.5.1 | Install PostgreSQL + PostGIS on VPS | — | Low | Low |
|
||||
| 1.5.2 | Configure firewall (ufw: SSH, HTTP, HTTPS) | 1.5.1 | Low | Low |
|
||||
| 1.5.3 | Create application database user | 1.5.1 | Low | Low |
|
||||
| 1.5.4 | Create Gunicorn systemd service file | 1.4.10 | Low | Low |
|
||||
| 1.5.5 | Configure Nginx reverse proxy | 1.5.4 | Low | Low |
|
||||
| 1.5.6 | Configure SSL (certbot) | 1.5.5 | Low | Low |
|
||||
| 1.5.7 | Create `scripts/deploy/deploy.sh` | 1.2.5 | Low | Low |
|
||||
| 1.5.8 | Create `scripts/deploy/health-check.sh` | 1.2.5 | Low | Low |
|
||||
| 1.5.9 | Deploy bio page | 1.5.6, 1.5.7 | Low | Low |
|
||||
| 1.5.10 | Verify HTTPS access | 1.5.9 | Low | Low |
|
||||
|
||||
---
|
||||
|
||||
## Launch 2: Toronto Housing Dashboard
|
||||
|
||||
### 2.1 Data Acquisition
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.1.1 | Define TRREB year scope + download PDFs | — | Low | Low |
|
||||
| 2.1.2 | **HUMAN**: Digitize TRREB district boundaries (QGIS) | 2.1.1 | High | High |
|
||||
| 2.1.3 | Register for CMHC portal | — | Low | Low |
|
||||
| 2.1.4 | Export CMHC Toronto rental CSVs | 2.1.3 | Low | Low |
|
||||
| 2.1.5 | Extract CMHC zone boundaries (R cmhc package) | 2.1.3 | Low | Medium |
|
||||
| 2.1.6 | Download neighbourhoods GeoJSON (158 boundaries) | — | Low | Low |
|
||||
| 2.1.7 | Download Neighbourhood Profiles 2021 (xlsx) | — | Low | Low |
|
||||
| 2.1.8 | Validate CRS alignment (all geo files WGS84) | 2.1.2, 2.1.5, 2.1.6 | Low | Medium |
|
||||
| 2.1.9 | Research Tier 1 policy events (10—20 events) | — | Mid | Medium |
|
||||
| 2.1.10 | Create `data/toronto/reference/policy_events.csv` | 2.1.9 | Low | Low |
|
||||
| 2.1.11 | Create `data/` directory structure | 1.3.1 | Low | Low |
|
||||
| 2.1.12 | Organize raw files into `data/toronto/raw/` | 2.1.11 | Low | Low |
|
||||
| 2.1.13 | Test TRREB parser across year boundaries | 2.2.3 | Low | Medium |
|
||||
|
||||
### 2.2 Data Processing
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.2.1 | Create `portfolio_app/toronto/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 2.2.2 | Create `portfolio_app/toronto/parsers/__init__.py` | 2.2.1 | Low | Low |
|
||||
| 2.2.3 | Build TRREB PDF parser (`parsers/trreb.py`) | 2.2.2, 2.1.1 | Mid | High |
|
||||
| 2.2.4 | TRREB data cleaning/normalization | 2.2.3 | Low | Medium |
|
||||
| 2.2.5 | TRREB parser unit tests | 2.2.4 | Low | Low |
|
||||
| 2.2.6 | Build CMHC CSV processor (`parsers/cmhc.py`) | 2.2.2, 2.1.4 | Low | Low |
|
||||
| 2.2.7 | CMHC reliability code handling | 2.2.6 | Low | Low |
|
||||
| 2.2.8 | CMHC processor unit tests | 2.2.7 | Low | Low |
|
||||
| 2.2.9 | Build Neighbourhood Profiles parser | 2.2.1, 2.1.7 | Low | Low |
|
||||
| 2.2.10 | Policy events CSV loader | 2.2.1, 2.1.10 | Low | Low |
|
||||
|
||||
### 2.3 Database Layer
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.3.1 | Create `portfolio_app/toronto/schemas/__init__.py` | 2.2.1 | Low | Low |
|
||||
| 2.3.2 | Create TRREB Pydantic schemas (`schemas/trreb.py`) | 2.3.1 | Low | Medium |
|
||||
| 2.3.3 | Create CMHC Pydantic schemas (`schemas/cmhc.py`) | 2.3.1 | Low | Medium |
|
||||
| 2.3.4 | Create enrichment Pydantic schemas (`schemas/enrichment.py`) | 2.3.1 | Low | Low |
|
||||
| 2.3.5 | Create policy event Pydantic schema (`schemas/policy_event.py`) | 2.3.1 | Low | Low |
|
||||
| 2.3.6 | Create `portfolio_app/toronto/models/__init__.py` | 2.2.1 | Low | Low |
|
||||
| 2.3.7 | Create SQLAlchemy base (`models/base.py`) | 2.3.6, 1.3.3 | Low | Medium |
|
||||
| 2.3.8 | Create dimension models (`models/dimensions.py`) | 2.3.7 | Low | Medium |
|
||||
| 2.3.9 | Create fact models (`models/facts.py`) | 2.3.8 | Low | Medium |
|
||||
| 2.3.10 | Create `portfolio_app/toronto/loaders/__init__.py` | 2.2.1 | Low | Low |
|
||||
| 2.3.11 | Create dimension loaders (`loaders/database.py`) | 2.3.10, 2.3.8 | Low | Medium |
|
||||
| 2.3.12 | Create fact loaders | 2.3.11, 2.3.9, 2.2.4, 2.2.7 | Mid | Medium |
|
||||
| 2.3.13 | Loader integration tests | 2.3.12 | Low | Medium |
|
||||
| 2.3.14 | Create SQL views for dashboard queries | 2.3.12 | Low | Medium |
|
||||
|
||||
### 2.4 dbt Transformation
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.4.1 | Create `dbt/` directory structure | 1.3.1 | Low | Low |
|
||||
| 2.4.2 | Create `dbt/dbt_project.yml` | 2.4.1 | Low | Low |
|
||||
| 2.4.3 | Create `dbt/profiles.yml` | 2.4.1, 1.3.3 | Low | Low |
|
||||
| 2.4.4 | Create `scripts/dbt/run.sh` | 1.2.5 | Low | Low |
|
||||
| 2.4.5 | Create `scripts/dbt/test.sh` | 1.2.5 | Low | Low |
|
||||
| 2.4.6 | Create `scripts/dbt/docs.sh` | 1.2.5 | Low | Low |
|
||||
| 2.4.7 | Create `scripts/dbt/fresh.sh` | 1.2.5 | Low | Low |
|
||||
| 2.4.8 | Create staging models (`stg_trreb__monthly`, `stg_cmhc__rental`) | 2.4.3, 2.3.12 | Low | Medium |
|
||||
| 2.4.9 | Create intermediate models | 2.4.8 | Low | Medium |
|
||||
| 2.4.10 | Create mart models | 2.4.9 | Low | Medium |
|
||||
| 2.4.11 | Create dbt schema tests (unique, not_null, relationships) | 2.4.10 | Low | Medium |
|
||||
| 2.4.12 | Create custom dbt tests (anomaly detection) | 2.4.11 | Low | Medium |
|
||||
| 2.4.13 | Create dbt documentation (schema.yml) | 2.4.10 | Low | Low |
|
||||
|
||||
### 2.5 Visualization
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.5.1 | Create `portfolio_app/figures/__init__.py` | 1.3.1 | Low | Low |
|
||||
| 2.5.2 | Build choropleth factory (`figures/choropleth.py`) | 2.5.1, 2.1.8 | Mid | Medium |
|
||||
| 2.5.3 | Build time series factory (`figures/time_series.py`) | 2.5.1 | Low | Medium |
|
||||
| 2.5.4 | Build YoY change chart factory (`figures/statistical.py`) | 2.5.1 | Low | Medium |
|
||||
| 2.5.5 | Build seasonality decomposition chart | 2.5.4 | Low | Medium |
|
||||
| 2.5.6 | Build district correlation matrix chart | 2.5.4 | Low | Medium |
|
||||
| 2.5.7 | Create `portfolio_app/pages/toronto/__init__.py` | 1.4.5 | Low | Low |
|
||||
| 2.5.8 | Create `portfolio_app/pages/toronto/dashboard.py` (layout only) | 2.5.7, 1.4.2, 1.4.3 | Mid | High |
|
||||
| 2.5.9 | Implement purchase/rental mode toggle | 2.5.8 | Low | Low |
|
||||
| 2.5.10 | Implement monthly time slider | 2.5.8 | Low | Medium |
|
||||
| 2.5.11 | Implement annual time selector (CMHC) | 2.5.8 | Low | Low |
|
||||
| 2.5.12 | Implement layer toggles (districts/zones/neighbourhoods) | 2.5.8 | Low | Medium |
|
||||
| 2.5.13 | Create `portfolio_app/pages/toronto/callbacks/__init__.py` | 2.5.7 | Low | Low |
|
||||
| 2.5.14 | Create `callbacks/map_callbacks.py` | 2.5.13, 2.5.2 | Mid | Medium |
|
||||
| 2.5.15 | Create `callbacks/filter_callbacks.py` | 2.5.13 | Low | Medium |
|
||||
| 2.5.16 | Create `callbacks/timeseries_callbacks.py` | 2.5.13, 2.5.3 | Low | Medium |
|
||||
| 2.5.17 | Implement district/zone tooltips | 2.5.14 | Low | Low |
|
||||
| 2.5.18 | Implement neighbourhood overlay | 2.5.14, 2.1.6 | Low | Medium |
|
||||
| 2.5.19 | Implement enrichment layer toggle | 2.5.18 | Low | Medium |
|
||||
| 2.5.20 | Implement policy event markers on time series | 2.5.16, 2.2.10 | Low | Medium |
|
||||
| 2.5.21 | Implement "district contains neighbourhoods" tooltip | 2.5.17 | Low | Low |
|
||||
| 2.5.22 | Test dashboard renders with sample data | 2.5.20 | Low | Medium |
|
||||
|
||||
### 2.6 Documentation
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.6.1 | Create `docs/` directory | 1.3.1 | Low | Low |
|
||||
| 2.6.2 | Write `docs/methodology.md` (geographic limitations) | 2.5.22 | Low | Medium |
|
||||
| 2.6.3 | Write `docs/data_sources.md` (citations) | 2.5.22 | Low | Low |
|
||||
| 2.6.4 | Write `docs/user_guide.md` | 2.5.22 | Low | Low |
|
||||
| 2.6.5 | Update `README.md` (final) | 2.6.2, 2.6.3 | Low | Low |
|
||||
| 2.6.6 | Update `CLAUDE.md` (final) | 2.6.5 | Low | Low |
|
||||
|
||||
### 2.7 Operations
|
||||
|
||||
| ID | Task | Depends On | Effort | Complexity |
|
||||
|----|------|------------|--------|------------|
|
||||
| 2.7.1 | Create `scripts/db/backup.sh` | 1.2.5 | Low | Low |
|
||||
| 2.7.2 | Create `scripts/db/restore.sh` | 1.2.5 | Low | Low |
|
||||
| 2.7.3 | Create `scripts/db/reset.sh` (dev only) | 1.2.5 | Low | Low |
|
||||
| 2.7.4 | Create `scripts/deploy/rollback.sh` | 1.2.5 | Low | Medium |
|
||||
| 2.7.5 | Implement backup retention policy | 2.7.1 | Low | Low |
|
||||
| 2.7.6 | Add `/health` endpoint | 2.5.8 | Low | Low |
|
||||
| 2.7.7 | Configure uptime monitoring (external) | 2.7.6 | Low | Low |
|
||||
| 2.7.8 | Deploy Toronto dashboard | 1.5.9, 2.5.22 | Low | Low |
|
||||
| 2.7.9 | Verify production deployment | 2.7.8 | Low | Low |
|
||||
|
||||
---
|
||||
|
||||
## L3 Task Details
|
||||
|
||||
### 1.1 Project Bootstrap
|
||||
|
||||
#### 1.1.1 Git repository initialization
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Initialize git repo with main branch |
|
||||
| **How** | `git init`, initial commit |
|
||||
| **Inputs** | — |
|
||||
| **Outputs** | `.git/` directory |
|
||||
| **Why** | Version control foundation |
|
||||
|
||||
#### 1.1.2 Create `.gitignore`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Git ignore rules per project plan |
|
||||
| **How** | Create file with patterns for: `.env`, `data/*/processed/`, `reports/`, `backups/`, `notebooks/*.html`, `__pycache__/`, `.venv/` |
|
||||
| **Inputs** | Project plan → Directory Rules |
|
||||
| **Outputs** | `.gitignore` |
|
||||
|
||||
#### 1.1.3 Create `pyproject.toml`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Python packaging config |
|
||||
| **How** | Define project metadata, dependencies, tool configs (ruff, mypy, pytest) |
|
||||
| **Inputs** | Tech stack versions from project plan |
|
||||
| **Outputs** | `pyproject.toml` |
|
||||
| **Dependencies** | PostgreSQL 16.x, Pydantic ≥2.0, SQLAlchemy ≥2.0, dbt-postgres ≥1.7, Pandas ≥2.1, GeoPandas ≥0.14, Dash ≥2.14, dash-mantine-components (latest), pytest ≥7.0 |
|
||||
|
||||
#### 1.1.4 Create `.python-version`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | pyenv version file |
|
||||
| **How** | Single line: `3.11` or specific patch version |
|
||||
| **Outputs** | `.python-version` |
|
||||
|
||||
#### 1.1.5 Create `.env.example`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Environment variable template |
|
||||
| **How** | Template with: DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
||||
| **Inputs** | Project plan → Environment Setup |
|
||||
| **Outputs** | `.env.example` |
|
||||
|
||||
#### 1.1.6 Create `README.md` (initial)
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Project overview stub |
|
||||
| **How** | Title, brief description, "Setup coming soon" |
|
||||
| **Outputs** | `README.md` |
|
||||
|
||||
#### 1.1.7 Create `CLAUDE.md`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | AI assistant context file |
|
||||
| **How** | Project context, architecture decisions, patterns, conventions |
|
||||
| **Inputs** | Project plan → Code Architecture |
|
||||
| **Outputs** | `CLAUDE.md` |
|
||||
| **Why** | Claude Code effectiveness from day 1 |
|
||||
|
||||
#### 1.1.8 Create `Makefile`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | All make targets from project plan |
|
||||
| **How** | Implement targets: setup, venv, clean, docker-up/down/logs/rebuild, db-init/backup/restore/reset, run, run-prod, dbt-run/test/docs/fresh, test, test-cov, lint, format, typecheck, ci, deploy, rollback |
|
||||
| **Inputs** | Project plan → Makefile Targets |
|
||||
| **Outputs** | `Makefile` |
|
||||
|
||||
### 1.2 Infrastructure
|
||||
|
||||
#### 1.2.4 Create `docker-compose.yml`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Docker Compose V2 for PostgreSQL 16 + PostGIS |
|
||||
| **How** | Service definition, volume mounts, port 5432, env vars from `.env` |
|
||||
| **Inputs** | `.env.example` |
|
||||
| **Outputs** | `docker-compose.yml` |
|
||||
| **Note** | No `version` field (Docker Compose V2) |
|
||||
|
||||
#### 1.2.5 Create `scripts/` directory structure
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Full scripts tree per project plan |
|
||||
| **How** | `mkdir -p scripts/{db,docker,deploy,dbt,dev}` |
|
||||
| **Outputs** | `scripts/db/`, `scripts/docker/`, `scripts/deploy/`, `scripts/dbt/`, `scripts/dev/` |
|
||||
|
||||
#### 1.2.10 Create `scripts/db/init.sh`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Database initialization with PostGIS |
|
||||
| **How** | `CREATE DATABASE`, `CREATE EXTENSION postgis`, schema creation |
|
||||
| **Standard** | `set -euo pipefail`, usage comment, idempotent |
|
||||
| **Outputs** | `scripts/db/init.sh` |
|
||||
|
||||
### 1.3 Application Foundation
|
||||
|
||||
#### 1.3.1 Create `portfolio_app/` directory structure
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Full application tree per project plan |
|
||||
| **Directories** | `portfolio_app/`, `portfolio_app/assets/`, `portfolio_app/assets/images/`, `portfolio_app/pages/`, `portfolio_app/pages/toronto/`, `portfolio_app/pages/toronto/callbacks/`, `portfolio_app/components/`, `portfolio_app/figures/`, `portfolio_app/toronto/`, `portfolio_app/toronto/parsers/`, `portfolio_app/toronto/loaders/`, `portfolio_app/toronto/schemas/`, `portfolio_app/toronto/models/`, `portfolio_app/toronto/transforms/`, `portfolio_app/errors/` |
|
||||
| **Pattern** | Callbacks in `pages/{dashboard}/callbacks/` per project plan |
|
||||
|
||||
#### 1.3.3 Create `config.py`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Pydantic BaseSettings for config |
|
||||
| **How** | Settings class loading from `.env` |
|
||||
| **Fields** | DATABASE_URL, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB, DASH_DEBUG, SECRET_KEY, LOG_LEVEL |
|
||||
|
||||
#### 1.3.5 Create `exceptions.py`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Exception hierarchy per project plan |
|
||||
| **Classes** | `PortfolioError` (base), `ParseError`, `ValidationError`, `LoadError` |
|
||||
|
||||
#### 1.3.6 Create `handlers.py`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Error handling decorators |
|
||||
| **How** | Decorators for: logging/re-raise, retry logic, transaction boundaries, timing |
|
||||
| **Pattern** | Infrastructure concerns only; domain logic uses explicit handling |
|
||||
|
||||
#### 1.3.7 Create `app.py`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Dash app factory with Pages routing |
|
||||
| **How** | `Dash(__name__, use_pages=True)`, MantineProvider wrapper |
|
||||
| **Imports** | External: absolute; Internal: relative (dot notation) |
|
||||
|
||||
#### 1.3.16 Create `conftest.py`
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | pytest fixtures |
|
||||
| **How** | Test database fixture, sample data fixtures, app client fixture |
|
||||
|
||||
### 1.4 Bio Page
|
||||
|
||||
#### 1.4.7 Integrate bio content
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Content from `bio_content_v2.md` |
|
||||
| **Sections** | Headline, Professional Summary, Tech Stack, Side Project, Availability |
|
||||
| **Layout** | Hero → Summary → Tech Stack → Project Cards → Social Links → Availability |
|
||||
|
||||
#### 1.4.8 Replace social link placeholders
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Replace `[USERNAME]` in LinkedIn/GitHub URLs |
|
||||
| **Source** | `bio_content_v2.md` → Social Links |
|
||||
| **Acceptance** | No placeholder text in production |
|
||||
|
||||
#### 1.4.9 Implement project cards
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Dynamic project card display |
|
||||
| **Logic** | Show deployed projects with links; show "In Development" for in-progress; hide or grey out planned |
|
||||
| **Source** | `bio_content_v2.md` → Portfolio Projects Section |
|
||||
|
||||
### 2.1 Data Acquisition
|
||||
|
||||
#### 2.1.1 Define TRREB year scope + download PDFs
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Decide which years to parse for V1, download PDFs |
|
||||
| **Decision** | 2020—present for V1 (manageable scope, consistent PDF format). Expand to 2007+ in future if needed. |
|
||||
| **Output** | `data/toronto/raw/trreb/market_watch_YYYY_MM.pdf` |
|
||||
| **Note** | PDF format may vary pre-2018; test before committing to older years |
|
||||
|
||||
#### 2.1.2 Digitize TRREB district boundaries
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | GeoJSON with ~35 district polygons |
|
||||
| **Tool** | QGIS |
|
||||
| **Process** | Import PDF as raster → create vector layer → trace polygons → add attributes (district_code, district_name, area_type) → export GeoJSON (WGS84/EPSG:4326) |
|
||||
| **Input** | TRREB Toronto.pdf map |
|
||||
| **Output** | `data/toronto/raw/geo/trreb_districts.geojson` |
|
||||
| **Effort** | High |
|
||||
| **Complexity** | High |
|
||||
| **Note** | HUMAN TASK — not automatable |
|
||||
|
||||
#### 2.1.5 Extract CMHC zone boundaries
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | GeoJSON with ~20 zone polygons |
|
||||
| **Tool** | R with cmhc and sf packages |
|
||||
| **Process** | `get_cmhc_geography(geography_type="ZONE", cma="Toronto")` → `st_write()` to GeoJSON |
|
||||
| **Output** | `data/toronto/raw/geo/cmhc_zones.geojson` |
|
||||
|
||||
#### 2.1.9 Research Tier 1 policy events
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Federal/provincial policy events with dates, descriptions, expected direction |
|
||||
| **Sources** | Bank of Canada, OSFI, Ontario Legislature |
|
||||
| **Schema** | event_date, effective_date, level, category, title, description, expected_direction, source_url, confidence |
|
||||
| **Acceptance** | Minimum 10 events, maximum 20 |
|
||||
| **Examples** | BoC rate decisions, OSFI B-20, Ontario Fair Housing Plan, foreign buyer tax |
|
||||
|
||||
#### 2.1.13 Test TRREB parser across year boundaries
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Verify parser handles PDFs from different years |
|
||||
| **Test Cases** | 2020 Q1, 2022 Q1, 2024 Q1 (minimum) |
|
||||
| **Check For** | Table structure changes, column naming variations, page number shifts |
|
||||
| **Output** | Documented format variations, parser fallbacks if needed |
|
||||
|
||||
### 2.2 Data Processing
|
||||
|
||||
#### 2.2.3 Build TRREB PDF parser
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Extract summary tables from TRREB PDFs |
|
||||
| **Tool** | pdfplumber or camelot-py |
|
||||
| **Location** | Pages 3-4 (Summary by Area) |
|
||||
| **Fields** | report_date, area_code, area_name, area_type, sales, dollar_volume, avg_price, median_price, new_listings, active_listings, avg_sp_lp, avg_dom |
|
||||
| **Output** | `portfolio_app/toronto/parsers/trreb.py` |
|
||||
|
||||
#### 2.2.7 CMHC reliability code handling
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Parse reliability codes, handle suppression |
|
||||
| **Codes** | a (excellent), b (good), c (fair), d (poor/caution), ** (suppressed → NULL) |
|
||||
| **Implementation** | Pydantic validators, enum type |
|
||||
|
||||
### 2.3 Database Layer
|
||||
|
||||
#### 2.3.8 Create dimension models
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | SQLAlchemy 2.0 models for dimensions |
|
||||
| **Tables** | `dim_time`, `dim_trreb_district`, `dim_cmhc_zone`, `dim_neighbourhood`, `dim_policy_event` |
|
||||
| **Geometry** | PostGIS geometry columns for districts, zones, neighbourhoods |
|
||||
| **Note** | `dim_neighbourhood` has no FK to facts in V1 |
|
||||
|
||||
#### 2.3.9 Create fact models
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | SQLAlchemy 2.0 models for facts |
|
||||
| **Tables** | `fact_purchases`, `fact_rentals` |
|
||||
| **FKs** | fact_purchases → dim_time, dim_trreb_district; fact_rentals → dim_time, dim_cmhc_zone |
|
||||
|
||||
### 2.4 dbt Transformation
|
||||
|
||||
#### 2.4.8 Create staging models
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | 1:1 source mapping, cleaned and typed |
|
||||
| **Models** | `stg_trreb__monthly`, `stg_cmhc__rental` |
|
||||
| **Naming** | `stg_{source}__{entity}` |
|
||||
|
||||
#### 2.4.11 Create dbt schema tests
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Data quality tests |
|
||||
| **Tests** | `unique` (PKs), `not_null` (required), `accepted_values` (reliability codes, area_type), `relationships` (FK integrity) |
|
||||
|
||||
#### 2.4.12 Create custom dbt tests
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Anomaly detection rules |
|
||||
| **Rules** | Price MoM change >30% → flag; missing districts → fail; duplicate records → fail |
|
||||
|
||||
### 2.5 Visualization
|
||||
|
||||
#### 2.5.2 Build choropleth factory
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Reusable choropleth_mapbox figure generator |
|
||||
| **Inputs** | GeoDataFrame, metric column, color config |
|
||||
| **Output** | Plotly figure |
|
||||
| **Location** | `portfolio_app/figures/choropleth.py` |
|
||||
|
||||
#### 2.5.4—2.5.6 Statistical chart factories
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Statistical analysis visualizations |
|
||||
| **Charts** | YoY change with variance bands, seasonality decomposition, district correlation matrix |
|
||||
| **Location** | `portfolio_app/figures/statistical.py` |
|
||||
| **Why** | Required skill demonstration per project plan |
|
||||
|
||||
#### 2.5.8 Create dashboard layout
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Toronto dashboard page structure |
|
||||
| **File** | `portfolio_app/pages/toronto/dashboard.py` |
|
||||
| **Pattern** | Layout only — no callbacks in this file |
|
||||
| **Components** | Navbar, choropleth map, time controls, layer toggles, time series panel, statistics panel, footer |
|
||||
|
||||
#### 2.5.13—2.5.16 Create callbacks
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Dashboard interaction logic |
|
||||
| **Location** | `portfolio_app/pages/toronto/callbacks/` |
|
||||
| **Files** | `__init__.py`, `map_callbacks.py`, `filter_callbacks.py`, `timeseries_callbacks.py` |
|
||||
| **Pattern** | Separate from layout per project plan callback separation pattern |
|
||||
| **Registration** | Import callback modules in `callbacks/__init__.py`; import that package in `dashboard.py`. Dash Pages auto-discovers callbacks when module is imported. |
|
||||
|
||||
#### 2.5.22 Test dashboard renders with sample data
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **What** | Verify dashboard works end-to-end |
|
||||
| **Sample Data** | Use output from task 2.3.12 (fact loaders). Run loaders with subset of parsed data before this task. |
|
||||
| **Verify** | Choropleth renders, time controls work, tooltips display, no console errors |
|
||||
|
||||
---
|
||||
|
||||
## Sprint Plan
|
||||
|
||||
### Sprint 1: Project Bootstrap + Start TRREB Digitization
|
||||
|
||||
**Goal**: Dev environment working, repo initialized, TRREB digitization started
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 1.1.1 | Git repo init | Low |
|
||||
| 1.1.2 | .gitignore | Low |
|
||||
| 1.1.3 | pyproject.toml | Low |
|
||||
| 1.1.4 | .python-version | Low |
|
||||
| 1.1.5 | .env.example | Low |
|
||||
| 1.1.6 | README.md (initial) | Low |
|
||||
| 1.1.7 | CLAUDE.md | Low |
|
||||
| 1.1.8 | Makefile | Low |
|
||||
| 1.2.1 | Python env setup | Low |
|
||||
| 1.2.2 | .pre-commit-config.yaml | Low |
|
||||
| 1.2.3 | Install pre-commit | Low |
|
||||
| 1.2.4 | docker-compose.yml | Low |
|
||||
| 1.2.5 | scripts/ directory structure | Low |
|
||||
| 1.2.6—1.2.9 | Docker scripts | Low |
|
||||
| 1.2.10 | scripts/db/init.sh | Low |
|
||||
| 1.2.11 | scripts/dev/setup.sh | Low |
|
||||
| 1.2.12 | Verify Docker + PostGIS | Low |
|
||||
| 1.3.1 | portfolio_app/ directory structure | Low |
|
||||
| 1.3.2—1.3.6 | App foundation files | Low |
|
||||
| 1.3.14—1.3.17 | Test infrastructure | Low |
|
||||
| 2.1.1 | Download TRREB PDFs | Low |
|
||||
| 2.1.2 | **START** TRREB boundaries (HUMAN) | High |
|
||||
| 2.1.9 | **START** Policy events research | Mid |
|
||||
|
||||
---
|
||||
|
||||
### Sprint 2: Bio Page + Data Acquisition
|
||||
|
||||
**Goal**: Bio live, all raw data downloaded
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 1.3.7 | app.py with Pages | Low |
|
||||
| 1.3.8 | Theme config | Low |
|
||||
| 1.3.9—1.3.13 | Assets directory + files | Low |
|
||||
| 1.4.1—1.4.4 | Components | Low |
|
||||
| 1.4.5—1.4.10 | Bio page | Low |
|
||||
| 1.5.1—1.5.3 | VPS setup | Low |
|
||||
| 1.5.4—1.5.6 | Gunicorn/Nginx/SSL | Low |
|
||||
| 1.5.7—1.5.8 | Deploy scripts | Low |
|
||||
| 1.5.9—1.5.10 | Deploy + verify | Low |
|
||||
| 2.1.2 | **CONTINUE** TRREB boundaries | High |
|
||||
| 2.1.3—2.1.4 | CMHC registration + export | Low |
|
||||
| 2.1.5 | CMHC zone boundaries (R) | Low |
|
||||
| 2.1.6 | Neighbourhoods GeoJSON | Low |
|
||||
| 2.1.7 | Neighbourhood Profiles download | Low |
|
||||
| 2.1.9 | **CONTINUE** Policy events research | Mid |
|
||||
| 2.1.10 | policy_events.csv | Low |
|
||||
| 2.1.11—2.1.12 | data/ directory + organize | Low |
|
||||
|
||||
**Milestone**: **Launch 1 — Bio Live**
|
||||
|
||||
---
|
||||
|
||||
### Sprint 3: Parsers + Schemas + Models
|
||||
|
||||
**Goal**: ETL pipeline working, database layer complete
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 2.1.2 | **COMPLETE** TRREB boundaries | High |
|
||||
| 2.1.8 | CRS validation | Low |
|
||||
| 2.2.1—2.2.2 | Toronto module init | Low |
|
||||
| 2.2.3—2.2.5 | TRREB parser + tests | Mid |
|
||||
| 2.2.6—2.2.8 | CMHC processor + tests | Low |
|
||||
| 2.2.9 | Neighbourhood Profiles parser | Low |
|
||||
| 2.2.10 | Policy events loader | Low |
|
||||
| 2.3.1—2.3.5 | Pydantic schemas | Low |
|
||||
| 2.3.6—2.3.9 | SQLAlchemy models | Low |
|
||||
|
||||
---
|
||||
|
||||
### Sprint 4: Loaders + dbt
|
||||
|
||||
**Goal**: Data loaded, transformation layer ready
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 2.3.10—2.3.13 | Loaders + tests | Mid |
|
||||
| 2.3.14 | SQL views | Low |
|
||||
| 2.4.1—2.4.7 | dbt setup + scripts | Low |
|
||||
| 2.4.8—2.4.10 | dbt models | Low |
|
||||
| 2.4.11—2.4.12 | dbt tests | Low |
|
||||
| 2.4.13 | dbt documentation | Low |
|
||||
| 2.7.1—2.7.3 | DB backup/restore scripts | Low |
|
||||
|
||||
---
|
||||
|
||||
### Sprint 5: Visualization
|
||||
|
||||
**Goal**: Dashboard functional
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 2.5.1—2.5.6 | Figure factories | Mid |
|
||||
| 2.5.7—2.5.12 | Dashboard layout + controls | Mid |
|
||||
| 2.5.13—2.5.16 | Callbacks | Mid |
|
||||
| 2.5.17—2.5.21 | Tooltips + overlays + markers | Low |
|
||||
| 2.5.22 | Test dashboard | Low |
|
||||
|
||||
---
|
||||
|
||||
### Sprint 6: Polish + Launch 2
|
||||
|
||||
**Goal**: Dashboard deployed
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| 2.6.1—2.6.6 | Documentation | Low |
|
||||
| 2.7.4—2.7.5 | Rollback script + retention | Low |
|
||||
| 2.7.6—2.7.7 | Health endpoint + monitoring | Low |
|
||||
| 2.7.8—2.7.9 | Deploy + verify | Low |
|
||||
|
||||
**Milestone**: **Launch 2 — Toronto Dashboard Live**
|
||||
|
||||
---
|
||||
|
||||
### Sprint 7: Buffer
|
||||
|
||||
**Goal**: Contingency for slippage, bug fixes
|
||||
|
||||
| Task ID | Task | Effort |
|
||||
|---------|------|--------|
|
||||
| — | Overflow from previous sprints | Varies |
|
||||
| — | Bug fixes | Varies |
|
||||
| — | UX polish | Low |
|
||||
|
||||
---
|
||||
|
||||
## Sprint Summary
|
||||
|
||||
| Sprint | Focus | Key Risk | Milestone |
|
||||
|--------|-------|----------|-----------|
|
||||
| 1 | Bootstrap + start boundaries | — | — |
|
||||
| 2 | Bio + data acquisition | TRREB digitization | Launch 1 |
|
||||
| 3 | Parsers + DB layer | PDF parser, boundaries | — |
|
||||
| 4 | Loaders + dbt | — | — |
|
||||
| 5 | Visualization | Choropleth complexity | — |
|
||||
| 6 | Polish + deploy | — | Launch 2 |
|
||||
| 7 | Buffer | — | — |
|
||||
|
||||
---
|
||||
|
||||
## Dependency Graph
|
||||
|
||||
### Launch 1 Critical Path
|
||||
```
|
||||
1.1.1 → 1.1.3 → 1.2.1 → 1.3.1 → 1.3.7 → 1.4.6 → 1.4.10 → 1.5.9 → 1.5.10
|
||||
```
|
||||
|
||||
### Launch 2 Critical Path
|
||||
```
|
||||
2.1.2 (TRREB boundaries) ─┬→ 2.1.8 (CRS) → 2.5.2 (choropleth) → 2.5.8 (layout) → 2.5.22 (test) → 2.7.8 (deploy)
|
||||
│
|
||||
2.1.1 → 2.2.3 (parser) → 2.2.4 → 2.3.12 (loaders) → 2.4.8 (dbt) ─┘
|
||||
```
|
||||
|
||||
### Parallel Tracks (can run simultaneously)
|
||||
|
||||
| Track | Tasks | Can Start |
|
||||
|-------|-------|-----------|
|
||||
| **A: TRREB Boundaries** | 2.1.1 → 2.1.2 | Sprint 1 |
|
||||
| **B: TRREB Parser** | 2.2.3—2.2.5 | Sprint 2 (after PDFs) |
|
||||
| **C: CMHC** | 2.1.3—2.1.5 → 2.2.6—2.2.8 | Sprint 2 |
|
||||
| **D: Enrichment** | 2.1.6—2.1.7 → 2.2.9 | Sprint 2 |
|
||||
| **E: Policy Events** | 2.1.9—2.1.10 → 2.2.10 | Sprint 1—2 |
|
||||
| **F: Schemas/Models** | 2.3.1—2.3.9 | Sprint 3 (after parsers) |
|
||||
| **G: dbt** | 2.4.* | Sprint 4 (after loaders) |
|
||||
| **H: Ops Scripts** | 2.7.1—2.7.5 | Sprint 4 |
|
||||
|
||||
---
|
||||
|
||||
## Risk Register
|
||||
|
||||
| Risk | Likelihood | Impact | Mitigation |
|
||||
|------|------------|--------|------------|
|
||||
| TRREB digitization slips | Medium | High | Start Sprint 1; timebox; accept lower precision initially |
|
||||
| PDF parser breaks on older years | Medium | Medium | Test multiple years early; build fallbacks |
|
||||
| PostGIS geometry issues | Low | Medium | Validate CRS before load (2.1.8) |
|
||||
| Choropleth performance | Low | Medium | Pre-aggregate; simplify geometries |
|
||||
| Policy events research takes too long | Medium | Low | Cap at 10 events minimum; expand post-launch |
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
### Launch 1
|
||||
- [ ] Bio page accessible via HTTPS
|
||||
- [ ] All content from `bio_content_v2.md` rendered
|
||||
- [ ] No placeholder text ([USERNAME]) visible
|
||||
- [ ] Mobile responsive
|
||||
- [ ] Social links functional
|
||||
|
||||
### Launch 2
|
||||
- [ ] Choropleth renders TRREB districts
|
||||
- [ ] Choropleth renders CMHC zones
|
||||
- [ ] Purchase/rental mode toggle works
|
||||
- [ ] Time navigation works (monthly for TRREB, annual for CMHC)
|
||||
- [ ] Policy event markers visible on time series
|
||||
- [ ] Neighbourhood overlay toggleable
|
||||
- [ ] Methodology documentation published
|
||||
- [ ] Data sources cited
|
||||
- [ ] Health endpoint responds
|
||||
|
||||
---
|
||||
|
||||
## Effort Legend
|
||||
|
||||
| Level | Meaning |
|
||||
|-------|---------|
|
||||
| **Low** | Straightforward; minimal iteration expected |
|
||||
| **Mid** | Requires debugging or multi-step coordination |
|
||||
| **High** | Complex logic, external tools, or human intervention required |
|
||||
|
||||
---
|
||||
|
||||
*Document Version: 4.1*
|
||||
*Created: January 2026*
|
||||
69
notebooks/README.md
Normal file
69
notebooks/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Toronto Neighbourhood Dashboard - Notebooks
|
||||
|
||||
Documentation notebooks for the Toronto Neighbourhood Dashboard visualizations. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
notebooks/
|
||||
├── README.md # This file
|
||||
├── overview/ # Overview tab visualizations
|
||||
├── housing/ # Housing tab visualizations
|
||||
├── safety/ # Safety tab visualizations
|
||||
├── demographics/ # Demographics tab visualizations
|
||||
└── amenities/ # Amenities tab visualizations
|
||||
```
|
||||
|
||||
## Notebook Template
|
||||
|
||||
Each notebook follows a standard two-section structure:
|
||||
|
||||
### Section 1: Data Reference
|
||||
|
||||
Documents the data pipeline:
|
||||
- **Source Tables**: List of dbt marts/tables used
|
||||
- **SQL Query**: The exact query to fetch data
|
||||
- **Transformation Steps**: Any pandas/python transformations
|
||||
- **Sample Output**: First 10 rows of the result
|
||||
|
||||
### Section 2: Data Visualization
|
||||
|
||||
Documents the figure creation:
|
||||
- **Figure Factory**: Import from `portfolio_app.figures`
|
||||
- **Parameters**: Key configuration options
|
||||
- **Rendered Output**: The actual visualization
|
||||
|
||||
## Available Figure Factories
|
||||
|
||||
| Factory | Module | Use Case |
|
||||
|---------|--------|----------|
|
||||
| `create_choropleth` | `figures.choropleth` | Map visualizations |
|
||||
| `create_ranking_bar` | `figures.bar_charts` | Top/bottom N rankings |
|
||||
| `create_stacked_bar` | `figures.bar_charts` | Category breakdowns |
|
||||
| `create_scatter` | `figures.scatter` | Correlation plots |
|
||||
| `create_radar` | `figures.radar` | Multi-metric comparisons |
|
||||
| `create_age_pyramid` | `figures.demographics` | Age distributions |
|
||||
| `create_time_series` | `figures.time_series` | Trend lines |
|
||||
|
||||
## Usage
|
||||
|
||||
1. Start Jupyter from project root:
|
||||
```bash
|
||||
jupyter notebook notebooks/
|
||||
```
|
||||
|
||||
2. Ensure database is running:
|
||||
```bash
|
||||
make docker-up
|
||||
```
|
||||
|
||||
3. Each notebook is self-contained - run all cells top to bottom.
|
||||
|
||||
## Notebook Naming Convention
|
||||
|
||||
`{metric}_{chart_type}.ipynb`
|
||||
|
||||
Examples:
|
||||
- `livability_choropleth.ipynb`
|
||||
- `crime_trend_line.ipynb`
|
||||
- `age_pyramid.ipynb`
|
||||
0
notebooks/amenities/.gitkeep
Normal file
0
notebooks/amenities/.gitkeep
Normal file
170
notebooks/amenities/amenity_index_choropleth.ipynb
Normal file
170
notebooks/amenities/amenity_index_choropleth.ipynb
Normal file
@@ -0,0 +1,170 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amenity Index Choropleth Map\n",
|
||||
"\n",
|
||||
"Displays total amenities per 1,000 residents across Toronto's 158 neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_amenities` | neighbourhood × year | amenity_index, total_amenities_per_1000, amenity_tier, geometry |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_id,\n",
|
||||
" neighbourhood_name,\n",
|
||||
" geometry,\n",
|
||||
" year,\n",
|
||||
" total_amenities_per_1000,\n",
|
||||
" amenity_index,\n",
|
||||
" amenity_tier,\n",
|
||||
" parks_per_1000,\n",
|
||||
" schools_per_1000,\n",
|
||||
" transit_per_1000,\n",
|
||||
" total_amenities,\n",
|
||||
" population\n",
|
||||
"FROM mart_neighbourhood_amenities\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
||||
"ORDER BY total_amenities_per_1000 DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent year\n",
|
||||
"2. Convert geometry to GeoJSON"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" df,\n",
|
||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
||||
" crs='EPSG:4326'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"geojson = json.loads(gdf.to_json())\n",
|
||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'total_amenities_per_1000', 'amenity_index', 'amenity_tier']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
||||
"\n",
|
||||
"fig = create_choropleth_figure(\n",
|
||||
" geojson=geojson,\n",
|
||||
" data=data,\n",
|
||||
" location_key='neighbourhood_id',\n",
|
||||
" color_column='total_amenities_per_1000',\n",
|
||||
" hover_data=['neighbourhood_name', 'amenity_index', 'parks_per_1000', 'schools_per_1000'],\n",
|
||||
" color_scale='Greens',\n",
|
||||
" title='Toronto Amenities per 1,000 Population',\n",
|
||||
" zoom=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Amenity Tier Interpretation\n",
|
||||
"\n",
|
||||
"| Tier | Meaning |\n",
|
||||
"|------|--------|\n",
|
||||
"| 1 | Best served (top 20%) |\n",
|
||||
"| 2-4 | Middle tiers |\n",
|
||||
"| 5 | Underserved (bottom 20%) |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
173
notebooks/amenities/amenity_radar.ipynb
Normal file
173
notebooks/amenities/amenity_radar.ipynb
Normal file
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amenity Radar Chart\n",
|
||||
"\n",
|
||||
"Spider/radar chart comparing amenity categories for selected neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_amenities` | neighbourhood × year | parks_index, schools_index, transit_index |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" parks_index,\n",
|
||||
" schools_index,\n",
|
||||
" transit_index,\n",
|
||||
" amenity_index,\n",
|
||||
" amenity_tier\n",
|
||||
"FROM mart_neighbourhood_amenities\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
||||
"ORDER BY amenity_index DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Select top 5 and bottom 5 neighbourhoods by amenity index\n",
|
||||
"2. Reshape for radar chart format"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Select representative neighbourhoods\n",
|
||||
"top_5 = df.head(5)\n",
|
||||
"bottom_5 = df.tail(5)\n",
|
||||
"\n",
|
||||
"# Prepare radar data\n",
|
||||
"categories = ['Parks', 'Schools', 'Transit']\n",
|
||||
"index_columns = ['parks_index', 'schools_index', 'transit_index']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n",
|
||||
"display(top_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])\n",
|
||||
"print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n",
|
||||
"display(bottom_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_radar` from `portfolio_app.figures.radar`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.radar import create_radar_figure\n",
|
||||
"\n",
|
||||
"# Compare top neighbourhood vs city average (100)\n",
|
||||
"top_hood = top_5.iloc[0]\n",
|
||||
"\n",
|
||||
"data = [\n",
|
||||
" {\n",
|
||||
" 'name': top_hood['neighbourhood_name'],\n",
|
||||
" 'values': [top_hood['parks_index'], top_hood['schools_index'], top_hood['transit_index']],\n",
|
||||
" 'categories': categories\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" 'name': 'City Average',\n",
|
||||
" 'values': [100, 100, 100],\n",
|
||||
" 'categories': categories\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"fig = create_radar_figure(\n",
|
||||
" data=data,\n",
|
||||
" title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Index Interpretation\n",
|
||||
"\n",
|
||||
"| Value | Meaning |\n",
|
||||
"|-------|--------|\n",
|
||||
"| < 100 | Below city average |\n",
|
||||
"| = 100 | City average |\n",
|
||||
"| > 100 | Above city average |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
161
notebooks/amenities/transit_accessibility_bar.ipynb
Normal file
161
notebooks/amenities/transit_accessibility_bar.ipynb
Normal file
@@ -0,0 +1,161 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Transit Accessibility Bar Chart\n",
|
||||
"\n",
|
||||
"Shows transit stops per 1,000 residents across Toronto neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_amenities` | neighbourhood × year | transit_per_1000, transit_index, transit_count |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" transit_per_1000,\n",
|
||||
" transit_index,\n",
|
||||
" transit_count,\n",
|
||||
" population,\n",
|
||||
" amenity_tier\n",
|
||||
"FROM mart_neighbourhood_amenities\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
||||
" AND transit_per_1000 IS NOT NULL\n",
|
||||
"ORDER BY transit_per_1000 DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Sort by transit accessibility\n",
|
||||
"2. Select top 20 for visualization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = df.head(20).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'transit_per_1000', 'transit_index', 'transit_count']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_horizontal_bar\n",
|
||||
"\n",
|
||||
"fig = create_horizontal_bar(\n",
|
||||
" data=data,\n",
|
||||
" name_column='neighbourhood_name',\n",
|
||||
" value_column='transit_per_1000',\n",
|
||||
" title='Top 20 Neighbourhoods by Transit Accessibility',\n",
|
||||
" color='#00BCD4',\n",
|
||||
" value_format='.2f',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transit Statistics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"City-wide Transit Statistics:\")\n",
|
||||
"print(f\" Total Transit Stops: {df['transit_count'].sum():,.0f}\")\n",
|
||||
"print(f\" Average per 1,000 pop: {df['transit_per_1000'].mean():.2f}\")\n",
|
||||
"print(f\" Median per 1,000 pop: {df['transit_per_1000'].median():.2f}\")\n",
|
||||
"print(f\" Best Access: {df['transit_per_1000'].max():.2f} per 1,000\")\n",
|
||||
"print(f\" Worst Access: {df['transit_per_1000'].min():.2f} per 1,000\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
0
notebooks/demographics/.gitkeep
Normal file
0
notebooks/demographics/.gitkeep
Normal file
173
notebooks/demographics/age_distribution.ipynb
Normal file
173
notebooks/demographics/age_distribution.ipynb
Normal file
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Age Distribution Analysis\n",
|
||||
"\n",
|
||||
"Compares median age and age index across Toronto neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_demographics` | neighbourhood × year | median_age, age_index, city_avg_age |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" median_age,\n",
|
||||
" age_index,\n",
|
||||
" city_avg_age,\n",
|
||||
" population,\n",
|
||||
" income_quintile,\n",
|
||||
" pct_renter_occupied\n",
|
||||
"FROM mart_neighbourhood_demographics\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
||||
" AND median_age IS NOT NULL\n",
|
||||
"ORDER BY median_age DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods with age data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent census year\n",
|
||||
"2. Calculate deviation from city average\n",
|
||||
"3. Classify as younger/older than average"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"city_avg = df['city_avg_age'].iloc[0]\n",
|
||||
"df['age_category'] = df['median_age'].apply(\n",
|
||||
" lambda x: 'Younger' if x < city_avg else 'Older'\n",
|
||||
")\n",
|
||||
"df['age_deviation'] = df['median_age'] - city_avg\n",
|
||||
"\n",
|
||||
"data = df.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"City Average Age: {city_avg:.1f}\")\n",
|
||||
"print(\"\\nYoungest Neighbourhoods:\")\n",
|
||||
"display(df.tail(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])\n",
|
||||
"print(\"\\nOldest Neighbourhoods:\")\n",
|
||||
"display(df.head(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_ranking_bar\n",
|
||||
"\n",
|
||||
"fig = create_ranking_bar(\n",
|
||||
" data=data,\n",
|
||||
" name_column='neighbourhood_name',\n",
|
||||
" value_column='median_age',\n",
|
||||
" title='Youngest & Oldest Neighbourhoods (Median Age)',\n",
|
||||
" top_n=10,\n",
|
||||
" bottom_n=10,\n",
|
||||
" color_top='#FF9800', # Orange for older\n",
|
||||
" color_bottom='#2196F3', # Blue for younger\n",
|
||||
" value_format='.1f',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Age vs Income Correlation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Age by income quintile\n",
|
||||
"print(\"Median Age by Income Quintile:\")\n",
|
||||
"df.groupby('income_quintile')['median_age'].mean().round(1)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
173
notebooks/demographics/income_choropleth.ipynb
Normal file
173
notebooks/demographics/income_choropleth.ipynb
Normal file
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Median Income Choropleth Map\n",
|
||||
"\n",
|
||||
"Displays median household income across Toronto's 158 neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_demographics` | neighbourhood × year | median_household_income, income_index, income_quintile, geometry |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_id,\n",
|
||||
" neighbourhood_name,\n",
|
||||
" geometry,\n",
|
||||
" year,\n",
|
||||
" median_household_income,\n",
|
||||
" income_index,\n",
|
||||
" income_quintile,\n",
|
||||
" population,\n",
|
||||
" unemployment_rate\n",
|
||||
"FROM mart_neighbourhood_demographics\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
||||
"ORDER BY median_household_income DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent census year\n",
|
||||
"2. Convert geometry to GeoJSON\n",
|
||||
"3. Scale income to thousands for readability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"df['income_thousands'] = df['median_household_income'] / 1000\n",
|
||||
"\n",
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" df,\n",
|
||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
||||
" crs='EPSG:4326'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"geojson = json.loads(gdf.to_json())\n",
|
||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'median_household_income', 'income_index', 'income_quintile']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
||||
"\n",
|
||||
"fig = create_choropleth_figure(\n",
|
||||
" geojson=geojson,\n",
|
||||
" data=data,\n",
|
||||
" location_key='neighbourhood_id',\n",
|
||||
" color_column='median_household_income',\n",
|
||||
" hover_data=['neighbourhood_name', 'income_index', 'income_quintile'],\n",
|
||||
" color_scale='Viridis',\n",
|
||||
" title='Toronto Median Household Income by Neighbourhood',\n",
|
||||
" zoom=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Income Quintile Distribution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.groupby('income_quintile')['median_household_income'].agg(['count', 'mean', 'min', 'max']).round(0)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
161
notebooks/demographics/population_density_bar.ipynb
Normal file
161
notebooks/demographics/population_density_bar.ipynb
Normal file
@@ -0,0 +1,161 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Population Density Bar Chart\n",
|
||||
"\n",
|
||||
"Shows population density (people per sq km) across Toronto neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_demographics` | neighbourhood × year | population_density, population, land_area_sqkm |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" population_density,\n",
|
||||
" population,\n",
|
||||
" land_area_sqkm,\n",
|
||||
" median_household_income,\n",
|
||||
" pct_renter_occupied\n",
|
||||
"FROM mart_neighbourhood_demographics\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
||||
" AND population_density IS NOT NULL\n",
|
||||
"ORDER BY population_density DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Sort by population density\n",
|
||||
"2. Select top 20 most dense neighbourhoods"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = df.head(20).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'population_density', 'population', 'land_area_sqkm']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_horizontal_bar\n",
|
||||
"\n",
|
||||
"fig = create_horizontal_bar(\n",
|
||||
" data=data,\n",
|
||||
" name_column='neighbourhood_name',\n",
|
||||
" value_column='population_density',\n",
|
||||
" title='Top 20 Most Dense Neighbourhoods',\n",
|
||||
" color='#9C27B0',\n",
|
||||
" value_format=',.0f',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Density Statistics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"City-wide Statistics:\")\n",
|
||||
"print(f\" Total Population: {df['population'].sum():,.0f}\")\n",
|
||||
"print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n",
|
||||
"print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n",
|
||||
"print(f\" Max Density: {df['population_density'].max():,.0f} per sq km\")\n",
|
||||
"print(f\" Min Density: {df['population_density'].min():,.0f} per sq km\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
174
notebooks/housing/affordability_choropleth.ipynb
Normal file
174
notebooks/housing/affordability_choropleth.ipynb
Normal file
@@ -0,0 +1,174 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Affordability Index Choropleth Map\n",
|
||||
"\n",
|
||||
"Displays housing affordability across Toronto's 158 neighbourhoods. Index of 100 = city average."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_housing` | neighbourhood × year | affordability_index, rent_to_income_pct, avg_rent_2bed, geometry |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_id,\n",
|
||||
" neighbourhood_name,\n",
|
||||
" geometry,\n",
|
||||
" year,\n",
|
||||
" affordability_index,\n",
|
||||
" rent_to_income_pct,\n",
|
||||
" avg_rent_2bed,\n",
|
||||
" median_household_income,\n",
|
||||
" is_affordable\n",
|
||||
"FROM mart_neighbourhood_housing\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_housing)\n",
|
||||
"ORDER BY affordability_index ASC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent year\n",
|
||||
"2. Convert geometry to GeoJSON\n",
|
||||
"3. Lower index = more affordable (inverted for visualization clarity)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" df,\n",
|
||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
||||
" crs='EPSG:4326'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"geojson = json.loads(gdf.to_json())\n",
|
||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'affordability_index', 'rent_to_income_pct', 'avg_rent_2bed', 'is_affordable']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `color_column`: 'affordability_index'\n",
|
||||
"- `color_scale`: 'RdYlGn_r' (reversed: green=affordable, red=expensive)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
||||
"\n",
|
||||
"fig = create_choropleth_figure(\n",
|
||||
" geojson=geojson,\n",
|
||||
" data=data,\n",
|
||||
" location_key='neighbourhood_id',\n",
|
||||
" color_column='affordability_index',\n",
|
||||
" hover_data=['neighbourhood_name', 'rent_to_income_pct', 'avg_rent_2bed'],\n",
|
||||
" color_scale='RdYlGn_r', # Reversed: lower index (affordable) = green\n",
|
||||
" title='Toronto Housing Affordability Index',\n",
|
||||
" zoom=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Index Interpretation\n",
|
||||
"\n",
|
||||
"| Index | Meaning |\n",
|
||||
"|-------|--------|\n",
|
||||
"| < 100 | More affordable than city average |\n",
|
||||
"| = 100 | City average affordability |\n",
|
||||
"| > 100 | Less affordable than city average |\n",
|
||||
"\n",
|
||||
"Affordability calculated as: `rent_to_income_pct / city_avg_rent_to_income * 100`"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
183
notebooks/housing/rent_trend_line.ipynb
Normal file
183
notebooks/housing/rent_trend_line.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Rent Trend Line Chart\n",
|
||||
"\n",
|
||||
"Shows 5-year rental price trends across Toronto neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_housing` | neighbourhood × year | year, avg_rent_2bed, rent_yoy_change_pct |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"# City-wide average rent by year\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" year,\n",
|
||||
" AVG(avg_rent_bachelor) as avg_rent_bachelor,\n",
|
||||
" AVG(avg_rent_1bed) as avg_rent_1bed,\n",
|
||||
" AVG(avg_rent_2bed) as avg_rent_2bed,\n",
|
||||
" AVG(avg_rent_3bed) as avg_rent_3bed,\n",
|
||||
" AVG(rent_yoy_change_pct) as avg_yoy_change\n",
|
||||
"FROM mart_neighbourhood_housing\n",
|
||||
"WHERE year >= (SELECT MAX(year) - 5 FROM mart_neighbourhood_housing)\n",
|
||||
"GROUP BY year\n",
|
||||
"ORDER BY year\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} years of rent data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Aggregate rent by year (city-wide average)\n",
|
||||
"2. Convert year to datetime for proper x-axis\n",
|
||||
"3. Reshape for multi-line chart by bedroom type"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create date column from year\n",
|
||||
"df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n",
|
||||
"\n",
|
||||
"# Melt for multi-line chart\n",
|
||||
"df_melted = df.melt(\n",
|
||||
" id_vars=['year', 'date'],\n",
|
||||
" value_vars=['avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed'],\n",
|
||||
" var_name='bedroom_type',\n",
|
||||
" value_name='avg_rent'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Clean labels\n",
|
||||
"df_melted['bedroom_type'] = df_melted['bedroom_type'].map({\n",
|
||||
" 'avg_rent_bachelor': 'Bachelor',\n",
|
||||
" 'avg_rent_1bed': '1 Bedroom',\n",
|
||||
" 'avg_rent_2bed': '2 Bedroom',\n",
|
||||
" 'avg_rent_3bed': '3 Bedroom'\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['year', 'avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed', 'avg_yoy_change']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_price_time_series` from `portfolio_app.figures.time_series`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `date_column`: 'date'\n",
|
||||
"- `price_column`: 'avg_rent'\n",
|
||||
"- `group_column`: 'bedroom_type' (for multi-line)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.time_series import create_price_time_series\n",
|
||||
"\n",
|
||||
"data = df_melted.to_dict('records')\n",
|
||||
"\n",
|
||||
"fig = create_price_time_series(\n",
|
||||
" data=data,\n",
|
||||
" date_column='date',\n",
|
||||
" price_column='avg_rent',\n",
|
||||
" group_column='bedroom_type',\n",
|
||||
" title='Toronto Average Rent Trend (5 Years)',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### YoY Change Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show year-over-year changes\n",
|
||||
"print(\"Year-over-Year Rent Change (%)\")\n",
|
||||
"df[['year', 'avg_yoy_change']].dropna()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
188
notebooks/housing/tenure_breakdown_bar.ipynb
Normal file
188
notebooks/housing/tenure_breakdown_bar.ipynb
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Housing Tenure Breakdown Bar Chart\n",
|
||||
"\n",
|
||||
"Shows the distribution of owner-occupied vs renter-occupied dwellings across neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_housing` | neighbourhood × year | pct_owner_occupied, pct_renter_occupied, income_quintile |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" pct_owner_occupied,\n",
|
||||
" pct_renter_occupied,\n",
|
||||
" income_quintile,\n",
|
||||
" total_rental_units,\n",
|
||||
" average_dwelling_value\n",
|
||||
"FROM mart_neighbourhood_housing\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_housing)\n",
|
||||
" AND pct_owner_occupied IS NOT NULL\n",
|
||||
"ORDER BY pct_renter_occupied DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods with tenure data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent year with tenure data\n",
|
||||
"2. Melt owner/renter columns for stacked bar\n",
|
||||
"3. Sort by renter percentage (highest first)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prepare for stacked bar\n",
|
||||
"df_stacked = df.melt(\n",
|
||||
" id_vars=['neighbourhood_name', 'income_quintile'],\n",
|
||||
" value_vars=['pct_owner_occupied', 'pct_renter_occupied'],\n",
|
||||
" var_name='tenure_type',\n",
|
||||
" value_name='percentage'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df_stacked['tenure_type'] = df_stacked['tenure_type'].map({\n",
|
||||
" 'pct_owner_occupied': 'Owner',\n",
|
||||
" 'pct_renter_occupied': 'Renter'\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"data = df_stacked.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Highest Renter Neighbourhoods:\")\n",
|
||||
"df[['neighbourhood_name', 'pct_renter_occupied', 'pct_owner_occupied', 'income_quintile']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `x_column`: 'neighbourhood_name'\n",
|
||||
"- `value_column`: 'percentage'\n",
|
||||
"- `category_column`: 'tenure_type'\n",
|
||||
"- `show_percentages`: True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_stacked_bar\n",
|
||||
"\n",
|
||||
"# Show top 20 by renter percentage\n",
|
||||
"top_20_names = df.head(20)['neighbourhood_name'].tolist()\n",
|
||||
"data_filtered = [d for d in data if d['neighbourhood_name'] in top_20_names]\n",
|
||||
"\n",
|
||||
"fig = create_stacked_bar(\n",
|
||||
" data=data_filtered,\n",
|
||||
" x_column='neighbourhood_name',\n",
|
||||
" value_column='percentage',\n",
|
||||
" category_column='tenure_type',\n",
|
||||
" title='Housing Tenure Mix - Top 20 Renter Neighbourhoods',\n",
|
||||
" color_map={'Owner': '#4CAF50', 'Renter': '#2196F3'},\n",
|
||||
" show_percentages=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### City-Wide Distribution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# City-wide averages\n",
|
||||
"print(f\"City Average Owner-Occupied: {df['pct_owner_occupied'].mean():.1f}%\")\n",
|
||||
"print(f\"City Average Renter-Occupied: {df['pct_renter_occupied'].mean():.1f}%\")\n",
|
||||
"\n",
|
||||
"# By income quintile\n",
|
||||
"print(\"\\nTenure by Income Quintile:\")\n",
|
||||
"df.groupby('income_quintile')[['pct_owner_occupied', 'pct_renter_occupied']].mean().round(1)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
183
notebooks/overview/income_safety_scatter.ipynb
Normal file
183
notebooks/overview/income_safety_scatter.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Income vs Safety Scatter Plot\n",
|
||||
"\n",
|
||||
"Explores the correlation between median household income and safety score across Toronto neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, median_household_income, safety_score, population |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" median_household_income,\n",
|
||||
" safety_score,\n",
|
||||
" population,\n",
|
||||
" livability_score,\n",
|
||||
" crime_rate_per_100k\n",
|
||||
"FROM mart_neighbourhood_overview\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
||||
" AND median_household_income IS NOT NULL\n",
|
||||
" AND safety_score IS NOT NULL\n",
|
||||
"ORDER BY median_household_income DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods with income and safety data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter out null values for income and safety\n",
|
||||
"2. Optionally scale income to thousands for readability\n",
|
||||
"3. Pass to scatter figure factory with optional trendline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Scale income to thousands for better axis readability\n",
|
||||
"df['income_thousands'] = df['median_household_income'] / 1000\n",
|
||||
"\n",
|
||||
"# Prepare data for figure factory\n",
|
||||
"data = df.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'median_household_income', 'safety_score', 'crime_rate_per_100k']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_scatter_figure` from `portfolio_app.figures.scatter`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `x_column`: 'income_thousands' (median household income in $K)\n",
|
||||
"- `y_column`: 'safety_score' (0-100 percentile rank)\n",
|
||||
"- `name_column`: 'neighbourhood_name' (hover label)\n",
|
||||
"- `size_column`: 'population' (optional, bubble size)\n",
|
||||
"- `trendline`: True (adds OLS regression line)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.scatter import create_scatter_figure\n",
|
||||
"\n",
|
||||
"fig = create_scatter_figure(\n",
|
||||
" data=data,\n",
|
||||
" x_column='income_thousands',\n",
|
||||
" y_column='safety_score',\n",
|
||||
" name_column='neighbourhood_name',\n",
|
||||
" size_column='population',\n",
|
||||
" title='Income vs Safety by Neighbourhood',\n",
|
||||
" x_title='Median Household Income ($K)',\n",
|
||||
" y_title='Safety Score (0-100)',\n",
|
||||
" trendline=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Interpretation\n",
|
||||
"\n",
|
||||
"This scatter plot reveals the relationship between income and safety:\n",
|
||||
"\n",
|
||||
"- **Positive correlation**: Higher income neighbourhoods tend to have higher safety scores\n",
|
||||
"- **Bubble size**: Represents population (larger = more people)\n",
|
||||
"- **Trendline**: Orange dashed line shows the overall trend\n",
|
||||
"- **Outliers**: Neighbourhoods far from the trendline are interesting cases\n",
|
||||
" - Above line: Safer than income would predict\n",
|
||||
" - Below line: Less safe than income would predict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Calculate correlation coefficient\n",
|
||||
"correlation = df['median_household_income'].corr(df['safety_score'])\n",
|
||||
"print(f\"Correlation coefficient (Income vs Safety): {correlation:.3f}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
184
notebooks/overview/livability_choropleth.ipynb
Normal file
184
notebooks/overview/livability_choropleth.ipynb
Normal file
@@ -0,0 +1,184 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Livability Score Choropleth Map\n",
|
||||
"\n",
|
||||
"Displays neighbourhood livability scores on an interactive map of Toronto's 158 neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_overview` | neighbourhood × year | livability_score, safety_score, affordability_score, amenity_score, geometry |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Connect to database\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_id,\n",
|
||||
" neighbourhood_name,\n",
|
||||
" geometry,\n",
|
||||
" year,\n",
|
||||
" livability_score,\n",
|
||||
" safety_score,\n",
|
||||
" affordability_score,\n",
|
||||
" amenity_score,\n",
|
||||
" population,\n",
|
||||
" median_household_income\n",
|
||||
"FROM mart_neighbourhood_overview\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
||||
"ORDER BY livability_score DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent year of data\n",
|
||||
"2. Extract GeoJSON from PostGIS geometry column\n",
|
||||
"3. Pass to choropleth figure factory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Transform geometry to GeoJSON\n",
|
||||
"import geopandas as gpd\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Convert WKB geometry to GeoDataFrame\n",
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" df,\n",
|
||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
||||
" crs='EPSG:4326'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create GeoJSON FeatureCollection\n",
|
||||
"geojson = json.loads(gdf.to_json())\n",
|
||||
"\n",
|
||||
"# Prepare data for figure factory\n",
|
||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'livability_score', 'safety_score', 'affordability_score', 'amenity_score']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `geojson`: GeoJSON FeatureCollection with neighbourhood boundaries\n",
|
||||
"- `data`: List of dicts with neighbourhood_id and scores\n",
|
||||
"- `location_key`: 'neighbourhood_id'\n",
|
||||
"- `color_column`: 'livability_score' (or safety_score, etc.)\n",
|
||||
"- `color_scale`: 'RdYlGn' (red=low, yellow=mid, green=high)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
||||
"\n",
|
||||
"fig = create_choropleth_figure(\n",
|
||||
" geojson=geojson,\n",
|
||||
" data=data,\n",
|
||||
" location_key='neighbourhood_id',\n",
|
||||
" color_column='livability_score',\n",
|
||||
" hover_data=['neighbourhood_name', 'safety_score', 'affordability_score', 'amenity_score'],\n",
|
||||
" color_scale='RdYlGn',\n",
|
||||
" title='Toronto Neighbourhood Livability Score',\n",
|
||||
" zoom=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Score Components\n",
|
||||
"\n",
|
||||
"The livability score is a weighted composite:\n",
|
||||
"\n",
|
||||
"| Component | Weight | Source |\n",
|
||||
"|-----------|--------|--------|\n",
|
||||
"| Safety | 30% | Inverse of crime rate per 100K |\n",
|
||||
"| Affordability | 40% | Inverse of rent-to-income ratio |\n",
|
||||
"| Amenities | 30% | Amenities per 1,000 residents |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
167
notebooks/overview/top_bottom_10_bar.ipynb
Normal file
167
notebooks/overview/top_bottom_10_bar.ipynb
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Top & Bottom 10 Neighbourhoods Bar Chart\n",
|
||||
"\n",
|
||||
"Horizontal bar chart showing the highest and lowest scoring neighbourhoods by livability."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, livability_score |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" livability_score,\n",
|
||||
" safety_score,\n",
|
||||
" affordability_score,\n",
|
||||
" amenity_score\n",
|
||||
"FROM mart_neighbourhood_overview\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
||||
" AND livability_score IS NOT NULL\n",
|
||||
"ORDER BY livability_score DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods with scores\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Sort by livability_score descending\n",
|
||||
"2. Take top 10 and bottom 10\n",
|
||||
"3. Pass to ranking bar figure factory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The figure factory handles top/bottom selection internally\n",
|
||||
"# Just prepare as list of dicts\n",
|
||||
"data = df.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Top 5:\")\n",
|
||||
"display(df.head(5))\n",
|
||||
"print(\"\\nBottom 5:\")\n",
|
||||
"display(df.tail(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `data`: List of dicts with all neighbourhoods\n",
|
||||
"- `name_column`: 'neighbourhood_name'\n",
|
||||
"- `value_column`: 'livability_score'\n",
|
||||
"- `top_n`: 10 (green bars)\n",
|
||||
"- `bottom_n`: 10 (red bars)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_ranking_bar\n",
|
||||
"\n",
|
||||
"fig = create_ranking_bar(\n",
|
||||
" data=data,\n",
|
||||
" name_column='neighbourhood_name',\n",
|
||||
" value_column='livability_score',\n",
|
||||
" title='Top & Bottom 10 Neighbourhoods by Livability',\n",
|
||||
" top_n=10,\n",
|
||||
" bottom_n=10,\n",
|
||||
" color_top='#4CAF50', # Green for top performers\n",
|
||||
" color_bottom='#F44336', # Red for bottom performers\n",
|
||||
" value_format='.1f',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Interpretation\n",
|
||||
"\n",
|
||||
"- **Green bars**: Highest livability scores (best combination of safety, affordability, and amenities)\n",
|
||||
"- **Red bars**: Lowest livability scores (areas that may need targeted investment)\n",
|
||||
"\n",
|
||||
"The ranking bar chart provides quick context for which neighbourhoods stand out at either extreme."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
0
notebooks/safety/.gitkeep
Normal file
0
notebooks/safety/.gitkeep
Normal file
178
notebooks/safety/crime_breakdown_bar.ipynb
Normal file
178
notebooks/safety/crime_breakdown_bar.ipynb
Normal file
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Crime Type Breakdown Bar Chart\n",
|
||||
"\n",
|
||||
"Stacked bar chart showing crime composition by Major Crime Indicator (MCI) categories."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_safety` | neighbourhood × year | assault_count, auto_theft_count, break_enter_count, robbery_count, etc. |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_name,\n",
|
||||
" assault_count,\n",
|
||||
" auto_theft_count,\n",
|
||||
" break_enter_count,\n",
|
||||
" robbery_count,\n",
|
||||
" theft_over_count,\n",
|
||||
" homicide_count,\n",
|
||||
" total_incidents,\n",
|
||||
" crime_rate_per_100k\n",
|
||||
"FROM mart_neighbourhood_safety\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_safety)\n",
|
||||
"ORDER BY total_incidents DESC\n",
|
||||
"LIMIT 15\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded top {len(df)} neighbourhoods by crime volume\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Select top 15 neighbourhoods by total incidents\n",
|
||||
"2. Melt crime type columns into rows\n",
|
||||
"3. Pass to stacked bar figure factory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_melted = df.melt(\n",
|
||||
" id_vars=['neighbourhood_name', 'total_incidents'],\n",
|
||||
" value_vars=['assault_count', 'auto_theft_count', 'break_enter_count', \n",
|
||||
" 'robbery_count', 'theft_over_count', 'homicide_count'],\n",
|
||||
" var_name='crime_type',\n",
|
||||
" value_name='count'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Clean labels\n",
|
||||
"df_melted['crime_type'] = df_melted['crime_type'].str.replace('_count', '').str.replace('_', ' ').str.title()\n",
|
||||
"\n",
|
||||
"data = df_melted.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'assault_count', 'auto_theft_count', 'break_enter_count', 'total_incidents']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.bar_charts import create_stacked_bar\n",
|
||||
"\n",
|
||||
"fig = create_stacked_bar(\n",
|
||||
" data=data,\n",
|
||||
" x_column='neighbourhood_name',\n",
|
||||
" value_column='count',\n",
|
||||
" category_column='crime_type',\n",
|
||||
" title='Crime Type Breakdown - Top 15 Neighbourhoods',\n",
|
||||
" color_map={\n",
|
||||
" 'Assault': '#d62728',\n",
|
||||
" 'Auto Theft': '#ff7f0e',\n",
|
||||
" 'Break Enter': '#9467bd',\n",
|
||||
" 'Robbery': '#8c564b',\n",
|
||||
" 'Theft Over': '#e377c2',\n",
|
||||
" 'Homicide': '#1f77b4'\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MCI Categories\n",
|
||||
"\n",
|
||||
"| Category | Description |\n",
|
||||
"|----------|------------|\n",
|
||||
"| Assault | Physical attacks |\n",
|
||||
"| Auto Theft | Vehicle theft |\n",
|
||||
"| Break & Enter | Burglary |\n",
|
||||
"| Robbery | Theft with force/threat |\n",
|
||||
"| Theft Over | Theft > $5,000 |\n",
|
||||
"| Homicide | Murder/manslaughter |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
172
notebooks/safety/crime_rate_choropleth.ipynb
Normal file
172
notebooks/safety/crime_rate_choropleth.ipynb
Normal file
@@ -0,0 +1,172 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Crime Rate Choropleth Map\n",
|
||||
"\n",
|
||||
"Displays crime rates per 100,000 population across Toronto's 158 neighbourhoods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_safety` | neighbourhood × year | crime_rate_per_100k, crime_index, safety_tier, geometry |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" neighbourhood_id,\n",
|
||||
" neighbourhood_name,\n",
|
||||
" geometry,\n",
|
||||
" year,\n",
|
||||
" crime_rate_per_100k,\n",
|
||||
" crime_index,\n",
|
||||
" safety_tier,\n",
|
||||
" total_incidents,\n",
|
||||
" population\n",
|
||||
"FROM mart_neighbourhood_safety\n",
|
||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_safety)\n",
|
||||
"ORDER BY crime_rate_per_100k DESC\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} neighbourhoods\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Filter to most recent year\n",
|
||||
"2. Convert geometry to GeoJSON\n",
|
||||
"3. Use reversed color scale (green=low crime, red=high crime)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"gdf = gpd.GeoDataFrame(\n",
|
||||
" df,\n",
|
||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
||||
" crs='EPSG:4326'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"geojson = json.loads(gdf.to_json())\n",
|
||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['neighbourhood_name', 'crime_rate_per_100k', 'crime_index', 'safety_tier', 'total_incidents']].head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
||||
"\n",
|
||||
"**Key Parameters:**\n",
|
||||
"- `color_column`: 'crime_rate_per_100k'\n",
|
||||
"- `color_scale`: 'RdYlGn_r' (red=high crime, green=low crime)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
||||
"\n",
|
||||
"fig = create_choropleth_figure(\n",
|
||||
" geojson=geojson,\n",
|
||||
" data=data,\n",
|
||||
" location_key='neighbourhood_id',\n",
|
||||
" color_column='crime_rate_per_100k',\n",
|
||||
" hover_data=['neighbourhood_name', 'crime_index', 'total_incidents'],\n",
|
||||
" color_scale='RdYlGn_r',\n",
|
||||
" title='Toronto Crime Rate per 100,000 Population',\n",
|
||||
" zoom=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Safety Tier Interpretation\n",
|
||||
"\n",
|
||||
"| Tier | Meaning |\n",
|
||||
"|------|--------|\n",
|
||||
"| 1 | Highest crime (top 20%) |\n",
|
||||
"| 2-4 | Middle tiers |\n",
|
||||
"| 5 | Lowest crime (bottom 20%) |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
186
notebooks/safety/crime_trend_line.ipynb
Normal file
186
notebooks/safety/crime_trend_line.ipynb
Normal file
@@ -0,0 +1,186 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Crime Trend Line Chart\n",
|
||||
"\n",
|
||||
"Shows 5-year crime rate trends across Toronto."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Data Reference\n",
|
||||
"\n",
|
||||
"### Source Tables\n",
|
||||
"\n",
|
||||
"| Table | Grain | Key Columns |\n",
|
||||
"|-------|-------|-------------|\n",
|
||||
"| `mart_neighbourhood_safety` | neighbourhood × year | year, crime_rate_per_100k, crime_yoy_change_pct |\n",
|
||||
"\n",
|
||||
"### SQL Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"SELECT\n",
|
||||
" year,\n",
|
||||
" AVG(crime_rate_per_100k) as avg_crime_rate,\n",
|
||||
" AVG(assault_rate_per_100k) as avg_assault_rate,\n",
|
||||
" AVG(auto_theft_rate_per_100k) as avg_auto_theft_rate,\n",
|
||||
" AVG(break_enter_rate_per_100k) as avg_break_enter_rate,\n",
|
||||
" SUM(total_incidents) as total_city_incidents,\n",
|
||||
" AVG(crime_yoy_change_pct) as avg_yoy_change\n",
|
||||
"FROM mart_neighbourhood_safety\n",
|
||||
"WHERE year >= (SELECT MAX(year) - 5 FROM mart_neighbourhood_safety)\n",
|
||||
"GROUP BY year\n",
|
||||
"ORDER BY year\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"df = pd.read_sql(query, engine)\n",
|
||||
"print(f\"Loaded {len(df)} years of crime data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformation Steps\n",
|
||||
"\n",
|
||||
"1. Aggregate by year (city-wide)\n",
|
||||
"2. Convert year to datetime\n",
|
||||
"3. Melt for multi-line by crime type"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n",
|
||||
"\n",
|
||||
"# Melt for multi-line\n",
|
||||
"df_melted = df.melt(\n",
|
||||
" id_vars=['year', 'date'],\n",
|
||||
" value_vars=['avg_assault_rate', 'avg_auto_theft_rate', 'avg_break_enter_rate'],\n",
|
||||
" var_name='crime_type',\n",
|
||||
" value_name='rate_per_100k'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df_melted['crime_type'] = df_melted['crime_type'].map({\n",
|
||||
" 'avg_assault_rate': 'Assault',\n",
|
||||
" 'avg_auto_theft_rate': 'Auto Theft',\n",
|
||||
" 'avg_break_enter_rate': 'Break & Enter'\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Sample Output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[['year', 'avg_crime_rate', 'total_city_incidents', 'avg_yoy_change']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Data Visualization\n",
|
||||
"\n",
|
||||
"### Figure Factory\n",
|
||||
"\n",
|
||||
"Uses `create_price_time_series` (reused for any numeric trend)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.insert(0, '../..')\n",
|
||||
"\n",
|
||||
"from portfolio_app.figures.time_series import create_price_time_series\n",
|
||||
"\n",
|
||||
"data = df_melted.to_dict('records')\n",
|
||||
"\n",
|
||||
"fig = create_price_time_series(\n",
|
||||
" data=data,\n",
|
||||
" date_column='date',\n",
|
||||
" price_column='rate_per_100k',\n",
|
||||
" group_column='crime_type',\n",
|
||||
" title='Toronto Crime Trends by Type (5 Years)',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Remove dollar sign formatting since this is rate data\n",
|
||||
"fig.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n",
|
||||
"\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Overall Trend"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Total crime rate trend\n",
|
||||
"total_data = df[['date', 'avg_crime_rate']].rename(columns={'avg_crime_rate': 'total_rate'}).to_dict('records')\n",
|
||||
"\n",
|
||||
"fig2 = create_price_time_series(\n",
|
||||
" data=total_data,\n",
|
||||
" date_column='date',\n",
|
||||
" price_column='total_rate',\n",
|
||||
" title='Toronto Overall Crime Rate Trend',\n",
|
||||
")\n",
|
||||
"fig2.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n",
|
||||
"fig2.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Application-level callbacks for the portfolio app."""
|
||||
|
||||
from . import theme
|
||||
from . import sidebar, theme
|
||||
|
||||
__all__ = ["theme"]
|
||||
__all__ = ["sidebar", "theme"]
|
||||
|
||||
25
portfolio_app/callbacks/sidebar.py
Normal file
25
portfolio_app/callbacks/sidebar.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Sidebar navigation callbacks for active state updates."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from dash import Input, Output, callback
|
||||
|
||||
from portfolio_app.components.sidebar import create_sidebar_content
|
||||
|
||||
|
||||
@callback( # type: ignore[misc]
|
||||
Output("floating-sidebar", "children"),
|
||||
Input("url", "pathname"),
|
||||
prevent_initial_call=False,
|
||||
)
|
||||
def update_sidebar_active_state(pathname: str) -> list[Any]:
|
||||
"""Update sidebar to highlight the current page.
|
||||
|
||||
Args:
|
||||
pathname: Current URL pathname from dcc.Location.
|
||||
|
||||
Returns:
|
||||
Updated sidebar content with correct active state.
|
||||
"""
|
||||
current_path = pathname or "/"
|
||||
return create_sidebar_content(current_path=current_path)
|
||||
@@ -4,9 +4,18 @@ import dash_mantine_components as dmc
|
||||
from dash import dcc, html
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
# Navigation items configuration
|
||||
NAV_ITEMS = [
|
||||
# Navigation items configuration - main pages
|
||||
NAV_ITEMS_MAIN = [
|
||||
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||
{"path": "/about", "icon": "tabler:user", "label": "About"},
|
||||
{"path": "/blog", "icon": "tabler:article", "label": "Blog"},
|
||||
{"path": "/resume", "icon": "tabler:file-text", "label": "Resume"},
|
||||
{"path": "/contact", "icon": "tabler:mail", "label": "Contact"},
|
||||
]
|
||||
|
||||
# Navigation items configuration - projects/dashboards (separated)
|
||||
NAV_ITEMS_PROJECTS = [
|
||||
{"path": "/projects", "icon": "tabler:folder", "label": "Projects"},
|
||||
{"path": "/toronto", "icon": "tabler:map-2", "label": "Toronto Housing"},
|
||||
]
|
||||
|
||||
@@ -135,22 +144,23 @@ def create_sidebar_divider() -> html.Div:
|
||||
return html.Div(className="sidebar-divider")
|
||||
|
||||
|
||||
def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html.Div:
|
||||
"""Create the floating sidebar navigation.
|
||||
def create_sidebar_content(
|
||||
current_path: str = "/", current_theme: str = "dark"
|
||||
) -> list[dmc.Tooltip | html.Div]:
|
||||
"""Create the sidebar content list.
|
||||
|
||||
Args:
|
||||
current_path: Current page path for active state highlighting.
|
||||
current_theme: Current theme for toggle icon state.
|
||||
|
||||
Returns:
|
||||
Complete sidebar component.
|
||||
List of sidebar components.
|
||||
"""
|
||||
return html.Div(
|
||||
[
|
||||
return [
|
||||
# Brand logo
|
||||
create_brand_logo(),
|
||||
create_sidebar_divider(),
|
||||
# Navigation icons
|
||||
# Main navigation icons
|
||||
*[
|
||||
create_nav_icon(
|
||||
icon=item["icon"],
|
||||
@@ -158,7 +168,18 @@ def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html
|
||||
path=item["path"],
|
||||
current_path=current_path,
|
||||
)
|
||||
for item in NAV_ITEMS
|
||||
for item in NAV_ITEMS_MAIN
|
||||
],
|
||||
create_sidebar_divider(),
|
||||
# Dashboard/Project links
|
||||
*[
|
||||
create_nav_icon(
|
||||
icon=item["icon"],
|
||||
label=item["label"],
|
||||
path=item["path"],
|
||||
current_path=current_path,
|
||||
)
|
||||
for item in NAV_ITEMS_PROJECTS
|
||||
],
|
||||
create_sidebar_divider(),
|
||||
# Theme toggle
|
||||
@@ -173,7 +194,21 @@ def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html
|
||||
)
|
||||
for link in EXTERNAL_LINKS
|
||||
],
|
||||
],
|
||||
className="floating-sidebar",
|
||||
]
|
||||
|
||||
|
||||
def create_sidebar(current_path: str = "/", current_theme: str = "dark") -> html.Div:
|
||||
"""Create the floating sidebar navigation.
|
||||
|
||||
Args:
|
||||
current_path: Current page path for active state highlighting.
|
||||
current_theme: Current theme for toggle icon state.
|
||||
|
||||
Returns:
|
||||
Complete sidebar component.
|
||||
"""
|
||||
return html.Div(
|
||||
id="floating-sidebar",
|
||||
className="floating-sidebar",
|
||||
children=create_sidebar_content(current_path, current_theme),
|
||||
)
|
||||
|
||||
111
portfolio_app/content/blog/building-data-platform-team-of-one.md
Normal file
111
portfolio_app/content/blog/building-data-platform-team-of-one.md
Normal file
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: "Building a Data Platform as a Team of One"
|
||||
date: "2025-01-15"
|
||||
description: "What I learned from 5 years as the sole data professional at a mid-size company"
|
||||
tags:
|
||||
- data-engineering
|
||||
- career
|
||||
- lessons-learned
|
||||
status: published
|
||||
---
|
||||
|
||||
When I joined Summitt Energy in 2019, there was no data infrastructure. No warehouse. No pipelines. No documentation. Just a collection of spreadsheets and a Genesys Cloud instance spitting out CSVs.
|
||||
|
||||
Five years later, I'd built DataFlow: an enterprise platform processing 1B+ rows across 21 tables, feeding dashboards that executives actually opened. Here's what I learned doing it alone.
|
||||
|
||||
## The Reality of "Full Stack Data"
|
||||
|
||||
When you're the only data person, "full stack" isn't a buzzword—it's survival. In a single week, I might:
|
||||
|
||||
- Debug a Python ETL script at 7am because overnight loads failed
|
||||
- Present quarterly metrics to leadership at 10am
|
||||
- Design a new dimensional model over lunch
|
||||
- Write SQL transformations in the afternoon
|
||||
- Handle ad-hoc "can you pull this data?" requests between meetings
|
||||
|
||||
There's no handoff. No "that's not my job." Everything is your job.
|
||||
|
||||
## Prioritization Frameworks
|
||||
|
||||
The hardest part isn't the technical work—it's deciding what to build first when everything feels urgent.
|
||||
|
||||
### The 80/20 Rule, Applied Ruthlessly
|
||||
|
||||
I asked myself: **What 20% of the data drives 80% of decisions?**
|
||||
|
||||
For a contact center, that turned out to be:
|
||||
- Call volume by interval
|
||||
- Abandon rate
|
||||
- Average handle time
|
||||
- Service level
|
||||
|
||||
Everything else was nice-to-have. I built those four metrics first, got them bulletproof, then expanded.
|
||||
|
||||
### The "Who's Screaming?" Test
|
||||
|
||||
When multiple stakeholders want different things:
|
||||
1. Who has executive backing?
|
||||
2. What's blocking revenue?
|
||||
3. What's causing visible pain?
|
||||
|
||||
If nobody's screaming, it can probably wait.
|
||||
|
||||
## Technical Debt vs. Shipping
|
||||
|
||||
I rewrote DataFlow three times:
|
||||
|
||||
- **v1 (2020)**: Hacky Python scripts. Worked, barely.
|
||||
- **v2 (2021)**: Proper dimensional model. Still messy code.
|
||||
- **v3 (2022)**: SQLAlchemy ORM, proper error handling, logging.
|
||||
- **v4 (2023)**: dbt-style transformations, FastAPI layer.
|
||||
|
||||
Was v1 embarrassing? Yes. Did it work? Also yes.
|
||||
|
||||
**The lesson**: Ship something that works, then iterate. Perfect is the enemy of done, especially when you're alone.
|
||||
|
||||
## Building Stakeholder Trust
|
||||
|
||||
The technical work is maybe 40% of the job. The rest is politics.
|
||||
|
||||
### Quick Wins First
|
||||
|
||||
Before asking for resources or patience, I delivered:
|
||||
- Automated a weekly report that took someone 4 hours
|
||||
- Fixed a dashboard that had been wrong for months
|
||||
- Built a simple tool that answered a frequent question
|
||||
|
||||
Trust is earned in small deposits.
|
||||
|
||||
### Speak Their Language
|
||||
|
||||
Executives don't care about your star schema. They care about:
|
||||
- "This will save 10 hours/week"
|
||||
- "This will catch errors before they hit customers"
|
||||
- "This will let you see X in real-time"
|
||||
|
||||
Translate technical work into business outcomes.
|
||||
|
||||
## What I'd Do Differently
|
||||
|
||||
1. **Document earlier**. I waited too long. When I finally wrote things down, onboarding became possible.
|
||||
|
||||
2. **Say no more**. Every "yes" to an ad-hoc request is a "no" to infrastructure work. Guard your time.
|
||||
|
||||
3. **Build monitoring first**. I spent too many mornings discovering failures manually. Alerting should be table stakes.
|
||||
|
||||
4. **Version control everything**. Even SQL. Even documentation. If it's not in Git, it doesn't exist.
|
||||
|
||||
## The Upside
|
||||
|
||||
Being a team of one forced me to learn things I'd have specialized away from on a bigger team:
|
||||
- Data modeling
|
||||
- Pipeline architecture
|
||||
- Dashboard design
|
||||
- Stakeholder management
|
||||
- System administration
|
||||
|
||||
It's brutal, but it makes you dangerous. You understand the whole stack.
|
||||
|
||||
---
|
||||
|
||||
*This is part of a series on building data infrastructure at small companies. More posts coming on dimensional modeling, dbt patterns, and surviving legacy systems.*
|
||||
@@ -1,10 +1,27 @@
|
||||
"""Plotly figure factories for data visualization."""
|
||||
|
||||
from .bar_charts import (
|
||||
create_horizontal_bar,
|
||||
create_ranking_bar,
|
||||
create_stacked_bar,
|
||||
)
|
||||
from .choropleth import (
|
||||
create_choropleth_figure,
|
||||
create_district_map,
|
||||
create_zone_map,
|
||||
)
|
||||
from .demographics import (
|
||||
create_age_pyramid,
|
||||
create_donut_chart,
|
||||
create_income_distribution,
|
||||
)
|
||||
from .radar import (
|
||||
create_comparison_radar,
|
||||
create_radar_figure,
|
||||
)
|
||||
from .scatter import (
|
||||
create_bubble_chart,
|
||||
create_scatter_figure,
|
||||
)
|
||||
from .summary_cards import create_metric_card_figure, create_summary_metrics
|
||||
from .time_series import (
|
||||
add_policy_markers,
|
||||
@@ -17,7 +34,6 @@ from .time_series import (
|
||||
__all__ = [
|
||||
# Choropleth
|
||||
"create_choropleth_figure",
|
||||
"create_district_map",
|
||||
"create_zone_map",
|
||||
# Time series
|
||||
"create_price_time_series",
|
||||
@@ -28,4 +44,18 @@ __all__ = [
|
||||
# Summary
|
||||
"create_metric_card_figure",
|
||||
"create_summary_metrics",
|
||||
# Bar charts
|
||||
"create_ranking_bar",
|
||||
"create_stacked_bar",
|
||||
"create_horizontal_bar",
|
||||
# Scatter plots
|
||||
"create_scatter_figure",
|
||||
"create_bubble_chart",
|
||||
# Radar charts
|
||||
"create_radar_figure",
|
||||
"create_comparison_radar",
|
||||
# Demographics
|
||||
"create_age_pyramid",
|
||||
"create_donut_chart",
|
||||
"create_income_distribution",
|
||||
]
|
||||
|
||||
238
portfolio_app/figures/bar_charts.py
Normal file
238
portfolio_app/figures/bar_charts.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""Bar chart figure factories for dashboard visualizations."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
def create_ranking_bar(
|
||||
data: list[dict[str, Any]],
|
||||
name_column: str,
|
||||
value_column: str,
|
||||
title: str | None = None,
|
||||
top_n: int = 10,
|
||||
bottom_n: int = 10,
|
||||
color_top: str = "#4CAF50",
|
||||
color_bottom: str = "#F44336",
|
||||
value_format: str = ",.0f",
|
||||
) -> go.Figure:
|
||||
"""Create horizontal bar chart showing top and bottom rankings.
|
||||
|
||||
Args:
|
||||
data: List of data records.
|
||||
name_column: Column name for labels.
|
||||
value_column: Column name for values.
|
||||
title: Optional chart title.
|
||||
top_n: Number of top items to show.
|
||||
bottom_n: Number of bottom items to show.
|
||||
color_top: Color for top performers.
|
||||
color_bottom: Color for bottom performers.
|
||||
value_format: Number format string for values.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Rankings")
|
||||
|
||||
df = pd.DataFrame(data).sort_values(value_column, ascending=False)
|
||||
|
||||
# Get top and bottom
|
||||
top_df = df.head(top_n).copy()
|
||||
bottom_df = df.tail(bottom_n).copy()
|
||||
|
||||
top_df["group"] = "Top"
|
||||
bottom_df["group"] = "Bottom"
|
||||
|
||||
# Combine with gap in the middle
|
||||
combined = pd.concat([top_df, bottom_df])
|
||||
combined["color"] = combined["group"].map(
|
||||
{"Top": color_top, "Bottom": color_bottom}
|
||||
)
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
# Add top bars
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
y=top_df[name_column],
|
||||
x=top_df[value_column],
|
||||
orientation="h",
|
||||
marker_color=color_top,
|
||||
name="Top",
|
||||
text=top_df[value_column].apply(lambda x: f"{x:{value_format}}"),
|
||||
textposition="auto",
|
||||
hovertemplate=f"%{{y}}<br>{value_column}: %{{x:{value_format}}}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
# Add bottom bars
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
y=bottom_df[name_column],
|
||||
x=bottom_df[value_column],
|
||||
orientation="h",
|
||||
marker_color=color_bottom,
|
||||
name="Bottom",
|
||||
text=bottom_df[value_column].apply(lambda x: f"{x:{value_format}}"),
|
||||
textposition="auto",
|
||||
hovertemplate=f"%{{y}}<br>{value_column}: %{{x:{value_format}}}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
barmode="group",
|
||||
showlegend=True,
|
||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
||||
yaxis={"autorange": "reversed", "title": None},
|
||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_stacked_bar(
|
||||
data: list[dict[str, Any]],
|
||||
x_column: str,
|
||||
value_column: str,
|
||||
category_column: str,
|
||||
title: str | None = None,
|
||||
color_map: dict[str, str] | None = None,
|
||||
show_percentages: bool = False,
|
||||
) -> go.Figure:
|
||||
"""Create stacked bar chart for breakdown visualizations.
|
||||
|
||||
Args:
|
||||
data: List of data records.
|
||||
x_column: Column name for x-axis categories.
|
||||
value_column: Column name for values.
|
||||
category_column: Column name for stacking categories.
|
||||
title: Optional chart title.
|
||||
color_map: Mapping of category to color.
|
||||
show_percentages: Whether to normalize to 100%.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Breakdown")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Default color scheme
|
||||
if color_map is None:
|
||||
categories = df[category_column].unique()
|
||||
colors = px.colors.qualitative.Set2[: len(categories)]
|
||||
color_map = dict(zip(categories, colors, strict=False))
|
||||
|
||||
fig = px.bar(
|
||||
df,
|
||||
x=x_column,
|
||||
y=value_column,
|
||||
color=category_column,
|
||||
color_discrete_map=color_map,
|
||||
barmode="stack",
|
||||
text=value_column if not show_percentages else None,
|
||||
)
|
||||
|
||||
if show_percentages:
|
||||
fig.update_traces(texttemplate="%{y:.1f}%", textposition="inside")
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_horizontal_bar(
|
||||
data: list[dict[str, Any]],
|
||||
name_column: str,
|
||||
value_column: str,
|
||||
title: str | None = None,
|
||||
color: str = "#2196F3",
|
||||
value_format: str = ",.0f",
|
||||
sort: bool = True,
|
||||
) -> go.Figure:
|
||||
"""Create simple horizontal bar chart.
|
||||
|
||||
Args:
|
||||
data: List of data records.
|
||||
name_column: Column name for labels.
|
||||
value_column: Column name for values.
|
||||
title: Optional chart title.
|
||||
color: Bar color.
|
||||
value_format: Number format string.
|
||||
sort: Whether to sort by value descending.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Bar Chart")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
if sort:
|
||||
df = df.sort_values(value_column, ascending=True)
|
||||
|
||||
fig = go.Figure(
|
||||
go.Bar(
|
||||
y=df[name_column],
|
||||
x=df[value_column],
|
||||
orientation="h",
|
||||
marker_color=color,
|
||||
text=df[value_column].apply(lambda x: f"{x:{value_format}}"),
|
||||
textposition="outside",
|
||||
hovertemplate=f"%{{y}}<br>Value: %{{x:{value_format}}}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
||||
yaxis={"title": None},
|
||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def _create_empty_figure(title: str) -> go.Figure:
|
||||
"""Create an empty figure with a message."""
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text="No data available",
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"visible": False},
|
||||
yaxis={"visible": False},
|
||||
)
|
||||
return fig
|
||||
@@ -115,34 +115,6 @@ def create_choropleth_figure(
|
||||
return fig
|
||||
|
||||
|
||||
def create_district_map(
|
||||
districts_geojson: dict[str, Any] | None,
|
||||
purchase_data: list[dict[str, Any]],
|
||||
metric: str = "avg_price",
|
||||
) -> go.Figure:
|
||||
"""Create choropleth map for TRREB districts.
|
||||
|
||||
Args:
|
||||
districts_geojson: GeoJSON for TRREB district boundaries.
|
||||
purchase_data: Purchase statistics by district.
|
||||
metric: Metric to display (avg_price, sales_count, etc.).
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
hover_columns = ["district_name", "sales_count", "avg_price", "median_price"]
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=districts_geojson,
|
||||
data=purchase_data,
|
||||
location_key="district_code",
|
||||
color_column=metric,
|
||||
hover_data=[c for c in hover_columns if c != metric],
|
||||
color_scale="Blues" if "price" in metric else "Greens",
|
||||
title="Toronto Purchase Market by District",
|
||||
)
|
||||
|
||||
|
||||
def create_zone_map(
|
||||
zones_geojson: dict[str, Any] | None,
|
||||
rental_data: list[dict[str, Any]],
|
||||
|
||||
240
portfolio_app/figures/demographics.py
Normal file
240
portfolio_app/figures/demographics.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""Demographics-specific chart factories."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
def create_age_pyramid(
|
||||
data: list[dict[str, Any]],
|
||||
age_groups: list[str],
|
||||
male_column: str = "male",
|
||||
female_column: str = "female",
|
||||
title: str | None = None,
|
||||
) -> go.Figure:
|
||||
"""Create population pyramid by age and gender.
|
||||
|
||||
Args:
|
||||
data: List with one record per age group containing male/female counts.
|
||||
age_groups: List of age group labels in order (youngest to oldest).
|
||||
male_column: Column name for male population.
|
||||
female_column: Column name for female population.
|
||||
title: Optional chart title.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data or not age_groups:
|
||||
return _create_empty_figure(title or "Age Distribution")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Ensure data is ordered by age groups
|
||||
if "age_group" in df.columns:
|
||||
df["age_order"] = df["age_group"].apply(
|
||||
lambda x: age_groups.index(x) if x in age_groups else -1
|
||||
)
|
||||
df = df.sort_values("age_order")
|
||||
|
||||
male_values = df[male_column].tolist() if male_column in df.columns else []
|
||||
female_values = df[female_column].tolist() if female_column in df.columns else []
|
||||
|
||||
# Make male values negative for pyramid effect
|
||||
male_values_neg = [-v for v in male_values]
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
# Male bars (left side, negative values)
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
y=age_groups,
|
||||
x=male_values_neg,
|
||||
orientation="h",
|
||||
name="Male",
|
||||
marker_color="#2196F3",
|
||||
hovertemplate="%{y}<br>Male: %{customdata:,}<extra></extra>",
|
||||
customdata=male_values,
|
||||
)
|
||||
)
|
||||
|
||||
# Female bars (right side, positive values)
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
y=age_groups,
|
||||
x=female_values,
|
||||
orientation="h",
|
||||
name="Female",
|
||||
marker_color="#E91E63",
|
||||
hovertemplate="%{y}<br>Female: %{x:,}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
# Calculate max for symmetric axis
|
||||
max_val = max(max(male_values, default=0), max(female_values, default=0))
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
barmode="overlay",
|
||||
bargap=0.1,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={
|
||||
"title": "Population",
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"range": [-max_val * 1.1, max_val * 1.1],
|
||||
"tickvals": [-max_val, -max_val / 2, 0, max_val / 2, max_val],
|
||||
"ticktext": [
|
||||
f"{max_val:,.0f}",
|
||||
f"{max_val / 2:,.0f}",
|
||||
"0",
|
||||
f"{max_val / 2:,.0f}",
|
||||
f"{max_val:,.0f}",
|
||||
],
|
||||
},
|
||||
yaxis={"title": None, "gridcolor": "rgba(128,128,128,0.2)"},
|
||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_donut_chart(
|
||||
data: list[dict[str, Any]],
|
||||
name_column: str,
|
||||
value_column: str,
|
||||
title: str | None = None,
|
||||
colors: list[str] | None = None,
|
||||
hole_size: float = 0.4,
|
||||
) -> go.Figure:
|
||||
"""Create donut chart for percentage breakdowns.
|
||||
|
||||
Args:
|
||||
data: List of data records with name and value.
|
||||
name_column: Column name for labels.
|
||||
value_column: Column name for values.
|
||||
title: Optional chart title.
|
||||
colors: List of colors for segments.
|
||||
hole_size: Size of center hole (0-1).
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Distribution")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
if colors is None:
|
||||
colors = [
|
||||
"#2196F3",
|
||||
"#4CAF50",
|
||||
"#FF9800",
|
||||
"#E91E63",
|
||||
"#9C27B0",
|
||||
"#00BCD4",
|
||||
"#FFC107",
|
||||
"#795548",
|
||||
]
|
||||
|
||||
fig = go.Figure(
|
||||
go.Pie(
|
||||
labels=df[name_column],
|
||||
values=df[value_column],
|
||||
hole=hole_size,
|
||||
marker_colors=colors[: len(df)],
|
||||
textinfo="percent+label",
|
||||
textposition="outside",
|
||||
hovertemplate="%{label}<br>%{value:,} (%{percent})<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
showlegend=False,
|
||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_income_distribution(
|
||||
data: list[dict[str, Any]],
|
||||
bracket_column: str,
|
||||
count_column: str,
|
||||
title: str | None = None,
|
||||
color: str = "#4CAF50",
|
||||
) -> go.Figure:
|
||||
"""Create histogram-style bar chart for income distribution.
|
||||
|
||||
Args:
|
||||
data: List of data records with income brackets and counts.
|
||||
bracket_column: Column name for income brackets.
|
||||
count_column: Column name for household counts.
|
||||
title: Optional chart title.
|
||||
color: Bar color.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Income Distribution")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
fig = go.Figure(
|
||||
go.Bar(
|
||||
x=df[bracket_column],
|
||||
y=df[count_column],
|
||||
marker_color=color,
|
||||
text=df[count_column].apply(lambda x: f"{x:,}"),
|
||||
textposition="outside",
|
||||
hovertemplate="%{x}<br>Households: %{y:,}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={
|
||||
"title": "Income Bracket",
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"tickangle": -45,
|
||||
},
|
||||
yaxis={
|
||||
"title": "Households",
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
},
|
||||
margin={"l": 10, "r": 10, "t": 60, "b": 80},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def _create_empty_figure(title: str) -> go.Figure:
|
||||
"""Create an empty figure with a message."""
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text="No data available",
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"visible": False},
|
||||
yaxis={"visible": False},
|
||||
)
|
||||
return fig
|
||||
166
portfolio_app/figures/radar.py
Normal file
166
portfolio_app/figures/radar.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""Radar/spider chart figure factory for multi-metric comparison."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
def create_radar_figure(
|
||||
data: list[dict[str, Any]],
|
||||
metrics: list[str],
|
||||
name_column: str | None = None,
|
||||
title: str | None = None,
|
||||
fill: bool = True,
|
||||
colors: list[str] | None = None,
|
||||
) -> go.Figure:
|
||||
"""Create radar/spider chart for multi-axis comparison.
|
||||
|
||||
Each record in data represents one entity (e.g., a neighbourhood)
|
||||
with values for each metric that will be plotted on a separate axis.
|
||||
|
||||
Args:
|
||||
data: List of data records, each with values for the metrics.
|
||||
metrics: List of metric column names to display on radar axes.
|
||||
name_column: Column name for entity labels.
|
||||
title: Optional chart title.
|
||||
fill: Whether to fill the radar polygons.
|
||||
colors: List of colors for each data series.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data or not metrics:
|
||||
return _create_empty_figure(title or "Radar Chart")
|
||||
|
||||
# Default colors
|
||||
if colors is None:
|
||||
colors = [
|
||||
"#2196F3",
|
||||
"#4CAF50",
|
||||
"#FF9800",
|
||||
"#E91E63",
|
||||
"#9C27B0",
|
||||
"#00BCD4",
|
||||
]
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
# Format axis labels
|
||||
axis_labels = [m.replace("_", " ").title() for m in metrics]
|
||||
|
||||
for i, record in enumerate(data):
|
||||
values = [record.get(m, 0) or 0 for m in metrics]
|
||||
# Close the radar polygon
|
||||
values_closed = values + [values[0]]
|
||||
labels_closed = axis_labels + [axis_labels[0]]
|
||||
|
||||
name = (
|
||||
record.get(name_column, f"Series {i + 1}")
|
||||
if name_column
|
||||
else f"Series {i + 1}"
|
||||
)
|
||||
color = colors[i % len(colors)]
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatterpolar(
|
||||
r=values_closed,
|
||||
theta=labels_closed,
|
||||
name=name,
|
||||
line={"color": color, "width": 2},
|
||||
fill="toself" if fill else None,
|
||||
fillcolor=f"rgba{_hex_to_rgba(color, 0.2)}" if fill else None,
|
||||
hovertemplate="%{theta}: %{r:.1f}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
polar={
|
||||
"radialaxis": {
|
||||
"visible": True,
|
||||
"gridcolor": "rgba(128,128,128,0.3)",
|
||||
"linecolor": "rgba(128,128,128,0.3)",
|
||||
"tickfont": {"color": "#c9c9c9"},
|
||||
},
|
||||
"angularaxis": {
|
||||
"gridcolor": "rgba(128,128,128,0.3)",
|
||||
"linecolor": "rgba(128,128,128,0.3)",
|
||||
"tickfont": {"color": "#c9c9c9"},
|
||||
},
|
||||
"bgcolor": "rgba(0,0,0,0)",
|
||||
},
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
showlegend=len(data) > 1,
|
||||
legend={"orientation": "h", "yanchor": "bottom", "y": -0.2},
|
||||
margin={"l": 40, "r": 40, "t": 60, "b": 40},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_comparison_radar(
|
||||
selected_data: dict[str, Any],
|
||||
average_data: dict[str, Any],
|
||||
metrics: list[str],
|
||||
selected_name: str = "Selected",
|
||||
average_name: str = "City Average",
|
||||
title: str | None = None,
|
||||
) -> go.Figure:
|
||||
"""Create radar chart comparing a selection to city average.
|
||||
|
||||
Args:
|
||||
selected_data: Data for the selected entity.
|
||||
average_data: Data for the city average.
|
||||
metrics: List of metric column names.
|
||||
selected_name: Label for selected entity.
|
||||
average_name: Label for average.
|
||||
title: Optional chart title.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not selected_data or not average_data:
|
||||
return _create_empty_figure(title or "Comparison")
|
||||
|
||||
data = [
|
||||
{**selected_data, "__name__": selected_name},
|
||||
{**average_data, "__name__": average_name},
|
||||
]
|
||||
|
||||
return create_radar_figure(
|
||||
data=data,
|
||||
metrics=metrics,
|
||||
name_column="__name__",
|
||||
title=title,
|
||||
colors=["#4CAF50", "#9E9E9E"],
|
||||
)
|
||||
|
||||
|
||||
def _hex_to_rgba(hex_color: str, alpha: float) -> tuple[int, int, int, float]:
|
||||
"""Convert hex color to RGBA tuple."""
|
||||
hex_color = hex_color.lstrip("#")
|
||||
r = int(hex_color[0:2], 16)
|
||||
g = int(hex_color[2:4], 16)
|
||||
b = int(hex_color[4:6], 16)
|
||||
return (r, g, b, alpha)
|
||||
|
||||
|
||||
def _create_empty_figure(title: str) -> go.Figure:
|
||||
"""Create an empty figure with a message."""
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text="No data available",
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
)
|
||||
return fig
|
||||
184
portfolio_app/figures/scatter.py
Normal file
184
portfolio_app/figures/scatter.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Scatter plot figure factory for correlation views."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
|
||||
|
||||
def create_scatter_figure(
|
||||
data: list[dict[str, Any]],
|
||||
x_column: str,
|
||||
y_column: str,
|
||||
name_column: str | None = None,
|
||||
size_column: str | None = None,
|
||||
color_column: str | None = None,
|
||||
title: str | None = None,
|
||||
x_title: str | None = None,
|
||||
y_title: str | None = None,
|
||||
trendline: bool = False,
|
||||
color_scale: str = "Blues",
|
||||
) -> go.Figure:
|
||||
"""Create scatter plot for correlation visualization.
|
||||
|
||||
Args:
|
||||
data: List of data records.
|
||||
x_column: Column name for x-axis values.
|
||||
y_column: Column name for y-axis values.
|
||||
name_column: Column name for point labels (hover).
|
||||
size_column: Column name for point sizes.
|
||||
color_column: Column name for color encoding.
|
||||
title: Optional chart title.
|
||||
x_title: X-axis title.
|
||||
y_title: Y-axis title.
|
||||
trendline: Whether to add OLS trendline.
|
||||
color_scale: Plotly color scale for continuous colors.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Scatter Plot")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
# Build hover_data
|
||||
hover_data = {}
|
||||
if name_column and name_column in df.columns:
|
||||
hover_data[name_column] = True
|
||||
|
||||
# Create scatter plot
|
||||
fig = px.scatter(
|
||||
df,
|
||||
x=x_column,
|
||||
y=y_column,
|
||||
size=size_column if size_column and size_column in df.columns else None,
|
||||
color=color_column if color_column and color_column in df.columns else None,
|
||||
color_continuous_scale=color_scale,
|
||||
hover_name=name_column,
|
||||
trendline="ols" if trendline else None,
|
||||
opacity=0.7,
|
||||
)
|
||||
|
||||
# Style the markers
|
||||
fig.update_traces(
|
||||
marker={
|
||||
"line": {"width": 1, "color": "rgba(255,255,255,0.3)"},
|
||||
},
|
||||
)
|
||||
|
||||
# Trendline styling
|
||||
if trendline:
|
||||
fig.update_traces(
|
||||
selector={"mode": "lines"},
|
||||
line={"color": "#FF9800", "dash": "dash", "width": 2},
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"title": x_title or x_column.replace("_", " ").title(),
|
||||
"zeroline": False,
|
||||
},
|
||||
yaxis={
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"title": y_title or y_column.replace("_", " ").title(),
|
||||
"zeroline": False,
|
||||
},
|
||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||
showlegend=color_column is not None,
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def create_bubble_chart(
|
||||
data: list[dict[str, Any]],
|
||||
x_column: str,
|
||||
y_column: str,
|
||||
size_column: str,
|
||||
name_column: str | None = None,
|
||||
color_column: str | None = None,
|
||||
title: str | None = None,
|
||||
x_title: str | None = None,
|
||||
y_title: str | None = None,
|
||||
size_max: int = 50,
|
||||
) -> go.Figure:
|
||||
"""Create bubble chart with sized markers.
|
||||
|
||||
Args:
|
||||
data: List of data records.
|
||||
x_column: Column name for x-axis values.
|
||||
y_column: Column name for y-axis values.
|
||||
size_column: Column name for bubble sizes.
|
||||
name_column: Column name for labels.
|
||||
color_column: Column name for colors.
|
||||
title: Optional chart title.
|
||||
x_title: X-axis title.
|
||||
y_title: Y-axis title.
|
||||
size_max: Maximum marker size in pixels.
|
||||
|
||||
Returns:
|
||||
Plotly Figure object.
|
||||
"""
|
||||
if not data:
|
||||
return _create_empty_figure(title or "Bubble Chart")
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
fig = px.scatter(
|
||||
df,
|
||||
x=x_column,
|
||||
y=y_column,
|
||||
size=size_column,
|
||||
color=color_column,
|
||||
hover_name=name_column,
|
||||
size_max=size_max,
|
||||
opacity=0.7,
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"title": x_title or x_column.replace("_", " ").title(),
|
||||
},
|
||||
yaxis={
|
||||
"gridcolor": "rgba(128,128,128,0.2)",
|
||||
"title": y_title or y_column.replace("_", " ").title(),
|
||||
},
|
||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def _create_empty_figure(title: str) -> go.Figure:
|
||||
"""Create an empty figure with a message."""
|
||||
fig = go.Figure()
|
||||
fig.add_annotation(
|
||||
text="No data available",
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"visible": False},
|
||||
yaxis={"visible": False},
|
||||
)
|
||||
return fig
|
||||
248
portfolio_app/pages/about.py
Normal file
248
portfolio_app/pages/about.py
Normal file
@@ -0,0 +1,248 @@
|
||||
"""About page - Professional narrative and background."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
dash.register_page(__name__, path="/about", name="About")
|
||||
|
||||
# Opening section
|
||||
OPENING = """I didn't start in data. I started in project management—CAPM certified, ITIL trained, \
|
||||
the whole corporate playbook. Then I realized I liked building systems more than managing timelines, \
|
||||
and I was better at automating reports than attending meetings about them.
|
||||
|
||||
That pivot led me to where I am now: 8 years deep in data engineering, analytics, and the messy \
|
||||
reality of turning raw information into something people can actually use."""
|
||||
|
||||
# What I Actually Do section
|
||||
WHAT_I_DO_SHORT = "The short version: I build data infrastructure. Pipelines, warehouses, \
|
||||
dashboards, automation—the invisible machinery that makes businesses run on data instead of gut feelings."
|
||||
|
||||
WHAT_I_DO_LONG = """The longer version: At Summitt Energy, I've been the sole data professional \
|
||||
supporting 150+ employees across 9 markets (Canada and US). I inherited nothing—no data warehouse, \
|
||||
no reporting infrastructure, no documentation. Over 5 years, I built DataFlow: an enterprise \
|
||||
platform processing 1B+ rows, integrating contact center data, CRM systems, and legacy tools \
|
||||
that definitely weren't designed to talk to each other.
|
||||
|
||||
That meant learning to be a generalist. I've done ETL pipeline development (Python, SQLAlchemy), \
|
||||
dimensional modeling, dashboard design (Power BI, Plotly-Dash), API integration, and more \
|
||||
stakeholder management than I'd like to admit. When you're the only data person, you learn to wear every hat."""
|
||||
|
||||
# How I Think About Data
|
||||
DATA_PHILOSOPHY_INTRO = "I'm not interested in data for data's sake. The question I always \
|
||||
start with: What decision does this help someone make?"
|
||||
|
||||
DATA_PHILOSOPHY_DETAIL = """Most of my work has been in operations-heavy environments—contact \
|
||||
centers, energy retail, logistics. These aren't glamorous domains, but they're where data can \
|
||||
have massive impact. A 30% improvement in abandon rate isn't just a metric; it's thousands of \
|
||||
customers who didn't hang up frustrated. A 40% reduction in reporting time means managers can \
|
||||
actually manage instead of wrestling with spreadsheets."""
|
||||
|
||||
DATA_PHILOSOPHY_CLOSE = "I care about outcomes, not technology stacks."
|
||||
|
||||
# Technical skills
|
||||
TECH_SKILLS = {
|
||||
"Languages": "Python (Pandas, SQLAlchemy, FastAPI), SQL (MSSQL, PostgreSQL), R, VBA",
|
||||
"Data Engineering": "ETL/ELT pipelines, dimensional modeling (star schema), dbt patterns, batch processing, API integration, web scraping (Selenium)",
|
||||
"Visualization": "Plotly/Dash, Power BI, Tableau",
|
||||
"Platforms": "Genesys Cloud, Five9, Zoho, Azure DevOps",
|
||||
"Currently Learning": "Cloud certification (Azure DP-203), Airflow, Snowflake",
|
||||
}
|
||||
|
||||
# Outside Work
|
||||
OUTSIDE_WORK_INTRO = "I'm a Brazilian-Canadian based in Toronto. I speak Portuguese (native), \
|
||||
English (fluent), and enough Spanish to survive."
|
||||
|
||||
OUTSIDE_WORK_ACTIVITIES = [
|
||||
"Building automation tools for small businesses through Bandit Labs (my side project)",
|
||||
"Contributing to open source (MCP servers, Claude Code plugins)",
|
||||
'Trying to explain to my kid why Daddy\'s job involves "making computers talk to each other"',
|
||||
]
|
||||
|
||||
# What I'm Looking For
|
||||
LOOKING_FOR_INTRO = "I'm currently exploring Senior Data Analyst and Data Engineer roles in \
|
||||
the Toronto area (or remote). I'm most interested in:"
|
||||
|
||||
LOOKING_FOR_ITEMS = [
|
||||
"Companies that treat data as infrastructure, not an afterthought",
|
||||
"Teams where I can contribute to architecture decisions, not just execute tickets",
|
||||
"Operations-focused industries (energy, logistics, financial services, contact center tech)",
|
||||
]
|
||||
|
||||
LOOKING_FOR_CLOSE = "If that sounds like your team, let's talk."
|
||||
|
||||
|
||||
def create_section_title(title: str) -> dmc.Title:
|
||||
"""Create a consistent section title."""
|
||||
return dmc.Title(title, order=2, size="h3", mb="sm")
|
||||
|
||||
|
||||
def create_opening_section() -> dmc.Paper:
|
||||
"""Create the opening/intro section."""
|
||||
paragraphs = OPENING.split("\n\n")
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[dmc.Text(p, size="md") for p in paragraphs],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_what_i_do_section() -> dmc.Paper:
|
||||
"""Create the What I Actually Do section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_section_title("What I Actually Do"),
|
||||
dmc.Text(WHAT_I_DO_SHORT, size="md", fw=500),
|
||||
dmc.Text(WHAT_I_DO_LONG, size="md"),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_philosophy_section() -> dmc.Paper:
|
||||
"""Create the How I Think About Data section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_section_title("How I Think About Data"),
|
||||
dmc.Text(DATA_PHILOSOPHY_INTRO, size="md", fw=500),
|
||||
dmc.Text(DATA_PHILOSOPHY_DETAIL, size="md"),
|
||||
dmc.Text(DATA_PHILOSOPHY_CLOSE, size="md", fw=500, fs="italic"),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_tech_section() -> dmc.Paper:
|
||||
"""Create the Technical Stuff section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_section_title("The Technical Stuff"),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text(category + ":", fw=600, size="sm", w=150),
|
||||
dmc.Text(skills, size="sm", c="dimmed"),
|
||||
],
|
||||
gap="sm",
|
||||
align="flex-start",
|
||||
wrap="nowrap",
|
||||
)
|
||||
for category, skills in TECH_SKILLS.items()
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_outside_work_section() -> dmc.Paper:
|
||||
"""Create the Outside Work section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_section_title("Outside Work"),
|
||||
dmc.Text(OUTSIDE_WORK_INTRO, size="md"),
|
||||
dmc.Text("When I'm not staring at SQL, I'm usually:", size="md"),
|
||||
dmc.List(
|
||||
[
|
||||
dmc.ListItem(dmc.Text(item, size="md"))
|
||||
for item in OUTSIDE_WORK_ACTIVITIES
|
||||
],
|
||||
spacing="xs",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_looking_for_section() -> dmc.Paper:
|
||||
"""Create the What I'm Looking For section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_section_title("What I'm Looking For"),
|
||||
dmc.Text(LOOKING_FOR_INTRO, size="md"),
|
||||
dmc.List(
|
||||
[
|
||||
dmc.ListItem(dmc.Text(item, size="md"))
|
||||
for item in LOOKING_FOR_ITEMS
|
||||
],
|
||||
spacing="xs",
|
||||
),
|
||||
dmc.Text(LOOKING_FOR_CLOSE, size="md", fw=500),
|
||||
dmc.Group(
|
||||
[
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"Download Resume",
|
||||
variant="filled",
|
||||
leftSection=DashIconify(
|
||||
icon="tabler:download", width=18
|
||||
),
|
||||
),
|
||||
href="/resume",
|
||||
),
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"Contact Me",
|
||||
variant="outline",
|
||||
leftSection=DashIconify(icon="tabler:mail", width=18),
|
||||
),
|
||||
href="/contact",
|
||||
),
|
||||
],
|
||||
gap="sm",
|
||||
mt="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("About", order=1, ta="center", mb="lg"),
|
||||
create_opening_section(),
|
||||
create_what_i_do_section(),
|
||||
create_philosophy_section(),
|
||||
create_tech_section(),
|
||||
create_outside_work_section(),
|
||||
create_looking_for_section(),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="xl",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
1
portfolio_app/pages/blog/__init__.py
Normal file
1
portfolio_app/pages/blog/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Blog pages package."""
|
||||
147
portfolio_app/pages/blog/article.py
Normal file
147
portfolio_app/pages/blog/article.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Blog article page - Dynamic routing for individual articles."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc, html
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
from portfolio_app.utils.markdown_loader import get_article
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path_template="/blog/<slug>",
|
||||
name="Article",
|
||||
)
|
||||
|
||||
|
||||
def create_not_found() -> dmc.Container:
|
||||
"""Create 404 state for missing articles."""
|
||||
return dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:file-unknown", width=48),
|
||||
size=80,
|
||||
radius="xl",
|
||||
variant="light",
|
||||
color="red",
|
||||
),
|
||||
dmc.Title("Article Not Found", order=2),
|
||||
dmc.Text(
|
||||
"The article you're looking for doesn't exist or has been moved.",
|
||||
size="md",
|
||||
c="dimmed",
|
||||
ta="center",
|
||||
),
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"Back to Blog",
|
||||
variant="light",
|
||||
leftSection=DashIconify(icon="tabler:arrow-left", width=18),
|
||||
),
|
||||
href="/blog",
|
||||
),
|
||||
],
|
||||
align="center",
|
||||
gap="md",
|
||||
py="xl",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
|
||||
|
||||
def layout(slug: str = "") -> dmc.Container:
|
||||
"""Generate the article layout dynamically.
|
||||
|
||||
Args:
|
||||
slug: Article slug from URL path.
|
||||
"""
|
||||
if not slug:
|
||||
return create_not_found()
|
||||
|
||||
article = get_article(slug)
|
||||
if not article:
|
||||
return create_not_found()
|
||||
|
||||
meta = article["meta"]
|
||||
|
||||
return dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
# Back link
|
||||
dcc.Link(
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:arrow-left", width=16),
|
||||
dmc.Text("Back to Blog", size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
href="/blog",
|
||||
style={"textDecoration": "none"},
|
||||
),
|
||||
# Article header
|
||||
dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title(meta["title"], order=1),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(
|
||||
icon="tabler:calendar", width=16
|
||||
),
|
||||
dmc.Text(
|
||||
meta["date"], size="sm", c="dimmed"
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(tag, variant="light", size="sm")
|
||||
for tag in meta.get("tags", [])
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
wrap="wrap",
|
||||
),
|
||||
(
|
||||
dmc.Text(meta["description"], size="lg", c="dimmed")
|
||||
if meta.get("description")
|
||||
else None
|
||||
),
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
),
|
||||
# Article content
|
||||
dmc.Paper(
|
||||
html.Div(
|
||||
# Render HTML content from markdown
|
||||
# Using dangerously_allow_html via dcc.Markdown or html.Div
|
||||
dcc.Markdown(
|
||||
article["content"],
|
||||
className="article-content",
|
||||
dangerously_allow_html=True,
|
||||
),
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
className="article-body",
|
||||
),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
113
portfolio_app/pages/blog/index.py
Normal file
113
portfolio_app/pages/blog/index.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Blog index page - Article listing."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
from portfolio_app.utils.markdown_loader import Article, get_all_articles
|
||||
|
||||
dash.register_page(__name__, path="/blog", name="Blog")
|
||||
|
||||
# Page intro
|
||||
INTRO_TEXT = (
|
||||
"I write occasionally about data engineering, automation, and the reality of being "
|
||||
"a one-person data team. No hot takes, no growth hacking—just things I've learned "
|
||||
"the hard way."
|
||||
)
|
||||
|
||||
|
||||
def create_article_card(article: Article) -> dmc.Paper:
|
||||
"""Create an article preview card."""
|
||||
meta = article["meta"]
|
||||
return dmc.Paper(
|
||||
dcc.Link(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text(meta["title"], fw=600, size="lg"),
|
||||
dmc.Text(meta["date"], size="sm", c="dimmed"),
|
||||
],
|
||||
justify="space-between",
|
||||
align="flex-start",
|
||||
wrap="wrap",
|
||||
),
|
||||
dmc.Text(meta["description"], size="md", c="dimmed", lineClamp=2),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(tag, variant="light", size="sm")
|
||||
for tag in meta.get("tags", [])[:3]
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
href=f"/blog/{meta['slug']}",
|
||||
style={"textDecoration": "none", "color": "inherit"},
|
||||
),
|
||||
p="lg",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
className="article-card",
|
||||
)
|
||||
|
||||
|
||||
def create_empty_state() -> dmc.Paper:
|
||||
"""Create empty state when no articles exist."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:article-off", width=48),
|
||||
size=80,
|
||||
radius="xl",
|
||||
variant="light",
|
||||
color="gray",
|
||||
),
|
||||
dmc.Title("No Articles Yet", order=3),
|
||||
dmc.Text(
|
||||
"Articles are coming soon. Check back later!",
|
||||
size="md",
|
||||
c="dimmed",
|
||||
ta="center",
|
||||
),
|
||||
],
|
||||
align="center",
|
||||
gap="md",
|
||||
py="xl",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def layout() -> dmc.Container:
|
||||
"""Generate the blog index layout dynamically."""
|
||||
articles = get_all_articles(include_drafts=False)
|
||||
|
||||
return dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Blog", order=1, ta="center"),
|
||||
dmc.Text(
|
||||
INTRO_TEXT, size="md", c="dimmed", ta="center", maw=600, mx="auto"
|
||||
),
|
||||
dmc.Divider(my="lg"),
|
||||
(
|
||||
dmc.Stack(
|
||||
[create_article_card(article) for article in articles],
|
||||
gap="lg",
|
||||
)
|
||||
if articles
|
||||
else create_empty_state()
|
||||
),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
287
portfolio_app/pages/contact.py
Normal file
287
portfolio_app/pages/contact.py
Normal file
@@ -0,0 +1,287 @@
|
||||
"""Contact page - Form UI and direct contact information."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
dash.register_page(__name__, path="/contact", name="Contact")
|
||||
|
||||
# Contact information
|
||||
CONTACT_INFO = {
|
||||
"email": "leobrmi@hotmail.com",
|
||||
"phone": "(416) 859-7936",
|
||||
"linkedin": "https://linkedin.com/in/leobmiranda",
|
||||
"github": "https://github.com/leomiranda",
|
||||
"location": "Toronto, ON, Canada",
|
||||
}
|
||||
|
||||
# Page intro text
|
||||
INTRO_TEXT = (
|
||||
"I'm currently open to Senior Data Analyst and Data Engineer roles in Toronto "
|
||||
"(or remote). If you're working on something interesting and need someone who can "
|
||||
"build data infrastructure from scratch, I'd like to hear about it."
|
||||
)
|
||||
|
||||
CONSULTING_TEXT = (
|
||||
"For consulting inquiries (automation, dashboards, small business data work), "
|
||||
"reach out about Bandit Labs."
|
||||
)
|
||||
|
||||
# Form subject options
|
||||
SUBJECT_OPTIONS = [
|
||||
{"value": "job", "label": "Job Opportunity"},
|
||||
{"value": "consulting", "label": "Consulting Inquiry"},
|
||||
{"value": "other", "label": "Other"},
|
||||
]
|
||||
|
||||
|
||||
def create_intro_section() -> dmc.Stack:
|
||||
"""Create the intro text section."""
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Title("Get In Touch", order=1, ta="center"),
|
||||
dmc.Text(INTRO_TEXT, size="md", ta="center", maw=600, mx="auto"),
|
||||
dmc.Text(
|
||||
CONSULTING_TEXT, size="md", ta="center", maw=600, mx="auto", c="dimmed"
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
mb="xl",
|
||||
)
|
||||
|
||||
|
||||
def create_contact_form() -> dmc.Paper:
|
||||
"""Create the contact form (disabled in Phase 1)."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Send a Message", order=2, size="h4"),
|
||||
dmc.Alert(
|
||||
"Contact form submission is coming soon. Please use the direct contact "
|
||||
"methods below for now.",
|
||||
title="Form Coming Soon",
|
||||
color="blue",
|
||||
variant="light",
|
||||
),
|
||||
dmc.TextInput(
|
||||
label="Name",
|
||||
placeholder="Your name",
|
||||
leftSection=DashIconify(icon="tabler:user", width=18),
|
||||
disabled=True,
|
||||
),
|
||||
dmc.TextInput(
|
||||
label="Email",
|
||||
placeholder="your.email@example.com",
|
||||
leftSection=DashIconify(icon="tabler:mail", width=18),
|
||||
disabled=True,
|
||||
),
|
||||
dmc.Select(
|
||||
label="Subject",
|
||||
placeholder="Select a subject",
|
||||
data=SUBJECT_OPTIONS,
|
||||
leftSection=DashIconify(icon="tabler:tag", width=18),
|
||||
disabled=True,
|
||||
),
|
||||
dmc.Textarea(
|
||||
label="Message",
|
||||
placeholder="Your message...",
|
||||
minRows=4,
|
||||
disabled=True,
|
||||
),
|
||||
dmc.Button(
|
||||
"Send Message",
|
||||
fullWidth=True,
|
||||
leftSection=DashIconify(icon="tabler:send", width=18),
|
||||
disabled=True,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_direct_contact() -> dmc.Paper:
|
||||
"""Create the direct contact information section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Direct Contact", order=2, size="h4"),
|
||||
dmc.Stack(
|
||||
[
|
||||
# Email
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:mail", width=20),
|
||||
size="lg",
|
||||
radius="md",
|
||||
variant="light",
|
||||
),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("Email", size="sm", c="dimmed"),
|
||||
dmc.Anchor(
|
||||
CONTACT_INFO["email"],
|
||||
href=f"mailto:{CONTACT_INFO['email']}",
|
||||
size="md",
|
||||
fw=500,
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
# Phone
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:phone", width=20),
|
||||
size="lg",
|
||||
radius="md",
|
||||
variant="light",
|
||||
),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("Phone", size="sm", c="dimmed"),
|
||||
dmc.Anchor(
|
||||
CONTACT_INFO["phone"],
|
||||
href=f"tel:{CONTACT_INFO['phone'].replace('(', '').replace(')', '').replace(' ', '').replace('-', '')}",
|
||||
size="md",
|
||||
fw=500,
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
# LinkedIn
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:brand-linkedin", width=20),
|
||||
size="lg",
|
||||
radius="md",
|
||||
variant="light",
|
||||
color="blue",
|
||||
),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("LinkedIn", size="sm", c="dimmed"),
|
||||
dmc.Anchor(
|
||||
"linkedin.com/in/leobmiranda",
|
||||
href=CONTACT_INFO["linkedin"],
|
||||
target="_blank",
|
||||
size="md",
|
||||
fw=500,
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
# GitHub
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:brand-github", width=20),
|
||||
size="lg",
|
||||
radius="md",
|
||||
variant="light",
|
||||
),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("GitHub", size="sm", c="dimmed"),
|
||||
dmc.Anchor(
|
||||
"github.com/leomiranda",
|
||||
href=CONTACT_INFO["github"],
|
||||
target="_blank",
|
||||
size="md",
|
||||
fw=500,
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_location_section() -> dmc.Paper:
|
||||
"""Create the location and work eligibility section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Location", order=2, size="h4"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.ThemeIcon(
|
||||
DashIconify(icon="tabler:map-pin", width=20),
|
||||
size="lg",
|
||||
radius="md",
|
||||
variant="light",
|
||||
color="red",
|
||||
),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text(CONTACT_INFO["location"], size="md", fw=500),
|
||||
dmc.Text(
|
||||
"Canadian Citizen | Eligible to work in Canada and US",
|
||||
size="sm",
|
||||
c="dimmed",
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_intro_section(),
|
||||
dmc.SimpleGrid(
|
||||
[
|
||||
create_contact_form(),
|
||||
dmc.Stack(
|
||||
[
|
||||
create_direct_contact(),
|
||||
create_location_section(),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
],
|
||||
cols={"base": 1, "md": 2},
|
||||
spacing="xl",
|
||||
),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="lg",
|
||||
py="xl",
|
||||
)
|
||||
@@ -1,81 +1,118 @@
|
||||
"""Bio landing page."""
|
||||
"""Home landing page - Portfolio entry point."""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
dash.register_page(__name__, path="/", name="Home")
|
||||
|
||||
# Content from bio_content_v2.md
|
||||
HEADLINE = "Leo | Data Engineer & Analytics Developer"
|
||||
TAGLINE = "I build data infrastructure that actually gets used."
|
||||
# Hero content from blueprint
|
||||
HEADLINE = "I turn messy data into systems that actually work."
|
||||
SUBHEAD = (
|
||||
"Data Engineer & Analytics Specialist. 8 years building pipelines, dashboards, "
|
||||
"and the infrastructure nobody sees but everyone depends on. Based in Toronto."
|
||||
)
|
||||
|
||||
SUMMARY = """Over the past 5 years, I've designed and evolved an enterprise analytics platform
|
||||
from scratch—now processing 1B+ rows across 21 tables with Python-based ETL pipelines and
|
||||
dbt-style SQL transformations. The result: 40% efficiency gains, 30% reduction in call
|
||||
abandon rates, and dashboards that executives actually open.
|
||||
|
||||
My approach: dimensional modeling (star schema), layered transformations
|
||||
(staging → intermediate → marts), and automation that eliminates manual work.
|
||||
I've built everything from self-service analytics portals to OCR-powered receipt processing systems.
|
||||
|
||||
Currently at Summitt Energy supporting multi-market operations across Canada and 8 US states.
|
||||
Previously cut my teeth on IT infrastructure projects at Petrobras (Fortune 500) and the
|
||||
Project Management Institute."""
|
||||
|
||||
TECH_STACK = [
|
||||
"Python",
|
||||
"Pandas",
|
||||
"SQLAlchemy",
|
||||
"FastAPI",
|
||||
"SQL",
|
||||
"PostgreSQL",
|
||||
"MSSQL",
|
||||
"Power BI",
|
||||
"Plotly/Dash",
|
||||
"dbt patterns",
|
||||
"Genesys Cloud",
|
||||
# Impact metrics
|
||||
IMPACT_STATS = [
|
||||
{"value": "1B+", "label": "Rows processed daily across enterprise platform"},
|
||||
{"value": "40%", "label": "Efficiency gain through automation"},
|
||||
{"value": "5 Years", "label": "Building DataFlow from zero"},
|
||||
]
|
||||
|
||||
PROJECTS = [
|
||||
{
|
||||
"title": "Toronto Housing Dashboard",
|
||||
"description": "Choropleth visualization of GTA real estate trends with TRREB and CMHC data.",
|
||||
"status": "In Development",
|
||||
"link": "/toronto",
|
||||
},
|
||||
{
|
||||
"title": "Energy Pricing Analysis",
|
||||
"description": "Time series analysis and ML prediction for utility market pricing.",
|
||||
"status": "Planned",
|
||||
"link": "/energy",
|
||||
},
|
||||
]
|
||||
# Featured project
|
||||
FEATURED_PROJECT = {
|
||||
"title": "Toronto Housing Market Dashboard",
|
||||
"description": (
|
||||
"Real-time analytics on Toronto's housing trends. "
|
||||
"dbt-powered ETL, Python scraping, Plotly visualization."
|
||||
),
|
||||
"status": "Live",
|
||||
"dashboard_link": "/toronto",
|
||||
"repo_link": "https://github.com/leomiranda/personal-portfolio",
|
||||
}
|
||||
|
||||
AVAILABILITY = "Open to Senior Data Analyst, Analytics Engineer, and BI Developer opportunities in Toronto or remote."
|
||||
# Brief intro
|
||||
INTRO_TEXT = (
|
||||
"I'm a data engineer who's spent the last 8 years in the trenches—building the "
|
||||
"infrastructure that feeds dashboards, automates the boring stuff, and makes data "
|
||||
"actually usable. Most of my work has been in contact center operations and energy, "
|
||||
"where I've had to be scrappy: one-person data teams, legacy systems, stakeholders "
|
||||
"who need answers yesterday."
|
||||
)
|
||||
|
||||
INTRO_CLOSING = "I like solving real problems, not theoretical ones."
|
||||
|
||||
|
||||
def create_hero_section() -> dmc.Stack:
|
||||
"""Create the hero section with name and tagline."""
|
||||
"""Create the hero section with headline, subhead, and CTAs."""
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Title(HEADLINE, order=1, ta="center"),
|
||||
dmc.Text(TAGLINE, size="xl", c="dimmed", ta="center"),
|
||||
dmc.Title(
|
||||
HEADLINE,
|
||||
order=1,
|
||||
ta="center",
|
||||
size="2.5rem",
|
||||
),
|
||||
dmc.Text(
|
||||
SUBHEAD,
|
||||
size="lg",
|
||||
c="dimmed",
|
||||
ta="center",
|
||||
maw=700,
|
||||
mx="auto",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"View Projects",
|
||||
size="lg",
|
||||
variant="filled",
|
||||
leftSection=DashIconify(icon="tabler:folder", width=20),
|
||||
),
|
||||
href="/projects",
|
||||
),
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"Get In Touch",
|
||||
size="lg",
|
||||
variant="outline",
|
||||
leftSection=DashIconify(icon="tabler:mail", width=20),
|
||||
),
|
||||
href="/contact",
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
justify="center",
|
||||
gap="md",
|
||||
mt="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
py="xl",
|
||||
)
|
||||
|
||||
|
||||
def create_summary_section() -> dmc.Paper:
|
||||
"""Create the professional summary section."""
|
||||
paragraphs = SUMMARY.strip().split("\n\n")
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
def create_impact_stat(stat: dict[str, str]) -> dmc.Stack:
|
||||
"""Create a single impact stat."""
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Title("About", order=2, size="h3"),
|
||||
*[dmc.Text(p.replace("\n", " "), size="md") for p in paragraphs],
|
||||
dmc.Text(stat["value"], fw=700, size="2rem", ta="center"),
|
||||
dmc.Text(stat["label"], size="sm", c="dimmed", ta="center"),
|
||||
],
|
||||
gap="md",
|
||||
gap="xs",
|
||||
align="center",
|
||||
)
|
||||
|
||||
|
||||
def create_impact_strip() -> dmc.Paper:
|
||||
"""Create the impact statistics strip."""
|
||||
return dmc.Paper(
|
||||
dmc.SimpleGrid(
|
||||
[create_impact_stat(stat) for stat in IMPACT_STATS],
|
||||
cols={"base": 1, "sm": 3},
|
||||
spacing="xl",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
@@ -83,16 +120,56 @@ def create_summary_section() -> dmc.Paper:
|
||||
)
|
||||
|
||||
|
||||
def create_tech_stack_section() -> dmc.Paper:
|
||||
"""Create the tech stack section with badges."""
|
||||
def create_featured_project() -> dmc.Paper:
|
||||
"""Create the featured project card."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Tech Stack", order=2, size="h3"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(tech, size="lg", variant="light", radius="sm")
|
||||
for tech in TECH_STACK
|
||||
dmc.Title("Featured Project", order=2, size="h3"),
|
||||
dmc.Badge(
|
||||
FEATURED_PROJECT["status"],
|
||||
color="green",
|
||||
variant="light",
|
||||
size="lg",
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
),
|
||||
dmc.Title(
|
||||
FEATURED_PROJECT["title"],
|
||||
order=3,
|
||||
size="h4",
|
||||
),
|
||||
dmc.Text(
|
||||
FEATURED_PROJECT["description"],
|
||||
size="md",
|
||||
c="dimmed",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"View Dashboard",
|
||||
variant="light",
|
||||
leftSection=DashIconify(
|
||||
icon="tabler:chart-bar", width=18
|
||||
),
|
||||
),
|
||||
href=FEATURED_PROJECT["dashboard_link"],
|
||||
),
|
||||
dmc.Anchor(
|
||||
dmc.Button(
|
||||
"View Repository",
|
||||
variant="subtle",
|
||||
leftSection=DashIconify(
|
||||
icon="tabler:brand-github", width=18
|
||||
),
|
||||
),
|
||||
href=FEATURED_PROJECT["repo_link"],
|
||||
target="_blank",
|
||||
),
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
@@ -105,38 +182,13 @@ def create_tech_stack_section() -> dmc.Paper:
|
||||
)
|
||||
|
||||
|
||||
def create_project_card(project: dict[str, str]) -> dmc.Card:
|
||||
"""Create a project card."""
|
||||
status_color = "blue" if project["status"] == "In Development" else "gray"
|
||||
return dmc.Card(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text(project["title"], fw=500, size="lg"),
|
||||
dmc.Badge(project["status"], color=status_color, variant="light"),
|
||||
],
|
||||
justify="space-between",
|
||||
align="center",
|
||||
),
|
||||
dmc.Text(project["description"], size="sm", c="dimmed", mt="sm"),
|
||||
],
|
||||
withBorder=True,
|
||||
radius="md",
|
||||
p="lg",
|
||||
)
|
||||
|
||||
|
||||
def create_projects_section() -> dmc.Paper:
|
||||
"""Create the portfolio projects section."""
|
||||
def create_intro_section() -> dmc.Paper:
|
||||
"""Create the brief intro section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Portfolio Projects", order=2, size="h3"),
|
||||
dmc.SimpleGrid(
|
||||
[create_project_card(p) for p in PROJECTS],
|
||||
cols={"base": 1, "sm": 2},
|
||||
spacing="lg",
|
||||
),
|
||||
dmc.Text(INTRO_TEXT, size="md"),
|
||||
dmc.Text(INTRO_CLOSING, size="md", fw=500, fs="italic"),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
@@ -146,20 +198,13 @@ def create_projects_section() -> dmc.Paper:
|
||||
)
|
||||
|
||||
|
||||
def create_availability_section() -> dmc.Text:
|
||||
"""Create the availability statement."""
|
||||
return dmc.Text(AVAILABILITY, size="sm", c="dimmed", ta="center", fs="italic")
|
||||
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_hero_section(),
|
||||
create_summary_section(),
|
||||
create_tech_stack_section(),
|
||||
create_projects_section(),
|
||||
dmc.Divider(my="lg"),
|
||||
create_availability_section(),
|
||||
create_impact_strip(),
|
||||
create_featured_project(),
|
||||
create_intro_section(),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="xl",
|
||||
|
||||
304
portfolio_app/pages/projects.py
Normal file
304
portfolio_app/pages/projects.py
Normal file
@@ -0,0 +1,304 @@
|
||||
"""Projects overview page - Hub for all portfolio projects."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
dash.register_page(__name__, path="/projects", name="Projects")
|
||||
|
||||
# Page intro
|
||||
INTRO_TEXT = (
|
||||
"These are projects I've built—some professional (anonymized where needed), "
|
||||
"some personal. Each one taught me something. Use the sidebar to jump directly "
|
||||
"to live dashboards or explore the overviews below."
|
||||
)
|
||||
|
||||
# Project definitions
|
||||
PROJECTS: list[dict[str, Any]] = [
|
||||
{
|
||||
"title": "Toronto Housing Market Dashboard",
|
||||
"type": "Personal Project",
|
||||
"status": "Live",
|
||||
"status_color": "green",
|
||||
"problem": (
|
||||
"Toronto's housing market moves fast, and most publicly available data "
|
||||
"is either outdated, behind paywalls, or scattered across dozens of sources. "
|
||||
"I wanted a single dashboard that tracked trends in real-time."
|
||||
),
|
||||
"built": [
|
||||
"Data Pipeline: Python scraper pulling listings data, automated on schedule",
|
||||
"Transformation Layer: dbt-based SQL architecture (staging -> intermediate -> marts)",
|
||||
"Visualization: Interactive Plotly-Dash dashboard with filters by neighborhood, price range, property type",
|
||||
"Infrastructure: PostgreSQL backend, version-controlled in Git",
|
||||
],
|
||||
"tech_stack": "Python, dbt, PostgreSQL, Plotly-Dash, GitHub Actions",
|
||||
"learned": (
|
||||
"Real estate data is messy as hell. Listings get pulled, prices change, "
|
||||
"duplicates are everywhere. Building a reliable pipeline meant implementing "
|
||||
'serious data quality checks and learning to embrace "good enough" over "perfect."'
|
||||
),
|
||||
"dashboard_link": "/toronto",
|
||||
"repo_link": "https://github.com/leomiranda/personal-portfolio",
|
||||
},
|
||||
{
|
||||
"title": "US Retail Energy Price Predictor",
|
||||
"type": "Personal Project",
|
||||
"status": "Coming Soon",
|
||||
"status_color": "yellow",
|
||||
"problem": (
|
||||
"Retail energy pricing in deregulated US markets is volatile and opaque. "
|
||||
"Consumers and analysts lack accessible tools to understand pricing trends "
|
||||
"and forecast where rates are headed."
|
||||
),
|
||||
"built": [
|
||||
"Data Pipeline: Automated ingestion of public pricing data across multiple US markets",
|
||||
"ML Model: Price prediction using time series forecasting (ARIMA, Prophet, or similar)",
|
||||
"Transformation Layer: dbt-based SQL architecture for feature engineering",
|
||||
"Visualization: Interactive dashboard showing historical trends + predictions by state/market",
|
||||
],
|
||||
"tech_stack": "Python, Scikit-learn, dbt, PostgreSQL, Plotly-Dash",
|
||||
"learned": (
|
||||
"This showcases the ML side of my skillset—something the Toronto Housing "
|
||||
"dashboard doesn't cover. It also leverages my domain expertise from 5+ years "
|
||||
"in retail energy operations."
|
||||
),
|
||||
"dashboard_link": None,
|
||||
"repo_link": None,
|
||||
},
|
||||
{
|
||||
"title": "DataFlow Platform",
|
||||
"type": "Professional",
|
||||
"status": "Case Study Pending",
|
||||
"status_color": "gray",
|
||||
"problem": (
|
||||
"When I joined Summitt Energy, there was no data infrastructure. "
|
||||
"Reports were manual. Insights were guesswork. I was hired to fix that."
|
||||
),
|
||||
"built": [
|
||||
"v1 (2020): Basic ETL scripts pulling Genesys Cloud data into MSSQL",
|
||||
"v2 (2021): Dimensional model (star schema) with fact/dimension tables",
|
||||
"v3 (2022): Python refactor with SQLAlchemy ORM, batch processing, error handling",
|
||||
"v4 (2023-24): dbt-pattern SQL views (staging -> intermediate -> marts), FastAPI layer, CLI tools",
|
||||
],
|
||||
"tech_stack": "Python, SQLAlchemy, FastAPI, MSSQL, Power BI, Genesys Cloud API",
|
||||
"impact": [
|
||||
"21 tables, 1B+ rows",
|
||||
"5,000+ daily transactions processed",
|
||||
"40% improvement in reporting efficiency",
|
||||
"30% reduction in call abandon rate",
|
||||
"50% faster Average Speed to Answer",
|
||||
],
|
||||
"learned": (
|
||||
"Building data infrastructure as a team of one forces brutal prioritization. "
|
||||
"I learned to ship imperfect solutions fast, iterate based on feedback, "
|
||||
"and never underestimate how long stakeholder buy-in takes."
|
||||
),
|
||||
"note": "This is proprietary work. A sanitized case study with architecture patterns (no proprietary data) will be published in Phase 3.",
|
||||
"dashboard_link": None,
|
||||
"repo_link": None,
|
||||
},
|
||||
{
|
||||
"title": "AI-Assisted Automation (Bandit Labs)",
|
||||
"type": "Consulting/Side Business",
|
||||
"status": "Active",
|
||||
"status_color": "blue",
|
||||
"problem": (
|
||||
"Small businesses don't need enterprise data platforms—they need someone "
|
||||
"to eliminate the 4 hours/week they spend manually entering receipts."
|
||||
),
|
||||
"built": [
|
||||
"Receipt Processing Automation: OCR pipeline (Tesseract, Google Vision) extracting purchase data from photos",
|
||||
"Product Margin Tracker: Plotly-Dash dashboard with real-time profitability insights",
|
||||
"Claude Code Plugins: MCP servers for Gitea, Wiki.js, NetBox integration",
|
||||
],
|
||||
"tech_stack": "Python, Tesseract, Google Vision API, Plotly-Dash, QuickBooks API",
|
||||
"learned": (
|
||||
"Small businesses are underserved by the data/automation industry. "
|
||||
"Everyone wants to sell them enterprise software they don't need. "
|
||||
"I like solving problems at a scale where the impact is immediately visible."
|
||||
),
|
||||
"dashboard_link": None,
|
||||
"repo_link": None,
|
||||
"external_link": "/lab",
|
||||
"external_label": "Learn More About Bandit Labs",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def create_project_card(project: dict[str, Any]) -> dmc.Paper:
|
||||
"""Create a detailed project card."""
|
||||
# Build the "What I Built" list
|
||||
built_items = project.get("built", [])
|
||||
built_section = (
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("What I Built:", fw=600, size="sm"),
|
||||
dmc.List(
|
||||
[dmc.ListItem(dmc.Text(item, size="sm")) for item in built_items],
|
||||
spacing="xs",
|
||||
size="sm",
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
)
|
||||
if built_items
|
||||
else None
|
||||
)
|
||||
|
||||
# Build impact section for DataFlow
|
||||
impact_items = project.get("impact", [])
|
||||
impact_section = (
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("Impact:", fw=600, size="sm"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(item, variant="light", size="sm")
|
||||
for item in impact_items
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
)
|
||||
if impact_items
|
||||
else None
|
||||
)
|
||||
|
||||
# Build action buttons
|
||||
buttons = []
|
||||
if project.get("dashboard_link"):
|
||||
buttons.append(
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
"View Dashboard",
|
||||
variant="light",
|
||||
size="sm",
|
||||
leftSection=DashIconify(icon="tabler:chart-bar", width=16),
|
||||
),
|
||||
href=project["dashboard_link"],
|
||||
)
|
||||
)
|
||||
if project.get("repo_link"):
|
||||
buttons.append(
|
||||
dmc.Anchor(
|
||||
dmc.Button(
|
||||
"View Repository",
|
||||
variant="subtle",
|
||||
size="sm",
|
||||
leftSection=DashIconify(icon="tabler:brand-github", width=16),
|
||||
),
|
||||
href=project["repo_link"],
|
||||
target="_blank",
|
||||
)
|
||||
)
|
||||
if project.get("external_link"):
|
||||
buttons.append(
|
||||
dcc.Link(
|
||||
dmc.Button(
|
||||
project.get("external_label", "Learn More"),
|
||||
variant="outline",
|
||||
size="sm",
|
||||
leftSection=DashIconify(icon="tabler:arrow-right", width=16),
|
||||
),
|
||||
href=project["external_link"],
|
||||
)
|
||||
)
|
||||
|
||||
# Handle "Coming Soon" state
|
||||
if project["status"] == "Coming Soon" and not buttons:
|
||||
buttons.append(
|
||||
dmc.Badge("Coming Soon", variant="light", color="yellow", size="lg")
|
||||
)
|
||||
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
# Header
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text(project["title"], fw=600, size="lg"),
|
||||
dmc.Text(project["type"], size="sm", c="dimmed"),
|
||||
],
|
||||
gap=0,
|
||||
),
|
||||
dmc.Badge(
|
||||
project["status"],
|
||||
color=project["status_color"],
|
||||
variant="light",
|
||||
size="lg",
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
align="flex-start",
|
||||
),
|
||||
# Problem
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("The Problem:", fw=600, size="sm"),
|
||||
dmc.Text(project["problem"], size="sm", c="dimmed"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
# What I Built
|
||||
built_section,
|
||||
# Impact (if exists)
|
||||
impact_section,
|
||||
# Tech Stack
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text("Tech Stack:", fw=600, size="sm"),
|
||||
dmc.Text(project["tech_stack"], size="sm", c="dimmed"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
# What I Learned
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text("What I Learned:", fw=600, size="sm"),
|
||||
dmc.Text(project["learned"], size="sm", fs="italic"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
# Note (if exists)
|
||||
(
|
||||
dmc.Alert(
|
||||
project["note"],
|
||||
color="gray",
|
||||
variant="light",
|
||||
)
|
||||
if project.get("note")
|
||||
else None
|
||||
),
|
||||
# Action buttons
|
||||
dmc.Group(buttons, gap="sm") if buttons else None,
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Projects", order=1, ta="center"),
|
||||
dmc.Text(
|
||||
INTRO_TEXT, size="md", c="dimmed", ta="center", maw=700, mx="auto"
|
||||
),
|
||||
dmc.Divider(my="lg"),
|
||||
*[create_project_card(project) for project in PROJECTS],
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="xl",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
362
portfolio_app/pages/resume.py
Normal file
362
portfolio_app/pages/resume.py
Normal file
@@ -0,0 +1,362 @@
|
||||
"""Resume page - Inline display with download options."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
dash.register_page(__name__, path="/resume", name="Resume")
|
||||
|
||||
# =============================================================================
|
||||
# HUMAN TASK: Upload resume content via Gitea
|
||||
# Replace the placeholder content below with actual resume data.
|
||||
# You can upload PDF/DOCX files to portfolio_app/assets/resume/
|
||||
# =============================================================================
|
||||
|
||||
# Resume sections - replace with actual content
|
||||
RESUME_HEADER = {
|
||||
"name": "Leo Miranda",
|
||||
"title": "Data Engineer & Analytics Specialist",
|
||||
"location": "Toronto, ON, Canada",
|
||||
"email": "leobrmi@hotmail.com",
|
||||
"phone": "(416) 859-7936",
|
||||
"linkedin": "linkedin.com/in/leobmiranda",
|
||||
"github": "github.com/leomiranda",
|
||||
}
|
||||
|
||||
RESUME_SUMMARY = (
|
||||
"Data Engineer with 8 years of experience building enterprise analytics platforms, "
|
||||
"ETL pipelines, and business intelligence solutions. Proven track record of delivering "
|
||||
"40% efficiency gains through automation and data infrastructure modernization. "
|
||||
"Expert in Python, SQL, and dimensional modeling with deep domain expertise in "
|
||||
"contact center operations and energy retail."
|
||||
)
|
||||
|
||||
# Experience - placeholder structure
|
||||
EXPERIENCE = [
|
||||
{
|
||||
"title": "Senior Data Analyst / Data Engineer",
|
||||
"company": "Summitt Energy",
|
||||
"location": "Toronto, ON",
|
||||
"period": "2019 - Present",
|
||||
"highlights": [
|
||||
"Built DataFlow platform from scratch: 21 tables, 1B+ rows, processing 5,000+ daily transactions",
|
||||
"Achieved 40% improvement in reporting efficiency through automated ETL pipelines",
|
||||
"Reduced call abandon rate by 30% via KPI framework and real-time dashboards",
|
||||
"Sole data professional supporting 150+ employees across 9 markets (Canada + US)",
|
||||
],
|
||||
},
|
||||
{
|
||||
"title": "IT Project Coordinator",
|
||||
"company": "Petrobras",
|
||||
"location": "Rio de Janeiro, Brazil",
|
||||
"period": "2015 - 2018",
|
||||
"highlights": [
|
||||
"Coordinated IT infrastructure projects for Fortune 500 energy company",
|
||||
"Managed vendor relationships and project timelines",
|
||||
"Developed reporting automation reducing manual effort by 60%",
|
||||
],
|
||||
},
|
||||
{
|
||||
"title": "Project Management Associate",
|
||||
"company": "Project Management Institute",
|
||||
"location": "Remote",
|
||||
"period": "2014 - 2015",
|
||||
"highlights": [
|
||||
"Supported global project management standards development",
|
||||
"CAPM and ITIL certified during this period",
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
# Skills - organized by category
|
||||
SKILLS = {
|
||||
"Languages": ["Python", "SQL", "R", "VBA"],
|
||||
"Data Engineering": [
|
||||
"ETL/ELT Pipelines",
|
||||
"Dimensional Modeling",
|
||||
"dbt",
|
||||
"SQLAlchemy",
|
||||
"FastAPI",
|
||||
],
|
||||
"Databases": ["PostgreSQL", "MSSQL", "Redis"],
|
||||
"Visualization": ["Plotly/Dash", "Power BI", "Tableau"],
|
||||
"Platforms": ["Genesys Cloud", "Five9", "Zoho CRM", "Azure DevOps"],
|
||||
"Currently Learning": ["Azure DP-203", "Airflow", "Snowflake"],
|
||||
}
|
||||
|
||||
# Education
|
||||
EDUCATION = [
|
||||
{
|
||||
"degree": "Bachelor of Business Administration",
|
||||
"school": "Universidade Federal do Rio de Janeiro",
|
||||
"year": "2014",
|
||||
},
|
||||
]
|
||||
|
||||
# Certifications
|
||||
CERTIFICATIONS = [
|
||||
"CAPM (Certified Associate in Project Management)",
|
||||
"ITIL Foundation",
|
||||
"Azure DP-203 (In Progress)",
|
||||
]
|
||||
|
||||
|
||||
def create_header_section() -> dmc.Paper:
|
||||
"""Create the resume header with contact info."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title(RESUME_HEADER["name"], order=1, ta="center"),
|
||||
dmc.Text(RESUME_HEADER["title"], size="xl", c="dimmed", ta="center"),
|
||||
dmc.Divider(my="sm"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:map-pin", width=16),
|
||||
dmc.Text(RESUME_HEADER["location"], size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:mail", width=16),
|
||||
dmc.Text(RESUME_HEADER["email"], size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:phone", width=16),
|
||||
dmc.Text(RESUME_HEADER["phone"], size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
justify="center",
|
||||
gap="lg",
|
||||
wrap="wrap",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Anchor(
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:brand-linkedin", width=16),
|
||||
dmc.Text("LinkedIn", size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
href=f"https://{RESUME_HEADER['linkedin']}",
|
||||
target="_blank",
|
||||
),
|
||||
dmc.Anchor(
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon="tabler:brand-github", width=16),
|
||||
dmc.Text("GitHub", size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
href=f"https://{RESUME_HEADER['github']}",
|
||||
target="_blank",
|
||||
),
|
||||
],
|
||||
justify="center",
|
||||
gap="lg",
|
||||
),
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
p="xl",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_download_section() -> dmc.Group:
|
||||
"""Create download buttons for resume files."""
|
||||
# Note: Buttons disabled until files are uploaded
|
||||
return dmc.Group(
|
||||
[
|
||||
dmc.Button(
|
||||
"Download PDF",
|
||||
variant="filled",
|
||||
leftSection=DashIconify(icon="tabler:file-type-pdf", width=18),
|
||||
disabled=True, # Enable after uploading resume.pdf to assets
|
||||
),
|
||||
dmc.Button(
|
||||
"Download DOCX",
|
||||
variant="outline",
|
||||
leftSection=DashIconify(icon="tabler:file-type-docx", width=18),
|
||||
disabled=True, # Enable after uploading resume.docx to assets
|
||||
),
|
||||
dmc.Anchor(
|
||||
dmc.Button(
|
||||
"View on LinkedIn",
|
||||
variant="subtle",
|
||||
leftSection=DashIconify(icon="tabler:brand-linkedin", width=18),
|
||||
),
|
||||
href=f"https://{RESUME_HEADER['linkedin']}",
|
||||
target="_blank",
|
||||
),
|
||||
],
|
||||
justify="center",
|
||||
gap="md",
|
||||
)
|
||||
|
||||
|
||||
def create_summary_section() -> dmc.Paper:
|
||||
"""Create the professional summary section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Professional Summary", order=2, size="h4"),
|
||||
dmc.Text(RESUME_SUMMARY, size="md"),
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
p="lg",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_experience_item(exp: dict[str, Any]) -> dmc.Stack:
|
||||
"""Create a single experience entry."""
|
||||
return dmc.Stack(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text(exp["title"], fw=600),
|
||||
dmc.Text(exp["period"], size="sm", c="dimmed"),
|
||||
],
|
||||
justify="space-between",
|
||||
),
|
||||
dmc.Text(f"{exp['company']} | {exp['location']}", size="sm", c="dimmed"),
|
||||
dmc.List(
|
||||
[dmc.ListItem(dmc.Text(h, size="sm")) for h in exp["highlights"]],
|
||||
spacing="xs",
|
||||
size="sm",
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
)
|
||||
|
||||
|
||||
def create_experience_section() -> dmc.Paper:
|
||||
"""Create the experience section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Experience", order=2, size="h4"),
|
||||
*[create_experience_item(exp) for exp in EXPERIENCE],
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
p="lg",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_skills_section() -> dmc.Paper:
|
||||
"""Create the skills section with badges."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Skills", order=2, size="h4"),
|
||||
dmc.SimpleGrid(
|
||||
[
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text(category, fw=600, size="sm"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(skill, variant="light", size="sm")
|
||||
for skill in skills
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
gap="xs",
|
||||
)
|
||||
for category, skills in SKILLS.items()
|
||||
],
|
||||
cols={"base": 1, "sm": 2},
|
||||
spacing="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="lg",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_education_section() -> dmc.Paper:
|
||||
"""Create education and certifications section."""
|
||||
return dmc.Paper(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Education & Certifications", order=2, size="h4"),
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Text(edu["degree"], fw=600),
|
||||
dmc.Text(
|
||||
f"{edu['school']} | {edu['year']}",
|
||||
size="sm",
|
||||
c="dimmed",
|
||||
),
|
||||
],
|
||||
gap=0,
|
||||
)
|
||||
for edu in EDUCATION
|
||||
],
|
||||
gap="sm",
|
||||
),
|
||||
dmc.Divider(my="sm"),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Badge(cert, variant="outline", size="md")
|
||||
for cert in CERTIFICATIONS
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
p="lg",
|
||||
radius="md",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
create_header_section(),
|
||||
create_download_section(),
|
||||
dmc.Alert(
|
||||
"Resume files (PDF/DOCX) will be available for download once uploaded. "
|
||||
"The inline content below is a preview.",
|
||||
title="Downloads Coming Soon",
|
||||
color="blue",
|
||||
variant="light",
|
||||
),
|
||||
create_summary_section(),
|
||||
create_experience_section(),
|
||||
create_skills_section(),
|
||||
create_education_section(),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="lg",
|
||||
),
|
||||
size="md",
|
||||
py="xl",
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
405
portfolio_app/pages/toronto/callbacks/chart_callbacks.py
Normal file
405
portfolio_app/pages/toronto/callbacks/chart_callbacks.py
Normal file
@@ -0,0 +1,405 @@
|
||||
"""Chart callbacks for supporting visualizations."""
|
||||
# mypy: disable-error-code="misc,no-untyped-def,arg-type"
|
||||
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
from dash import Input, Output, callback
|
||||
|
||||
from portfolio_app.figures import (
|
||||
create_donut_chart,
|
||||
create_horizontal_bar,
|
||||
create_radar_figure,
|
||||
create_scatter_figure,
|
||||
)
|
||||
from portfolio_app.toronto.services import (
|
||||
get_amenities_data,
|
||||
get_city_averages,
|
||||
get_demographics_data,
|
||||
get_housing_data,
|
||||
get_neighbourhood_details,
|
||||
get_safety_data,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("overview-scatter-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_overview_scatter(year: str) -> go.Figure:
|
||||
"""Update income vs safety scatter plot."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_demographics_data(year_int)
|
||||
safety_df = get_safety_data(year_int)
|
||||
|
||||
if df.empty or safety_df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Merge demographics with safety
|
||||
merged = df.merge(
|
||||
safety_df[["neighbourhood_id", "total_crime_rate"]],
|
||||
on="neighbourhood_id",
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Compute safety score (inverse of crime rate)
|
||||
if "total_crime_rate" in merged.columns:
|
||||
max_crime = merged["total_crime_rate"].max()
|
||||
if max_crime and max_crime > 0:
|
||||
merged["safety_score"] = 100 - (
|
||||
merged["total_crime_rate"] / max_crime * 100
|
||||
)
|
||||
else:
|
||||
merged["safety_score"] = 50 # Default if no crime data
|
||||
|
||||
# Fill NULL population with median or default value for sizing
|
||||
if "population" in merged.columns:
|
||||
median_pop = merged["population"].median()
|
||||
default_pop = median_pop if pd.notna(median_pop) else 10000
|
||||
merged["population"] = merged["population"].fillna(default_pop)
|
||||
|
||||
# Filter rows with required data for scatter plot
|
||||
merged = merged.dropna(subset=["median_household_income", "safety_score"])
|
||||
|
||||
if merged.empty:
|
||||
return _empty_chart("Insufficient data for scatter plot")
|
||||
|
||||
data = merged.to_dict("records")
|
||||
|
||||
return create_scatter_figure(
|
||||
data=data,
|
||||
x_column="median_household_income",
|
||||
y_column="safety_score",
|
||||
name_column="neighbourhood_name",
|
||||
size_column="population",
|
||||
title="Income vs Safety",
|
||||
x_title="Median Household Income ($)",
|
||||
y_title="Safety Score",
|
||||
trendline=True,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("housing-trend-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
)
|
||||
def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
|
||||
"""Update housing rent trend chart."""
|
||||
# For now, show city averages as we don't have multi-year data
|
||||
# This would be a time series if we had historical data
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
if not averages:
|
||||
return _empty_chart("No trend data available")
|
||||
|
||||
# Placeholder for trend data - would be historical
|
||||
base_rent = averages.get("avg_rent_2bed") or 2000
|
||||
data = [
|
||||
{"year": "2019", "avg_rent": base_rent * 0.85},
|
||||
{"year": "2020", "avg_rent": base_rent * 0.88},
|
||||
{"year": "2021", "avg_rent": base_rent * 0.92},
|
||||
{"year": "2022", "avg_rent": base_rent * 0.96},
|
||||
{"year": "2023", "avg_rent": base_rent},
|
||||
]
|
||||
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=[d["year"] for d in data],
|
||||
y=[d["avg_rent"] for d in data],
|
||||
mode="lines+markers",
|
||||
line={"color": "#2196F3", "width": 2},
|
||||
marker={"size": 8},
|
||||
name="City Average",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)"},
|
||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": "Avg Rent (2BR)"},
|
||||
showlegend=False,
|
||||
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
Output("housing-types-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_housing_types(year: str) -> go.Figure:
|
||||
"""Update dwelling types breakdown chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_housing_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Aggregate tenure types across city
|
||||
owner_pct = df["pct_owner_occupied"].mean()
|
||||
renter_pct = df["pct_renter_occupied"].mean()
|
||||
|
||||
data = [
|
||||
{"type": "Owner Occupied", "percentage": owner_pct},
|
||||
{"type": "Renter Occupied", "percentage": renter_pct},
|
||||
]
|
||||
|
||||
return create_donut_chart(
|
||||
data=data,
|
||||
name_column="type",
|
||||
value_column="percentage",
|
||||
colors=["#4CAF50", "#2196F3"],
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("safety-trend-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_safety_trend(year: str) -> go.Figure:
|
||||
"""Update crime trend chart."""
|
||||
# Placeholder for trend - would need historical data
|
||||
data = [
|
||||
{"year": "2019", "crime_rate": 4500},
|
||||
{"year": "2020", "crime_rate": 4200},
|
||||
{"year": "2021", "crime_rate": 4100},
|
||||
{"year": "2022", "crime_rate": 4300},
|
||||
{"year": "2023", "crime_rate": 4250},
|
||||
]
|
||||
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=[d["year"] for d in data],
|
||||
y=[d["crime_rate"] for d in data],
|
||||
mode="lines+markers",
|
||||
line={"color": "#FF5722", "width": 2},
|
||||
marker={"size": 8},
|
||||
fill="tozeroy",
|
||||
fillcolor="rgba(255,87,34,0.1)",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)"},
|
||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": "Crime Rate per 100K"},
|
||||
showlegend=False,
|
||||
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
@callback(
|
||||
Output("safety-types-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_safety_types(year: str) -> go.Figure:
|
||||
"""Update crime by category chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_safety_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Aggregate crime types across city
|
||||
violent = df["violent_crimes"].sum() if "violent_crimes" in df.columns else 0
|
||||
property_crimes = (
|
||||
df["property_crimes"].sum() if "property_crimes" in df.columns else 0
|
||||
)
|
||||
theft = df["theft_crimes"].sum() if "theft_crimes" in df.columns else 0
|
||||
other = (
|
||||
df["total_crimes"].sum() - violent - property_crimes - theft
|
||||
if "total_crimes" in df.columns
|
||||
else 0
|
||||
)
|
||||
|
||||
data = [
|
||||
{"category": "Violent", "count": int(violent)},
|
||||
{"category": "Property", "count": int(property_crimes)},
|
||||
{"category": "Theft", "count": int(theft)},
|
||||
{"category": "Other", "count": int(max(0, other))},
|
||||
]
|
||||
|
||||
return create_horizontal_bar(
|
||||
data=data,
|
||||
name_column="category",
|
||||
value_column="count",
|
||||
color="#FF5722",
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("demographics-age-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_demographics_age(year: str) -> go.Figure:
|
||||
"""Update age distribution chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_demographics_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Calculate average age distribution
|
||||
under_18 = df["pct_under_18"].mean() if "pct_under_18" in df.columns else 20
|
||||
age_18_64 = df["pct_18_to_64"].mean() if "pct_18_to_64" in df.columns else 65
|
||||
over_65 = df["pct_65_plus"].mean() if "pct_65_plus" in df.columns else 15
|
||||
|
||||
data = [
|
||||
{"age_group": "Under 18", "percentage": under_18},
|
||||
{"age_group": "18-64", "percentage": age_18_64},
|
||||
{"age_group": "65+", "percentage": over_65},
|
||||
]
|
||||
|
||||
return create_donut_chart(
|
||||
data=data,
|
||||
name_column="age_group",
|
||||
value_column="percentage",
|
||||
colors=["#9C27B0", "#673AB7", "#3F51B5"],
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("demographics-income-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_demographics_income(year: str) -> go.Figure:
|
||||
"""Update income distribution chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_demographics_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Create income quintile distribution
|
||||
if "income_quintile" in df.columns:
|
||||
quintile_counts = df["income_quintile"].value_counts().sort_index()
|
||||
data = [
|
||||
{"bracket": f"Q{q}", "count": int(count)}
|
||||
for q, count in quintile_counts.items()
|
||||
]
|
||||
else:
|
||||
# Fallback to placeholder
|
||||
data = [
|
||||
{"bracket": "Q1 (Low)", "count": 32},
|
||||
{"bracket": "Q2", "count": 32},
|
||||
{"bracket": "Q3 (Mid)", "count": 32},
|
||||
{"bracket": "Q4", "count": 31},
|
||||
{"bracket": "Q5 (High)", "count": 31},
|
||||
]
|
||||
|
||||
return create_horizontal_bar(
|
||||
data=data,
|
||||
name_column="bracket",
|
||||
value_column="count",
|
||||
color="#4CAF50",
|
||||
sort=False,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("amenities-breakdown-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_amenities_breakdown(year: str) -> go.Figure:
|
||||
"""Update amenity breakdown chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_amenities_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Aggregate amenity counts
|
||||
parks = df["park_count"].sum() if "park_count" in df.columns else 0
|
||||
schools = df["school_count"].sum() if "school_count" in df.columns else 0
|
||||
childcare = df["childcare_count"].sum() if "childcare_count" in df.columns else 0
|
||||
|
||||
data = [
|
||||
{"type": "Parks", "count": int(parks)},
|
||||
{"type": "Schools", "count": int(schools)},
|
||||
{"type": "Childcare", "count": int(childcare)},
|
||||
]
|
||||
|
||||
return create_horizontal_bar(
|
||||
data=data,
|
||||
name_column="type",
|
||||
value_column="count",
|
||||
color="#4CAF50",
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("amenities-radar-chart", "figure"),
|
||||
Input("toronto-year-select", "value"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
)
|
||||
def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure:
|
||||
"""Update amenity comparison radar chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
|
||||
# Get city averages
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
amenity_score = averages.get("avg_amenity_score") or 50
|
||||
city_data = {
|
||||
"parks_per_1000": amenity_score / 100 * 10,
|
||||
"schools_per_1000": amenity_score / 100 * 5,
|
||||
"childcare_per_1000": amenity_score / 100 * 3,
|
||||
"transit_access": 70,
|
||||
}
|
||||
|
||||
data = [city_data]
|
||||
|
||||
# Add selected neighbourhood if available
|
||||
if neighbourhood_id:
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
if details:
|
||||
selected_data = {
|
||||
"parks_per_1000": details.get("park_count", 0) / 10,
|
||||
"schools_per_1000": details.get("school_count", 0) / 5,
|
||||
"childcare_per_1000": 3,
|
||||
"transit_access": 70,
|
||||
}
|
||||
data.insert(0, selected_data)
|
||||
|
||||
return create_radar_figure(
|
||||
data=data,
|
||||
metrics=[
|
||||
"parks_per_1000",
|
||||
"schools_per_1000",
|
||||
"childcare_per_1000",
|
||||
"transit_access",
|
||||
],
|
||||
fill=True,
|
||||
)
|
||||
|
||||
|
||||
def _empty_chart(message: str) -> go.Figure:
|
||||
"""Create an empty chart with a message."""
|
||||
fig = go.Figure()
|
||||
fig.update_layout(
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"visible": False},
|
||||
yaxis={"visible": False},
|
||||
)
|
||||
fig.add_annotation(
|
||||
text=message,
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
return fig
|
||||
304
portfolio_app/pages/toronto/callbacks/map_callbacks.py
Normal file
304
portfolio_app/pages/toronto/callbacks/map_callbacks.py
Normal file
@@ -0,0 +1,304 @@
|
||||
"""Map callbacks for choropleth interactions."""
|
||||
# mypy: disable-error-code="misc,no-untyped-def,arg-type,no-any-return"
|
||||
|
||||
import plotly.graph_objects as go
|
||||
from dash import Input, Output, State, callback, no_update
|
||||
|
||||
from portfolio_app.figures import create_choropleth_figure, create_ranking_bar
|
||||
from portfolio_app.toronto.services import (
|
||||
get_amenities_data,
|
||||
get_demographics_data,
|
||||
get_housing_data,
|
||||
get_neighbourhoods_geojson,
|
||||
get_overview_data,
|
||||
get_safety_data,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("overview-choropleth", "figure"),
|
||||
Input("overview-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_overview_choropleth(metric: str, year: str) -> go.Figure:
|
||||
"""Update the overview tab choropleth map."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_overview_data(year_int)
|
||||
geojson = get_neighbourhoods_geojson(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_map("No data available")
|
||||
|
||||
data = df.to_dict("records")
|
||||
|
||||
# Color scales based on metric
|
||||
color_scale = {
|
||||
"livability_score": "Viridis",
|
||||
"safety_score": "Greens",
|
||||
"affordability_score": "Blues",
|
||||
"amenity_score": "Purples",
|
||||
}.get(metric, "Viridis")
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=geojson,
|
||||
data=data,
|
||||
location_key="neighbourhood_id",
|
||||
color_column=metric or "livability_score",
|
||||
hover_data=["neighbourhood_name", "population"],
|
||||
color_scale=color_scale,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("housing-choropleth", "figure"),
|
||||
Input("housing-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_housing_choropleth(metric: str, year: str) -> go.Figure:
|
||||
"""Update the housing tab choropleth map."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_housing_data(year_int)
|
||||
geojson = get_neighbourhoods_geojson(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_map("No housing data available")
|
||||
|
||||
data = df.to_dict("records")
|
||||
|
||||
color_scale = {
|
||||
"affordability_index": "RdYlGn_r",
|
||||
"avg_rent_2bed": "Oranges",
|
||||
"rent_to_income_pct": "Reds",
|
||||
"vacancy_rate": "Blues",
|
||||
}.get(metric, "Oranges")
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=geojson,
|
||||
data=data,
|
||||
location_key="neighbourhood_id",
|
||||
color_column=metric or "affordability_index",
|
||||
hover_data=["neighbourhood_name", "avg_rent_2bed", "vacancy_rate"],
|
||||
color_scale=color_scale,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("safety-choropleth", "figure"),
|
||||
Input("safety-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_safety_choropleth(metric: str, year: str) -> go.Figure:
|
||||
"""Update the safety tab choropleth map."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_safety_data(year_int)
|
||||
geojson = get_neighbourhoods_geojson(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_map("No safety data available")
|
||||
|
||||
data = df.to_dict("records")
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=geojson,
|
||||
data=data,
|
||||
location_key="neighbourhood_id",
|
||||
color_column=metric or "total_crime_rate",
|
||||
hover_data=["neighbourhood_name", "total_crimes"],
|
||||
color_scale="Reds",
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("demographics-choropleth", "figure"),
|
||||
Input("demographics-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_demographics_choropleth(metric: str, year: str) -> go.Figure:
|
||||
"""Update the demographics tab choropleth map."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_demographics_data(year_int)
|
||||
geojson = get_neighbourhoods_geojson(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_map("No demographics data available")
|
||||
|
||||
data = df.to_dict("records")
|
||||
|
||||
color_scale = {
|
||||
"population": "YlOrBr",
|
||||
"median_income": "Greens",
|
||||
"median_age": "Blues",
|
||||
"diversity_index": "Purples",
|
||||
}.get(metric, "YlOrBr")
|
||||
|
||||
# Map frontend metric names to column names
|
||||
column_map = {
|
||||
"population": "population",
|
||||
"median_income": "median_household_income",
|
||||
"median_age": "median_age",
|
||||
"diversity_index": "diversity_index",
|
||||
}
|
||||
column = column_map.get(metric, "population")
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=geojson,
|
||||
data=data,
|
||||
location_key="neighbourhood_id",
|
||||
color_column=column,
|
||||
hover_data=["neighbourhood_name"],
|
||||
color_scale=color_scale,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("amenities-choropleth", "figure"),
|
||||
Input("amenities-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_amenities_choropleth(metric: str, year: str) -> go.Figure:
|
||||
"""Update the amenities tab choropleth map."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_amenities_data(year_int)
|
||||
geojson = get_neighbourhoods_geojson(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_map("No amenities data available")
|
||||
|
||||
data = df.to_dict("records")
|
||||
|
||||
# Map frontend metric names to column names
|
||||
column_map = {
|
||||
"amenity_score": "amenity_score",
|
||||
"parks_per_capita": "parks_per_1000",
|
||||
"schools_per_capita": "schools_per_1000",
|
||||
"transit_score": "total_amenities_per_1000",
|
||||
}
|
||||
column = column_map.get(metric, "amenity_score")
|
||||
|
||||
return create_choropleth_figure(
|
||||
geojson=geojson,
|
||||
data=data,
|
||||
location_key="neighbourhood_id",
|
||||
color_column=column,
|
||||
hover_data=["neighbourhood_name", "park_count", "school_count"],
|
||||
color_scale="Greens",
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("toronto-selected-neighbourhood", "data"),
|
||||
Input("overview-choropleth", "clickData"),
|
||||
Input("housing-choropleth", "clickData"),
|
||||
Input("safety-choropleth", "clickData"),
|
||||
Input("demographics-choropleth", "clickData"),
|
||||
Input("amenities-choropleth", "clickData"),
|
||||
State("toronto-tabs", "value"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def handle_map_click(
|
||||
overview_click,
|
||||
housing_click,
|
||||
safety_click,
|
||||
demographics_click,
|
||||
amenities_click,
|
||||
active_tab: str,
|
||||
) -> int | None:
|
||||
"""Extract neighbourhood ID from map click."""
|
||||
# Get the click data for the active tab
|
||||
click_map = {
|
||||
"overview": overview_click,
|
||||
"housing": housing_click,
|
||||
"safety": safety_click,
|
||||
"demographics": demographics_click,
|
||||
"amenities": amenities_click,
|
||||
}
|
||||
|
||||
click_data = click_map.get(active_tab)
|
||||
|
||||
if not click_data:
|
||||
return no_update
|
||||
|
||||
try:
|
||||
# Extract neighbourhood_id from click data
|
||||
point = click_data["points"][0]
|
||||
location = point.get("location") or point.get("customdata", [None])[0]
|
||||
if location:
|
||||
return int(location)
|
||||
except (KeyError, IndexError, TypeError):
|
||||
pass
|
||||
|
||||
return no_update
|
||||
|
||||
|
||||
@callback(
|
||||
Output("overview-rankings-chart", "figure"),
|
||||
Input("overview-metric-select", "value"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_rankings_chart(metric: str, year: str) -> go.Figure:
|
||||
"""Update the top/bottom rankings bar chart."""
|
||||
year_int = int(year) if year else 2021
|
||||
df = get_overview_data(year_int)
|
||||
|
||||
if df.empty:
|
||||
return _empty_chart("No data available")
|
||||
|
||||
# Use the selected metric for ranking
|
||||
metric = metric or "livability_score"
|
||||
data = df.to_dict("records")
|
||||
|
||||
return create_ranking_bar(
|
||||
data=data,
|
||||
name_column="neighbourhood_name",
|
||||
value_column=metric,
|
||||
title=f"Top & Bottom 10 by {metric.replace('_', ' ').title()}",
|
||||
top_n=10,
|
||||
bottom_n=10,
|
||||
)
|
||||
|
||||
|
||||
def _empty_map(message: str) -> go.Figure:
|
||||
"""Create an empty map with a message."""
|
||||
fig = go.Figure()
|
||||
fig.update_layout(
|
||||
mapbox={
|
||||
"style": "carto-darkmatter",
|
||||
"center": {"lat": 43.7, "lon": -79.4},
|
||||
"zoom": 9.5,
|
||||
},
|
||||
margin={"l": 0, "r": 0, "t": 0, "b": 0},
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
)
|
||||
fig.add_annotation(
|
||||
text=message,
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
return fig
|
||||
|
||||
|
||||
def _empty_chart(message: str) -> go.Figure:
|
||||
"""Create an empty chart with a message."""
|
||||
fig = go.Figure()
|
||||
fig.update_layout(
|
||||
paper_bgcolor="rgba(0,0,0,0)",
|
||||
plot_bgcolor="rgba(0,0,0,0)",
|
||||
font_color="#c9c9c9",
|
||||
xaxis={"visible": False},
|
||||
yaxis={"visible": False},
|
||||
)
|
||||
fig.add_annotation(
|
||||
text=message,
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
x=0.5,
|
||||
y=0.5,
|
||||
showarrow=False,
|
||||
font={"size": 14, "color": "#888888"},
|
||||
)
|
||||
return fig
|
||||
309
portfolio_app/pages/toronto/callbacks/selection_callbacks.py
Normal file
309
portfolio_app/pages/toronto/callbacks/selection_callbacks.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""Selection callbacks for dropdowns and neighbourhood details."""
|
||||
# mypy: disable-error-code="misc,no-untyped-def,type-arg"
|
||||
|
||||
import dash_mantine_components as dmc
|
||||
from dash import Input, Output, callback
|
||||
|
||||
from portfolio_app.toronto.services import (
|
||||
get_city_averages,
|
||||
get_neighbourhood_details,
|
||||
get_neighbourhood_list,
|
||||
)
|
||||
|
||||
|
||||
@callback(
|
||||
Output("toronto-neighbourhood-select", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def populate_neighbourhood_dropdown(year: str) -> list[dict]:
|
||||
"""Populate the neighbourhood search dropdown."""
|
||||
year_int = int(year) if year else 2021
|
||||
neighbourhoods = get_neighbourhood_list(year_int)
|
||||
|
||||
return [
|
||||
{"value": str(n["neighbourhood_id"]), "label": n["neighbourhood_name"]}
|
||||
for n in neighbourhoods
|
||||
]
|
||||
|
||||
|
||||
@callback(
|
||||
Output("toronto-selected-neighbourhood", "data", allow_duplicate=True),
|
||||
Input("toronto-neighbourhood-select", "value"),
|
||||
prevent_initial_call=True,
|
||||
)
|
||||
def select_from_dropdown(value: str | None) -> int | None:
|
||||
"""Update selected neighbourhood from dropdown."""
|
||||
if value:
|
||||
return int(value)
|
||||
return None
|
||||
|
||||
|
||||
@callback(
|
||||
Output("toronto-compare-btn", "disabled"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
)
|
||||
def toggle_compare_button(neighbourhood_id: int | None) -> bool:
|
||||
"""Enable compare button when a neighbourhood is selected."""
|
||||
return neighbourhood_id is None
|
||||
|
||||
|
||||
# Overview tab KPIs
|
||||
@callback(
|
||||
Output("overview-city-avg", "children"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_overview_city_avg(year: str) -> str:
|
||||
"""Update the city average livability score."""
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
score = averages.get("avg_livability_score", 72)
|
||||
return f"{score:.0f}" if score else "—"
|
||||
|
||||
|
||||
@callback(
|
||||
Output("overview-selected-name", "children"),
|
||||
Output("overview-selected-scores", "children"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_overview_selected(neighbourhood_id: int | None, year: str):
|
||||
"""Update the selected neighbourhood details in overview tab."""
|
||||
if not neighbourhood_id:
|
||||
return "Click map to select", [dmc.Text("—", c="dimmed")]
|
||||
|
||||
year_int = int(year) if year else 2021
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
|
||||
if not details:
|
||||
return "Unknown", [dmc.Text("No data", c="dimmed")]
|
||||
|
||||
name = details.get("neighbourhood_name", "Unknown")
|
||||
scores = [
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text("Livability:", size="sm"),
|
||||
dmc.Text(
|
||||
f"{details.get('livability_score', 0):.0f}", size="sm", fw=700
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text("Safety:", size="sm"),
|
||||
dmc.Text(f"{details.get('safety_score', 0):.0f}", size="sm", fw=700),
|
||||
],
|
||||
justify="space-between",
|
||||
),
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Text("Affordability:", size="sm"),
|
||||
dmc.Text(
|
||||
f"{details.get('affordability_score', 0):.0f}", size="sm", fw=700
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
),
|
||||
]
|
||||
|
||||
return name, scores
|
||||
|
||||
|
||||
# Housing tab KPIs
|
||||
@callback(
|
||||
Output("housing-city-rent", "children"),
|
||||
Output("housing-rent-change", "children"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_housing_kpis(year: str):
|
||||
"""Update housing tab KPI cards."""
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
rent = averages.get("avg_rent_2bed", 2450)
|
||||
rent_str = f"${rent:,.0f}" if rent else "—"
|
||||
|
||||
# Placeholder change - would come from historical data
|
||||
change = "+4.2% YoY"
|
||||
|
||||
return rent_str, change
|
||||
|
||||
|
||||
@callback(
|
||||
Output("housing-selected-name", "children"),
|
||||
Output("housing-selected-details", "children"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_housing_selected(neighbourhood_id: int | None, year: str):
|
||||
"""Update selected neighbourhood details in housing tab."""
|
||||
if not neighbourhood_id:
|
||||
return "Click map to select", [dmc.Text("—", c="dimmed")]
|
||||
|
||||
year_int = int(year) if year else 2021
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
|
||||
if not details:
|
||||
return "Unknown", [dmc.Text("No data", c="dimmed")]
|
||||
|
||||
name = details.get("neighbourhood_name", "Unknown")
|
||||
rent = details.get("avg_rent_2bed")
|
||||
vacancy = details.get("vacancy_rate")
|
||||
|
||||
info = [
|
||||
dmc.Text(f"2BR Rent: ${rent:,.0f}" if rent else "2BR Rent: —", size="sm"),
|
||||
dmc.Text(f"Vacancy: {vacancy:.1f}%" if vacancy else "Vacancy: —", size="sm"),
|
||||
]
|
||||
|
||||
return name, info
|
||||
|
||||
|
||||
# Safety tab KPIs
|
||||
@callback(
|
||||
Output("safety-city-rate", "children"),
|
||||
Output("safety-rate-change", "children"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_safety_kpis(year: str):
|
||||
"""Update safety tab KPI cards."""
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
rate = averages.get("avg_crime_rate", 4250)
|
||||
rate_str = f"{rate:,.0f}" if rate else "—"
|
||||
|
||||
# Placeholder change
|
||||
change = "-2.1% YoY"
|
||||
|
||||
return rate_str, change
|
||||
|
||||
|
||||
@callback(
|
||||
Output("safety-selected-name", "children"),
|
||||
Output("safety-selected-details", "children"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_safety_selected(neighbourhood_id: int | None, year: str):
|
||||
"""Update selected neighbourhood details in safety tab."""
|
||||
if not neighbourhood_id:
|
||||
return "Click map to select", [dmc.Text("—", c="dimmed")]
|
||||
|
||||
year_int = int(year) if year else 2021
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
|
||||
if not details:
|
||||
return "Unknown", [dmc.Text("No data", c="dimmed")]
|
||||
|
||||
name = details.get("neighbourhood_name", "Unknown")
|
||||
crime_rate = details.get("crime_rate_per_100k")
|
||||
|
||||
info = [
|
||||
dmc.Text(
|
||||
f"Crime Rate: {crime_rate:,.0f}/100K" if crime_rate else "Crime Rate: —",
|
||||
size="sm",
|
||||
),
|
||||
]
|
||||
|
||||
return name, info
|
||||
|
||||
|
||||
# Demographics tab KPIs
|
||||
@callback(
|
||||
Output("demographics-city-pop", "children"),
|
||||
Output("demographics-pop-change", "children"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_demographics_kpis(year: str):
|
||||
"""Update demographics tab KPI cards."""
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
pop = averages.get("total_population", 2790000)
|
||||
if pop and pop >= 1000000:
|
||||
pop_str = f"{pop / 1000000:.2f}M"
|
||||
elif pop:
|
||||
pop_str = f"{pop:,.0f}"
|
||||
else:
|
||||
pop_str = "—"
|
||||
|
||||
change = "+2.3% since 2016"
|
||||
|
||||
return pop_str, change
|
||||
|
||||
|
||||
@callback(
|
||||
Output("demographics-selected-name", "children"),
|
||||
Output("demographics-selected-details", "children"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_demographics_selected(neighbourhood_id: int | None, year: str):
|
||||
"""Update selected neighbourhood details in demographics tab."""
|
||||
if not neighbourhood_id:
|
||||
return "Click map to select", [dmc.Text("—", c="dimmed")]
|
||||
|
||||
year_int = int(year) if year else 2021
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
|
||||
if not details:
|
||||
return "Unknown", [dmc.Text("No data", c="dimmed")]
|
||||
|
||||
name = details.get("neighbourhood_name", "Unknown")
|
||||
pop = details.get("population")
|
||||
income = details.get("median_household_income")
|
||||
|
||||
info = [
|
||||
dmc.Text(f"Population: {pop:,}" if pop else "Population: —", size="sm"),
|
||||
dmc.Text(
|
||||
f"Median Income: ${income:,.0f}" if income else "Median Income: —",
|
||||
size="sm",
|
||||
),
|
||||
]
|
||||
|
||||
return name, info
|
||||
|
||||
|
||||
# Amenities tab KPIs
|
||||
@callback(
|
||||
Output("amenities-city-score", "children"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_amenities_kpis(year: str) -> str:
|
||||
"""Update amenities tab KPI cards."""
|
||||
year_int = int(year) if year else 2021
|
||||
averages = get_city_averages(year_int)
|
||||
|
||||
score = averages.get("avg_amenity_score", 68)
|
||||
return f"{score:.0f}" if score else "—"
|
||||
|
||||
|
||||
@callback(
|
||||
Output("amenities-selected-name", "children"),
|
||||
Output("amenities-selected-details", "children"),
|
||||
Input("toronto-selected-neighbourhood", "data"),
|
||||
Input("toronto-year-select", "value"),
|
||||
)
|
||||
def update_amenities_selected(neighbourhood_id: int | None, year: str):
|
||||
"""Update selected neighbourhood details in amenities tab."""
|
||||
if not neighbourhood_id:
|
||||
return "Click map to select", [dmc.Text("—", c="dimmed")]
|
||||
|
||||
year_int = int(year) if year else 2021
|
||||
details = get_neighbourhood_details(neighbourhood_id, year_int)
|
||||
|
||||
if not details:
|
||||
return "Unknown", [dmc.Text("No data", c="dimmed")]
|
||||
|
||||
name = details.get("neighbourhood_name", "Unknown")
|
||||
parks = details.get("park_count")
|
||||
schools = details.get("school_count")
|
||||
|
||||
info = [
|
||||
dmc.Text(f"Parks: {parks}" if parks is not None else "Parks: —", size="sm"),
|
||||
dmc.Text(
|
||||
f"Schools: {schools}" if schools is not None else "Schools: —", size="sm"
|
||||
),
|
||||
]
|
||||
|
||||
return name, info
|
||||
@@ -1,62 +1,56 @@
|
||||
"""Toronto Housing Dashboard page."""
|
||||
"""Toronto Neighbourhood Dashboard page.
|
||||
|
||||
Displays neighbourhood-level data across 5 tabs: Overview, Housing, Safety,
|
||||
Demographics, and Amenities. Each tab provides interactive choropleth maps,
|
||||
KPI cards, and supporting charts.
|
||||
"""
|
||||
|
||||
import dash
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc, html
|
||||
from dash import dcc
|
||||
from dash_iconify import DashIconify
|
||||
|
||||
from portfolio_app.components import (
|
||||
create_map_controls,
|
||||
create_metric_cards_row,
|
||||
create_time_slider,
|
||||
create_year_selector,
|
||||
from portfolio_app.pages.toronto.tabs import (
|
||||
create_amenities_tab,
|
||||
create_demographics_tab,
|
||||
create_housing_tab,
|
||||
create_overview_tab,
|
||||
create_safety_tab,
|
||||
)
|
||||
|
||||
dash.register_page(__name__, path="/toronto", name="Toronto Housing")
|
||||
dash.register_page(__name__, path="/toronto", name="Toronto Neighbourhoods")
|
||||
|
||||
# Metric options for the purchase market
|
||||
PURCHASE_METRIC_OPTIONS = [
|
||||
{"label": "Average Price", "value": "avg_price"},
|
||||
{"label": "Median Price", "value": "median_price"},
|
||||
{"label": "Sales Volume", "value": "sales_count"},
|
||||
{"label": "Days on Market", "value": "avg_dom"},
|
||||
]
|
||||
|
||||
# Metric options for the rental market
|
||||
RENTAL_METRIC_OPTIONS = [
|
||||
{"label": "Average Rent", "value": "avg_rent"},
|
||||
{"label": "Vacancy Rate", "value": "vacancy_rate"},
|
||||
{"label": "Rental Universe", "value": "rental_universe"},
|
||||
]
|
||||
|
||||
# Sample metrics for KPI cards (will be populated by callbacks)
|
||||
SAMPLE_METRICS = [
|
||||
# Tab configuration
|
||||
TAB_CONFIG = [
|
||||
{
|
||||
"title": "Avg. Price",
|
||||
"value": 1125000,
|
||||
"delta": 2.3,
|
||||
"prefix": "$",
|
||||
"format_spec": ",.0f",
|
||||
"value": "overview",
|
||||
"label": "Overview",
|
||||
"icon": "tabler:chart-pie",
|
||||
"color": "blue",
|
||||
},
|
||||
{
|
||||
"title": "Sales Volume",
|
||||
"value": 4850,
|
||||
"delta": -5.1,
|
||||
"format_spec": ",",
|
||||
"value": "housing",
|
||||
"label": "Housing",
|
||||
"icon": "tabler:home",
|
||||
"color": "teal",
|
||||
},
|
||||
{
|
||||
"title": "Avg. DOM",
|
||||
"value": 18,
|
||||
"delta": 3,
|
||||
"suffix": " days",
|
||||
"positive_is_good": False,
|
||||
"value": "safety",
|
||||
"label": "Safety",
|
||||
"icon": "tabler:shield-check",
|
||||
"color": "orange",
|
||||
},
|
||||
{
|
||||
"title": "Avg. Rent",
|
||||
"value": 2450,
|
||||
"delta": 4.2,
|
||||
"prefix": "$",
|
||||
"format_spec": ",.0f",
|
||||
"value": "demographics",
|
||||
"label": "Demographics",
|
||||
"icon": "tabler:users",
|
||||
"color": "violet",
|
||||
},
|
||||
{
|
||||
"value": "amenities",
|
||||
"label": "Amenities",
|
||||
"icon": "tabler:trees",
|
||||
"color": "green",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -67,9 +61,9 @@ def create_header() -> dmc.Group:
|
||||
[
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Title("Toronto Housing Dashboard", order=1),
|
||||
dmc.Title("Toronto Neighbourhood Dashboard", order=1),
|
||||
dmc.Text(
|
||||
"Real estate market analysis for the Greater Toronto Area",
|
||||
"Explore livability across 158 Toronto neighbourhoods",
|
||||
c="dimmed",
|
||||
),
|
||||
],
|
||||
@@ -88,11 +82,17 @@ def create_header() -> dmc.Group:
|
||||
),
|
||||
href="/toronto/methodology",
|
||||
),
|
||||
create_year_selector(
|
||||
id_prefix="toronto",
|
||||
min_year=2020,
|
||||
default_year=2024,
|
||||
label="Year",
|
||||
dmc.Select(
|
||||
id="toronto-year-select",
|
||||
data=[
|
||||
{"value": "2021", "label": "2021"},
|
||||
{"value": "2022", "label": "2022"},
|
||||
{"value": "2023", "label": "2023"},
|
||||
],
|
||||
value="2021",
|
||||
label="Census Year",
|
||||
size="sm",
|
||||
w=120,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
@@ -103,188 +103,100 @@ def create_header() -> dmc.Group:
|
||||
)
|
||||
|
||||
|
||||
def create_kpi_section() -> dmc.Box:
|
||||
"""Create the KPI metrics row."""
|
||||
return dmc.Box(
|
||||
children=[
|
||||
dmc.Title("Key Metrics", order=3, size="h4", mb="sm"),
|
||||
html.Div(
|
||||
id="toronto-kpi-cards",
|
||||
children=[
|
||||
create_metric_cards_row(SAMPLE_METRICS, id_prefix="toronto-kpi")
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def create_purchase_map_section() -> dmc.Grid:
|
||||
"""Create the purchase market choropleth section."""
|
||||
return dmc.Grid(
|
||||
[
|
||||
dmc.GridCol(
|
||||
create_map_controls(
|
||||
id_prefix="purchase-map",
|
||||
metric_options=PURCHASE_METRIC_OPTIONS,
|
||||
default_metric="avg_price",
|
||||
),
|
||||
span={"base": 12, "md": 3},
|
||||
),
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
children=[
|
||||
dcc.Graph(
|
||||
id="purchase-choropleth",
|
||||
config={"scrollZoom": True},
|
||||
style={"height": "500px"},
|
||||
),
|
||||
],
|
||||
p="xs",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 9},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
)
|
||||
|
||||
|
||||
def create_rental_map_section() -> dmc.Grid:
|
||||
"""Create the rental market choropleth section."""
|
||||
return dmc.Grid(
|
||||
[
|
||||
dmc.GridCol(
|
||||
create_map_controls(
|
||||
id_prefix="rental-map",
|
||||
metric_options=RENTAL_METRIC_OPTIONS,
|
||||
default_metric="avg_rent",
|
||||
),
|
||||
span={"base": 12, "md": 3},
|
||||
),
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
children=[
|
||||
dcc.Graph(
|
||||
id="rental-choropleth",
|
||||
config={"scrollZoom": True},
|
||||
style={"height": "500px"},
|
||||
),
|
||||
],
|
||||
p="xs",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 9},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
)
|
||||
|
||||
|
||||
def create_time_series_section() -> dmc.Grid:
|
||||
"""Create the time series charts section."""
|
||||
return dmc.Grid(
|
||||
[
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
children=[
|
||||
dmc.Title("Price Trends", order=4, size="h5", mb="sm"),
|
||||
dcc.Graph(
|
||||
id="price-time-series",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "350px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
children=[
|
||||
dmc.Title("Sales Volume", order=4, size="h5", mb="sm"),
|
||||
dcc.Graph(
|
||||
id="volume-time-series",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "350px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
)
|
||||
|
||||
|
||||
def create_market_comparison_section() -> dmc.Paper:
|
||||
"""Create the market comparison chart section."""
|
||||
def create_neighbourhood_selector() -> dmc.Paper:
|
||||
"""Create the neighbourhood search/select component."""
|
||||
return dmc.Paper(
|
||||
children=[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Title("Market Indicators", order=4, size="h5"),
|
||||
create_time_slider(
|
||||
id_prefix="market-comparison",
|
||||
min_year=2020,
|
||||
label="",
|
||||
DashIconify(icon="tabler:search", width=20, color="gray"),
|
||||
dmc.Select(
|
||||
id="toronto-neighbourhood-select",
|
||||
placeholder="Search neighbourhoods...",
|
||||
searchable=True,
|
||||
clearable=True,
|
||||
data=[], # Populated by callback
|
||||
style={"flex": 1},
|
||||
),
|
||||
dmc.Button(
|
||||
"Compare",
|
||||
id="toronto-compare-btn",
|
||||
leftSection=DashIconify(icon="tabler:git-compare", width=16),
|
||||
variant="light",
|
||||
disabled=True,
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
align="center",
|
||||
mb="md",
|
||||
gap="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="market-comparison-chart",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "400px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
p="sm",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
)
|
||||
|
||||
|
||||
def create_tab_navigation() -> dmc.Tabs:
|
||||
"""Create the tab navigation with icons."""
|
||||
return dmc.Tabs(
|
||||
[
|
||||
dmc.TabsList(
|
||||
[
|
||||
dmc.TabsTab(
|
||||
dmc.Group(
|
||||
[
|
||||
DashIconify(icon=tab["icon"], width=18),
|
||||
dmc.Text(tab["label"], size="sm"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
value=tab["value"],
|
||||
)
|
||||
for tab in TAB_CONFIG
|
||||
],
|
||||
grow=True,
|
||||
),
|
||||
# Tab panels
|
||||
dmc.TabsPanel(create_overview_tab(), value="overview", pt="md"),
|
||||
dmc.TabsPanel(create_housing_tab(), value="housing", pt="md"),
|
||||
dmc.TabsPanel(create_safety_tab(), value="safety", pt="md"),
|
||||
dmc.TabsPanel(create_demographics_tab(), value="demographics", pt="md"),
|
||||
dmc.TabsPanel(create_amenities_tab(), value="amenities", pt="md"),
|
||||
],
|
||||
id="toronto-tabs",
|
||||
value="overview",
|
||||
variant="default",
|
||||
)
|
||||
|
||||
|
||||
def create_data_notice() -> dmc.Alert:
|
||||
"""Create a notice about data availability."""
|
||||
"""Create a notice about data sources."""
|
||||
return dmc.Alert(
|
||||
children=[
|
||||
dmc.Text(
|
||||
"This dashboard uses TRREB and CMHC data. "
|
||||
"Geographic boundaries require QGIS digitization to enable choropleth maps. "
|
||||
"Sample data is shown below.",
|
||||
"Data from Toronto Open Data (Census 2021, Crime Statistics) and "
|
||||
"CMHC Rental Market Reports. Click neighbourhoods on the map for details.",
|
||||
size="sm",
|
||||
),
|
||||
],
|
||||
title="Data Notice",
|
||||
title="Data Sources",
|
||||
color="blue",
|
||||
variant="light",
|
||||
icon=DashIconify(icon="tabler:info-circle", width=20),
|
||||
)
|
||||
|
||||
|
||||
# Store for selected neighbourhood
|
||||
neighbourhood_store = dcc.Store(id="toronto-selected-neighbourhood", data=None)
|
||||
|
||||
# Register callbacks
|
||||
from portfolio_app.pages.toronto import callbacks # noqa: E402, F401
|
||||
|
||||
layout = dmc.Container(
|
||||
dmc.Stack(
|
||||
[
|
||||
neighbourhood_store,
|
||||
create_header(),
|
||||
create_data_notice(),
|
||||
create_kpi_section(),
|
||||
dmc.Divider(my="md", label="Purchase Market", labelPosition="center"),
|
||||
create_purchase_map_section(),
|
||||
dmc.Divider(my="md", label="Rental Market", labelPosition="center"),
|
||||
create_rental_map_section(),
|
||||
dmc.Divider(my="md", label="Trends", labelPosition="center"),
|
||||
create_time_series_section(),
|
||||
create_market_comparison_section(),
|
||||
create_neighbourhood_selector(),
|
||||
create_tab_navigation(),
|
||||
dmc.Space(h=40),
|
||||
],
|
||||
gap="lg",
|
||||
|
||||
@@ -46,42 +46,8 @@ def layout() -> dmc.Container:
|
||||
mb="lg",
|
||||
children=[
|
||||
dmc.Title("Data Sources", order=2, mb="md"),
|
||||
# TRREB
|
||||
dmc.Title("Purchase Data: TRREB", order=3, size="h4", mb="sm"),
|
||||
dmc.Text(
|
||||
[
|
||||
"The Toronto Regional Real Estate Board (TRREB) publishes monthly ",
|
||||
html.Strong("Market Watch"),
|
||||
" reports containing aggregate statistics for residential real estate "
|
||||
"transactions across the Greater Toronto Area.",
|
||||
],
|
||||
mb="sm",
|
||||
),
|
||||
dmc.List(
|
||||
[
|
||||
dmc.ListItem("Source: TRREB Market Watch Reports (PDF)"),
|
||||
dmc.ListItem("Geographic granularity: ~35 TRREB Districts"),
|
||||
dmc.ListItem("Temporal granularity: Monthly"),
|
||||
dmc.ListItem("Coverage: 2021-present"),
|
||||
dmc.ListItem(
|
||||
[
|
||||
"Metrics: Sales count, average/median price, new listings, ",
|
||||
"active listings, days on market, sale-to-list ratio",
|
||||
]
|
||||
),
|
||||
],
|
||||
mb="md",
|
||||
),
|
||||
dmc.Anchor(
|
||||
"TRREB Market Watch Archive",
|
||||
href="https://trreb.ca/market-data/market-watch/market-watch-archive/",
|
||||
target="_blank",
|
||||
mb="lg",
|
||||
),
|
||||
# CMHC
|
||||
dmc.Title(
|
||||
"Rental Data: CMHC", order=3, size="h4", mb="sm", mt="md"
|
||||
),
|
||||
dmc.Title("Rental Data: CMHC", order=3, size="h4", mb="sm"),
|
||||
dmc.Text(
|
||||
[
|
||||
"Canada Mortgage and Housing Corporation (CMHC) conducts the annual ",
|
||||
@@ -124,28 +90,17 @@ def layout() -> dmc.Container:
|
||||
mb="lg",
|
||||
children=[
|
||||
dmc.Title("Geographic Considerations", order=2, mb="md"),
|
||||
dmc.Alert(
|
||||
title="Important: Non-Aligned Geographies",
|
||||
color="yellow",
|
||||
mb="md",
|
||||
children=[
|
||||
"TRREB Districts and CMHC Zones do ",
|
||||
html.Strong("not"),
|
||||
" align geographically. They are displayed as separate layers and "
|
||||
"should not be directly compared at the sub-regional level.",
|
||||
],
|
||||
),
|
||||
dmc.Text(
|
||||
"The dashboard presents three geographic layers:",
|
||||
"The dashboard presents two geographic layers:",
|
||||
mb="sm",
|
||||
),
|
||||
dmc.List(
|
||||
[
|
||||
dmc.ListItem(
|
||||
[
|
||||
html.Strong("TRREB Districts (~35): "),
|
||||
"Used for purchase/sales data visualization. "
|
||||
"Districts are defined by TRREB and labeled with codes like W01, C01, E01.",
|
||||
html.Strong("City Neighbourhoods (158): "),
|
||||
"Official City of Toronto neighbourhood boundaries, "
|
||||
"used for neighbourhood-level analysis.",
|
||||
]
|
||||
),
|
||||
dmc.ListItem(
|
||||
@@ -155,13 +110,6 @@ def layout() -> dmc.Container:
|
||||
"Zones are aligned with Census Tract boundaries.",
|
||||
]
|
||||
),
|
||||
dmc.ListItem(
|
||||
[
|
||||
html.Strong("City Neighbourhoods (158): "),
|
||||
"Reference overlay only. "
|
||||
"These are official City of Toronto neighbourhood boundaries.",
|
||||
]
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
@@ -212,22 +160,15 @@ def layout() -> dmc.Container:
|
||||
dmc.ListItem(
|
||||
[
|
||||
html.Strong("Reporting Lag: "),
|
||||
"TRREB data reflects closed transactions, which may lag market "
|
||||
"conditions by 1-3 months. CMHC data is annual.",
|
||||
]
|
||||
),
|
||||
dmc.ListItem(
|
||||
[
|
||||
html.Strong("Geographic Boundaries: "),
|
||||
"TRREB district boundaries were manually digitized from reference maps "
|
||||
"and may contain minor inaccuracies.",
|
||||
"CMHC rental data is annual (October survey). "
|
||||
"Other data sources may have different update frequencies.",
|
||||
]
|
||||
),
|
||||
dmc.ListItem(
|
||||
[
|
||||
html.Strong("Data Suppression: "),
|
||||
"Some cells may be suppressed for confidentiality when transaction "
|
||||
"counts are below thresholds.",
|
||||
"Some cells may be suppressed for confidentiality when counts "
|
||||
"are below thresholds.",
|
||||
]
|
||||
),
|
||||
],
|
||||
|
||||
15
portfolio_app/pages/toronto/tabs/__init__.py
Normal file
15
portfolio_app/pages/toronto/tabs/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Tab modules for Toronto Neighbourhood Dashboard."""
|
||||
|
||||
from .amenities import create_amenities_tab
|
||||
from .demographics import create_demographics_tab
|
||||
from .housing import create_housing_tab
|
||||
from .overview import create_overview_tab
|
||||
from .safety import create_safety_tab
|
||||
|
||||
__all__ = [
|
||||
"create_overview_tab",
|
||||
"create_housing_tab",
|
||||
"create_safety_tab",
|
||||
"create_demographics_tab",
|
||||
"create_amenities_tab",
|
||||
]
|
||||
207
portfolio_app/pages/toronto/tabs/amenities.py
Normal file
207
portfolio_app/pages/toronto/tabs/amenities.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Amenities tab for Toronto Neighbourhood Dashboard.
|
||||
|
||||
Displays parks, schools, transit, and other amenity metrics.
|
||||
"""
|
||||
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
|
||||
|
||||
def create_amenities_tab() -> dmc.Stack:
|
||||
"""Create the Amenities tab layout.
|
||||
|
||||
Layout:
|
||||
- Choropleth map (amenity score) | KPI cards
|
||||
- Amenity breakdown chart | Amenity comparison radar
|
||||
|
||||
Returns:
|
||||
Tab content as a Mantine Stack component.
|
||||
"""
|
||||
return dmc.Stack(
|
||||
[
|
||||
# Main content: Map + KPIs
|
||||
dmc.Grid(
|
||||
[
|
||||
# Choropleth map
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Title(
|
||||
"Neighbourhood Amenities",
|
||||
order=4,
|
||||
size="h5",
|
||||
),
|
||||
dmc.Select(
|
||||
id="amenities-metric-select",
|
||||
data=[
|
||||
{
|
||||
"value": "amenity_score",
|
||||
"label": "Amenity Score",
|
||||
},
|
||||
{
|
||||
"value": "parks_per_capita",
|
||||
"label": "Parks per 1K",
|
||||
},
|
||||
{
|
||||
"value": "schools_per_capita",
|
||||
"label": "Schools per 1K",
|
||||
},
|
||||
{
|
||||
"value": "transit_score",
|
||||
"label": "Transit Score",
|
||||
},
|
||||
],
|
||||
value="amenity_score",
|
||||
size="sm",
|
||||
w=180,
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="amenities-choropleth",
|
||||
config={
|
||||
"scrollZoom": True,
|
||||
"displayModeBar": False,
|
||||
},
|
||||
style={"height": "450px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "lg": 8},
|
||||
),
|
||||
# KPI cards
|
||||
dmc.GridCol(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text(
|
||||
"City Amenity Score", size="xs", c="dimmed"
|
||||
),
|
||||
dmc.Title(
|
||||
id="amenities-city-score",
|
||||
children="68",
|
||||
order=2,
|
||||
),
|
||||
dmc.Text(
|
||||
"Out of 100",
|
||||
size="sm",
|
||||
c="dimmed",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text("Total Parks", size="xs", c="dimmed"),
|
||||
dmc.Title(
|
||||
id="amenities-total-parks",
|
||||
children="1,500+",
|
||||
order=2,
|
||||
),
|
||||
dmc.Text(
|
||||
id="amenities-park-area",
|
||||
children="8,000+ hectares",
|
||||
size="sm",
|
||||
c="green",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text(
|
||||
"Selected Neighbourhood",
|
||||
size="xs",
|
||||
c="dimmed",
|
||||
),
|
||||
dmc.Title(
|
||||
id="amenities-selected-name",
|
||||
children="Click map to select",
|
||||
order=4,
|
||||
size="h5",
|
||||
),
|
||||
dmc.Stack(
|
||||
id="amenities-selected-details",
|
||||
children=[
|
||||
dmc.Text("—", c="dimmed"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
span={"base": 12, "lg": 4},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
),
|
||||
# Supporting charts
|
||||
dmc.Grid(
|
||||
[
|
||||
# Amenity breakdown
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Title(
|
||||
"Amenity Breakdown",
|
||||
order=4,
|
||||
size="h5",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="amenities-breakdown-chart",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "300px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
# Amenity comparison radar
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Title(
|
||||
"Amenity Comparison",
|
||||
order=4,
|
||||
size="h5",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="amenities-radar-chart",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "300px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
)
|
||||
211
portfolio_app/pages/toronto/tabs/demographics.py
Normal file
211
portfolio_app/pages/toronto/tabs/demographics.py
Normal file
@@ -0,0 +1,211 @@
|
||||
"""Demographics tab for Toronto Neighbourhood Dashboard.
|
||||
|
||||
Displays population, income, age, and diversity metrics.
|
||||
"""
|
||||
|
||||
import dash_mantine_components as dmc
|
||||
from dash import dcc
|
||||
|
||||
|
||||
def create_demographics_tab() -> dmc.Stack:
|
||||
"""Create the Demographics tab layout.
|
||||
|
||||
Layout:
|
||||
- Choropleth map (demographic metric) | KPI cards
|
||||
- Age distribution chart | Income distribution chart
|
||||
|
||||
Returns:
|
||||
Tab content as a Mantine Stack component.
|
||||
"""
|
||||
return dmc.Stack(
|
||||
[
|
||||
# Main content: Map + KPIs
|
||||
dmc.Grid(
|
||||
[
|
||||
# Choropleth map
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Group(
|
||||
[
|
||||
dmc.Title(
|
||||
"Neighbourhood Demographics",
|
||||
order=4,
|
||||
size="h5",
|
||||
),
|
||||
dmc.Select(
|
||||
id="demographics-metric-select",
|
||||
data=[
|
||||
{
|
||||
"value": "population",
|
||||
"label": "Population",
|
||||
},
|
||||
{
|
||||
"value": "median_income",
|
||||
"label": "Median Income",
|
||||
},
|
||||
{
|
||||
"value": "median_age",
|
||||
"label": "Median Age",
|
||||
},
|
||||
{
|
||||
"value": "diversity_index",
|
||||
"label": "Diversity Index",
|
||||
},
|
||||
],
|
||||
value="population",
|
||||
size="sm",
|
||||
w=180,
|
||||
),
|
||||
],
|
||||
justify="space-between",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="demographics-choropleth",
|
||||
config={
|
||||
"scrollZoom": True,
|
||||
"displayModeBar": False,
|
||||
},
|
||||
style={"height": "450px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "lg": 8},
|
||||
),
|
||||
# KPI cards
|
||||
dmc.GridCol(
|
||||
dmc.Stack(
|
||||
[
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text(
|
||||
"City Population", size="xs", c="dimmed"
|
||||
),
|
||||
dmc.Title(
|
||||
id="demographics-city-pop",
|
||||
children="2.79M",
|
||||
order=2,
|
||||
),
|
||||
dmc.Text(
|
||||
id="demographics-pop-change",
|
||||
children="+2.3% since 2016",
|
||||
size="sm",
|
||||
c="green",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text(
|
||||
"Median Household Income",
|
||||
size="xs",
|
||||
c="dimmed",
|
||||
),
|
||||
dmc.Title(
|
||||
id="demographics-city-income",
|
||||
children="$84,000",
|
||||
order=2,
|
||||
),
|
||||
dmc.Text(
|
||||
"City average",
|
||||
size="sm",
|
||||
c="dimmed",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Text(
|
||||
"Selected Neighbourhood",
|
||||
size="xs",
|
||||
c="dimmed",
|
||||
),
|
||||
dmc.Title(
|
||||
id="demographics-selected-name",
|
||||
children="Click map to select",
|
||||
order=4,
|
||||
size="h5",
|
||||
),
|
||||
dmc.Stack(
|
||||
id="demographics-selected-details",
|
||||
children=[
|
||||
dmc.Text("—", c="dimmed"),
|
||||
],
|
||||
gap="xs",
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
),
|
||||
span={"base": 12, "lg": 4},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
),
|
||||
# Supporting charts
|
||||
dmc.Grid(
|
||||
[
|
||||
# Age distribution
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Title(
|
||||
"Age Distribution",
|
||||
order=4,
|
||||
size="h5",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="demographics-age-chart",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "300px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
# Income distribution
|
||||
dmc.GridCol(
|
||||
dmc.Paper(
|
||||
[
|
||||
dmc.Title(
|
||||
"Income Distribution",
|
||||
order=4,
|
||||
size="h5",
|
||||
mb="sm",
|
||||
),
|
||||
dcc.Graph(
|
||||
id="demographics-income-chart",
|
||||
config={"displayModeBar": False},
|
||||
style={"height": "300px"},
|
||||
),
|
||||
],
|
||||
p="md",
|
||||
radius="sm",
|
||||
withBorder=True,
|
||||
),
|
||||
span={"base": 12, "md": 6},
|
||||
),
|
||||
],
|
||||
gutter="md",
|
||||
),
|
||||
],
|
||||
gap="md",
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user