Compare commits
50 Commits
4818c53fd2
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| a2c213be5d | |||
| 0455ec69a0 | |||
| 9e216962b1 | |||
| dfa5f92d8a | |||
| 0c9769fd27 | |||
| cb908a18c3 | |||
| 558022f26e | |||
| 9e27fb8011 | |||
| cda2a078d9 | |||
| dd8de9810d | |||
| 56bcc1bb1d | |||
| ee0a7ef7ad | |||
| fd9850778e | |||
| 01e98103c7 | |||
| 62d1a52eed | |||
| e37611673f | |||
| 33306a911b | |||
| a5d6866d63 | |||
| f58b2f70e2 | |||
| 263b52d5e4 | |||
| f345d41535 | |||
| 14701f334c | |||
| 92763a17c4 | |||
| 546ee1cc92 | |||
| 9cc2cf0e00 | |||
| 28f239e8cd | |||
| c3de98c4a5 | |||
| eee015efac | |||
| 941305e71c | |||
| 54665bac63 | |||
| 3eb32a4766 | |||
| 69c4216cd5 | |||
| 6e00a17c05 | |||
| 8f3c5554f9 | |||
| 5839eabf1e | |||
| ebe48304d7 | |||
| 2fc2a1bdb5 | |||
| 6872aa510b | |||
| 9a1fc81f79 | |||
| cf6e874961 | |||
| 451dc10a10 | |||
| 193b9289b9 | |||
| 7a16e6d121 | |||
| ecc50e5d98 | |||
| ae3742630e | |||
| e70965b429 | |||
| 25954f17bb | |||
| bffd44a5a5 | |||
| bf6e392002 | |||
| d0f32edba7 |
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
- staging
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-and-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install ruff pytest
|
||||||
|
|
||||||
|
- name: Run linter
|
||||||
|
run: ruff check .
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: pytest tests/ -v --tb=short
|
||||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Production
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Production Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.PROD_HOST }}
|
||||||
|
username: ${{ secrets.PROD_USER }}
|
||||||
|
key: ${{ secrets.PROD_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin main
|
||||||
|
git reset --hard origin/main
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Production deployment complete!"
|
||||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Staging
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- staging
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Staging Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.STAGING_HOST }}
|
||||||
|
username: ${{ secrets.STAGING_USER }}
|
||||||
|
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin staging
|
||||||
|
git reset --hard origin/staging
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Staging deployment complete!"
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -198,3 +198,4 @@ cython_debug/
|
|||||||
# PyPI configuration file
|
# PyPI configuration file
|
||||||
.pypirc
|
.pypirc
|
||||||
|
|
||||||
|
dbt/.user.yml
|
||||||
|
|||||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.defaultInterpreterPath": "/home/leomiranda/WorkDev/personal/personal-portfolio/.venv/bin/python"
|
||||||
|
}
|
||||||
385
CLAUDE.md
385
CLAUDE.md
@@ -1,5 +1,48 @@
|
|||||||
# CLAUDE.md
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## ⛔ MANDATORY BEHAVIOR RULES - READ FIRST
|
||||||
|
|
||||||
|
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||||
|
|
||||||
|
### 1. WHEN USER ASKS YOU TO CHECK SOMETHING - CHECK EVERYTHING
|
||||||
|
- Search ALL locations, not just where you think it is
|
||||||
|
- Check cache directories: `~/.claude/plugins/cache/`
|
||||||
|
- Check installed: `~/.claude/plugins/marketplaces/`
|
||||||
|
- Check source directories
|
||||||
|
- **NEVER say "no" or "that's not the issue" without exhaustive verification**
|
||||||
|
|
||||||
|
### 2. WHEN USER SAYS SOMETHING IS WRONG - BELIEVE THEM
|
||||||
|
- The user knows their system better than you
|
||||||
|
- Investigate thoroughly before disagreeing
|
||||||
|
- **Your confidence is often wrong. User's instincts are often right.**
|
||||||
|
|
||||||
|
### 3. NEVER SAY "DONE" WITHOUT VERIFICATION
|
||||||
|
- Run the actual command/script to verify
|
||||||
|
- Show the output to the user
|
||||||
|
- **"Done" means VERIFIED WORKING, not "I made changes"**
|
||||||
|
|
||||||
|
### 4. SHOW EXACTLY WHAT USER ASKS FOR
|
||||||
|
- If user asks for messages, show the MESSAGES
|
||||||
|
- If user asks for code, show the CODE
|
||||||
|
- **Do not interpret or summarize unless asked**
|
||||||
|
|
||||||
|
**FAILURE TO FOLLOW THESE RULES = WASTED USER TIME = UNACCEPTABLE**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Mandatory Behavior Rules
|
||||||
|
|
||||||
|
**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.**
|
||||||
|
|
||||||
|
1. **CHECK EVERYTHING** - Search ALL locations before saying "no" (cache, installed, source directories)
|
||||||
|
2. **BELIEVE THE USER** - Investigate thoroughly before disagreeing; user instincts are often right
|
||||||
|
3. **VERIFY BEFORE "DONE"** - Run commands, show output; "done" means verified working
|
||||||
|
4. **SHOW EXACTLY WHAT'S ASKED** - Do not interpret or summarize unless requested
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
Working context for Claude Code on the Analytics Portfolio project.
|
Working context for Claude Code on the Analytics Portfolio project.
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -17,15 +60,33 @@ Working context for Claude Code on the Analytics Portfolio project.
|
|||||||
### Run Commands
|
### Run Commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Setup & Database
|
||||||
make setup # Install deps, create .env, init pre-commit
|
make setup # Install deps, create .env, init pre-commit
|
||||||
make docker-up # Start PostgreSQL + PostGIS
|
make docker-up # Start PostgreSQL + PostGIS (auto-detects x86/ARM)
|
||||||
make docker-down # Stop containers
|
make docker-down # Stop containers
|
||||||
make db-init # Initialize database schema
|
make db-init # Initialize database schema
|
||||||
|
make db-reset # Drop and recreate database (DESTRUCTIVE)
|
||||||
|
|
||||||
|
# Data Loading
|
||||||
|
make load-data # Load all project data (currently: Toronto)
|
||||||
|
make load-toronto # Load Toronto data from APIs
|
||||||
|
|
||||||
|
# Application
|
||||||
make run # Start Dash dev server
|
make run # Start Dash dev server
|
||||||
|
|
||||||
|
# Testing & Quality
|
||||||
make test # Run pytest
|
make test # Run pytest
|
||||||
make lint # Run ruff linter
|
make lint # Run ruff linter
|
||||||
make format # Run ruff formatter
|
make format # Run ruff formatter
|
||||||
make ci # Run all checks
|
make typecheck # Run mypy type checker
|
||||||
|
make ci # Run all checks (lint, typecheck, test)
|
||||||
|
|
||||||
|
# dbt
|
||||||
|
make dbt-run # Run dbt models
|
||||||
|
make dbt-test # Run dbt tests
|
||||||
|
make dbt-docs # Generate and serve dbt documentation
|
||||||
|
|
||||||
|
# Run `make help` for full target list
|
||||||
```
|
```
|
||||||
|
|
||||||
### Branch Workflow
|
### Branch Workflow
|
||||||
@@ -33,10 +94,7 @@ make ci # Run all checks
|
|||||||
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
||||||
2. Work and commit on feature branch
|
2. Work and commit on feature branch
|
||||||
3. Merge INTO `development` when complete
|
3. Merge INTO `development` when complete
|
||||||
4. Delete the feature branch after merge (keep branches clean)
|
4. `development` -> `staging` -> `main` for releases
|
||||||
5. `development` -> `staging` -> `main` for releases
|
|
||||||
|
|
||||||
**CRITICAL: NEVER DELETE the `development` branch. It is the main integration branch.**
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -52,125 +110,44 @@ make ci # Run all checks
|
|||||||
|
|
||||||
### Module Responsibilities
|
### Module Responsibilities
|
||||||
|
|
||||||
| Directory | Contains | Purpose |
|
| Directory | Purpose |
|
||||||
|-----------|----------|---------|
|
|-----------|---------|
|
||||||
| `schemas/` | Pydantic models | Data validation |
|
| `schemas/` | Pydantic models for data validation |
|
||||||
| `models/` | SQLAlchemy ORM | Database persistence |
|
| `models/` | SQLAlchemy ORM for database persistence |
|
||||||
| `parsers/` | API/CSV extraction | Raw data ingestion |
|
| `parsers/` | API/CSV extraction for raw data ingestion |
|
||||||
| `loaders/` | Database operations | Data loading |
|
| `loaders/` | Database operations for data loading |
|
||||||
| `figures/` | Chart factories | Plotly figure generation |
|
| `services/` | Query functions for dbt mart queries |
|
||||||
| `callbacks/` | Dash callbacks | In `pages/{dashboard}/callbacks/` |
|
| `figures/` | Chart factories for Plotly figure generation |
|
||||||
| `errors/` | Exceptions + handlers | Error handling |
|
| `errors/` | Custom exception classes (see `errors/exceptions.py`) |
|
||||||
|
|
||||||
### Type Hints
|
|
||||||
|
|
||||||
Use Python 3.10+ style:
|
|
||||||
```python
|
|
||||||
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error Handling
|
|
||||||
|
|
||||||
```python
|
|
||||||
# errors/exceptions.py
|
|
||||||
class PortfolioError(Exception):
|
|
||||||
"""Base exception."""
|
|
||||||
|
|
||||||
class ParseError(PortfolioError):
|
|
||||||
"""PDF/CSV parsing failed."""
|
|
||||||
|
|
||||||
class ValidationError(PortfolioError):
|
|
||||||
"""Pydantic or business rule validation failed."""
|
|
||||||
|
|
||||||
class LoadError(PortfolioError):
|
|
||||||
"""Database load operation failed."""
|
|
||||||
```
|
|
||||||
|
|
||||||
### Code Standards
|
### Code Standards
|
||||||
|
|
||||||
|
- Python 3.10+ type hints: `list[str]`, `dict[str, int] | None`
|
||||||
- Single responsibility functions with verb naming
|
- Single responsibility functions with verb naming
|
||||||
- Early returns over deep nesting
|
- Early returns over deep nesting
|
||||||
- Google-style docstrings only for non-obvious behavior
|
- Google-style docstrings only for non-obvious behavior
|
||||||
- Module-level constants for magic values
|
|
||||||
- Pydantic BaseSettings for runtime config
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Application Structure
|
## Application Structure
|
||||||
|
|
||||||
```
|
**Entry Point:** `portfolio_app/app.py` (Dash app factory with Pages routing)
|
||||||
portfolio_app/
|
|
||||||
├── app.py # Dash app factory with Pages routing
|
|
||||||
├── config.py # Pydantic BaseSettings
|
|
||||||
├── assets/ # CSS, images (auto-served)
|
|
||||||
│ └── sidebar.css # Navigation styling
|
|
||||||
├── callbacks/ # Global callbacks
|
|
||||||
│ ├── sidebar.py # Sidebar toggle
|
|
||||||
│ └── theme.py # Dark/light theme
|
|
||||||
├── pages/
|
|
||||||
│ ├── home.py # Bio landing page -> /
|
|
||||||
│ ├── about.py # About page -> /about
|
|
||||||
│ ├── contact.py # Contact form -> /contact
|
|
||||||
│ ├── health.py # Health endpoint -> /health
|
|
||||||
│ ├── projects.py # Project showcase -> /projects
|
|
||||||
│ ├── resume.py # Resume/CV -> /resume
|
|
||||||
│ ├── blog/
|
|
||||||
│ │ ├── index.py # Blog listing -> /blog
|
|
||||||
│ │ └── article.py # Blog article -> /blog/{slug}
|
|
||||||
│ └── toronto/
|
|
||||||
│ ├── dashboard.py # Dashboard -> /toronto
|
|
||||||
│ ├── methodology.py # Methodology -> /toronto/methodology
|
|
||||||
│ ├── tabs/ # 5 tab layouts (overview, housing, safety, demographics, amenities)
|
|
||||||
│ └── callbacks/ # Dashboard interactions
|
|
||||||
├── components/ # Shared UI (sidebar, cards, controls)
|
|
||||||
│ ├── metric_card.py # KPI card component
|
|
||||||
│ ├── map_controls.py # Map control panel
|
|
||||||
│ ├── sidebar.py # Navigation sidebar
|
|
||||||
│ └── time_slider.py # Time range selector
|
|
||||||
├── figures/ # Shared chart factories
|
|
||||||
│ ├── choropleth.py # Map visualizations
|
|
||||||
│ ├── bar_charts.py # Ranking, stacked, horizontal bars
|
|
||||||
│ ├── scatter.py # Scatter and bubble plots
|
|
||||||
│ ├── radar.py # Radar/spider charts
|
|
||||||
│ ├── demographics.py # Age pyramids, donut charts
|
|
||||||
│ ├── time_series.py # Trend lines
|
|
||||||
│ └── summary_cards.py # KPI figures
|
|
||||||
├── content/ # Markdown content
|
|
||||||
│ └── blog/ # Blog articles
|
|
||||||
├── toronto/ # Toronto data logic
|
|
||||||
│ ├── parsers/
|
|
||||||
│ ├── loaders/
|
|
||||||
│ ├── schemas/ # Pydantic
|
|
||||||
│ ├── models/ # SQLAlchemy
|
|
||||||
│ └── demo_data.py # Sample data
|
|
||||||
├── utils/ # Utilities
|
|
||||||
│ └── markdown_loader.py # Markdown processing
|
|
||||||
└── errors/
|
|
||||||
|
|
||||||
notebooks/ # Data documentation (Phase 6)
|
| Directory | Purpose |
|
||||||
├── README.md # Template and usage guide
|
|-----------|---------|
|
||||||
├── overview/ # Overview tab notebooks (3)
|
| `pages/` | Dash Pages (file-based routing) |
|
||||||
├── housing/ # Housing tab notebooks (3)
|
| `pages/toronto/` | Toronto Dashboard (`tabs/` for layouts, `callbacks/` for interactions) |
|
||||||
├── safety/ # Safety tab notebooks (3)
|
| `components/` | Shared UI components |
|
||||||
├── demographics/ # Demographics tab notebooks (3)
|
| `figures/toronto/` | Toronto chart factories |
|
||||||
└── amenities/ # Amenities tab notebooks (3)
|
| `toronto/` | Toronto data logic (parsers, loaders, schemas, models) |
|
||||||
```
|
|
||||||
|
|
||||||
### URL Routing
|
**Key URLs:** `/` (home), `/toronto` (dashboard), `/blog` (listing), `/blog/{slug}` (articles), `/health` (status)
|
||||||
|
|
||||||
| URL | Page | Sprint |
|
### Multi-Dashboard Architecture
|
||||||
|-----|------|--------|
|
|
||||||
| `/` | Bio landing page | 2 |
|
- **figures/**: Domain-namespaced (`figures/toronto/`, future: `figures/football/`)
|
||||||
| `/about` | About page | 8 |
|
- **dbt models**: Domain subdirectories (`staging/toronto/`, `marts/toronto/`)
|
||||||
| `/contact` | Contact form | 8 |
|
- **Database schemas**: Domain-specific raw data (`raw_toronto`, future: `raw_football`)
|
||||||
| `/health` | Health endpoint | 8 |
|
|
||||||
| `/projects` | Project showcase | 8 |
|
|
||||||
| `/resume` | Resume/CV | 8 |
|
|
||||||
| `/blog` | Blog listing | 8 |
|
|
||||||
| `/blog/{slug}` | Blog article | 8 |
|
|
||||||
| `/toronto` | Toronto Dashboard | 6 |
|
|
||||||
| `/toronto/methodology` | Dashboard methodology | 6 |
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -182,43 +159,31 @@ notebooks/ # Data documentation (Phase 6)
|
|||||||
| Validation | Pydantic | >=2.0 |
|
| Validation | Pydantic | >=2.0 |
|
||||||
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
||||||
| Transformation | dbt-postgres | >=1.7 |
|
| Transformation | dbt-postgres | >=1.7 |
|
||||||
| Data Processing | Pandas | >=2.1 |
|
| Visualization | Dash + Plotly + dash-mantine-components | >=2.14 |
|
||||||
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
||||||
| Visualization | Dash + Plotly | >=2.14 |
|
|
||||||
| UI Components | dash-mantine-components | Latest stable |
|
|
||||||
| Testing | pytest | >=7.0 |
|
|
||||||
| Python | 3.11+ | Via pyenv |
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
**Notes**:
|
**Notes**: SQLAlchemy 2.0 + Pydantic 2.0 only. Docker Compose V2 format (no `version` field).
|
||||||
- SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
|
|
||||||
- PostGIS extension required in database
|
|
||||||
- Docker Compose V2 format (no `version` field)
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Data Model Overview
|
## Data Model Overview
|
||||||
|
|
||||||
### Geographic Reality (Toronto Housing)
|
### Database Schemas
|
||||||
|
|
||||||
```
|
| Schema | Purpose |
|
||||||
City Neighbourhoods (158) - Primary geographic unit for analysis
|
|--------|---------|
|
||||||
CMHC Zones (~20) - Rental data (Census Tract aligned)
|
| `public` | Shared dimensions (dim_time) |
|
||||||
```
|
| `raw_toronto` | Toronto-specific raw/dimension tables |
|
||||||
|
| `stg_toronto` | Toronto dbt staging views |
|
||||||
|
| `int_toronto` | Toronto dbt intermediate views |
|
||||||
|
| `mart_toronto` | Toronto dbt mart tables |
|
||||||
|
|
||||||
### Star Schema
|
### dbt Project: `portfolio`
|
||||||
|
|
||||||
| Table | Type | Keys |
|
|
||||||
|-------|------|------|
|
|
||||||
| `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone |
|
|
||||||
| `dim_time` | Dimension | date_key (PK) |
|
|
||||||
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
|
||||||
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
|
||||||
| `dim_policy_event` | Dimension | event_id (PK) |
|
|
||||||
|
|
||||||
### dbt Layers
|
|
||||||
|
|
||||||
| Layer | Naming | Purpose |
|
| Layer | Naming | Purpose |
|
||||||
|-------|--------|---------|
|
|-------|--------|---------|
|
||||||
|
| Shared | `stg_dimensions__*` | Cross-domain dimensions |
|
||||||
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||||
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
||||||
| Marts | `mart_{domain}` | Final analytical tables |
|
| Marts | `mart_{domain}` | Final analytical tables |
|
||||||
@@ -227,13 +192,12 @@ CMHC Zones (~20) - Rental data (Census Tract aligned)
|
|||||||
|
|
||||||
## Deferred Features
|
## Deferred Features
|
||||||
|
|
||||||
**Stop and flag if a task seems to require these**:
|
**Stop and flag if a task requires these**:
|
||||||
|
|
||||||
| Feature | Reason |
|
| Feature | Reason |
|
||||||
|---------|--------|
|
|---------|--------|
|
||||||
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||||
| ML prediction models | Energy project scope (future phase) |
|
| ML prediction models | Energy project scope (future phase) |
|
||||||
| Multi-project shared infrastructure | Build first, abstract second |
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -253,92 +217,123 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Script Standards
|
|
||||||
|
|
||||||
All scripts in `scripts/`:
|
|
||||||
- Include usage comments at top
|
|
||||||
- Idempotent where possible
|
|
||||||
- Exit codes: 0 = success, 1 = error
|
|
||||||
- Use `set -euo pipefail` for bash
|
|
||||||
- Log to stdout, errors to stderr
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Reference Documents
|
## Reference Documents
|
||||||
|
|
||||||
| Document | Location | Use When |
|
| Document | Location | Use When |
|
||||||
|----------|----------|----------|
|
|----------|----------|----------|
|
||||||
| Project reference | `docs/PROJECT_REFERENCE.md` | Architecture decisions, completed work |
|
| Project reference | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||||
| Developer guide | `docs/CONTRIBUTING.md` | How to add pages, blog posts, tabs |
|
| Developer guide | `docs/CONTRIBUTING.md` | How to add pages, tabs |
|
||||||
| Lessons learned | `docs/project-lessons-learned/INDEX.md` | Past issues and solutions |
|
| Lessons learned | `docs/project-lessons-learned/INDEX.md` | Past issues and solutions |
|
||||||
|
| Deployment runbook | `docs/runbooks/deployment.md` | Deploying to environments |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Projman Plugin Workflow
|
## Plugin Reference
|
||||||
|
|
||||||
**CRITICAL: Always use the projman plugin for sprint and task management.**
|
### Sprint Management: projman
|
||||||
|
|
||||||
### When to Use Projman Skills
|
**CRITICAL: Always use projman for sprint and task management.**
|
||||||
|
|
||||||
| Skill | Trigger | Purpose |
|
| Skill | Trigger | Purpose |
|
||||||
|-------|---------|---------|
|
|-------|---------|---------|
|
||||||
| `/projman:sprint-plan` | New sprint or phase implementation | Architecture analysis + Gitea issue creation |
|
| `/projman:sprint-plan` | New sprint/feature | Architecture analysis + Gitea issue creation |
|
||||||
| `/projman:sprint-start` | Beginning implementation work | Load lessons learned (Wiki.js or local), start execution |
|
| `/projman:sprint-start` | Begin implementation | Load lessons learned, start execution |
|
||||||
| `/projman:sprint-status` | Check progress | Review blockers and completion status |
|
| `/projman:sprint-status` | Check progress | Review blockers and completion |
|
||||||
| `/projman:sprint-close` | Sprint completion | Capture lessons learned (Wiki.js or local backup) |
|
| `/projman:sprint-close` | Sprint completion | Capture lessons learned |
|
||||||
|
|
||||||
### Default Behavior
|
**Default workflow**: `/projman:sprint-plan` before code -> create issues -> `/projman:sprint-start` -> track via Gitea -> `/projman:sprint-close`
|
||||||
|
|
||||||
When user requests implementation work:
|
**Gitea**: `personal-projects/personal-portfolio` at `gitea.hotserv.cloud`
|
||||||
|
|
||||||
1. **ALWAYS start with `/projman:sprint-plan`** before writing code
|
### Data Platform: data-platform
|
||||||
2. Create Gitea issues with proper labels and acceptance criteria
|
|
||||||
3. Use `/projman:sprint-start` to begin execution with lessons learned
|
|
||||||
4. Track progress via Gitea issue comments
|
|
||||||
5. Close sprint with `/projman:sprint-close` to document lessons
|
|
||||||
|
|
||||||
### Gitea Repository
|
Use for dbt, PostgreSQL, and PostGIS operations.
|
||||||
|
|
||||||
- **Repo**: `lmiranda/personal-portfolio`
|
| Skill | Purpose |
|
||||||
- **Host**: `gitea.hotserv.cloud`
|
|-------|---------|
|
||||||
- **Note**: `lmiranda` is a user account (not org), so label lookup may require repo-level labels
|
| `/data-platform:data-review` | Audit data integrity, schema validity, dbt compliance |
|
||||||
|
| `/data-platform:data-gate` | CI/CD data quality gate (pass/fail) |
|
||||||
|
|
||||||
### MCP Tools Available
|
**When to use:** Schema changes, dbt model development, data loading, before merging data PRs.
|
||||||
|
|
||||||
**Gitea**:
|
**MCP tools available:** `pg_connect`, `pg_query`, `pg_tables`, `pg_columns`, `pg_schemas`, `st_*` (PostGIS), `dbt_*` operations.
|
||||||
- `list_issues`, `get_issue`, `create_issue`, `update_issue`, `add_comment`
|
|
||||||
- `get_labels`, `suggest_labels`
|
|
||||||
|
|
||||||
**Wiki.js**:
|
### Visualization: viz-platform
|
||||||
- `search_lessons`, `create_lesson`, `search_pages`, `get_page`
|
|
||||||
|
|
||||||
### Lessons Learned (Backup Method)
|
Use for Dash/Mantine component validation and chart creation.
|
||||||
|
|
||||||
**When Wiki.js is unavailable**, use the local backup in `docs/project-lessons-learned/`:
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/viz-platform:component` | Inspect DMC component props and validation |
|
||||||
|
| `/viz-platform:chart` | Create themed Plotly charts |
|
||||||
|
| `/viz-platform:theme` | Apply/validate themes |
|
||||||
|
| `/viz-platform:dashboard` | Create dashboard layouts |
|
||||||
|
|
||||||
**At Sprint Start:**
|
**When to use:** Dashboard development, new visualizations, component prop lookup.
|
||||||
1. Review `docs/project-lessons-learned/INDEX.md` for relevant past lessons
|
|
||||||
2. Search lesson files by tags/keywords before implementation
|
|
||||||
3. Apply prevention strategies from applicable lessons
|
|
||||||
|
|
||||||
**At Sprint Close:**
|
### Code Quality: code-sentinel
|
||||||
1. Try Wiki.js `create_lesson` first
|
|
||||||
2. If Wiki.js fails, create lesson in `docs/project-lessons-learned/`
|
|
||||||
3. Use naming convention: `{phase-or-sprint}-{short-description}.md`
|
|
||||||
4. Update `INDEX.md` with new entry
|
|
||||||
5. Follow the lesson template in INDEX.md
|
|
||||||
|
|
||||||
**Migration:** Once Wiki.js is configured, lessons will be migrated there for better searchability.
|
Use for security scanning and refactoring analysis.
|
||||||
|
|
||||||
### Issue Structure
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/code-sentinel:security-scan` | Full security audit of codebase |
|
||||||
|
| `/code-sentinel:refactor` | Apply refactoring patterns |
|
||||||
|
| `/code-sentinel:refactor-dry` | Preview refactoring without applying |
|
||||||
|
|
||||||
Every Gitea issue should include:
|
**When to use:** Before major releases, after adding auth/data handling code, periodic audits.
|
||||||
- **Overview**: Brief description
|
|
||||||
- **Files to Create/Modify**: Explicit paths
|
### Documentation: doc-guardian
|
||||||
- **Acceptance Criteria**: Checkboxes
|
|
||||||
- **Technical Notes**: Implementation hints
|
Use for documentation drift detection and synchronization.
|
||||||
- **Labels**: Listed in body (workaround for label API issues)
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/doc-guardian:doc-audit` | Scan project for documentation drift |
|
||||||
|
| `/doc-guardian:doc-sync` | Synchronize pending documentation updates |
|
||||||
|
|
||||||
|
**When to use:** After significant code changes, before releases.
|
||||||
|
|
||||||
|
### Pull Requests: pr-review
|
||||||
|
|
||||||
|
Use for comprehensive PR review with multiple analysis perspectives.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/pr-review:initial-setup` | Configure PR review for project |
|
||||||
|
| Triggered automatically | Security, performance, maintainability, test analysis |
|
||||||
|
|
||||||
|
**When to use:** Before merging significant PRs to `development` or `main`.
|
||||||
|
|
||||||
|
### Requirement Clarification: clarity-assist
|
||||||
|
|
||||||
|
Use when requirements are ambiguous or need decomposition.
|
||||||
|
|
||||||
|
**When to use:** Unclear specifications, complex feature requests, conflicting requirements.
|
||||||
|
|
||||||
|
### Contract Validation: contract-validator
|
||||||
|
|
||||||
|
Use for plugin interface validation.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/contract-validator:agent-check` | Quick agent definition validation |
|
||||||
|
| `/contract-validator:full-validation` | Full plugin contract validation |
|
||||||
|
|
||||||
|
**When to use:** When modifying plugin integrations or agent definitions.
|
||||||
|
|
||||||
|
### Git Workflow: git-flow
|
||||||
|
|
||||||
|
Use for standardized git operations.
|
||||||
|
|
||||||
|
| Skill | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `/git-flow:commit` | Auto-generated conventional commit |
|
||||||
|
| `/git-flow:branch-start` | Create feature/fix/chore branch |
|
||||||
|
| `/git-flow:git-status` | Comprehensive status with recommendations |
|
||||||
|
|
||||||
|
**When to use:** Complex merge scenarios, branch management, standardized commits.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*Last Updated: January 2026 (Post-Sprint 9)*
|
*Last Updated: February 2026*
|
||||||
|
|||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024-2025 Leo Miranda
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
61
Makefile
61
Makefile
@@ -1,13 +1,25 @@
|
|||||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help
|
.PHONY: setup docker-up docker-down db-init load-data load-all load-toronto load-toronto-only seed-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||||
|
|
||||||
# Default target
|
# Default target
|
||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
PYTHON := python3
|
VENV := .venv
|
||||||
PIP := pip
|
PYTHON := $(VENV)/bin/python3
|
||||||
|
PIP := $(VENV)/bin/pip
|
||||||
DOCKER_COMPOSE := docker compose
|
DOCKER_COMPOSE := docker compose
|
||||||
|
|
||||||
|
# Architecture detection for Docker images
|
||||||
|
ARCH := $(shell uname -m)
|
||||||
|
ifeq ($(ARCH),aarch64)
|
||||||
|
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||||
|
else ifeq ($(ARCH),arm64)
|
||||||
|
POSTGIS_IMAGE := imresamu/postgis:16-3.4
|
||||||
|
else
|
||||||
|
POSTGIS_IMAGE := postgis/postgis:16-3.4
|
||||||
|
endif
|
||||||
|
export POSTGIS_IMAGE
|
||||||
|
|
||||||
# Colors for output
|
# Colors for output
|
||||||
BLUE := \033[0;34m
|
BLUE := \033[0;34m
|
||||||
GREEN := \033[0;32m
|
GREEN := \033[0;32m
|
||||||
@@ -39,6 +51,7 @@ setup: ## Install dependencies, create .env, init pre-commit
|
|||||||
|
|
||||||
docker-up: ## Start PostgreSQL + PostGIS containers
|
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||||
@echo "$(GREEN)Starting database containers...$(NC)"
|
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||||
|
@echo "$(BLUE)Architecture: $(ARCH) -> Using image: $(POSTGIS_IMAGE)$(NC)"
|
||||||
$(DOCKER_COMPOSE) up -d
|
$(DOCKER_COMPOSE) up -d
|
||||||
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||||
@sleep 3
|
@sleep 3
|
||||||
@@ -57,11 +70,7 @@ docker-logs: ## View container logs
|
|||||||
|
|
||||||
db-init: ## Initialize database schema
|
db-init: ## Initialize database schema
|
||||||
@echo "$(GREEN)Initializing database schema...$(NC)"
|
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||||
@if [ -f scripts/db/init.sh ]; then \
|
$(PYTHON) scripts/db/init_schema.py
|
||||||
bash scripts/db/init.sh; \
|
|
||||||
else \
|
|
||||||
echo "$(YELLOW)scripts/db/init.sh not found - skipping$(NC)"; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||||
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||||
@@ -71,14 +80,27 @@ db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
|||||||
@sleep 3
|
@sleep 3
|
||||||
$(MAKE) db-init
|
$(MAKE) db-init
|
||||||
|
|
||||||
load-data: ## Load Toronto data from APIs and run dbt
|
# Domain-specific data loading
|
||||||
|
load-toronto: ## Load Toronto data from APIs
|
||||||
@echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)"
|
@echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)"
|
||||||
$(PYTHON) scripts/data/load_toronto_data.py
|
$(PYTHON) scripts/data/load_toronto_data.py
|
||||||
|
@echo "$(GREEN)Seeding Toronto development data...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||||
|
|
||||||
load-data-only: ## Load Toronto data without running dbt
|
load-toronto-only: ## Load Toronto data without running dbt or seeding
|
||||||
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
||||||
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
||||||
|
|
||||||
|
# Aggregate data loading
|
||||||
|
load-data: load-toronto ## Load all project data (currently: Toronto)
|
||||||
|
@echo "$(GREEN)All data loaded!$(NC)"
|
||||||
|
|
||||||
|
load-all: load-data ## Alias for load-data
|
||||||
|
|
||||||
|
seed-data: ## Seed sample development data (amenities, median_age)
|
||||||
|
@echo "$(GREEN)Seeding development data...$(NC)"
|
||||||
|
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Application
|
# Application
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -105,15 +127,15 @@ test-cov: ## Run pytest with coverage
|
|||||||
|
|
||||||
dbt-run: ## Run dbt models
|
dbt-run: ## Run dbt models
|
||||||
@echo "$(GREEN)Running dbt models...$(NC)"
|
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||||
cd dbt && dbt run
|
@set -a && . ./.env && set +a && cd dbt && dbt run --profiles-dir .
|
||||||
|
|
||||||
dbt-test: ## Run dbt tests
|
dbt-test: ## Run dbt tests
|
||||||
@echo "$(GREEN)Running dbt tests...$(NC)"
|
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||||
cd dbt && dbt test
|
@set -a && . ./.env && set +a && cd dbt && dbt test --profiles-dir .
|
||||||
|
|
||||||
dbt-docs: ## Generate dbt documentation
|
dbt-docs: ## Generate dbt documentation
|
||||||
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||||
cd dbt && dbt docs generate && dbt docs serve
|
@set -a && . ./.env && set +a && cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir .
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Code Quality
|
# Code Quality
|
||||||
@@ -139,6 +161,19 @@ ci: ## Run all checks (lint, typecheck, test)
|
|||||||
$(MAKE) test
|
$(MAKE) test
|
||||||
@echo "$(GREEN)All checks passed!$(NC)"
|
@echo "$(GREEN)All checks passed!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Operations
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||||
|
@./scripts/logs.sh $(SERVICE)
|
||||||
|
|
||||||
|
run-detached: ## Start containers and wait for health check
|
||||||
|
@./scripts/run-detached.sh
|
||||||
|
|
||||||
|
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||||
|
@./scripts/etl/toronto.sh $(MODE)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Deployment
|
# Deployment
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
67
README.md
67
README.md
@@ -1,5 +1,9 @@
|
|||||||
# Analytics Portfolio
|
# Analytics Portfolio
|
||||||
|
|
||||||
|
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||||
|
|
||||||
|
**Live Demo:** [leodata.science](https://leodata.science)
|
||||||
|
|
||||||
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
## Live Pages
|
## Live Pages
|
||||||
@@ -32,6 +36,42 @@ An interactive choropleth dashboard analyzing Toronto's 158 official neighbourho
|
|||||||
- Toronto Police Service (crime statistics)
|
- Toronto Police Service (crime statistics)
|
||||||
- CMHC Rental Market Survey (rental data by zone)
|
- CMHC Rental Market Survey (rental data by zone)
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
subgraph Sources
|
||||||
|
A1[City of Toronto API]
|
||||||
|
A2[Toronto Police API]
|
||||||
|
A3[CMHC Data]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph ETL
|
||||||
|
B1[Parsers]
|
||||||
|
B2[Loaders]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Database
|
||||||
|
C1[(PostgreSQL/PostGIS)]
|
||||||
|
C2[dbt Models]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Application
|
||||||
|
D1[Dash App]
|
||||||
|
D2[Plotly Figures]
|
||||||
|
end
|
||||||
|
|
||||||
|
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pipeline Stages:**
|
||||||
|
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||||
|
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||||
|
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||||
|
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||||
|
|
||||||
|
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -75,28 +115,31 @@ portfolio_app/
|
|||||||
│ ├── tabs/ # Tab layouts (5)
|
│ ├── tabs/ # Tab layouts (5)
|
||||||
│ └── callbacks/ # Interaction logic
|
│ └── callbacks/ # Interaction logic
|
||||||
├── components/ # Shared UI components
|
├── components/ # Shared UI components
|
||||||
├── figures/ # Plotly figure factories
|
├── figures/
|
||||||
|
│ └── toronto/ # Toronto figure factories
|
||||||
├── content/
|
├── content/
|
||||||
│ └── blog/ # Markdown blog articles
|
│ └── blog/ # Markdown blog articles
|
||||||
├── toronto/ # Toronto data logic
|
├── toronto/ # Toronto data logic
|
||||||
│ ├── parsers/ # API data extraction
|
│ ├── parsers/ # API data extraction
|
||||||
│ ├── loaders/ # Database operations
|
│ ├── loaders/ # Database operations
|
||||||
│ ├── schemas/ # Pydantic models
|
│ ├── schemas/ # Pydantic models
|
||||||
│ └── models/ # SQLAlchemy ORM
|
│ └── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||||
└── errors/ # Exception handling
|
└── errors/ # Exception handling
|
||||||
|
|
||||||
dbt/
|
dbt/ # dbt project: portfolio
|
||||||
├── models/
|
├── models/
|
||||||
│ ├── staging/ # 1:1 source tables
|
│ ├── shared/ # Cross-domain dimensions
|
||||||
│ ├── intermediate/ # Business logic
|
│ ├── staging/toronto/ # Toronto staging models
|
||||||
│ └── marts/ # Analytical tables
|
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
│ └── marts/toronto/ # Toronto analytical tables
|
||||||
|
|
||||||
notebooks/ # Data documentation (15 notebooks)
|
notebooks/
|
||||||
├── overview/ # Overview tab visualizations
|
└── toronto/ # Toronto documentation (15 notebooks)
|
||||||
├── housing/ # Housing tab visualizations
|
├── overview/ # Overview tab visualizations
|
||||||
├── safety/ # Safety tab visualizations
|
├── housing/ # Housing tab visualizations
|
||||||
├── demographics/ # Demographics tab visualizations
|
├── safety/ # Safety tab visualizations
|
||||||
└── amenities/ # Amenities tab visualizations
|
├── demographics/ # Demographics tab visualizations
|
||||||
|
└── amenities/ # Amenities tab visualizations
|
||||||
|
|
||||||
docs/
|
docs/
|
||||||
├── PROJECT_REFERENCE.md # Architecture reference
|
├── PROJECT_REFERENCE.md # Architecture reference
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
name: 'toronto_housing'
|
name: 'portfolio'
|
||||||
version: '1.0.0'
|
|
||||||
config-version: 2
|
config-version: 2
|
||||||
|
|
||||||
profile: 'toronto_housing'
|
profile: 'portfolio'
|
||||||
|
|
||||||
model-paths: ["models"]
|
model-paths: ["models"]
|
||||||
analysis-paths: ["analyses"]
|
analysis-paths: ["analyses"]
|
||||||
@@ -16,13 +15,19 @@ clean-targets:
|
|||||||
- "dbt_packages"
|
- "dbt_packages"
|
||||||
|
|
||||||
models:
|
models:
|
||||||
toronto_housing:
|
portfolio:
|
||||||
|
shared:
|
||||||
|
+materialized: view
|
||||||
|
+schema: shared
|
||||||
staging:
|
staging:
|
||||||
+materialized: view
|
toronto:
|
||||||
+schema: staging
|
+materialized: view
|
||||||
|
+schema: stg_toronto
|
||||||
intermediate:
|
intermediate:
|
||||||
+materialized: view
|
toronto:
|
||||||
+schema: intermediate
|
+materialized: view
|
||||||
|
+schema: int_toronto
|
||||||
marts:
|
marts:
|
||||||
+materialized: table
|
toronto:
|
||||||
+schema: marts
|
+materialized: table
|
||||||
|
+schema: mart_toronto
|
||||||
|
|||||||
11
dbt/macros/generate_schema_name.sql
Normal file
11
dbt/macros/generate_schema_name.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Override dbt default schema name generation.
|
||||||
|
-- Use the custom schema name directly instead of
|
||||||
|
-- concatenating with the target schema.
|
||||||
|
-- See: https://docs.getdbt.com/docs/build/custom-schemas
|
||||||
|
{% macro generate_schema_name(custom_schema_name, node) %}
|
||||||
|
{%- if custom_schema_name is none -%}
|
||||||
|
{{ target.schema }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ custom_schema_name | trim }}
|
||||||
|
{%- endif -%}
|
||||||
|
{% endmacro %}
|
||||||
@@ -5,11 +5,11 @@ models:
|
|||||||
description: "Rental data enriched with time and zone dimensions"
|
description: "Rental data enriched with time and zone dimensions"
|
||||||
columns:
|
columns:
|
||||||
- name: rental_id
|
- name: rental_id
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: zone_code
|
- name: zone_code
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
- name: int_neighbourhood__demographics
|
- name: int_neighbourhood__demographics
|
||||||
@@ -17,11 +17,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: census_year
|
- name: census_year
|
||||||
description: "Census year"
|
description: "Census year"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: income_quintile
|
- name: income_quintile
|
||||||
description: "Income quintile (1-5, city-wide)"
|
description: "Income quintile (1-5, city-wide)"
|
||||||
@@ -31,7 +31,7 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: year
|
- name: year
|
||||||
description: "Reference year"
|
description: "Reference year"
|
||||||
@@ -45,11 +45,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: year
|
- name: year
|
||||||
description: "Statistics year"
|
description: "Statistics year"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: crime_rate_per_100k
|
- name: crime_rate_per_100k
|
||||||
description: "Total crime rate per 100K population"
|
description: "Total crime rate per 100K population"
|
||||||
@@ -61,7 +61,7 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: year
|
- name: year
|
||||||
description: "Reference year"
|
description: "Reference year"
|
||||||
@@ -75,11 +75,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: year
|
- name: year
|
||||||
description: "Survey year"
|
description: "Survey year"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: avg_rent_2bed
|
- name: avg_rent_2bed
|
||||||
description: "Weighted average 2-bedroom rent"
|
description: "Weighted average 2-bedroom rent"
|
||||||
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
60
dbt/models/intermediate/toronto/int_census__toronto_cma.sql
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
-- Intermediate: Toronto CMA census statistics by year
|
||||||
|
-- Provides city-wide averages for metrics not available at neighbourhood level
|
||||||
|
-- Used when neighbourhood-level data is unavailable (e.g., median household income)
|
||||||
|
-- Grain: One row per year
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
select * from {{ ref('int_year_spine') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Census data is only available for 2016 and 2021
|
||||||
|
-- Map each analysis year to the appropriate census year
|
||||||
|
year_to_census as (
|
||||||
|
select
|
||||||
|
y.year,
|
||||||
|
case
|
||||||
|
when y.year <= 2018 then 2016
|
||||||
|
else 2021
|
||||||
|
end as census_year
|
||||||
|
from years y
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Toronto CMA median household income from Statistics Canada
|
||||||
|
-- Source: Census Profile Table 98-316-X2021001
|
||||||
|
-- 2016: $65,829 (from Census Profile)
|
||||||
|
-- 2021: $84,000 (from Census Profile)
|
||||||
|
cma_income as (
|
||||||
|
select 2016 as census_year, 65829 as median_household_income union all
|
||||||
|
select 2021 as census_year, 84000 as median_household_income
|
||||||
|
),
|
||||||
|
|
||||||
|
-- City-wide aggregates from loaded neighbourhood data
|
||||||
|
city_aggregates as (
|
||||||
|
select
|
||||||
|
census_year,
|
||||||
|
sum(population) as total_population,
|
||||||
|
avg(population_density) as avg_population_density,
|
||||||
|
avg(unemployment_rate) as avg_unemployment_rate
|
||||||
|
from census
|
||||||
|
where population is not null
|
||||||
|
group by census_year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
y.year,
|
||||||
|
y.census_year,
|
||||||
|
ci.median_household_income,
|
||||||
|
ca.total_population,
|
||||||
|
ca.avg_population_density,
|
||||||
|
ca.avg_unemployment_rate
|
||||||
|
from year_to_census y
|
||||||
|
left join cma_income ci on y.census_year = ci.census_year
|
||||||
|
left join city_aggregates ca on y.census_year = ca.census_year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
@@ -34,7 +34,7 @@ amenity_scores as (
|
|||||||
n.population,
|
n.population,
|
||||||
n.land_area_sqkm,
|
n.land_area_sqkm,
|
||||||
|
|
||||||
a.year,
|
coalesce(a.year, 2021) as year,
|
||||||
|
|
||||||
-- Raw counts
|
-- Raw counts
|
||||||
a.parks_count,
|
a.parks_count,
|
||||||
@@ -16,12 +16,12 @@ crime_by_year as (
|
|||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
crime_year as year,
|
crime_year as year,
|
||||||
sum(incident_count) as total_incidents,
|
sum(incident_count) as total_incidents,
|
||||||
sum(case when crime_type = 'Assault' then incident_count else 0 end) as assault_count,
|
sum(case when crime_type = 'assault' then incident_count else 0 end) as assault_count,
|
||||||
sum(case when crime_type = 'Auto Theft' then incident_count else 0 end) as auto_theft_count,
|
sum(case when crime_type = 'auto_theft' then incident_count else 0 end) as auto_theft_count,
|
||||||
sum(case when crime_type = 'Break and Enter' then incident_count else 0 end) as break_enter_count,
|
sum(case when crime_type = 'break_and_enter' then incident_count else 0 end) as break_enter_count,
|
||||||
sum(case when crime_type = 'Robbery' then incident_count else 0 end) as robbery_count,
|
sum(case when crime_type = 'robbery' then incident_count else 0 end) as robbery_count,
|
||||||
sum(case when crime_type = 'Theft Over' then incident_count else 0 end) as theft_over_count,
|
sum(case when crime_type = 'theft_over' then incident_count else 0 end) as theft_over_count,
|
||||||
sum(case when crime_type = 'Homicide' then incident_count else 0 end) as homicide_count,
|
sum(case when crime_type = 'homicide' then incident_count else 0 end) as homicide_count,
|
||||||
avg(rate_per_100k) as avg_rate_per_100k
|
avg(rate_per_100k) as avg_rate_per_100k
|
||||||
from crime
|
from crime
|
||||||
group by neighbourhood_id, crime_year
|
group by neighbourhood_id, crime_year
|
||||||
@@ -64,15 +64,17 @@ crime_summary as (
|
|||||||
w.robbery_count,
|
w.robbery_count,
|
||||||
w.theft_over_count,
|
w.theft_over_count,
|
||||||
w.homicide_count,
|
w.homicide_count,
|
||||||
w.avg_rate_per_100k,
|
|
||||||
w.yoy_change_pct,
|
w.yoy_change_pct,
|
||||||
|
|
||||||
-- Crime rate per 100K population
|
-- Crime rate per 100K population (use source data avg, or calculate if population available)
|
||||||
case
|
coalesce(
|
||||||
when n.population > 0
|
w.avg_rate_per_100k,
|
||||||
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
case
|
||||||
else null
|
when n.population > 0
|
||||||
end as crime_rate_per_100k
|
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
||||||
|
else null
|
||||||
|
end
|
||||||
|
) as crime_rate_per_100k
|
||||||
|
|
||||||
from neighbourhoods n
|
from neighbourhoods n
|
||||||
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
||||||
@@ -17,7 +17,8 @@ demographics as (
|
|||||||
n.geometry,
|
n.geometry,
|
||||||
n.land_area_sqkm,
|
n.land_area_sqkm,
|
||||||
|
|
||||||
c.census_year,
|
-- Use census_year from census data, or fall back to dim_neighbourhood's year
|
||||||
|
coalesce(c.census_year, n.census_year, 2021) as census_year,
|
||||||
c.population,
|
c.population,
|
||||||
c.population_density,
|
c.population_density,
|
||||||
c.median_household_income,
|
c.median_household_income,
|
||||||
@@ -20,7 +20,7 @@ housing as (
|
|||||||
n.neighbourhood_name,
|
n.neighbourhood_name,
|
||||||
n.geometry,
|
n.geometry,
|
||||||
|
|
||||||
coalesce(r.year, c.census_year) as year,
|
coalesce(r.year, c.census_year, 2021) as year,
|
||||||
|
|
||||||
-- Census housing metrics
|
-- Census housing metrics
|
||||||
c.pct_owner_occupied,
|
c.pct_owner_occupied,
|
||||||
@@ -42,10 +42,10 @@ pivoted as (
|
|||||||
select
|
select
|
||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
year,
|
year,
|
||||||
max(case when bedroom_type = 'Two Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
|
max(case when bedroom_type = '2bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
|
||||||
max(case when bedroom_type = 'One Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
|
max(case when bedroom_type = '1bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
|
||||||
max(case when bedroom_type = 'Bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
|
max(case when bedroom_type = 'bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
|
||||||
max(case when bedroom_type = 'Three Bedroom +' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
|
max(case when bedroom_type = '3bed' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
|
||||||
avg(vacancy_rate) as vacancy_rate,
|
avg(vacancy_rate) as vacancy_rate,
|
||||||
sum(rental_units_estimate) as total_rental_units
|
sum(rental_units_estimate) as total_rental_units
|
||||||
from allocated
|
from allocated
|
||||||
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
25
dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Intermediate: Toronto CMA rental metrics by year
|
||||||
|
-- Aggregates rental data to city-wide averages by year
|
||||||
|
-- Source: StatCan CMHC data at CMA level
|
||||||
|
-- Grain: One row per year
|
||||||
|
|
||||||
|
with rentals as (
|
||||||
|
select * from {{ ref('stg_cmhc__rentals') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Pivot bedroom types to columns
|
||||||
|
yearly_rentals as (
|
||||||
|
select
|
||||||
|
year,
|
||||||
|
max(case when bedroom_type = 'bachelor' then avg_rent end) as avg_rent_bachelor,
|
||||||
|
max(case when bedroom_type = '1bed' then avg_rent end) as avg_rent_1bed,
|
||||||
|
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_2bed,
|
||||||
|
max(case when bedroom_type = '3bed' then avg_rent end) as avg_rent_3bed,
|
||||||
|
-- Use 2-bedroom as standard reference
|
||||||
|
max(case when bedroom_type = '2bed' then avg_rent end) as avg_rent_standard,
|
||||||
|
max(vacancy_rate) as vacancy_rate
|
||||||
|
from rentals
|
||||||
|
group by year
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from yearly_rentals
|
||||||
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
11
dbt/models/intermediate/toronto/int_year_spine.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Intermediate: Year spine for analysis
|
||||||
|
-- Creates a row for each year from 2014-2025
|
||||||
|
-- Used to drive time-series analysis across all data sources
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
-- Generate years from available data sources
|
||||||
|
-- Crime data: 2014-2024, Rentals: 2019-2025
|
||||||
|
select generate_series(2014, 2025) as year
|
||||||
|
)
|
||||||
|
|
||||||
|
select year from years
|
||||||
@@ -1,110 +0,0 @@
|
|||||||
-- Mart: Neighbourhood Overview with Composite Livability Score
|
|
||||||
-- Dashboard Tab: Overview
|
|
||||||
-- Grain: One row per neighbourhood per year
|
|
||||||
|
|
||||||
with demographics as (
|
|
||||||
select * from {{ ref('int_neighbourhood__demographics') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
housing as (
|
|
||||||
select * from {{ ref('int_neighbourhood__housing') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
crime as (
|
|
||||||
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
amenities as (
|
|
||||||
select * from {{ ref('int_neighbourhood__amenity_scores') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
-- Compute percentile ranks for scoring components
|
|
||||||
percentiles as (
|
|
||||||
select
|
|
||||||
d.neighbourhood_id,
|
|
||||||
d.neighbourhood_name,
|
|
||||||
d.geometry,
|
|
||||||
d.census_year as year,
|
|
||||||
d.population,
|
|
||||||
d.median_household_income,
|
|
||||||
|
|
||||||
-- Safety score: inverse of crime rate (higher = safer)
|
|
||||||
case
|
|
||||||
when c.crime_rate_per_100k is not null
|
|
||||||
then 100 - percent_rank() over (
|
|
||||||
partition by d.census_year
|
|
||||||
order by c.crime_rate_per_100k
|
|
||||||
) * 100
|
|
||||||
else null
|
|
||||||
end as safety_score,
|
|
||||||
|
|
||||||
-- Affordability score: inverse of rent-to-income ratio
|
|
||||||
case
|
|
||||||
when h.rent_to_income_pct is not null
|
|
||||||
then 100 - percent_rank() over (
|
|
||||||
partition by d.census_year
|
|
||||||
order by h.rent_to_income_pct
|
|
||||||
) * 100
|
|
||||||
else null
|
|
||||||
end as affordability_score,
|
|
||||||
|
|
||||||
-- Amenity score: based on amenities per capita
|
|
||||||
case
|
|
||||||
when a.total_amenities_per_1000 is not null
|
|
||||||
then percent_rank() over (
|
|
||||||
partition by d.census_year
|
|
||||||
order by a.total_amenities_per_1000
|
|
||||||
) * 100
|
|
||||||
else null
|
|
||||||
end as amenity_score,
|
|
||||||
|
|
||||||
-- Raw metrics for reference
|
|
||||||
c.crime_rate_per_100k,
|
|
||||||
h.rent_to_income_pct,
|
|
||||||
h.avg_rent_2bed,
|
|
||||||
a.total_amenities_per_1000
|
|
||||||
|
|
||||||
from demographics d
|
|
||||||
left join housing h
|
|
||||||
on d.neighbourhood_id = h.neighbourhood_id
|
|
||||||
and d.census_year = h.year
|
|
||||||
left join crime c
|
|
||||||
on d.neighbourhood_id = c.neighbourhood_id
|
|
||||||
and d.census_year = c.year
|
|
||||||
left join amenities a
|
|
||||||
on d.neighbourhood_id = a.neighbourhood_id
|
|
||||||
and d.census_year = a.year
|
|
||||||
),
|
|
||||||
|
|
||||||
final as (
|
|
||||||
select
|
|
||||||
neighbourhood_id,
|
|
||||||
neighbourhood_name,
|
|
||||||
geometry,
|
|
||||||
year,
|
|
||||||
population,
|
|
||||||
median_household_income,
|
|
||||||
|
|
||||||
-- Component scores (0-100)
|
|
||||||
round(safety_score::numeric, 1) as safety_score,
|
|
||||||
round(affordability_score::numeric, 1) as affordability_score,
|
|
||||||
round(amenity_score::numeric, 1) as amenity_score,
|
|
||||||
|
|
||||||
-- Composite livability score: safety (30%), affordability (40%), amenities (30%)
|
|
||||||
round(
|
|
||||||
(coalesce(safety_score, 50) * 0.30 +
|
|
||||||
coalesce(affordability_score, 50) * 0.40 +
|
|
||||||
coalesce(amenity_score, 50) * 0.30)::numeric,
|
|
||||||
1
|
|
||||||
) as livability_score,
|
|
||||||
|
|
||||||
-- Raw metrics
|
|
||||||
crime_rate_per_100k,
|
|
||||||
rent_to_income_pct,
|
|
||||||
avg_rent_2bed,
|
|
||||||
total_amenities_per_1000
|
|
||||||
|
|
||||||
from percentiles
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from final
|
|
||||||
@@ -6,7 +6,7 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: rental_id
|
- name: rental_id
|
||||||
description: "Unique rental record identifier"
|
description: "Unique rental record identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
@@ -17,11 +17,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry for mapping"
|
description: "PostGIS geometry for mapping"
|
||||||
@@ -41,11 +41,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry for mapping"
|
description: "PostGIS geometry for mapping"
|
||||||
@@ -63,11 +63,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry for mapping"
|
description: "PostGIS geometry for mapping"
|
||||||
@@ -77,7 +77,7 @@ models:
|
|||||||
description: "100 = city average crime rate"
|
description: "100 = city average crime rate"
|
||||||
- name: safety_tier
|
- name: safety_tier
|
||||||
description: "Safety tier (1=safest, 5=highest crime)"
|
description: "Safety tier (1=safest, 5=highest crime)"
|
||||||
tests:
|
data_tests:
|
||||||
- accepted_values:
|
- accepted_values:
|
||||||
arguments:
|
arguments:
|
||||||
values: [1, 2, 3, 4, 5]
|
values: [1, 2, 3, 4, 5]
|
||||||
@@ -89,11 +89,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry for mapping"
|
description: "PostGIS geometry for mapping"
|
||||||
@@ -103,7 +103,7 @@ models:
|
|||||||
description: "100 = city average income"
|
description: "100 = city average income"
|
||||||
- name: income_quintile
|
- name: income_quintile
|
||||||
description: "Income quintile (1-5)"
|
description: "Income quintile (1-5)"
|
||||||
tests:
|
data_tests:
|
||||||
- accepted_values:
|
- accepted_values:
|
||||||
arguments:
|
arguments:
|
||||||
values: [1, 2, 3, 4, 5]
|
values: [1, 2, 3, 4, 5]
|
||||||
@@ -115,11 +115,11 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood identifier"
|
description: "Neighbourhood identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry for mapping"
|
description: "PostGIS geometry for mapping"
|
||||||
@@ -129,7 +129,7 @@ models:
|
|||||||
description: "100 = city average amenities"
|
description: "100 = city average amenities"
|
||||||
- name: amenity_tier
|
- name: amenity_tier
|
||||||
description: "Amenity tier (1=best, 5=lowest)"
|
description: "Amenity tier (1=best, 5=lowest)"
|
||||||
tests:
|
data_tests:
|
||||||
- accepted_values:
|
- accepted_values:
|
||||||
arguments:
|
arguments:
|
||||||
values: [1, 2, 3, 4, 5]
|
values: [1, 2, 3, 4, 5]
|
||||||
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
153
dbt/models/marts/toronto/mart_neighbourhood_overview.sql
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
-- Mart: Neighbourhood Overview with Composite Livability Score
|
||||||
|
-- Dashboard Tab: Overview
|
||||||
|
-- Grain: One row per neighbourhood per year
|
||||||
|
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
|
||||||
|
|
||||||
|
with years as (
|
||||||
|
select * from {{ ref('int_year_spine') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
neighbourhoods as (
|
||||||
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Create base: all neighbourhoods × all years
|
||||||
|
neighbourhood_years as (
|
||||||
|
select
|
||||||
|
n.neighbourhood_id,
|
||||||
|
n.neighbourhood_name,
|
||||||
|
n.geometry,
|
||||||
|
y.year
|
||||||
|
from neighbourhoods n
|
||||||
|
cross join years y
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Census data (available for 2016, 2021)
|
||||||
|
-- For each year, use the most recent census data available
|
||||||
|
census as (
|
||||||
|
select * from {{ ref('stg_toronto__census') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
census_mapped as (
|
||||||
|
select
|
||||||
|
ny.neighbourhood_id,
|
||||||
|
ny.year,
|
||||||
|
c.population,
|
||||||
|
c.unemployment_rate,
|
||||||
|
c.pct_bachelors_or_higher as education_bachelors_pct
|
||||||
|
from neighbourhood_years ny
|
||||||
|
left join census c on ny.neighbourhood_id = c.neighbourhood_id
|
||||||
|
-- Use census year <= analysis year, prefer most recent
|
||||||
|
and c.census_year = (
|
||||||
|
select max(c2.census_year)
|
||||||
|
from {{ ref('stg_toronto__census') }} c2
|
||||||
|
where c2.neighbourhood_id = ny.neighbourhood_id
|
||||||
|
and c2.census_year <= ny.year
|
||||||
|
)
|
||||||
|
),
|
||||||
|
|
||||||
|
-- CMA-level census data (for income - not available at neighbourhood level)
|
||||||
|
cma_census as (
|
||||||
|
select * from {{ ref('int_census__toronto_cma') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Crime data (2014-2024)
|
||||||
|
crime as (
|
||||||
|
select * from {{ ref('int_neighbourhood__crime_summary') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
|
||||||
|
rentals as (
|
||||||
|
select * from {{ ref('int_rentals__toronto_cma') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
-- Compute scores
|
||||||
|
scored as (
|
||||||
|
select
|
||||||
|
ny.neighbourhood_id,
|
||||||
|
ny.neighbourhood_name,
|
||||||
|
ny.geometry,
|
||||||
|
ny.year,
|
||||||
|
cm.population,
|
||||||
|
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
|
||||||
|
cma.median_household_income,
|
||||||
|
|
||||||
|
-- Safety score: inverse of crime rate (higher = safer)
|
||||||
|
case
|
||||||
|
when cr.crime_rate_per_100k is not null
|
||||||
|
then 100 - percent_rank() over (
|
||||||
|
partition by ny.year
|
||||||
|
order by cr.crime_rate_per_100k
|
||||||
|
) * 100
|
||||||
|
else null
|
||||||
|
end as safety_score,
|
||||||
|
|
||||||
|
-- Affordability score: inverse of rent-to-income ratio
|
||||||
|
-- Using CMA-level income since neighbourhood-level not available
|
||||||
|
case
|
||||||
|
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||||
|
then 100 - percent_rank() over (
|
||||||
|
partition by ny.year
|
||||||
|
order by (r.avg_rent_standard * 12 / cma.median_household_income)
|
||||||
|
) * 100
|
||||||
|
else null
|
||||||
|
end as affordability_score,
|
||||||
|
|
||||||
|
-- Raw metrics
|
||||||
|
cr.crime_rate_per_100k,
|
||||||
|
case
|
||||||
|
when cma.median_household_income > 0 and r.avg_rent_standard > 0
|
||||||
|
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
|
||||||
|
else null
|
||||||
|
end as rent_to_income_pct,
|
||||||
|
r.avg_rent_standard as avg_rent_2bed,
|
||||||
|
r.vacancy_rate
|
||||||
|
|
||||||
|
from neighbourhood_years ny
|
||||||
|
left join census_mapped cm
|
||||||
|
on ny.neighbourhood_id = cm.neighbourhood_id
|
||||||
|
and ny.year = cm.year
|
||||||
|
left join cma_census cma
|
||||||
|
on ny.year = cma.year
|
||||||
|
left join crime cr
|
||||||
|
on ny.neighbourhood_id = cr.neighbourhood_id
|
||||||
|
and ny.year = cr.year
|
||||||
|
left join rentals r
|
||||||
|
on ny.year = r.year
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
neighbourhood_id,
|
||||||
|
neighbourhood_name,
|
||||||
|
geometry,
|
||||||
|
year,
|
||||||
|
population,
|
||||||
|
median_household_income,
|
||||||
|
|
||||||
|
-- Component scores (0-100)
|
||||||
|
round(safety_score::numeric, 1) as safety_score,
|
||||||
|
round(affordability_score::numeric, 1) as affordability_score,
|
||||||
|
-- TODO: Replace with actual amenity score when fact_amenities is populated
|
||||||
|
-- Currently uses neutral placeholder (50.0) which affects livability_score accuracy
|
||||||
|
50.0 as amenity_score,
|
||||||
|
|
||||||
|
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
|
||||||
|
round(
|
||||||
|
(coalesce(safety_score, 50) * 0.40 +
|
||||||
|
coalesce(affordability_score, 50) * 0.40 +
|
||||||
|
50 * 0.20)::numeric,
|
||||||
|
1
|
||||||
|
) as livability_score,
|
||||||
|
|
||||||
|
-- Raw metrics
|
||||||
|
crime_rate_per_100k,
|
||||||
|
rent_to_income_pct,
|
||||||
|
avg_rent_2bed,
|
||||||
|
vacancy_rate,
|
||||||
|
null::numeric as total_amenities_per_1000
|
||||||
|
|
||||||
|
from scored
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
33
dbt/models/shared/_shared.yml
Normal file
33
dbt/models/shared/_shared.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_dimensions__time
|
||||||
|
description: "Staged time dimension - shared across all projects"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Primary key (YYYYMM format)"
|
||||||
|
data_tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- name: full_date
|
||||||
|
description: "First day of month"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: year
|
||||||
|
description: "Calendar year"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: month
|
||||||
|
description: "Month number (1-12)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: quarter
|
||||||
|
description: "Quarter (1-4)"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: month_name
|
||||||
|
description: "Month name"
|
||||||
|
data_tests:
|
||||||
|
- not_null
|
||||||
|
- name: is_month_start
|
||||||
|
description: "Always true (monthly grain)"
|
||||||
25
dbt/models/shared/_sources.yml
Normal file
25
dbt/models/shared/_sources.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
sources:
|
||||||
|
- name: shared
|
||||||
|
description: "Shared dimension tables used across all dashboards"
|
||||||
|
database: portfolio
|
||||||
|
schema: public
|
||||||
|
tables:
|
||||||
|
- name: dim_time
|
||||||
|
description: "Time dimension (monthly grain) - shared across all projects"
|
||||||
|
columns:
|
||||||
|
- name: date_key
|
||||||
|
description: "Primary key (YYYYMM format)"
|
||||||
|
- name: full_date
|
||||||
|
description: "First day of month"
|
||||||
|
- name: year
|
||||||
|
description: "Calendar year"
|
||||||
|
- name: month
|
||||||
|
description: "Month number (1-12)"
|
||||||
|
- name: quarter
|
||||||
|
description: "Quarter (1-4)"
|
||||||
|
- name: month_name
|
||||||
|
description: "Month name"
|
||||||
|
- name: is_month_start
|
||||||
|
description: "Always true (monthly grain)"
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
-- Staged time dimension
|
-- Staged time dimension
|
||||||
-- Source: dim_time table
|
-- Source: shared.dim_time table
|
||||||
-- Grain: One row per month
|
-- Grain: One row per month
|
||||||
|
-- Note: Shared dimension used across all dashboard projects
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'dim_time') }}
|
select * from {{ source('shared', 'dim_time') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
-- Staged CMHC zone dimension
|
|
||||||
-- Source: dim_cmhc_zone table
|
|
||||||
-- Grain: One row per zone
|
|
||||||
|
|
||||||
with source as (
|
|
||||||
select * from {{ source('toronto_housing', 'dim_cmhc_zone') }}
|
|
||||||
),
|
|
||||||
|
|
||||||
staged as (
|
|
||||||
select
|
|
||||||
zone_key,
|
|
||||||
zone_code,
|
|
||||||
zone_name,
|
|
||||||
geometry
|
|
||||||
from source
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from staged
|
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
version: 2
|
version: 2
|
||||||
|
|
||||||
sources:
|
sources:
|
||||||
- name: toronto_housing
|
- name: toronto
|
||||||
description: "Toronto housing data loaded from CMHC and City of Toronto sources"
|
description: "Toronto data loaded from CMHC and City of Toronto sources"
|
||||||
database: portfolio
|
database: portfolio
|
||||||
schema: public
|
schema: raw_toronto
|
||||||
tables:
|
tables:
|
||||||
- name: fact_rentals
|
- name: fact_rentals
|
||||||
description: "CMHC annual rental survey data by zone and bedroom type"
|
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||||
@@ -16,12 +16,6 @@ sources:
|
|||||||
- name: zone_key
|
- name: zone_key
|
||||||
description: "Foreign key to dim_cmhc_zone"
|
description: "Foreign key to dim_cmhc_zone"
|
||||||
|
|
||||||
- name: dim_time
|
|
||||||
description: "Time dimension (monthly grain)"
|
|
||||||
columns:
|
|
||||||
- name: date_key
|
|
||||||
description: "Primary key (YYYYMMDD format)"
|
|
||||||
|
|
||||||
- name: dim_cmhc_zone
|
- name: dim_cmhc_zone
|
||||||
description: "CMHC zone dimension with geometry"
|
description: "CMHC zone dimension with geometry"
|
||||||
columns:
|
columns:
|
||||||
@@ -6,25 +6,16 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: rental_id
|
- name: rental_id
|
||||||
description: "Unique identifier for rental record"
|
description: "Unique identifier for rental record"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: date_key
|
- name: date_key
|
||||||
description: "Date dimension key (YYYYMMDD)"
|
description: "Date dimension key (YYYYMMDD)"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: zone_key
|
- name: zone_key
|
||||||
description: "CMHC zone dimension key"
|
description: "CMHC zone dimension key"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
|
||||||
|
|
||||||
- name: stg_dimensions__time
|
|
||||||
description: "Staged time dimension"
|
|
||||||
columns:
|
|
||||||
- name: date_key
|
|
||||||
description: "Date dimension key (YYYYMMDD)"
|
|
||||||
tests:
|
|
||||||
- unique
|
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
- name: stg_dimensions__cmhc_zones
|
- name: stg_dimensions__cmhc_zones
|
||||||
@@ -32,12 +23,12 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: zone_key
|
- name: zone_key
|
||||||
description: "Zone dimension key"
|
description: "Zone dimension key"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: zone_code
|
- name: zone_code
|
||||||
description: "CMHC zone code"
|
description: "CMHC zone code"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
@@ -46,12 +37,12 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood primary key"
|
description: "Neighbourhood primary key"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_name
|
- name: neighbourhood_name
|
||||||
description: "Official neighbourhood name"
|
description: "Official neighbourhood name"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: geometry
|
- name: geometry
|
||||||
description: "PostGIS geometry (POLYGON)"
|
description: "PostGIS geometry (POLYGON)"
|
||||||
@@ -61,16 +52,16 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: census_id
|
- name: census_id
|
||||||
description: "Census record identifier"
|
description: "Census record identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood foreign key"
|
description: "Neighbourhood foreign key"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: census_year
|
- name: census_year
|
||||||
description: "Census year (2016, 2021)"
|
description: "Census year (2016, 2021)"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
- name: stg_toronto__crime
|
- name: stg_toronto__crime
|
||||||
@@ -78,16 +69,16 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: crime_id
|
- name: crime_id
|
||||||
description: "Crime record identifier"
|
description: "Crime record identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood foreign key"
|
description: "Neighbourhood foreign key"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: crime_type
|
- name: crime_type
|
||||||
description: "Type of crime"
|
description: "Type of crime"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
- name: stg_toronto__amenities
|
- name: stg_toronto__amenities
|
||||||
@@ -95,16 +86,16 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: amenity_id
|
- name: amenity_id
|
||||||
description: "Amenity record identifier"
|
description: "Amenity record identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood foreign key"
|
description: "Neighbourhood foreign key"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: amenity_type
|
- name: amenity_type
|
||||||
description: "Type of amenity"
|
description: "Type of amenity"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
- name: stg_cmhc__zone_crosswalk
|
- name: stg_cmhc__zone_crosswalk
|
||||||
@@ -112,18 +103,18 @@ models:
|
|||||||
columns:
|
columns:
|
||||||
- name: crosswalk_id
|
- name: crosswalk_id
|
||||||
description: "Crosswalk record identifier"
|
description: "Crosswalk record identifier"
|
||||||
tests:
|
data_tests:
|
||||||
- unique
|
- unique
|
||||||
- not_null
|
- not_null
|
||||||
- name: cmhc_zone_code
|
- name: cmhc_zone_code
|
||||||
description: "CMHC zone code"
|
description: "CMHC zone code"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: neighbourhood_id
|
- name: neighbourhood_id
|
||||||
description: "Neighbourhood foreign key"
|
description: "Neighbourhood foreign key"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
- name: area_weight
|
- name: area_weight
|
||||||
description: "Proportional area weight (0-1)"
|
description: "Proportional area weight (0-1)"
|
||||||
tests:
|
data_tests:
|
||||||
- not_null
|
- not_null
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
-- Staged CMHC rental market survey data
|
-- Staged CMHC rental market survey data
|
||||||
-- Source: fact_rentals table loaded from CMHC CSV exports
|
-- Source: fact_rentals table loaded from CMHC/StatCan
|
||||||
-- Grain: One row per zone per bedroom type per survey year
|
-- Grain: One row per zone per bedroom type per survey year
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'fact_rentals') }}
|
select
|
||||||
|
f.*,
|
||||||
|
t.year as survey_year
|
||||||
|
from {{ source('toronto', 'fact_rentals') }} f
|
||||||
|
join {{ source('shared', 'dim_time') }} t on f.date_key = t.date_key
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -11,6 +15,7 @@ staged as (
|
|||||||
id as rental_id,
|
id as rental_id,
|
||||||
date_key,
|
date_key,
|
||||||
zone_key,
|
zone_key,
|
||||||
|
survey_year as year,
|
||||||
bedroom_type,
|
bedroom_type,
|
||||||
universe as rental_universe,
|
universe as rental_universe,
|
||||||
avg_rent,
|
avg_rent,
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
-- Grain: One row per zone-neighbourhood intersection
|
-- Grain: One row per zone-neighbourhood intersection
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'bridge_cmhc_neighbourhood') }}
|
select * from {{ source('toronto', 'bridge_cmhc_neighbourhood') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
19
dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Staged CMHC zone dimension
|
||||||
|
-- Source: dim_cmhc_zone table
|
||||||
|
-- Grain: One row per zone
|
||||||
|
|
||||||
|
with source as (
|
||||||
|
select * from {{ source('toronto', 'dim_cmhc_zone') }}
|
||||||
|
),
|
||||||
|
|
||||||
|
staged as (
|
||||||
|
select
|
||||||
|
zone_key,
|
||||||
|
zone_code,
|
||||||
|
zone_name
|
||||||
|
-- geometry column excluded: CMHC does not provide zone boundaries
|
||||||
|
-- Spatial analysis uses dim_neighbourhood geometry instead
|
||||||
|
from source
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from staged
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
-- Grain: One row per neighbourhood per amenity type per year
|
-- Grain: One row per neighbourhood per amenity type per year
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'fact_amenities') }}
|
select * from {{ source('toronto', 'fact_amenities') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
-- Grain: One row per neighbourhood per census year
|
-- Grain: One row per neighbourhood per census year
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'fact_census') }}
|
select * from {{ source('toronto', 'fact_census') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
-- Grain: One row per neighbourhood per year per crime type
|
-- Grain: One row per neighbourhood per year per crime type
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'fact_crime') }}
|
select * from {{ source('toronto', 'fact_crime') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
-- Grain: One row per neighbourhood (158 total)
|
-- Grain: One row per neighbourhood (158 total)
|
||||||
|
|
||||||
with source as (
|
with source as (
|
||||||
select * from {{ source('toronto_housing', 'dim_neighbourhood') }}
|
select * from {{ source('toronto', 'dim_neighbourhood') }}
|
||||||
),
|
),
|
||||||
|
|
||||||
staged as (
|
staged as (
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
toronto_housing:
|
portfolio:
|
||||||
target: dev
|
target: dev
|
||||||
outputs:
|
outputs:
|
||||||
dev:
|
dev:
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
services:
|
services:
|
||||||
db:
|
db:
|
||||||
image: postgis/postgis:16-3.4
|
image: ${POSTGIS_IMAGE:-postgis/postgis:16-3.4}
|
||||||
container_name: portfolio-db
|
container_name: portfolio-db
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@@ -50,9 +50,11 @@ The app runs at `http://localhost:8050`.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
make test # Run tests
|
make test # Run tests
|
||||||
|
make test-cov # Run tests with coverage
|
||||||
make lint # Check code style
|
make lint # Check code style
|
||||||
make format # Auto-format code
|
make format # Auto-format code
|
||||||
make ci # Run all checks (lint + test)
|
make typecheck # Run mypy type checker
|
||||||
|
make ci # Run all checks (lint, typecheck, test)
|
||||||
make dbt-run # Run dbt transformations
|
make dbt-run # Run dbt transformations
|
||||||
make dbt-test # Run dbt tests
|
make dbt-test # Run dbt tests
|
||||||
```
|
```
|
||||||
@@ -247,13 +249,23 @@ def layout(slug: str = "") -> dmc.Container:
|
|||||||
To add the page to the sidebar, edit `portfolio_app/components/sidebar.py`:
|
To add the page to the sidebar, edit `portfolio_app/components/sidebar.py`:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
NAV_ITEMS = [
|
# For main pages (Home, About, Blog, etc.)
|
||||||
{"label": "Home", "href": "/", "icon": "tabler:home"},
|
NAV_ITEMS_MAIN = [
|
||||||
{"label": "Your Page", "href": "/your-page", "icon": "tabler:star"},
|
{"path": "/", "icon": "tabler:home", "label": "Home"},
|
||||||
|
{"path": "/your-page", "icon": "tabler:star", "label": "Your Page"},
|
||||||
|
# ...
|
||||||
|
]
|
||||||
|
|
||||||
|
# For project/dashboard pages
|
||||||
|
NAV_ITEMS_PROJECTS = [
|
||||||
|
{"path": "/projects", "icon": "tabler:folder", "label": "Projects"},
|
||||||
|
{"path": "/your-dashboard", "icon": "tabler:chart-bar", "label": "Your Dashboard"},
|
||||||
# ...
|
# ...
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The sidebar uses icon buttons with tooltips. Each item needs `path`, `icon` (Tabler icon name), and `label` (tooltip text).
|
||||||
|
|
||||||
### URL Routing Summary
|
### URL Routing Summary
|
||||||
|
|
||||||
| File Location | URL |
|
| File Location | URL |
|
||||||
@@ -278,7 +290,7 @@ Dashboard tabs are in `portfolio_app/pages/toronto/tabs/`.
|
|||||||
|
|
||||||
import dash_mantine_components as dmc
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
from portfolio_app.figures.choropleth import create_choropleth
|
from portfolio_app.figures.toronto.choropleth import create_choropleth
|
||||||
from portfolio_app.toronto.demo_data import get_demo_data
|
from portfolio_app.toronto.demo_data import get_demo_data
|
||||||
|
|
||||||
|
|
||||||
@@ -327,13 +339,13 @@ dmc.TabsPanel(create_your_tab_layout(), value="your-tab"),
|
|||||||
|
|
||||||
## Creating Figure Factories
|
## Creating Figure Factories
|
||||||
|
|
||||||
Figure factories are in `portfolio_app/figures/`. They create reusable Plotly figures.
|
Figure factories are organized by dashboard domain under `portfolio_app/figures/{domain}/`.
|
||||||
|
|
||||||
### Pattern
|
### Pattern
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# figures/your_chart.py
|
# figures/toronto/your_chart.py
|
||||||
"""Your chart type factory."""
|
"""Your chart type factory for Toronto dashboard."""
|
||||||
|
|
||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
@@ -370,7 +382,7 @@ def create_your_chart(
|
|||||||
### Export from `__init__.py`
|
### Export from `__init__.py`
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# figures/__init__.py
|
# figures/toronto/__init__.py
|
||||||
from .your_chart import create_your_chart
|
from .your_chart import create_your_chart
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@@ -379,6 +391,14 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Importing Figure Factories
|
||||||
|
|
||||||
|
```python
|
||||||
|
# In callbacks or tabs
|
||||||
|
from portfolio_app.figures.toronto import create_choropleth_figure
|
||||||
|
from portfolio_app.figures.toronto.bar_charts import create_ranking_bar
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Branch Workflow
|
## Branch Workflow
|
||||||
|
|||||||
335
docs/DATABASE_SCHEMA.md
Normal file
335
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
# Database Schema
|
||||||
|
|
||||||
|
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
|
## Entity Relationship Diagram
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
dim_time {
|
||||||
|
int date_key PK
|
||||||
|
date full_date UK
|
||||||
|
int year
|
||||||
|
int month
|
||||||
|
int quarter
|
||||||
|
string month_name
|
||||||
|
bool is_month_start
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_cmhc_zone {
|
||||||
|
int zone_key PK
|
||||||
|
string zone_code UK
|
||||||
|
string zone_name
|
||||||
|
geometry geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_neighbourhood {
|
||||||
|
int neighbourhood_id PK
|
||||||
|
string name
|
||||||
|
geometry geometry
|
||||||
|
int population
|
||||||
|
numeric land_area_sqkm
|
||||||
|
numeric pop_density_per_sqkm
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric median_household_income
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
int census_year
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_policy_event {
|
||||||
|
int event_id PK
|
||||||
|
date event_date
|
||||||
|
date effective_date
|
||||||
|
string level
|
||||||
|
string category
|
||||||
|
string title
|
||||||
|
text description
|
||||||
|
string expected_direction
|
||||||
|
string source_url
|
||||||
|
string confidence
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_rentals {
|
||||||
|
int id PK
|
||||||
|
int date_key FK
|
||||||
|
int zone_key FK
|
||||||
|
string bedroom_type
|
||||||
|
int universe
|
||||||
|
numeric avg_rent
|
||||||
|
numeric median_rent
|
||||||
|
numeric vacancy_rate
|
||||||
|
numeric availability_rate
|
||||||
|
numeric turnover_rate
|
||||||
|
numeric rent_change_pct
|
||||||
|
string reliability_code
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_census {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int census_year
|
||||||
|
int population
|
||||||
|
numeric population_density
|
||||||
|
numeric median_household_income
|
||||||
|
numeric average_household_income
|
||||||
|
numeric unemployment_rate
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
numeric median_age
|
||||||
|
numeric average_dwelling_value
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_crime {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int year
|
||||||
|
string crime_type
|
||||||
|
int count
|
||||||
|
numeric rate_per_100k
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_amenities {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
string amenity_type
|
||||||
|
int count
|
||||||
|
int year
|
||||||
|
}
|
||||||
|
|
||||||
|
bridge_cmhc_neighbourhood {
|
||||||
|
int id PK
|
||||||
|
string cmhc_zone_code FK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
numeric weight
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_time ||--o{ fact_rentals : "date_key"
|
||||||
|
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||||
|
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||||
|
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||||
|
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Schema Layers
|
||||||
|
|
||||||
|
### Database Schemas
|
||||||
|
|
||||||
|
| Schema | Purpose | Managed By |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `public` | Shared dimensions (dim_time) | SQLAlchemy |
|
||||||
|
| `raw_toronto` | Toronto dimension and fact tables | SQLAlchemy |
|
||||||
|
| `stg_toronto` | Toronto staging models | dbt |
|
||||||
|
| `int_toronto` | Toronto intermediate models | dbt |
|
||||||
|
| `mart_toronto` | Toronto analytical tables | dbt |
|
||||||
|
|
||||||
|
### Raw Toronto Schema (raw_toronto)
|
||||||
|
|
||||||
|
Toronto-specific tables loaded by SQLAlchemy:
|
||||||
|
|
||||||
|
| Table | Source | Description |
|
||||||
|
|-------|--------|-------------|
|
||||||
|
| `dim_neighbourhood` | City of Toronto API | 158 neighbourhood boundaries |
|
||||||
|
| `dim_cmhc_zone` | CMHC | ~20 rental market zones |
|
||||||
|
| `dim_policy_event` | Manual | Policy events for annotation |
|
||||||
|
| `fact_census` | City of Toronto API | Census profile data |
|
||||||
|
| `fact_crime` | Toronto Police API | Crime statistics |
|
||||||
|
| `fact_amenities` | City of Toronto API | Amenity counts |
|
||||||
|
| `fact_rentals` | CMHC Data Files | Rental market survey data |
|
||||||
|
| `bridge_cmhc_neighbourhood` | Computed | Zone-neighbourhood mapping |
|
||||||
|
|
||||||
|
### Public Schema
|
||||||
|
|
||||||
|
Shared dimensions used across all projects:
|
||||||
|
|
||||||
|
| Table | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `dim_time` | Time dimension (monthly grain) |
|
||||||
|
|
||||||
|
### Staging Schema - stg_toronto (dbt)
|
||||||
|
|
||||||
|
Staging models provide 1:1 cleaned representations of source data:
|
||||||
|
|
||||||
|
| Model | Source Table | Purpose |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||||
|
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||||
|
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||||
|
| `stg_toronto__crime` | raw.crime_data | Standardized crime categories |
|
||||||
|
| `stg_toronto__amenities` | raw.amenities | Typed amenity counts |
|
||||||
|
| `stg_dimensions__time` | generated | Time dimension |
|
||||||
|
| `stg_dimensions__cmhc_zones` | raw.cmhc_zones | CMHC zone boundaries |
|
||||||
|
| `stg_cmhc__zone_crosswalk` | raw.crosswalk | Zone-neighbourhood mapping |
|
||||||
|
|
||||||
|
### Marts Schema - mart_toronto (dbt)
|
||||||
|
|
||||||
|
Analytical tables ready for dashboard consumption:
|
||||||
|
|
||||||
|
| Model | Grain | Purpose |
|
||||||
|
|-------|-------|---------|
|
||||||
|
| `mart_neighbourhood_overview` | neighbourhood | Composite livability scores |
|
||||||
|
| `mart_neighbourhood_housing` | neighbourhood | Housing and rent metrics |
|
||||||
|
| `mart_neighbourhood_safety` | neighbourhood × year | Crime rate calculations |
|
||||||
|
| `mart_neighbourhood_demographics` | neighbourhood | Income, age, population metrics |
|
||||||
|
| `mart_neighbourhood_amenities` | neighbourhood | Amenity accessibility scores |
|
||||||
|
| `mart_toronto_rentals` | zone × month | Time-series rental analysis |
|
||||||
|
|
||||||
|
## Table Details
|
||||||
|
|
||||||
|
### Dimension Tables
|
||||||
|
|
||||||
|
#### dim_time
|
||||||
|
Time dimension for date-based analysis. Grain: one row per month.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||||
|
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||||
|
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||||
|
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||||
|
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||||
|
|
||||||
|
#### dim_cmhc_zone
|
||||||
|
CMHC rental market zones (~20 zones covering Toronto).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||||
|
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||||
|
|
||||||
|
#### dim_neighbourhood
|
||||||
|
Toronto's 158 official neighbourhoods.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||||
|
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||||
|
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||||
|
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||||
|
|
||||||
|
#### dim_policy_event
|
||||||
|
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| event_date | DATE | NOT NULL | Announcement date |
|
||||||
|
| effective_date | DATE | | Implementation date |
|
||||||
|
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||||
|
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||||
|
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||||
|
| description | TEXT | | Detailed description |
|
||||||
|
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||||
|
| source_url | VARCHAR(500) | | Reference link |
|
||||||
|
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||||
|
|
||||||
|
### Fact Tables
|
||||||
|
|
||||||
|
#### fact_rentals
|
||||||
|
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||||
|
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||||
|
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||||
|
| universe | INTEGER | | Total rental units |
|
||||||
|
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||||
|
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||||
|
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||||
|
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||||
|
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||||
|
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||||
|
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||||
|
|
||||||
|
#### fact_census
|
||||||
|
Census statistics. Grain: neighbourhood × census year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| population_density | NUMERIC(10,2) | | People per km² |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||||
|
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||||
|
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||||
|
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||||
|
|
||||||
|
#### fact_crime
|
||||||
|
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||||
|
| count | INTEGER | NOT NULL | Number of incidents |
|
||||||
|
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||||
|
|
||||||
|
#### fact_amenities
|
||||||
|
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||||
|
| count | INTEGER | NOT NULL | Number of amenities |
|
||||||
|
| year | INTEGER | NOT NULL | Reference year |
|
||||||
|
|
||||||
|
### Bridge Tables
|
||||||
|
|
||||||
|
#### bridge_cmhc_neighbourhood
|
||||||
|
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||||
|
|
||||||
|
## Indexes
|
||||||
|
|
||||||
|
| Table | Index | Columns | Purpose |
|
||||||
|
|-------|-------|---------|---------|
|
||||||
|
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||||
|
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||||
|
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||||
|
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||||
|
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||||
|
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||||
|
|
||||||
|
## PostGIS Extensions
|
||||||
|
|
||||||
|
The database requires PostGIS for geospatial operations:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||||
|
```
|
||||||
|
|
||||||
|
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||||
@@ -76,7 +76,8 @@ portfolio_app/
|
|||||||
├── components/ # Shared UI components
|
├── components/ # Shared UI components
|
||||||
├── content/blog/ # Markdown blog articles
|
├── content/blog/ # Markdown blog articles
|
||||||
├── errors/ # Exception handling
|
├── errors/ # Exception handling
|
||||||
├── figures/ # Plotly figure factories
|
├── figures/
|
||||||
|
│ └── toronto/ # Toronto figure factories
|
||||||
├── pages/
|
├── pages/
|
||||||
│ ├── home.py
|
│ ├── home.py
|
||||||
│ ├── about.py
|
│ ├── about.py
|
||||||
@@ -91,15 +92,26 @@ portfolio_app/
|
|||||||
│ ├── dashboard.py
|
│ ├── dashboard.py
|
||||||
│ ├── methodology.py
|
│ ├── methodology.py
|
||||||
│ ├── tabs/ # 5 tab layouts
|
│ ├── tabs/ # 5 tab layouts
|
||||||
│ └── callbacks/ # Dashboard interactions
|
│ └── callbacks/ # Dashboard interactions (map_callbacks, chart_callbacks, selection_callbacks)
|
||||||
├── toronto/ # Data logic
|
├── toronto/ # Data logic
|
||||||
│ ├── parsers/ # API extraction
|
│ ├── parsers/ # API extraction (geo, toronto_open_data, toronto_police, cmhc)
|
||||||
│ ├── loaders/ # Database operations
|
│ ├── loaders/ # Database operations (base, cmhc, cmhc_crosswalk)
|
||||||
│ ├── schemas/ # Pydantic models
|
│ ├── schemas/ # Pydantic models
|
||||||
│ ├── models/ # SQLAlchemy ORM
|
│ ├── models/ # SQLAlchemy ORM (raw_toronto schema)
|
||||||
|
│ ├── services/ # Query functions (neighbourhood_service, geometry_service)
|
||||||
│ └── demo_data.py # Sample data
|
│ └── demo_data.py # Sample data
|
||||||
└── utils/
|
└── utils/
|
||||||
└── markdown_loader.py # Blog article loading
|
└── markdown_loader.py # Blog article loading
|
||||||
|
|
||||||
|
dbt/ # dbt project: portfolio
|
||||||
|
├── models/
|
||||||
|
│ ├── shared/ # Cross-domain dimensions
|
||||||
|
│ ├── staging/toronto/ # Toronto staging models
|
||||||
|
│ ├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
│ └── marts/toronto/ # Toronto mart tables
|
||||||
|
|
||||||
|
notebooks/
|
||||||
|
└── toronto/ # Toronto documentation notebooks
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -143,10 +155,20 @@ CMHC Zones (~20) ← Rental data (Census Tract aligned)
|
|||||||
| `fact_rentals` | Fact | Rental data by CMHC zone |
|
| `fact_rentals` | Fact | Rental data by CMHC zone |
|
||||||
| `fact_amenities` | Fact | Amenity counts by neighbourhood |
|
| `fact_amenities` | Fact | Amenity counts by neighbourhood |
|
||||||
|
|
||||||
### dbt Layers
|
### dbt Project: `portfolio`
|
||||||
|
|
||||||
|
**Model Structure:**
|
||||||
|
```
|
||||||
|
dbt/models/
|
||||||
|
├── shared/ # Cross-domain dimensions (stg_dimensions__time)
|
||||||
|
├── staging/toronto/ # Toronto staging models
|
||||||
|
├── intermediate/toronto/ # Toronto intermediate models
|
||||||
|
└── marts/toronto/ # Toronto mart tables
|
||||||
|
```
|
||||||
|
|
||||||
| Layer | Naming | Example |
|
| Layer | Naming | Example |
|
||||||
|-------|--------|---------|
|
|-------|--------|---------|
|
||||||
|
| Shared | `stg_dimensions__*` | `stg_dimensions__time` |
|
||||||
| Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` |
|
| Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` |
|
||||||
| Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` |
|
| Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` |
|
||||||
| Marts | `mart_{domain}` | `mart_neighbourhood_overview` |
|
| Marts | `mart_{domain}` | `mart_neighbourhood_overview` |
|
||||||
@@ -241,16 +263,25 @@ LOG_LEVEL=INFO
|
|||||||
| Target | Purpose |
|
| Target | Purpose |
|
||||||
|--------|---------|
|
|--------|---------|
|
||||||
| `setup` | Install deps, create .env, init pre-commit |
|
| `setup` | Install deps, create .env, init pre-commit |
|
||||||
| `docker-up` | Start PostgreSQL + PostGIS |
|
| `docker-up` | Start PostgreSQL + PostGIS (auto-detects x86/ARM) |
|
||||||
| `docker-down` | Stop containers |
|
| `docker-down` | Stop containers |
|
||||||
|
| `docker-logs` | View container logs |
|
||||||
| `db-init` | Initialize database schema |
|
| `db-init` | Initialize database schema |
|
||||||
|
| `db-reset` | Drop and recreate database (DESTRUCTIVE) |
|
||||||
|
| `load-data` | Load Toronto data from APIs, seed dev data |
|
||||||
|
| `load-toronto-only` | Load Toronto data without dbt or seeding |
|
||||||
|
| `seed-data` | Seed sample development data |
|
||||||
| `run` | Start Dash dev server |
|
| `run` | Start Dash dev server |
|
||||||
| `test` | Run pytest |
|
| `test` | Run pytest |
|
||||||
| `dbt-run` | Run dbt models |
|
| `test-cov` | Run pytest with coverage |
|
||||||
| `dbt-test` | Run dbt tests |
|
|
||||||
| `lint` | Run ruff linter |
|
| `lint` | Run ruff linter |
|
||||||
| `format` | Run ruff formatter |
|
| `format` | Run ruff formatter |
|
||||||
| `ci` | Run all checks |
|
| `typecheck` | Run mypy type checker |
|
||||||
|
| `ci` | Run all checks (lint, typecheck, test) |
|
||||||
|
| `dbt-run` | Run dbt models |
|
||||||
|
| `dbt-test` | Run dbt tests |
|
||||||
|
| `dbt-docs` | Generate and serve dbt documentation |
|
||||||
|
| `clean` | Remove build artifacts and caches |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ This folder contains lessons learned from sprints and development work. These le
|
|||||||
|
|
||||||
| Date | Sprint/Phase | Title | Tags |
|
| Date | Sprint/Phase | Title | Tags |
|
||||||
|------|--------------|-------|------|
|
|------|--------------|-------|------|
|
||||||
|
| 2026-02-01 | Sprint 10 | [Formspree Integration with Dash Callbacks](./sprint-10-formspree-dash-integration.md) | formspree, dash, callbacks, forms, spam-protection, honeypot, ajax |
|
||||||
| 2026-01-17 | Sprint 9 | [Gitea Labels API Requires Org Context](./sprint-9-gitea-labels-user-repos.md) | gitea, mcp, api, labels, projman, configuration |
|
| 2026-01-17 | Sprint 9 | [Gitea Labels API Requires Org Context](./sprint-9-gitea-labels-user-repos.md) | gitea, mcp, api, labels, projman, configuration |
|
||||||
| 2026-01-17 | Sprint 9 | [Always Read CLAUDE.md Before Asking Questions](./sprint-9-read-claude-md-first.md) | projman, claude-code, context, documentation, workflow |
|
| 2026-01-17 | Sprint 9 | [Always Read CLAUDE.md Before Asking Questions](./sprint-9-read-claude-md-first.md) | projman, claude-code, context, documentation, workflow |
|
||||||
| 2026-01-17 | Sprint 9-10 | [Graceful Error Handling in Service Layers](./sprint-9-10-graceful-error-handling.md) | python, postgresql, error-handling, dash, graceful-degradation, arm64 |
|
| 2026-01-17 | Sprint 9-10 | [Graceful Error Handling in Service Layers](./sprint-9-10-graceful-error-handling.md) | python, postgresql, error-handling, dash, graceful-degradation, arm64 |
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
# Sprint 10 - Formspree Integration with Dash Callbacks
|
||||||
|
|
||||||
|
## Context
|
||||||
|
Implementing a contact form on a Dash portfolio site that submits to Formspree, a third-party form handling service.
|
||||||
|
|
||||||
|
## Insights
|
||||||
|
|
||||||
|
### Formspree AJAX Submission
|
||||||
|
Formspree supports AJAX submissions (no page redirect) when you:
|
||||||
|
1. POST with `Content-Type: application/json`
|
||||||
|
2. Include `Accept: application/json` header
|
||||||
|
3. Send form data as JSON body
|
||||||
|
|
||||||
|
This returns a JSON response instead of redirecting to a thank-you page, which is ideal for single-page Dash applications.
|
||||||
|
|
||||||
|
### Dash Multi-Output Callbacks for Forms
|
||||||
|
When handling form submission with validation and feedback, use a multi-output callback pattern:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@callback(
|
||||||
|
Output("feedback-container", "children"), # Success/error alert
|
||||||
|
Output("submit-button", "loading"), # Button loading state
|
||||||
|
Output("field-1", "value"), # Clear on success
|
||||||
|
Output("field-2", "value"), # Clear on success
|
||||||
|
Output("field-1", "error"), # Field-level errors
|
||||||
|
Output("field-2", "error"), # Field-level errors
|
||||||
|
Input("submit-button", "n_clicks"),
|
||||||
|
State("field-1", "value"),
|
||||||
|
State("field-2", "value"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `no_update` for outputs you don't want to change (e.g., keep form values on validation error, only clear on success).
|
||||||
|
|
||||||
|
### Honeypot Spam Protection
|
||||||
|
Simple and effective bot protection without CAPTCHA:
|
||||||
|
1. Add a hidden text input field (CSS: `position: absolute; left: -9999px`)
|
||||||
|
2. Set `tabIndex=-1` and `autoComplete="off"` to prevent accidental filling
|
||||||
|
3. In callback, check if honeypot has value - if yes, it's a bot
|
||||||
|
4. For bots: return fake success (don't reveal detection)
|
||||||
|
5. For humans: proceed with real submission
|
||||||
|
|
||||||
|
Formspree also accepts `_gotcha` as a honeypot field name in the JSON payload.
|
||||||
|
|
||||||
|
## Code Pattern
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Honeypot check - bots fill hidden fields
|
||||||
|
if honeypot_value:
|
||||||
|
# Fake success - don't let bots know they were caught
|
||||||
|
return (_create_success_alert(), False, "", "", None, None)
|
||||||
|
|
||||||
|
# Real submission for humans
|
||||||
|
response = requests.post(
|
||||||
|
FORMSPREE_ENDPOINT,
|
||||||
|
json=form_data,
|
||||||
|
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prevention/Best Practices
|
||||||
|
- Always use `timeout` parameter with `requests.post()` to avoid hanging
|
||||||
|
- Wrap external API calls in try/except for network errors
|
||||||
|
- Return user-friendly error messages, not technical details
|
||||||
|
- Use DMC's `required=True` and `error` props for form validation feedback
|
||||||
|
|
||||||
|
## Tags
|
||||||
|
formspree, dash, callbacks, forms, spam-protection, honeypot, ajax, python, requests, validation
|
||||||
265
docs/runbooks/adding-dashboard.md
Normal file
265
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
# Runbook: Adding a New Dashboard
|
||||||
|
|
||||||
|
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- [ ] Data sources identified and accessible
|
||||||
|
- [ ] Database schema designed
|
||||||
|
- [ ] Basic Dash/Plotly familiarity
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
Create the following structure:
|
||||||
|
|
||||||
|
### Application Code (`portfolio_app/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── pages/
|
||||||
|
│ └── {dashboard_name}/
|
||||||
|
│ ├── dashboard.py # Main layout with tabs
|
||||||
|
│ ├── methodology.py # Data sources and methods page
|
||||||
|
│ ├── tabs/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── overview.py # Overview tab layout
|
||||||
|
│ │ └── ... # Additional tab layouts
|
||||||
|
│ └── callbacks/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── ... # Callback modules
|
||||||
|
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── parsers/ # API/CSV extraction
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── loaders/ # Database operations
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── schemas/ # Pydantic models
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ └── models/ # SQLAlchemy ORM (schema: raw_{dashboard_name})
|
||||||
|
│ └── __init__.py
|
||||||
|
└── figures/
|
||||||
|
└── {dashboard_name}/ # Figure factories for this dashboard
|
||||||
|
├── __init__.py
|
||||||
|
└── ... # Chart modules
|
||||||
|
```
|
||||||
|
|
||||||
|
### dbt Models (`dbt/models/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
dbt/models/
|
||||||
|
├── staging/
|
||||||
|
│ └── {dashboard_name}/ # Staging models
|
||||||
|
│ ├── _sources.yml # Source definitions (schema: raw_{dashboard_name})
|
||||||
|
│ ├── _staging.yml # Model tests/docs
|
||||||
|
│ └── stg_*.sql # Staging models
|
||||||
|
├── intermediate/
|
||||||
|
│ └── {dashboard_name}/ # Intermediate models
|
||||||
|
│ ├── _intermediate.yml
|
||||||
|
│ └── int_*.sql
|
||||||
|
└── marts/
|
||||||
|
└── {dashboard_name}/ # Mart tables
|
||||||
|
├── _marts.yml
|
||||||
|
└── mart_*.sql
|
||||||
|
```
|
||||||
|
|
||||||
|
### Documentation (`notebooks/`)
|
||||||
|
|
||||||
|
```
|
||||||
|
notebooks/
|
||||||
|
└── {dashboard_name}/ # Domain subdirectories
|
||||||
|
├── overview/
|
||||||
|
├── ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step-by-Step Checklist
|
||||||
|
|
||||||
|
### 1. Data Layer
|
||||||
|
|
||||||
|
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||||
|
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||||
|
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||||
|
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||||
|
- [ ] Add database migrations if needed
|
||||||
|
|
||||||
|
### 2. Database Schema
|
||||||
|
|
||||||
|
- [ ] Define schema constant in models (e.g., `RAW_FOOTBALL_SCHEMA = "raw_football"`)
|
||||||
|
- [ ] Add `__table_args__ = {"schema": RAW_FOOTBALL_SCHEMA}` to all models
|
||||||
|
- [ ] Update `scripts/db/init_schema.py` to create the new schema
|
||||||
|
|
||||||
|
### 3. dbt Models
|
||||||
|
|
||||||
|
Create dbt models in `dbt/models/`:
|
||||||
|
|
||||||
|
- [ ] `staging/{dashboard_name}/_sources.yml` - Source definitions pointing to `raw_{dashboard_name}` schema
|
||||||
|
- [ ] `staging/{dashboard_name}/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||||
|
- [ ] `intermediate/{dashboard_name}/int_{domain}__{transform}.sql` - Business logic
|
||||||
|
- [ ] `marts/{dashboard_name}/mart_{domain}.sql` - Final analytical tables
|
||||||
|
|
||||||
|
Update `dbt/dbt_project.yml` with new subdirectory config:
|
||||||
|
```yaml
|
||||||
|
models:
|
||||||
|
portfolio:
|
||||||
|
staging:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: view
|
||||||
|
+schema: stg_{dashboard_name}
|
||||||
|
intermediate:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: view
|
||||||
|
+schema: int_{dashboard_name}
|
||||||
|
marts:
|
||||||
|
{dashboard_name}:
|
||||||
|
+materialized: table
|
||||||
|
+schema: mart_{dashboard_name}
|
||||||
|
```
|
||||||
|
|
||||||
|
Follow naming conventions:
|
||||||
|
- Staging: `stg_{source}__{entity}`
|
||||||
|
- Intermediate: `int_{domain}__{transform}`
|
||||||
|
- Marts: `mart_{domain}`
|
||||||
|
|
||||||
|
### 4. Visualization Layer
|
||||||
|
|
||||||
|
- [ ] Create figure factories in `figures/{dashboard_name}/`
|
||||||
|
- [ ] Create `figures/{dashboard_name}/__init__.py` with exports
|
||||||
|
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||||
|
|
||||||
|
Import pattern:
|
||||||
|
```python
|
||||||
|
from portfolio_app.figures.{dashboard_name} import create_choropleth_figure
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Dashboard Pages
|
||||||
|
|
||||||
|
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
import dash
|
||||||
|
from dash import html, dcc
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/{dashboard_name}",
|
||||||
|
title="{Dashboard Title}",
|
||||||
|
description="{Description}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def layout():
|
||||||
|
return dmc.Container([
|
||||||
|
# Header
|
||||||
|
dmc.Title("{Dashboard Title}", order=1),
|
||||||
|
|
||||||
|
# Tabs
|
||||||
|
dmc.Tabs([
|
||||||
|
dmc.TabsList([
|
||||||
|
dmc.TabsTab("Overview", value="overview"),
|
||||||
|
# Add more tabs
|
||||||
|
]),
|
||||||
|
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||||
|
# Add more panels
|
||||||
|
], value="overview"),
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||||
|
|
||||||
|
- [ ] Create one file per tab
|
||||||
|
- [ ] Export layout function from each
|
||||||
|
|
||||||
|
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||||
|
|
||||||
|
- [ ] Create callback modules for interactivity
|
||||||
|
- [ ] Import and register in dashboard.py
|
||||||
|
|
||||||
|
### 5. Navigation
|
||||||
|
|
||||||
|
Add to sidebar in `components/sidebar.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
dmc.NavLink(
|
||||||
|
label="{Dashboard Name}",
|
||||||
|
href="/{dashboard_name}",
|
||||||
|
icon=DashIconify(icon="..."),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Documentation
|
||||||
|
|
||||||
|
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||||
|
- [ ] Document data sources
|
||||||
|
- [ ] Document transformation logic
|
||||||
|
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||||
|
|
||||||
|
### 7. Testing
|
||||||
|
|
||||||
|
- [ ] Add unit tests for parsers
|
||||||
|
- [ ] Add unit tests for loaders
|
||||||
|
- [ ] Add integration tests for callbacks
|
||||||
|
- [ ] Run `make test`
|
||||||
|
|
||||||
|
### 8. Final Verification
|
||||||
|
|
||||||
|
- [ ] All pages render without errors
|
||||||
|
- [ ] All callbacks respond correctly
|
||||||
|
- [ ] Data loads successfully
|
||||||
|
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Tests pass (`make test`)
|
||||||
|
|
||||||
|
## Example: Toronto Dashboard
|
||||||
|
|
||||||
|
Reference implementation: `portfolio_app/pages/toronto/`
|
||||||
|
|
||||||
|
Key files:
|
||||||
|
- `dashboard.py` - Main layout with 5 tabs
|
||||||
|
- `tabs/overview.py` - Livability scores, scatter plots
|
||||||
|
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||||
|
- `toronto/models/dimensions.py` - Dimension tables
|
||||||
|
- `toronto/models/facts.py` - Fact tables
|
||||||
|
|
||||||
|
## Common Patterns
|
||||||
|
|
||||||
|
### Figure Factories
|
||||||
|
|
||||||
|
```python
|
||||||
|
# figures/choropleth.py
|
||||||
|
def create_choropleth_figure(
|
||||||
|
gdf: gpd.GeoDataFrame,
|
||||||
|
value_column: str,
|
||||||
|
title: str,
|
||||||
|
**kwargs
|
||||||
|
) -> go.Figure:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Callbacks
|
||||||
|
|
||||||
|
```python
|
||||||
|
# callbacks/map_callbacks.py
|
||||||
|
@callback(
|
||||||
|
Output("neighbourhood-details", "children"),
|
||||||
|
Input("choropleth-map", "clickData"),
|
||||||
|
)
|
||||||
|
def update_details(click_data):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Loading
|
||||||
|
|
||||||
|
```python
|
||||||
|
# {dashboard_name}/loaders/load.py
|
||||||
|
def load_data(session: Session) -> None:
|
||||||
|
# Parse from source
|
||||||
|
records = parse_source_data()
|
||||||
|
|
||||||
|
# Validate with Pydantic
|
||||||
|
validated = [Schema(**r) for r in records]
|
||||||
|
|
||||||
|
# Load to database
|
||||||
|
for record in validated:
|
||||||
|
session.add(Model(**record.model_dump()))
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
```
|
||||||
232
docs/runbooks/deployment.md
Normal file
232
docs/runbooks/deployment.md
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
# Runbook: Deployment
|
||||||
|
|
||||||
|
This runbook covers deployment procedures for the Analytics Portfolio application.
|
||||||
|
|
||||||
|
## Environments
|
||||||
|
|
||||||
|
| Environment | Branch | Server | URL |
|
||||||
|
|-------------|--------|--------|-----|
|
||||||
|
| Development | `development` | Local | http://localhost:8050 |
|
||||||
|
| Staging | `staging` | Homelab (hotserv) | Internal |
|
||||||
|
| Production | `main` | Bandit Labs VPS | https://leodata.science |
|
||||||
|
|
||||||
|
## CI/CD Pipeline
|
||||||
|
|
||||||
|
### Automatic Deployment
|
||||||
|
|
||||||
|
Deployments are triggered automatically via Gitea Actions:
|
||||||
|
|
||||||
|
1. **Push to `staging`** → Deploys to staging server
|
||||||
|
2. **Push to `main`** → Deploys to production server
|
||||||
|
|
||||||
|
### Workflow Files
|
||||||
|
|
||||||
|
- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
|
||||||
|
- `.gitea/workflows/deploy-staging.yml` - Staging deployment
|
||||||
|
- `.gitea/workflows/deploy-production.yml` - Production deployment
|
||||||
|
|
||||||
|
### Required Secrets
|
||||||
|
|
||||||
|
Configure these in Gitea repository settings:
|
||||||
|
|
||||||
|
| Secret | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `STAGING_HOST` | Staging server hostname/IP |
|
||||||
|
| `STAGING_USER` | SSH username for staging |
|
||||||
|
| `STAGING_SSH_KEY` | Private key for staging SSH |
|
||||||
|
| `PROD_HOST` | Production server hostname/IP |
|
||||||
|
| `PROD_USER` | SSH username for production |
|
||||||
|
| `PROD_SSH_KEY` | Private key for production SSH |
|
||||||
|
|
||||||
|
## Manual Deployment
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- SSH access to target server
|
||||||
|
- Repository cloned at `~/apps/personal-portfolio`
|
||||||
|
- Virtual environment created at `.venv`
|
||||||
|
- Docker and Docker Compose installed
|
||||||
|
- PostgreSQL container running
|
||||||
|
|
||||||
|
### Steps
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. SSH to server
|
||||||
|
ssh user@server
|
||||||
|
|
||||||
|
# 2. Navigate to app directory
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
# 3. Pull latest changes
|
||||||
|
git fetch origin {branch}
|
||||||
|
git reset --hard origin/{branch}
|
||||||
|
|
||||||
|
# 4. Activate virtual environment
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# 5. Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 6. Run database migrations (if any)
|
||||||
|
# python -m alembic upgrade head
|
||||||
|
|
||||||
|
# 7. Run dbt models
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 8. Restart application
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 9. Verify health
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rollback Procedure
|
||||||
|
|
||||||
|
### Quick Rollback
|
||||||
|
|
||||||
|
If deployment fails, rollback to previous commit:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Find previous working commit
|
||||||
|
git log --oneline -10
|
||||||
|
|
||||||
|
# 2. Reset to that commit
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 3. Restart services
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Full Rollback (Database)
|
||||||
|
|
||||||
|
If database changes need to be reverted:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Stop application
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# 2. Restore database backup
|
||||||
|
pg_restore -h localhost -U portfolio -d portfolio backup.dump
|
||||||
|
|
||||||
|
# 3. Revert code
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 4. Run dbt at that version
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 5. Restart
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Health Checks
|
||||||
|
|
||||||
|
### Application Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
```json
|
||||||
|
{"status": "healthy"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec postgres pg_isready -U portfolio
|
||||||
|
```
|
||||||
|
|
||||||
|
### Container Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose ps
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### View Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# All services
|
||||||
|
make logs
|
||||||
|
|
||||||
|
# Specific service
|
||||||
|
make logs SERVICE=postgres
|
||||||
|
|
||||||
|
# Or directly
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Resource Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker stats
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Application Won't Start
|
||||||
|
|
||||||
|
1. Check container logs: `docker compose logs app`
|
||||||
|
2. Verify environment variables: `cat .env`
|
||||||
|
3. Check database connectivity: `docker compose exec postgres pg_isready`
|
||||||
|
4. Verify port availability: `lsof -i :8050`
|
||||||
|
|
||||||
|
### Database Connection Errors
|
||||||
|
|
||||||
|
1. Check postgres container: `docker compose ps postgres`
|
||||||
|
2. Verify DATABASE_URL in `.env`
|
||||||
|
3. Check postgres logs: `docker compose logs postgres`
|
||||||
|
4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
|
||||||
|
|
||||||
|
### dbt Failures
|
||||||
|
|
||||||
|
1. Check dbt logs: `cd dbt && dbt debug`
|
||||||
|
2. Verify profiles.yml: `cat dbt/profiles.yml`
|
||||||
|
3. Run with verbose output: `dbt run --debug`
|
||||||
|
|
||||||
|
### Out of Memory
|
||||||
|
|
||||||
|
1. Check memory usage: `free -h`
|
||||||
|
2. Review container limits in docker-compose.yml
|
||||||
|
3. Consider increasing swap or server resources
|
||||||
|
|
||||||
|
## Backup Procedures
|
||||||
|
|
||||||
|
### Database Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
|
||||||
|
|
||||||
|
# Compressed backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore from Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From SQL file
|
||||||
|
docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
|
||||||
|
|
||||||
|
# From dump file
|
||||||
|
docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Checklist
|
||||||
|
|
||||||
|
Before deploying to production:
|
||||||
|
|
||||||
|
- [ ] All tests pass (`make test`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Staging deployment successful
|
||||||
|
- [ ] Manual testing on staging complete
|
||||||
|
- [ ] Database backup taken
|
||||||
|
- [ ] Rollback plan confirmed
|
||||||
|
- [ ] Team notified of deployment window
|
||||||
@@ -1,17 +1,18 @@
|
|||||||
# Toronto Neighbourhood Dashboard - Notebooks
|
# Dashboard Documentation Notebooks
|
||||||
|
|
||||||
Documentation notebooks for the Toronto Neighbourhood Dashboard visualizations. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern.
|
Documentation notebooks organized by dashboard project. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern.
|
||||||
|
|
||||||
## Directory Structure
|
## Directory Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
notebooks/
|
notebooks/
|
||||||
├── README.md # This file
|
├── README.md # This file
|
||||||
├── overview/ # Overview tab visualizations
|
└── toronto/ # Toronto Neighbourhood Dashboard
|
||||||
├── housing/ # Housing tab visualizations
|
├── overview/ # Overview tab visualizations
|
||||||
├── safety/ # Safety tab visualizations
|
├── housing/ # Housing tab visualizations
|
||||||
├── demographics/ # Demographics tab visualizations
|
├── safety/ # Safety tab visualizations
|
||||||
└── amenities/ # Amenities tab visualizations
|
├── demographics/ # Demographics tab visualizations
|
||||||
|
└── amenities/ # Amenities tab visualizations
|
||||||
```
|
```
|
||||||
|
|
||||||
## Notebook Template
|
## Notebook Template
|
||||||
|
|||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -50,8 +55,8 @@
|
|||||||
" transit_per_1000,\n",
|
" transit_per_1000,\n",
|
||||||
" total_amenities,\n",
|
" total_amenities,\n",
|
||||||
" population\n",
|
" population\n",
|
||||||
"FROM mart_neighbourhood_amenities\n",
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
"ORDER BY total_amenities_per_1000 DESC\n",
|
"ORDER BY total_amenities_per_1000 DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -75,17 +80,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import geopandas as gpd\n",
|
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
"gdf = gpd.GeoDataFrame(\n",
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
" df,\n",
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
|
||||||
" crs='EPSG:4326'\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"geojson = json.loads(gdf.to_json())\n",
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -101,7 +105,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'total_amenities_per_1000', 'amenity_index', 'amenity_tier']].head(10)"
|
"df[\n",
|
||||||
|
" [\"neighbourhood_name\", \"total_amenities_per_1000\", \"amenity_index\", \"amenity_tier\"]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -112,7 +118,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`."
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -122,18 +128,24 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_choropleth_figure(\n",
|
"fig = create_choropleth_figure(\n",
|
||||||
" geojson=geojson,\n",
|
" geojson=geojson,\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" location_key='neighbourhood_id',\n",
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
" color_column='total_amenities_per_1000',\n",
|
" color_column=\"total_amenities_per_1000\",\n",
|
||||||
" hover_data=['neighbourhood_name', 'amenity_index', 'parks_per_1000', 'schools_per_1000'],\n",
|
" hover_data=[\n",
|
||||||
" color_scale='Greens',\n",
|
" \"neighbourhood_name\",\n",
|
||||||
" title='Toronto Amenities per 1,000 Population',\n",
|
" \"amenity_index\",\n",
|
||||||
|
" \"parks_per_1000\",\n",
|
||||||
|
" \"schools_per_1000\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" color_scale=\"Greens\",\n",
|
||||||
|
" title=\"Toronto Amenities per 1,000 Population\",\n",
|
||||||
" zoom=10,\n",
|
" zoom=10,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -44,8 +49,8 @@
|
|||||||
" transit_index,\n",
|
" transit_index,\n",
|
||||||
" amenity_index,\n",
|
" amenity_index,\n",
|
||||||
" amenity_tier\n",
|
" amenity_tier\n",
|
||||||
"FROM mart_neighbourhood_amenities\n",
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
"ORDER BY amenity_index DESC\n",
|
"ORDER BY amenity_index DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -74,8 +79,8 @@
|
|||||||
"bottom_5 = df.tail(5)\n",
|
"bottom_5 = df.tail(5)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Prepare radar data\n",
|
"# Prepare radar data\n",
|
||||||
"categories = ['Parks', 'Schools', 'Transit']\n",
|
"categories = [\"Parks\", \"Schools\", \"Transit\"]\n",
|
||||||
"index_columns = ['parks_index', 'schools_index', 'transit_index']"
|
"index_columns = [\"parks_index\", \"schools_index\", \"transit_index\"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -92,9 +97,29 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n",
|
"print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n",
|
||||||
"display(top_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])\n",
|
"display(\n",
|
||||||
|
" top_5[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"parks_index\",\n",
|
||||||
|
" \"schools_index\",\n",
|
||||||
|
" \"transit_index\",\n",
|
||||||
|
" \"amenity_index\",\n",
|
||||||
|
" ]\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
"print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n",
|
"print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n",
|
||||||
"display(bottom_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])"
|
"display(\n",
|
||||||
|
" bottom_5[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"parks_index\",\n",
|
||||||
|
" \"schools_index\",\n",
|
||||||
|
" \"transit_index\",\n",
|
||||||
|
" \"amenity_index\",\n",
|
||||||
|
" ]\n",
|
||||||
|
" ]\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -105,7 +130,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_radar` from `portfolio_app.figures.radar`."
|
"Uses `create_radar` from `portfolio_app.figures.toronto.radar`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -115,28 +140,21 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.radar import create_radar_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.radar import create_comparison_radar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Compare top neighbourhood vs city average (100)\n",
|
"# Compare top neighbourhood vs city average (100)\n",
|
||||||
"top_hood = top_5.iloc[0]\n",
|
"top_hood = top_5.iloc[0]\n",
|
||||||
|
"metrics = [\"parks_index\", \"schools_index\", \"transit_index\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = [\n",
|
"fig = create_comparison_radar(\n",
|
||||||
" {\n",
|
" selected_data=top_hood.to_dict(),\n",
|
||||||
" 'name': top_hood['neighbourhood_name'],\n",
|
" average_data={\"parks_index\": 100, \"schools_index\": 100, \"transit_index\": 100},\n",
|
||||||
" 'values': [top_hood['parks_index'], top_hood['schools_index'], top_hood['transit_index']],\n",
|
" metrics=metrics,\n",
|
||||||
" 'categories': categories\n",
|
" selected_name=top_hood[\"neighbourhood_name\"],\n",
|
||||||
" },\n",
|
" average_name=\"City Average\",\n",
|
||||||
" {\n",
|
|
||||||
" 'name': 'City Average',\n",
|
|
||||||
" 'values': [100, 100, 100],\n",
|
|
||||||
" 'categories': categories\n",
|
|
||||||
" }\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"fig = create_radar_figure(\n",
|
|
||||||
" data=data,\n",
|
|
||||||
" title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n",
|
" title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -44,8 +49,8 @@
|
|||||||
" transit_count,\n",
|
" transit_count,\n",
|
||||||
" population,\n",
|
" population,\n",
|
||||||
" amenity_tier\n",
|
" amenity_tier\n",
|
||||||
"FROM mart_neighbourhood_amenities\n",
|
"FROM public_marts.mart_neighbourhood_amenities\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_amenities)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n",
|
||||||
" AND transit_per_1000 IS NOT NULL\n",
|
" AND transit_per_1000 IS NOT NULL\n",
|
||||||
"ORDER BY transit_per_1000 DESC\n",
|
"ORDER BY transit_per_1000 DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -70,7 +75,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"data = df.head(20).to_dict('records')"
|
"data = df.head(20).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -86,7 +91,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'transit_per_1000', 'transit_index', 'transit_count']].head(10)"
|
"df[[\"neighbourhood_name\", \"transit_per_1000\", \"transit_index\", \"transit_count\"]].head(\n",
|
||||||
|
" 10\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -97,7 +104,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`."
|
"Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -107,17 +114,18 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_horizontal_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_horizontal_bar(\n",
|
"fig = create_horizontal_bar(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" name_column='neighbourhood_name',\n",
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='transit_per_1000',\n",
|
" value_column=\"transit_per_1000\",\n",
|
||||||
" title='Top 20 Neighbourhoods by Transit Accessibility',\n",
|
" title=\"Top 20 Neighbourhoods by Transit Accessibility\",\n",
|
||||||
" color='#00BCD4',\n",
|
" color=\"#00BCD4\",\n",
|
||||||
" value_format='.2f',\n",
|
" value_format=\".2f\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
@@ -136,7 +144,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(f\"City-wide Transit Statistics:\")\n",
|
"print(\"City-wide Transit Statistics:\")\n",
|
||||||
"print(f\" Total Transit Stops: {df['transit_count'].sum():,.0f}\")\n",
|
"print(f\" Total Transit Stops: {df['transit_count'].sum():,.0f}\")\n",
|
||||||
"print(f\" Average per 1,000 pop: {df['transit_per_1000'].mean():.2f}\")\n",
|
"print(f\" Average per 1,000 pop: {df['transit_per_1000'].mean():.2f}\")\n",
|
||||||
"print(f\" Median per 1,000 pop: {df['transit_per_1000'].median():.2f}\")\n",
|
"print(f\" Median per 1,000 pop: {df['transit_per_1000'].median():.2f}\")\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -45,8 +50,8 @@
|
|||||||
" population,\n",
|
" population,\n",
|
||||||
" income_quintile,\n",
|
" income_quintile,\n",
|
||||||
" pct_renter_occupied\n",
|
" pct_renter_occupied\n",
|
||||||
"FROM mart_neighbourhood_demographics\n",
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
" AND median_age IS NOT NULL\n",
|
" AND median_age IS NOT NULL\n",
|
||||||
"ORDER BY median_age DESC\n",
|
"ORDER BY median_age DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -72,13 +77,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"city_avg = df['city_avg_age'].iloc[0]\n",
|
"city_avg = df[\"city_avg_age\"].iloc[0]\n",
|
||||||
"df['age_category'] = df['median_age'].apply(\n",
|
"df[\"age_category\"] = df[\"median_age\"].apply(\n",
|
||||||
" lambda x: 'Younger' if x < city_avg else 'Older'\n",
|
" lambda x: \"Younger\" if x < city_avg else \"Older\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"df['age_deviation'] = df['median_age'] - city_avg\n",
|
"df[\"age_deviation\"] = df[\"median_age\"] - city_avg\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = df.to_dict('records')"
|
"data = df.to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -96,9 +101,13 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"print(f\"City Average Age: {city_avg:.1f}\")\n",
|
"print(f\"City Average Age: {city_avg:.1f}\")\n",
|
||||||
"print(\"\\nYoungest Neighbourhoods:\")\n",
|
"print(\"\\nYoungest Neighbourhoods:\")\n",
|
||||||
"display(df.tail(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])\n",
|
"display(\n",
|
||||||
|
" df.tail(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n",
|
||||||
|
")\n",
|
||||||
"print(\"\\nOldest Neighbourhoods:\")\n",
|
"print(\"\\nOldest Neighbourhoods:\")\n",
|
||||||
"display(df.head(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])"
|
"display(\n",
|
||||||
|
" df.head(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -109,7 +118,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`."
|
"Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -119,20 +128,21 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_ranking_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_ranking_bar(\n",
|
"fig = create_ranking_bar(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" name_column='neighbourhood_name',\n",
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='median_age',\n",
|
" value_column=\"median_age\",\n",
|
||||||
" title='Youngest & Oldest Neighbourhoods (Median Age)',\n",
|
" title=\"Youngest & Oldest Neighbourhoods (Median Age)\",\n",
|
||||||
" top_n=10,\n",
|
" top_n=10,\n",
|
||||||
" bottom_n=10,\n",
|
" bottom_n=10,\n",
|
||||||
" color_top='#FF9800', # Orange for older\n",
|
" color_top=\"#FF9800\", # Orange for older\n",
|
||||||
" color_bottom='#2196F3', # Blue for younger\n",
|
" color_bottom=\"#2196F3\", # Blue for younger\n",
|
||||||
" value_format='.1f',\n",
|
" value_format=\".1f\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
@@ -153,7 +163,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Age by income quintile\n",
|
"# Age by income quintile\n",
|
||||||
"print(\"Median Age by Income Quintile:\")\n",
|
"print(\"Median Age by Income Quintile:\")\n",
|
||||||
"df.groupby('income_quintile')['median_age'].mean().round(1)"
|
"df.groupby(\"income_quintile\")[\"median_age\"].mean().round(1)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -47,8 +52,8 @@
|
|||||||
" income_quintile,\n",
|
" income_quintile,\n",
|
||||||
" population,\n",
|
" population,\n",
|
||||||
" unemployment_rate\n",
|
" unemployment_rate\n",
|
||||||
"FROM mart_neighbourhood_demographics\n",
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
"ORDER BY median_household_income DESC\n",
|
"ORDER BY median_household_income DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -73,19 +78,18 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import geopandas as gpd\n",
|
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df['income_thousands'] = df['median_household_income'] / 1000\n",
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
|
"df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n",
|
||||||
"\n",
|
"\n",
|
||||||
"gdf = gpd.GeoDataFrame(\n",
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
" df,\n",
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
|
||||||
" crs='EPSG:4326'\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"geojson = json.loads(gdf.to_json())\n",
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -101,7 +105,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'median_household_income', 'income_index', 'income_quintile']].head(10)"
|
"df[\n",
|
||||||
|
" [\"neighbourhood_name\", \"median_household_income\", \"income_index\", \"income_quintile\"]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -112,7 +118,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`."
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -122,18 +128,19 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_choropleth_figure(\n",
|
"fig = create_choropleth_figure(\n",
|
||||||
" geojson=geojson,\n",
|
" geojson=geojson,\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" location_key='neighbourhood_id',\n",
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
" color_column='median_household_income',\n",
|
" color_column=\"median_household_income\",\n",
|
||||||
" hover_data=['neighbourhood_name', 'income_index', 'income_quintile'],\n",
|
" hover_data=[\"neighbourhood_name\", \"income_index\", \"income_quintile\"],\n",
|
||||||
" color_scale='Viridis',\n",
|
" color_scale=\"Viridis\",\n",
|
||||||
" title='Toronto Median Household Income by Neighbourhood',\n",
|
" title=\"Toronto Median Household Income by Neighbourhood\",\n",
|
||||||
" zoom=10,\n",
|
" zoom=10,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -153,7 +160,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df.groupby('income_quintile')['median_household_income'].agg(['count', 'mean', 'min', 'max']).round(0)"
|
"df.groupby(\"income_quintile\")[\"median_household_income\"].agg(\n",
|
||||||
|
" [\"count\", \"mean\", \"min\", \"max\"]\n",
|
||||||
|
").round(0)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -44,8 +49,8 @@
|
|||||||
" land_area_sqkm,\n",
|
" land_area_sqkm,\n",
|
||||||
" median_household_income,\n",
|
" median_household_income,\n",
|
||||||
" pct_renter_occupied\n",
|
" pct_renter_occupied\n",
|
||||||
"FROM mart_neighbourhood_demographics\n",
|
"FROM public_marts.mart_neighbourhood_demographics\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_demographics)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n",
|
||||||
" AND population_density IS NOT NULL\n",
|
" AND population_density IS NOT NULL\n",
|
||||||
"ORDER BY population_density DESC\n",
|
"ORDER BY population_density DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -70,7 +75,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"data = df.head(20).to_dict('records')"
|
"data = df.head(20).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -86,7 +91,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'population_density', 'population', 'land_area_sqkm']].head(10)"
|
"df[[\"neighbourhood_name\", \"population_density\", \"population\", \"land_area_sqkm\"]].head(\n",
|
||||||
|
" 10\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -97,7 +104,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`."
|
"Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -107,17 +114,18 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_horizontal_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_horizontal_bar(\n",
|
"fig = create_horizontal_bar(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" name_column='neighbourhood_name',\n",
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='population_density',\n",
|
" value_column=\"population_density\",\n",
|
||||||
" title='Top 20 Most Dense Neighbourhoods',\n",
|
" title=\"Top 20 Most Dense Neighbourhoods\",\n",
|
||||||
" color='#9C27B0',\n",
|
" color=\"#9C27B0\",\n",
|
||||||
" value_format=',.0f',\n",
|
" value_format=\",.0f\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
@@ -136,7 +144,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(f\"City-wide Statistics:\")\n",
|
"print(\"City-wide Statistics:\")\n",
|
||||||
"print(f\" Total Population: {df['population'].sum():,.0f}\")\n",
|
"print(f\" Total Population: {df['population'].sum():,.0f}\")\n",
|
||||||
"print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n",
|
"print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n",
|
||||||
"print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n",
|
"print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -47,8 +52,8 @@
|
|||||||
" avg_rent_2bed,\n",
|
" avg_rent_2bed,\n",
|
||||||
" median_household_income,\n",
|
" median_household_income,\n",
|
||||||
" is_affordable\n",
|
" is_affordable\n",
|
||||||
"FROM mart_neighbourhood_housing\n",
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_housing)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
"ORDER BY affordability_index ASC\n",
|
"ORDER BY affordability_index ASC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -73,17 +78,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import geopandas as gpd\n",
|
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
"gdf = gpd.GeoDataFrame(\n",
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
" df,\n",
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
|
||||||
" crs='EPSG:4326'\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"geojson = json.loads(gdf.to_json())\n",
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -99,7 +103,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'affordability_index', 'rent_to_income_pct', 'avg_rent_2bed', 'is_affordable']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"affordability_index\",\n",
|
||||||
|
" \"rent_to_income_pct\",\n",
|
||||||
|
" \"avg_rent_2bed\",\n",
|
||||||
|
" \"is_affordable\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -110,7 +122,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `color_column`: 'affordability_index'\n",
|
"- `color_column`: 'affordability_index'\n",
|
||||||
@@ -124,18 +136,19 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_choropleth_figure(\n",
|
"fig = create_choropleth_figure(\n",
|
||||||
" geojson=geojson,\n",
|
" geojson=geojson,\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" location_key='neighbourhood_id',\n",
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
" color_column='affordability_index',\n",
|
" color_column=\"affordability_index\",\n",
|
||||||
" hover_data=['neighbourhood_name', 'rent_to_income_pct', 'avg_rent_2bed'],\n",
|
" hover_data=[\"neighbourhood_name\", \"rent_to_income_pct\", \"avg_rent_2bed\"],\n",
|
||||||
" color_scale='RdYlGn_r', # Reversed: lower index (affordable) = green\n",
|
" color_scale=\"RdYlGn_r\", # Reversed: lower index (affordable) = green\n",
|
||||||
" title='Toronto Housing Affordability Index',\n",
|
" title=\"Toronto Housing Affordability Index\",\n",
|
||||||
" zoom=10,\n",
|
" zoom=10,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# City-wide average rent by year\n",
|
"# City-wide average rent by year\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
@@ -45,8 +50,8 @@
|
|||||||
" AVG(avg_rent_2bed) as avg_rent_2bed,\n",
|
" AVG(avg_rent_2bed) as avg_rent_2bed,\n",
|
||||||
" AVG(avg_rent_3bed) as avg_rent_3bed,\n",
|
" AVG(avg_rent_3bed) as avg_rent_3bed,\n",
|
||||||
" AVG(rent_yoy_change_pct) as avg_yoy_change\n",
|
" AVG(rent_yoy_change_pct) as avg_yoy_change\n",
|
||||||
"FROM mart_neighbourhood_housing\n",
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
"WHERE year >= (SELECT MAX(year) - 5 FROM mart_neighbourhood_housing)\n",
|
"WHERE year >= (SELECT MAX(year) - 5 FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
"GROUP BY year\n",
|
"GROUP BY year\n",
|
||||||
"ORDER BY year\n",
|
"ORDER BY year\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -73,23 +78,25 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Create date column from year\n",
|
"# Create date column from year\n",
|
||||||
"df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n",
|
"df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Melt for multi-line chart\n",
|
"# Melt for multi-line chart\n",
|
||||||
"df_melted = df.melt(\n",
|
"df_melted = df.melt(\n",
|
||||||
" id_vars=['year', 'date'],\n",
|
" id_vars=[\"year\", \"date\"],\n",
|
||||||
" value_vars=['avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed'],\n",
|
" value_vars=[\"avg_rent_bachelor\", \"avg_rent_1bed\", \"avg_rent_2bed\", \"avg_rent_3bed\"],\n",
|
||||||
" var_name='bedroom_type',\n",
|
" var_name=\"bedroom_type\",\n",
|
||||||
" value_name='avg_rent'\n",
|
" value_name=\"avg_rent\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Clean labels\n",
|
"# Clean labels\n",
|
||||||
"df_melted['bedroom_type'] = df_melted['bedroom_type'].map({\n",
|
"df_melted[\"bedroom_type\"] = df_melted[\"bedroom_type\"].map(\n",
|
||||||
" 'avg_rent_bachelor': 'Bachelor',\n",
|
" {\n",
|
||||||
" 'avg_rent_1bed': '1 Bedroom',\n",
|
" \"avg_rent_bachelor\": \"Bachelor\",\n",
|
||||||
" 'avg_rent_2bed': '2 Bedroom',\n",
|
" \"avg_rent_1bed\": \"1 Bedroom\",\n",
|
||||||
" 'avg_rent_3bed': '3 Bedroom'\n",
|
" \"avg_rent_2bed\": \"2 Bedroom\",\n",
|
||||||
"})"
|
" \"avg_rent_3bed\": \"3 Bedroom\",\n",
|
||||||
|
" }\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -105,7 +112,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['year', 'avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed', 'avg_yoy_change']]"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"year\",\n",
|
||||||
|
" \"avg_rent_bachelor\",\n",
|
||||||
|
" \"avg_rent_1bed\",\n",
|
||||||
|
" \"avg_rent_2bed\",\n",
|
||||||
|
" \"avg_rent_3bed\",\n",
|
||||||
|
" \"avg_yoy_change\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -116,7 +132,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_price_time_series` from `portfolio_app.figures.time_series`.\n",
|
"Uses `create_price_time_series` from `portfolio_app.figures.toronto.time_series`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `date_column`: 'date'\n",
|
"- `date_column`: 'date'\n",
|
||||||
@@ -131,18 +147,19 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.time_series import create_price_time_series\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = df_melted.to_dict('records')\n",
|
"from portfolio_app.figures.toronto.time_series import create_price_time_series\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_melted.to_dict(\"records\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_price_time_series(\n",
|
"fig = create_price_time_series(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" date_column='date',\n",
|
" date_column=\"date\",\n",
|
||||||
" price_column='avg_rent',\n",
|
" price_column=\"avg_rent\",\n",
|
||||||
" group_column='bedroom_type',\n",
|
" group_column=\"bedroom_type\",\n",
|
||||||
" title='Toronto Average Rent Trend (5 Years)',\n",
|
" title=\"Toronto Average Rent Trend (5 Years)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
@@ -163,7 +180,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Show year-over-year changes\n",
|
"# Show year-over-year changes\n",
|
||||||
"print(\"Year-over-Year Rent Change (%)\")\n",
|
"print(\"Year-over-Year Rent Change (%)\")\n",
|
||||||
"df[['year', 'avg_yoy_change']].dropna()"
|
"df[[\"year\", \"avg_yoy_change\"]].dropna()"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -44,8 +49,8 @@
|
|||||||
" income_quintile,\n",
|
" income_quintile,\n",
|
||||||
" total_rental_units,\n",
|
" total_rental_units,\n",
|
||||||
" average_dwelling_value\n",
|
" average_dwelling_value\n",
|
||||||
"FROM mart_neighbourhood_housing\n",
|
"FROM public_marts.mart_neighbourhood_housing\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_housing)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_housing)\n",
|
||||||
" AND pct_owner_occupied IS NOT NULL\n",
|
" AND pct_owner_occupied IS NOT NULL\n",
|
||||||
"ORDER BY pct_renter_occupied DESC\n",
|
"ORDER BY pct_renter_occupied DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -73,18 +78,17 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Prepare for stacked bar\n",
|
"# Prepare for stacked bar\n",
|
||||||
"df_stacked = df.melt(\n",
|
"df_stacked = df.melt(\n",
|
||||||
" id_vars=['neighbourhood_name', 'income_quintile'],\n",
|
" id_vars=[\"neighbourhood_name\", \"income_quintile\"],\n",
|
||||||
" value_vars=['pct_owner_occupied', 'pct_renter_occupied'],\n",
|
" value_vars=[\"pct_owner_occupied\", \"pct_renter_occupied\"],\n",
|
||||||
" var_name='tenure_type',\n",
|
" var_name=\"tenure_type\",\n",
|
||||||
" value_name='percentage'\n",
|
" value_name=\"percentage\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df_stacked['tenure_type'] = df_stacked['tenure_type'].map({\n",
|
"df_stacked[\"tenure_type\"] = df_stacked[\"tenure_type\"].map(\n",
|
||||||
" 'pct_owner_occupied': 'Owner',\n",
|
" {\"pct_owner_occupied\": \"Owner\", \"pct_renter_occupied\": \"Renter\"}\n",
|
||||||
" 'pct_renter_occupied': 'Renter'\n",
|
")\n",
|
||||||
"})\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"data = df_stacked.to_dict('records')"
|
"data = df_stacked.to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -101,7 +105,14 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"Highest Renter Neighbourhoods:\")\n",
|
"print(\"Highest Renter Neighbourhoods:\")\n",
|
||||||
"df[['neighbourhood_name', 'pct_renter_occupied', 'pct_owner_occupied', 'income_quintile']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"pct_renter_occupied\",\n",
|
||||||
|
" \"pct_owner_occupied\",\n",
|
||||||
|
" \"income_quintile\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -112,7 +123,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`.\n",
|
"Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `x_column`: 'neighbourhood_name'\n",
|
"- `x_column`: 'neighbourhood_name'\n",
|
||||||
@@ -128,21 +139,22 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_stacked_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Show top 20 by renter percentage\n",
|
"# Show top 20 by renter percentage\n",
|
||||||
"top_20_names = df.head(20)['neighbourhood_name'].tolist()\n",
|
"top_20_names = df.head(20)[\"neighbourhood_name\"].tolist()\n",
|
||||||
"data_filtered = [d for d in data if d['neighbourhood_name'] in top_20_names]\n",
|
"data_filtered = [d for d in data if d[\"neighbourhood_name\"] in top_20_names]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_stacked_bar(\n",
|
"fig = create_stacked_bar(\n",
|
||||||
" data=data_filtered,\n",
|
" data=data_filtered,\n",
|
||||||
" x_column='neighbourhood_name',\n",
|
" x_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='percentage',\n",
|
" value_column=\"percentage\",\n",
|
||||||
" category_column='tenure_type',\n",
|
" category_column=\"tenure_type\",\n",
|
||||||
" title='Housing Tenure Mix - Top 20 Renter Neighbourhoods',\n",
|
" title=\"Housing Tenure Mix - Top 20 Renter Neighbourhoods\",\n",
|
||||||
" color_map={'Owner': '#4CAF50', 'Renter': '#2196F3'},\n",
|
" color_map={\"Owner\": \"#4CAF50\", \"Renter\": \"#2196F3\"},\n",
|
||||||
" show_percentages=True,\n",
|
" show_percentages=True,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -168,7 +180,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# By income quintile\n",
|
"# By income quintile\n",
|
||||||
"print(\"\\nTenure by Income Quintile:\")\n",
|
"print(\"\\nTenure by Income Quintile:\")\n",
|
||||||
"df.groupby('income_quintile')[['pct_owner_occupied', 'pct_renter_occupied']].mean().round(1)"
|
"df.groupby(\"income_quintile\")[\n",
|
||||||
|
" [\"pct_owner_occupied\", \"pct_renter_occupied\"]\n",
|
||||||
|
"].mean().round(1)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -44,8 +49,8 @@
|
|||||||
" population,\n",
|
" population,\n",
|
||||||
" livability_score,\n",
|
" livability_score,\n",
|
||||||
" crime_rate_per_100k\n",
|
" crime_rate_per_100k\n",
|
||||||
"FROM mart_neighbourhood_overview\n",
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
" AND median_household_income IS NOT NULL\n",
|
" AND median_household_income IS NOT NULL\n",
|
||||||
" AND safety_score IS NOT NULL\n",
|
" AND safety_score IS NOT NULL\n",
|
||||||
"ORDER BY median_household_income DESC\n",
|
"ORDER BY median_household_income DESC\n",
|
||||||
@@ -73,10 +78,10 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Scale income to thousands for better axis readability\n",
|
"# Scale income to thousands for better axis readability\n",
|
||||||
"df['income_thousands'] = df['median_household_income'] / 1000\n",
|
"df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Prepare data for figure factory\n",
|
"# Prepare data for figure factory\n",
|
||||||
"data = df.to_dict('records')"
|
"data = df.to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -92,7 +97,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'median_household_income', 'safety_score', 'crime_rate_per_100k']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"median_household_income\",\n",
|
||||||
|
" \"safety_score\",\n",
|
||||||
|
" \"crime_rate_per_100k\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -103,7 +115,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_scatter_figure` from `portfolio_app.figures.scatter`.\n",
|
"Uses `create_scatter_figure` from `portfolio_app.figures.toronto.scatter`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `x_column`: 'income_thousands' (median household income in $K)\n",
|
"- `x_column`: 'income_thousands' (median household income in $K)\n",
|
||||||
@@ -120,19 +132,20 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.scatter import create_scatter_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.scatter import create_scatter_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_scatter_figure(\n",
|
"fig = create_scatter_figure(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" x_column='income_thousands',\n",
|
" x_column=\"income_thousands\",\n",
|
||||||
" y_column='safety_score',\n",
|
" y_column=\"safety_score\",\n",
|
||||||
" name_column='neighbourhood_name',\n",
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
" size_column='population',\n",
|
" size_column=\"population\",\n",
|
||||||
" title='Income vs Safety by Neighbourhood',\n",
|
" title=\"Income vs Safety by Neighbourhood\",\n",
|
||||||
" x_title='Median Household Income ($K)',\n",
|
" x_title=\"Median Household Income ($K)\",\n",
|
||||||
" y_title='Safety Score (0-100)',\n",
|
" y_title=\"Safety Score (0-100)\",\n",
|
||||||
" trendline=True,\n",
|
" trendline=True,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -162,7 +175,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Calculate correlation coefficient\n",
|
"# Calculate correlation coefficient\n",
|
||||||
"correlation = df['median_household_income'].corr(df['safety_score'])\n",
|
"correlation = df[\"median_household_income\"].corr(df[\"safety_score\"])\n",
|
||||||
"print(f\"Correlation coefficient (Income vs Safety): {correlation:.3f}\")"
|
"print(f\"Correlation coefficient (Income vs Safety): {correlation:.3f}\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -30,12 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Connect to database\n",
|
"import pandas as pd\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -49,8 +53,8 @@
|
|||||||
" amenity_score,\n",
|
" amenity_score,\n",
|
||||||
" population,\n",
|
" population,\n",
|
||||||
" median_household_income\n",
|
" median_household_income\n",
|
||||||
"FROM mart_neighbourhood_overview\n",
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
"ORDER BY livability_score DESC\n",
|
"ORDER BY livability_score DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -76,21 +80,20 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Transform geometry to GeoJSON\n",
|
"# Transform geometry to GeoJSON\n",
|
||||||
"import geopandas as gpd\n",
|
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
"# Convert WKB geometry to GeoDataFrame\n",
|
"# Convert WKB geometry to GeoDataFrame\n",
|
||||||
"gdf = gpd.GeoDataFrame(\n",
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
" df,\n",
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
|
||||||
" crs='EPSG:4326'\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Create GeoJSON FeatureCollection\n",
|
"# Create GeoJSON FeatureCollection\n",
|
||||||
"geojson = json.loads(gdf.to_json())\n",
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Prepare data for figure factory\n",
|
"# Prepare data for figure factory\n",
|
||||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -106,7 +109,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'livability_score', 'safety_score', 'affordability_score', 'amenity_score']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"livability_score\",\n",
|
||||||
|
" \"safety_score\",\n",
|
||||||
|
" \"affordability_score\",\n",
|
||||||
|
" \"amenity_score\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -117,7 +128,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `geojson`: GeoJSON FeatureCollection with neighbourhood boundaries\n",
|
"- `geojson`: GeoJSON FeatureCollection with neighbourhood boundaries\n",
|
||||||
@@ -134,18 +145,24 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_choropleth_figure(\n",
|
"fig = create_choropleth_figure(\n",
|
||||||
" geojson=geojson,\n",
|
" geojson=geojson,\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" location_key='neighbourhood_id',\n",
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
" color_column='livability_score',\n",
|
" color_column=\"livability_score\",\n",
|
||||||
" hover_data=['neighbourhood_name', 'safety_score', 'affordability_score', 'amenity_score'],\n",
|
" hover_data=[\n",
|
||||||
" color_scale='RdYlGn',\n",
|
" \"neighbourhood_name\",\n",
|
||||||
" title='Toronto Neighbourhood Livability Score',\n",
|
" \"safety_score\",\n",
|
||||||
|
" \"affordability_score\",\n",
|
||||||
|
" \"amenity_score\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" color_scale=\"RdYlGn\",\n",
|
||||||
|
" title=\"Toronto Neighbourhood Livability Score\",\n",
|
||||||
" zoom=10,\n",
|
" zoom=10,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -43,8 +48,8 @@
|
|||||||
" safety_score,\n",
|
" safety_score,\n",
|
||||||
" affordability_score,\n",
|
" affordability_score,\n",
|
||||||
" amenity_score\n",
|
" amenity_score\n",
|
||||||
"FROM mart_neighbourhood_overview\n",
|
"FROM public_marts.mart_neighbourhood_overview\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_overview)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n",
|
||||||
" AND livability_score IS NOT NULL\n",
|
" AND livability_score IS NOT NULL\n",
|
||||||
"ORDER BY livability_score DESC\n",
|
"ORDER BY livability_score DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -72,7 +77,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# The figure factory handles top/bottom selection internally\n",
|
"# The figure factory handles top/bottom selection internally\n",
|
||||||
"# Just prepare as list of dicts\n",
|
"# Just prepare as list of dicts\n",
|
||||||
"data = df.to_dict('records')"
|
"data = df.to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -102,7 +107,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`.\n",
|
"Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `data`: List of dicts with all neighbourhoods\n",
|
"- `data`: List of dicts with all neighbourhoods\n",
|
||||||
@@ -119,20 +124,21 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_ranking_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_ranking_bar(\n",
|
"fig = create_ranking_bar(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" name_column='neighbourhood_name',\n",
|
" name_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='livability_score',\n",
|
" value_column=\"livability_score\",\n",
|
||||||
" title='Top & Bottom 10 Neighbourhoods by Livability',\n",
|
" title=\"Top & Bottom 10 Neighbourhoods by Livability\",\n",
|
||||||
" top_n=10,\n",
|
" top_n=10,\n",
|
||||||
" bottom_n=10,\n",
|
" bottom_n=10,\n",
|
||||||
" color_top='#4CAF50', # Green for top performers\n",
|
" color_top=\"#4CAF50\", # Green for top performers\n",
|
||||||
" color_bottom='#F44336', # Red for bottom performers\n",
|
" color_bottom=\"#F44336\", # Red for bottom performers\n",
|
||||||
" value_format='.1f',\n",
|
" value_format=\".1f\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
0
notebooks/toronto/safety/.gitkeep
Normal file
0
notebooks/toronto/safety/.gitkeep
Normal file
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -47,8 +52,8 @@
|
|||||||
" homicide_count,\n",
|
" homicide_count,\n",
|
||||||
" total_incidents,\n",
|
" total_incidents,\n",
|
||||||
" crime_rate_per_100k\n",
|
" crime_rate_per_100k\n",
|
||||||
"FROM mart_neighbourhood_safety\n",
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_safety)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
"ORDER BY total_incidents DESC\n",
|
"ORDER BY total_incidents DESC\n",
|
||||||
"LIMIT 15\n",
|
"LIMIT 15\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -75,17 +80,25 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df_melted = df.melt(\n",
|
"df_melted = df.melt(\n",
|
||||||
" id_vars=['neighbourhood_name', 'total_incidents'],\n",
|
" id_vars=[\"neighbourhood_name\", \"total_incidents\"],\n",
|
||||||
" value_vars=['assault_count', 'auto_theft_count', 'break_enter_count', \n",
|
" value_vars=[\n",
|
||||||
" 'robbery_count', 'theft_over_count', 'homicide_count'],\n",
|
" \"assault_count\",\n",
|
||||||
" var_name='crime_type',\n",
|
" \"auto_theft_count\",\n",
|
||||||
" value_name='count'\n",
|
" \"break_enter_count\",\n",
|
||||||
|
" \"robbery_count\",\n",
|
||||||
|
" \"theft_over_count\",\n",
|
||||||
|
" \"homicide_count\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" var_name=\"crime_type\",\n",
|
||||||
|
" value_name=\"count\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Clean labels\n",
|
"# Clean labels\n",
|
||||||
"df_melted['crime_type'] = df_melted['crime_type'].str.replace('_count', '').str.replace('_', ' ').str.title()\n",
|
"df_melted[\"crime_type\"] = (\n",
|
||||||
|
" df_melted[\"crime_type\"].str.replace(\"_count\", \"\").str.replace(\"_\", \" \").str.title()\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = df_melted.to_dict('records')"
|
"data = df_melted.to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -101,7 +114,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'assault_count', 'auto_theft_count', 'break_enter_count', 'total_incidents']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"assault_count\",\n",
|
||||||
|
" \"auto_theft_count\",\n",
|
||||||
|
" \"break_enter_count\",\n",
|
||||||
|
" \"total_incidents\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -112,7 +133,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`."
|
"Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -122,23 +143,24 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.bar_charts import create_stacked_bar\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_stacked_bar(\n",
|
"fig = create_stacked_bar(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" x_column='neighbourhood_name',\n",
|
" x_column=\"neighbourhood_name\",\n",
|
||||||
" value_column='count',\n",
|
" value_column=\"count\",\n",
|
||||||
" category_column='crime_type',\n",
|
" category_column=\"crime_type\",\n",
|
||||||
" title='Crime Type Breakdown - Top 15 Neighbourhoods',\n",
|
" title=\"Crime Type Breakdown - Top 15 Neighbourhoods\",\n",
|
||||||
" color_map={\n",
|
" color_map={\n",
|
||||||
" 'Assault': '#d62728',\n",
|
" \"Assault\": \"#d62728\",\n",
|
||||||
" 'Auto Theft': '#ff7f0e',\n",
|
" \"Auto Theft\": \"#ff7f0e\",\n",
|
||||||
" 'Break Enter': '#9467bd',\n",
|
" \"Break Enter\": \"#9467bd\",\n",
|
||||||
" 'Robbery': '#8c564b',\n",
|
" \"Robbery\": \"#8c564b\",\n",
|
||||||
" 'Theft Over': '#e377c2',\n",
|
" \"Theft Over\": \"#e377c2\",\n",
|
||||||
" 'Homicide': '#1f77b4'\n",
|
" \"Homicide\": \"#1f77b4\",\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -47,8 +52,8 @@
|
|||||||
" safety_tier,\n",
|
" safety_tier,\n",
|
||||||
" total_incidents,\n",
|
" total_incidents,\n",
|
||||||
" population\n",
|
" population\n",
|
||||||
"FROM mart_neighbourhood_safety\n",
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
"WHERE year = (SELECT MAX(year) FROM mart_neighbourhood_safety)\n",
|
"WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
"ORDER BY crime_rate_per_100k DESC\n",
|
"ORDER BY crime_rate_per_100k DESC\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -73,17 +78,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import geopandas as gpd\n",
|
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"import geopandas as gpd\n",
|
||||||
|
"\n",
|
||||||
"gdf = gpd.GeoDataFrame(\n",
|
"gdf = gpd.GeoDataFrame(\n",
|
||||||
" df,\n",
|
" df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n",
|
||||||
" geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n",
|
|
||||||
" crs='EPSG:4326'\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"geojson = json.loads(gdf.to_json())\n",
|
"geojson = json.loads(gdf.to_json())\n",
|
||||||
"data = df.drop(columns=['geometry']).to_dict('records')"
|
"data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -99,7 +103,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['neighbourhood_name', 'crime_rate_per_100k', 'crime_index', 'safety_tier', 'total_incidents']].head(10)"
|
"df[\n",
|
||||||
|
" [\n",
|
||||||
|
" \"neighbourhood_name\",\n",
|
||||||
|
" \"crime_rate_per_100k\",\n",
|
||||||
|
" \"crime_index\",\n",
|
||||||
|
" \"safety_tier\",\n",
|
||||||
|
" \"total_incidents\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"].head(10)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -110,7 +122,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"### Figure Factory\n",
|
"### Figure Factory\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n",
|
"Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Key Parameters:**\n",
|
"**Key Parameters:**\n",
|
||||||
"- `color_column`: 'crime_rate_per_100k'\n",
|
"- `color_column`: 'crime_rate_per_100k'\n",
|
||||||
@@ -124,18 +136,19 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.choropleth import create_choropleth_figure\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
|
"\n",
|
||||||
|
"from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_choropleth_figure(\n",
|
"fig = create_choropleth_figure(\n",
|
||||||
" geojson=geojson,\n",
|
" geojson=geojson,\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" location_key='neighbourhood_id',\n",
|
" location_key=\"neighbourhood_id\",\n",
|
||||||
" color_column='crime_rate_per_100k',\n",
|
" color_column=\"crime_rate_per_100k\",\n",
|
||||||
" hover_data=['neighbourhood_name', 'crime_index', 'total_incidents'],\n",
|
" hover_data=[\"neighbourhood_name\", \"crime_index\", \"total_incidents\"],\n",
|
||||||
" color_scale='RdYlGn_r',\n",
|
" color_scale=\"RdYlGn_r\",\n",
|
||||||
" title='Toronto Crime Rate per 100,000 Population',\n",
|
" title=\"Toronto Crime Rate per 100,000 Population\",\n",
|
||||||
" zoom=10,\n",
|
" zoom=10,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -30,11 +30,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sqlalchemy import create_engine\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n",
|
"import pandas as pd\n",
|
||||||
|
"from dotenv import load_dotenv\n",
|
||||||
|
"from sqlalchemy import create_engine\n",
|
||||||
|
"\n",
|
||||||
|
"# Load .env from project root\n",
|
||||||
|
"load_dotenv(\"../../.env\")\n",
|
||||||
|
"\n",
|
||||||
|
"engine = create_engine(os.environ[\"DATABASE_URL\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
"SELECT\n",
|
"SELECT\n",
|
||||||
@@ -45,8 +50,8 @@
|
|||||||
" AVG(break_enter_rate_per_100k) as avg_break_enter_rate,\n",
|
" AVG(break_enter_rate_per_100k) as avg_break_enter_rate,\n",
|
||||||
" SUM(total_incidents) as total_city_incidents,\n",
|
" SUM(total_incidents) as total_city_incidents,\n",
|
||||||
" AVG(crime_yoy_change_pct) as avg_yoy_change\n",
|
" AVG(crime_yoy_change_pct) as avg_yoy_change\n",
|
||||||
"FROM mart_neighbourhood_safety\n",
|
"FROM public_marts.mart_neighbourhood_safety\n",
|
||||||
"WHERE year >= (SELECT MAX(year) - 5 FROM mart_neighbourhood_safety)\n",
|
"WHERE year >= (SELECT MAX(year) - 5 FROM public_marts.mart_neighbourhood_safety)\n",
|
||||||
"GROUP BY year\n",
|
"GROUP BY year\n",
|
||||||
"ORDER BY year\n",
|
"ORDER BY year\n",
|
||||||
"\"\"\"\n",
|
"\"\"\"\n",
|
||||||
@@ -72,21 +77,23 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n",
|
"df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Melt for multi-line\n",
|
"# Melt for multi-line\n",
|
||||||
"df_melted = df.melt(\n",
|
"df_melted = df.melt(\n",
|
||||||
" id_vars=['year', 'date'],\n",
|
" id_vars=[\"year\", \"date\"],\n",
|
||||||
" value_vars=['avg_assault_rate', 'avg_auto_theft_rate', 'avg_break_enter_rate'],\n",
|
" value_vars=[\"avg_assault_rate\", \"avg_auto_theft_rate\", \"avg_break_enter_rate\"],\n",
|
||||||
" var_name='crime_type',\n",
|
" var_name=\"crime_type\",\n",
|
||||||
" value_name='rate_per_100k'\n",
|
" value_name=\"rate_per_100k\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df_melted['crime_type'] = df_melted['crime_type'].map({\n",
|
"df_melted[\"crime_type\"] = df_melted[\"crime_type\"].map(\n",
|
||||||
" 'avg_assault_rate': 'Assault',\n",
|
" {\n",
|
||||||
" 'avg_auto_theft_rate': 'Auto Theft',\n",
|
" \"avg_assault_rate\": \"Assault\",\n",
|
||||||
" 'avg_break_enter_rate': 'Break & Enter'\n",
|
" \"avg_auto_theft_rate\": \"Auto Theft\",\n",
|
||||||
"})"
|
" \"avg_break_enter_rate\": \"Break & Enter\",\n",
|
||||||
|
" }\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -102,7 +109,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df[['year', 'avg_crime_rate', 'total_city_incidents', 'avg_yoy_change']]"
|
"df[[\"year\", \"avg_crime_rate\", \"total_city_incidents\", \"avg_yoy_change\"]]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -123,22 +130,23 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.insert(0, '../..')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from portfolio_app.figures.time_series import create_price_time_series\n",
|
"sys.path.insert(0, \"../..\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = df_melted.to_dict('records')\n",
|
"from portfolio_app.figures.toronto.time_series import create_price_time_series\n",
|
||||||
|
"\n",
|
||||||
|
"data = df_melted.to_dict(\"records\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig = create_price_time_series(\n",
|
"fig = create_price_time_series(\n",
|
||||||
" data=data,\n",
|
" data=data,\n",
|
||||||
" date_column='date',\n",
|
" date_column=\"date\",\n",
|
||||||
" price_column='rate_per_100k',\n",
|
" price_column=\"rate_per_100k\",\n",
|
||||||
" group_column='crime_type',\n",
|
" group_column=\"crime_type\",\n",
|
||||||
" title='Toronto Crime Trends by Type (5 Years)',\n",
|
" title=\"Toronto Crime Trends by Type (5 Years)\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Remove dollar sign formatting since this is rate data\n",
|
"# Remove dollar sign formatting since this is rate data\n",
|
||||||
"fig.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n",
|
"fig.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig.show()"
|
"fig.show()"
|
||||||
]
|
]
|
||||||
@@ -157,15 +165,19 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Total crime rate trend\n",
|
"# Total crime rate trend\n",
|
||||||
"total_data = df[['date', 'avg_crime_rate']].rename(columns={'avg_crime_rate': 'total_rate'}).to_dict('records')\n",
|
"total_data = (\n",
|
||||||
|
" df[[\"date\", \"avg_crime_rate\"]]\n",
|
||||||
|
" .rename(columns={\"avg_crime_rate\": \"total_rate\"})\n",
|
||||||
|
" .to_dict(\"records\")\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"fig2 = create_price_time_series(\n",
|
"fig2 = create_price_time_series(\n",
|
||||||
" data=total_data,\n",
|
" data=total_data,\n",
|
||||||
" date_column='date',\n",
|
" date_column=\"date\",\n",
|
||||||
" price_column='total_rate',\n",
|
" price_column=\"total_rate\",\n",
|
||||||
" title='Toronto Overall Crime Rate Trend',\n",
|
" title=\"Toronto Overall Crime Rate Trend\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"fig2.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n",
|
"fig2.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n",
|
||||||
"fig2.show()"
|
"fig2.show()"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
"""Application-level callbacks for the portfolio app."""
|
"""Application-level callbacks for the portfolio app."""
|
||||||
|
|
||||||
from . import sidebar, theme
|
from . import contact, sidebar, theme
|
||||||
|
|
||||||
__all__ = ["sidebar", "theme"]
|
__all__ = ["contact", "sidebar", "theme"]
|
||||||
|
|||||||
214
portfolio_app/callbacks/contact.py
Normal file
214
portfolio_app/callbacks/contact.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
"""Contact form submission callback with Formspree integration."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
import requests
|
||||||
|
from dash import Input, Output, State, callback, no_update
|
||||||
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
|
FORMSPREE_ENDPOINT = "https://formspree.io/f/mqelqzpd"
|
||||||
|
EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_form(
|
||||||
|
name: str | None, email: str | None, message: str | None
|
||||||
|
) -> str | None:
|
||||||
|
"""Validate form fields and return error message if invalid."""
|
||||||
|
if not name or not name.strip():
|
||||||
|
return "Please enter your name."
|
||||||
|
if not email or not email.strip():
|
||||||
|
return "Please enter your email address."
|
||||||
|
if not EMAIL_REGEX.match(email.strip()):
|
||||||
|
return "Please enter a valid email address."
|
||||||
|
if not message or not message.strip():
|
||||||
|
return "Please enter a message."
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _create_success_alert() -> dmc.Alert:
|
||||||
|
"""Create success feedback alert."""
|
||||||
|
return dmc.Alert(
|
||||||
|
"Thank you for your message! I'll get back to you soon.",
|
||||||
|
title="Message Sent",
|
||||||
|
color="green",
|
||||||
|
variant="light",
|
||||||
|
icon=DashIconify(icon="tabler:check", width=20),
|
||||||
|
withCloseButton=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_error_alert(message: str) -> dmc.Alert:
|
||||||
|
"""Create error feedback alert."""
|
||||||
|
return dmc.Alert(
|
||||||
|
message,
|
||||||
|
title="Error",
|
||||||
|
color="red",
|
||||||
|
variant="light",
|
||||||
|
icon=DashIconify(icon="tabler:alert-circle", width=20),
|
||||||
|
withCloseButton=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@callback( # type: ignore[misc]
|
||||||
|
Output("contact-feedback", "children"),
|
||||||
|
Output("contact-submit", "loading"),
|
||||||
|
Output("contact-name", "value"),
|
||||||
|
Output("contact-email", "value"),
|
||||||
|
Output("contact-subject", "value"),
|
||||||
|
Output("contact-message", "value"),
|
||||||
|
Output("contact-name", "error"),
|
||||||
|
Output("contact-email", "error"),
|
||||||
|
Output("contact-message", "error"),
|
||||||
|
Input("contact-submit", "n_clicks"),
|
||||||
|
State("contact-name", "value"),
|
||||||
|
State("contact-email", "value"),
|
||||||
|
State("contact-subject", "value"),
|
||||||
|
State("contact-message", "value"),
|
||||||
|
State("contact-gotcha", "value"),
|
||||||
|
prevent_initial_call=True,
|
||||||
|
)
|
||||||
|
def submit_contact_form(
|
||||||
|
n_clicks: int | None,
|
||||||
|
name: str | None,
|
||||||
|
email: str | None,
|
||||||
|
subject: str | None,
|
||||||
|
message: str | None,
|
||||||
|
gotcha: str | None,
|
||||||
|
) -> tuple[Any, ...]:
|
||||||
|
"""Submit contact form to Formspree.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_clicks: Button click count.
|
||||||
|
name: User's name.
|
||||||
|
email: User's email address.
|
||||||
|
subject: Message subject (optional).
|
||||||
|
message: Message content.
|
||||||
|
gotcha: Honeypot field value (should be empty for real users).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (feedback, loading, name, email, subject, message,
|
||||||
|
name_error, email_error, message_error).
|
||||||
|
"""
|
||||||
|
if not n_clicks:
|
||||||
|
return (no_update,) * 9
|
||||||
|
|
||||||
|
# Check honeypot - if filled, silently "succeed" (it's a bot)
|
||||||
|
if gotcha:
|
||||||
|
return (
|
||||||
|
_create_success_alert(),
|
||||||
|
False,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate form
|
||||||
|
validation_error = _validate_form(name, email, message)
|
||||||
|
if validation_error:
|
||||||
|
# Determine which field has the error
|
||||||
|
name_error = "Required" if not name or not name.strip() else None
|
||||||
|
email_error = None
|
||||||
|
message_error = "Required" if not message or not message.strip() else None
|
||||||
|
|
||||||
|
if not email or not email.strip():
|
||||||
|
email_error = "Required"
|
||||||
|
elif not EMAIL_REGEX.match(email.strip()):
|
||||||
|
email_error = "Invalid email format"
|
||||||
|
|
||||||
|
return (
|
||||||
|
_create_error_alert(validation_error),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
name_error,
|
||||||
|
email_error,
|
||||||
|
message_error,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare form data (validation passed, so name/email/message are not None)
|
||||||
|
assert name is not None
|
||||||
|
assert email is not None
|
||||||
|
assert message is not None
|
||||||
|
form_data = {
|
||||||
|
"name": name.strip(),
|
||||||
|
"email": email.strip(),
|
||||||
|
"subject": subject or "General Inquiry",
|
||||||
|
"message": message.strip(),
|
||||||
|
"_gotcha": "", # Formspree honeypot
|
||||||
|
}
|
||||||
|
|
||||||
|
# Submit to Formspree
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
FORMSPREE_ENDPOINT,
|
||||||
|
json=form_data,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Success - clear form
|
||||||
|
return (
|
||||||
|
_create_success_alert(),
|
||||||
|
False,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Formspree returned an error
|
||||||
|
return (
|
||||||
|
_create_error_alert(
|
||||||
|
"Failed to send message. Please try again or use direct contact."
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
return (
|
||||||
|
_create_error_alert("Request timed out. Please try again."),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return (
|
||||||
|
_create_error_alert(
|
||||||
|
"Network error. Please check your connection and try again."
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
no_update,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
@@ -28,7 +28,7 @@ def create_metric_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=default_value or (options[0]["value"] if options else None),
|
value=default_value or (options[0]["value"] if options else None),
|
||||||
style={"width": "200px"},
|
w=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -64,7 +64,7 @@ def create_map_controls(
|
|||||||
id=f"{id_prefix}-layer-toggle",
|
id=f"{id_prefix}-layer-toggle",
|
||||||
label="Show Boundaries",
|
label="Show Boundaries",
|
||||||
checked=True,
|
checked=True,
|
||||||
style={"marginTop": "10px"},
|
mt="sm",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import Any
|
|||||||
import dash_mantine_components as dmc
|
import dash_mantine_components as dmc
|
||||||
from dash import dcc
|
from dash import dcc
|
||||||
|
|
||||||
from portfolio_app.figures.summary_cards import create_metric_card_figure
|
from portfolio_app.figures.toronto.summary_cards import create_metric_card_figure
|
||||||
|
|
||||||
|
|
||||||
class MetricCard:
|
class MetricCard:
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ def create_year_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=str(default_year),
|
value=str(default_year),
|
||||||
style={"width": "120px"},
|
w=120,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -83,7 +83,8 @@ def create_time_slider(
|
|||||||
marks=marks,
|
marks=marks,
|
||||||
step=1,
|
step=1,
|
||||||
minRange=1,
|
minRange=1,
|
||||||
style={"marginTop": "20px", "marginBottom": "10px"},
|
mt="md",
|
||||||
|
mb="sm",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
p="md",
|
p="md",
|
||||||
@@ -131,5 +132,5 @@ def create_month_selector(
|
|||||||
label=label,
|
label=label,
|
||||||
data=options,
|
data=options,
|
||||||
value=str(default_month),
|
value=str(default_month),
|
||||||
style={"width": "140px"},
|
w=140,
|
||||||
)
|
)
|
||||||
|
|||||||
48
portfolio_app/design/__init__.py
Normal file
48
portfolio_app/design/__init__.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""Design system tokens and utilities."""
|
||||||
|
|
||||||
|
from .tokens import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
COLOR_ACCENT,
|
||||||
|
COLOR_NEGATIVE,
|
||||||
|
COLOR_POSITIVE,
|
||||||
|
COLOR_WARNING,
|
||||||
|
GRID_COLOR,
|
||||||
|
GRID_COLOR_DARK,
|
||||||
|
PALETTE_COMPARISON,
|
||||||
|
PALETTE_GENDER,
|
||||||
|
PALETTE_TREND,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
POLICY_COLORS,
|
||||||
|
TEXT_MUTED,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
get_colorbar_defaults,
|
||||||
|
get_default_layout,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Text colors
|
||||||
|
"TEXT_PRIMARY",
|
||||||
|
"TEXT_SECONDARY",
|
||||||
|
"TEXT_MUTED",
|
||||||
|
# Chart backgrounds
|
||||||
|
"GRID_COLOR",
|
||||||
|
"GRID_COLOR_DARK",
|
||||||
|
"PAPER_BG",
|
||||||
|
"PLOT_BG",
|
||||||
|
# Semantic colors
|
||||||
|
"COLOR_POSITIVE",
|
||||||
|
"COLOR_NEGATIVE",
|
||||||
|
"COLOR_WARNING",
|
||||||
|
"COLOR_ACCENT",
|
||||||
|
# Palettes
|
||||||
|
"CHART_PALETTE",
|
||||||
|
"PALETTE_COMPARISON",
|
||||||
|
"PALETTE_GENDER",
|
||||||
|
"PALETTE_TREND",
|
||||||
|
"POLICY_COLORS",
|
||||||
|
# Utility functions
|
||||||
|
"get_default_layout",
|
||||||
|
"get_colorbar_defaults",
|
||||||
|
]
|
||||||
162
portfolio_app/design/tokens.py
Normal file
162
portfolio_app/design/tokens.py
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
"""Centralized design tokens for consistent styling across the application.
|
||||||
|
|
||||||
|
This module provides a single source of truth for colors, ensuring:
|
||||||
|
- Consistent styling across all Plotly figures and components
|
||||||
|
- Accessibility compliance (WCAG color contrast)
|
||||||
|
- Easy theme updates without hunting through multiple files
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from portfolio_app.design import TEXT_PRIMARY, CHART_PALETTE
|
||||||
|
fig.update_layout(font_color=TEXT_PRIMARY)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TEXT COLORS (Dark Theme)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
TEXT_PRIMARY = "#c9c9c9"
|
||||||
|
"""Primary text color for labels, titles, and body text."""
|
||||||
|
|
||||||
|
TEXT_SECONDARY = "#888888"
|
||||||
|
"""Secondary text color for subtitles, captions, and muted text."""
|
||||||
|
|
||||||
|
TEXT_MUTED = "#666666"
|
||||||
|
"""Muted text color for disabled states and placeholders."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CHART BACKGROUND & GRID
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
GRID_COLOR = "rgba(128, 128, 128, 0.2)"
|
||||||
|
"""Standard grid line color with transparency."""
|
||||||
|
|
||||||
|
GRID_COLOR_DARK = "rgba(128, 128, 128, 0.3)"
|
||||||
|
"""Darker grid for radar charts and polar plots."""
|
||||||
|
|
||||||
|
PAPER_BG = "rgba(0, 0, 0, 0)"
|
||||||
|
"""Transparent paper background for charts."""
|
||||||
|
|
||||||
|
PLOT_BG = "rgba(0, 0, 0, 0)"
|
||||||
|
"""Transparent plot background for charts."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SEMANTIC COLORS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
COLOR_POSITIVE = "#40c057"
|
||||||
|
"""Positive/success indicator (Mantine green-6)."""
|
||||||
|
|
||||||
|
COLOR_NEGATIVE = "#fa5252"
|
||||||
|
"""Negative/error indicator (Mantine red-6)."""
|
||||||
|
|
||||||
|
COLOR_WARNING = "#fab005"
|
||||||
|
"""Warning indicator (Mantine yellow-6)."""
|
||||||
|
|
||||||
|
COLOR_ACCENT = "#228be6"
|
||||||
|
"""Primary accent color (Mantine blue-6)."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ACCESSIBLE CHART PALETTE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Okabe-Ito palette - optimized for all color vision deficiencies
|
||||||
|
# Reference: https://jfly.uni-koeln.de/color/
|
||||||
|
CHART_PALETTE = [
|
||||||
|
"#0072B2", # Blue (primary data series)
|
||||||
|
"#E69F00", # Orange
|
||||||
|
"#56B4E9", # Sky blue
|
||||||
|
"#009E73", # Teal/green
|
||||||
|
"#F0E442", # Yellow
|
||||||
|
"#D55E00", # Vermillion
|
||||||
|
"#CC79A7", # Pink
|
||||||
|
"#000000", # Black (use sparingly)
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
Accessible categorical palette (Okabe-Ito).
|
||||||
|
|
||||||
|
Distinguishable for deuteranopia, protanopia, and tritanopia.
|
||||||
|
Use indices 0-6 for most charts; index 7 (black) for emphasis only.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Semantic subsets for specific use cases
|
||||||
|
PALETTE_COMPARISON = [CHART_PALETTE[0], CHART_PALETTE[1]]
|
||||||
|
"""Two-color palette for A/B comparisons."""
|
||||||
|
|
||||||
|
PALETTE_GENDER = {
|
||||||
|
"male": "#56B4E9", # Sky blue
|
||||||
|
"female": "#CC79A7", # Pink
|
||||||
|
}
|
||||||
|
"""Gender-specific colors (accessible contrast)."""
|
||||||
|
|
||||||
|
PALETTE_TREND = {
|
||||||
|
"positive": COLOR_POSITIVE,
|
||||||
|
"negative": COLOR_NEGATIVE,
|
||||||
|
"neutral": TEXT_SECONDARY,
|
||||||
|
}
|
||||||
|
"""Trend indicator colors for sparklines and deltas."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# POLICY/EVENT MARKERS (Time Series)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
POLICY_COLORS = {
|
||||||
|
"policy_change": "#E69F00", # Orange - policy changes
|
||||||
|
"major_event": "#D55E00", # Vermillion - major events
|
||||||
|
"data_note": "#56B4E9", # Sky blue - data annotations
|
||||||
|
"forecast": "#009E73", # Teal - forecast periods
|
||||||
|
"highlight": "#F0E442", # Yellow - highlighted regions
|
||||||
|
}
|
||||||
|
"""Colors for policy markers and event annotations on time series."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CHART LAYOUT DEFAULTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_layout() -> dict[str, Any]:
|
||||||
|
"""Return default Plotly layout settings with design tokens.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Layout configuration for fig.update_layout()
|
||||||
|
|
||||||
|
Example:
|
||||||
|
fig.update_layout(**get_default_layout())
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"paper_bgcolor": PAPER_BG,
|
||||||
|
"plot_bgcolor": PLOT_BG,
|
||||||
|
"font": {"color": TEXT_PRIMARY},
|
||||||
|
"title": {"font": {"color": TEXT_PRIMARY}},
|
||||||
|
"legend": {"font": {"color": TEXT_PRIMARY}},
|
||||||
|
"xaxis": {
|
||||||
|
"gridcolor": GRID_COLOR,
|
||||||
|
"linecolor": GRID_COLOR,
|
||||||
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
|
"title": {"font": {"color": TEXT_PRIMARY}},
|
||||||
|
},
|
||||||
|
"yaxis": {
|
||||||
|
"gridcolor": GRID_COLOR,
|
||||||
|
"linecolor": GRID_COLOR,
|
||||||
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
|
"title": {"font": {"color": TEXT_PRIMARY}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_colorbar_defaults() -> dict[str, Any]:
|
||||||
|
"""Return default colorbar settings with design tokens.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Colorbar configuration for choropleth/heatmap traces
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
|
"title": {"font": {"color": TEXT_PRIMARY}},
|
||||||
|
}
|
||||||
@@ -1,61 +1,15 @@
|
|||||||
"""Plotly figure factories for data visualization."""
|
"""Plotly figure factories for data visualization.
|
||||||
|
|
||||||
from .bar_charts import (
|
Figure factories are organized by dashboard domain:
|
||||||
create_horizontal_bar,
|
- toronto/ : Toronto Neighbourhood Dashboard figures
|
||||||
create_ranking_bar,
|
|
||||||
create_stacked_bar,
|
Usage:
|
||||||
)
|
from portfolio_app.figures.toronto import create_choropleth_figure
|
||||||
from .choropleth import (
|
from portfolio_app.figures.toronto import create_ranking_bar
|
||||||
create_choropleth_figure,
|
"""
|
||||||
create_zone_map,
|
|
||||||
)
|
from . import toronto
|
||||||
from .demographics import (
|
|
||||||
create_age_pyramid,
|
|
||||||
create_donut_chart,
|
|
||||||
create_income_distribution,
|
|
||||||
)
|
|
||||||
from .radar import (
|
|
||||||
create_comparison_radar,
|
|
||||||
create_radar_figure,
|
|
||||||
)
|
|
||||||
from .scatter import (
|
|
||||||
create_bubble_chart,
|
|
||||||
create_scatter_figure,
|
|
||||||
)
|
|
||||||
from .summary_cards import create_metric_card_figure, create_summary_metrics
|
|
||||||
from .time_series import (
|
|
||||||
add_policy_markers,
|
|
||||||
create_market_comparison_chart,
|
|
||||||
create_price_time_series,
|
|
||||||
create_time_series_with_events,
|
|
||||||
create_volume_time_series,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# Choropleth
|
"toronto",
|
||||||
"create_choropleth_figure",
|
|
||||||
"create_zone_map",
|
|
||||||
# Time series
|
|
||||||
"create_price_time_series",
|
|
||||||
"create_volume_time_series",
|
|
||||||
"create_market_comparison_chart",
|
|
||||||
"create_time_series_with_events",
|
|
||||||
"add_policy_markers",
|
|
||||||
# Summary
|
|
||||||
"create_metric_card_figure",
|
|
||||||
"create_summary_metrics",
|
|
||||||
# Bar charts
|
|
||||||
"create_ranking_bar",
|
|
||||||
"create_stacked_bar",
|
|
||||||
"create_horizontal_bar",
|
|
||||||
# Scatter plots
|
|
||||||
"create_scatter_figure",
|
|
||||||
"create_bubble_chart",
|
|
||||||
# Radar charts
|
|
||||||
"create_radar_figure",
|
|
||||||
"create_comparison_radar",
|
|
||||||
# Demographics
|
|
||||||
"create_age_pyramid",
|
|
||||||
"create_donut_chart",
|
|
||||||
"create_income_distribution",
|
|
||||||
]
|
]
|
||||||
|
|||||||
61
portfolio_app/figures/toronto/__init__.py
Normal file
61
portfolio_app/figures/toronto/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""Plotly figure factories for Toronto dashboard visualizations."""
|
||||||
|
|
||||||
|
from .bar_charts import (
|
||||||
|
create_horizontal_bar,
|
||||||
|
create_ranking_bar,
|
||||||
|
create_stacked_bar,
|
||||||
|
)
|
||||||
|
from .choropleth import (
|
||||||
|
create_choropleth_figure,
|
||||||
|
create_zone_map,
|
||||||
|
)
|
||||||
|
from .demographics import (
|
||||||
|
create_age_pyramid,
|
||||||
|
create_donut_chart,
|
||||||
|
create_income_distribution,
|
||||||
|
)
|
||||||
|
from .radar import (
|
||||||
|
create_comparison_radar,
|
||||||
|
create_radar_figure,
|
||||||
|
)
|
||||||
|
from .scatter import (
|
||||||
|
create_bubble_chart,
|
||||||
|
create_scatter_figure,
|
||||||
|
)
|
||||||
|
from .summary_cards import create_metric_card_figure, create_summary_metrics
|
||||||
|
from .time_series import (
|
||||||
|
add_policy_markers,
|
||||||
|
create_market_comparison_chart,
|
||||||
|
create_price_time_series,
|
||||||
|
create_time_series_with_events,
|
||||||
|
create_volume_time_series,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Choropleth
|
||||||
|
"create_choropleth_figure",
|
||||||
|
"create_zone_map",
|
||||||
|
# Time series
|
||||||
|
"create_price_time_series",
|
||||||
|
"create_volume_time_series",
|
||||||
|
"create_market_comparison_chart",
|
||||||
|
"create_time_series_with_events",
|
||||||
|
"add_policy_markers",
|
||||||
|
# Summary
|
||||||
|
"create_metric_card_figure",
|
||||||
|
"create_summary_metrics",
|
||||||
|
# Bar charts
|
||||||
|
"create_ranking_bar",
|
||||||
|
"create_stacked_bar",
|
||||||
|
"create_horizontal_bar",
|
||||||
|
# Scatter plots
|
||||||
|
"create_scatter_figure",
|
||||||
|
"create_bubble_chart",
|
||||||
|
# Radar charts
|
||||||
|
"create_radar_figure",
|
||||||
|
"create_comparison_radar",
|
||||||
|
# Demographics
|
||||||
|
"create_age_pyramid",
|
||||||
|
"create_donut_chart",
|
||||||
|
"create_income_distribution",
|
||||||
|
]
|
||||||
@@ -6,6 +6,17 @@ import pandas as pd
|
|||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
COLOR_NEGATIVE,
|
||||||
|
COLOR_POSITIVE,
|
||||||
|
GRID_COLOR,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_ranking_bar(
|
def create_ranking_bar(
|
||||||
data: list[dict[str, Any]],
|
data: list[dict[str, Any]],
|
||||||
@@ -14,8 +25,8 @@ def create_ranking_bar(
|
|||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
top_n: int = 10,
|
top_n: int = 10,
|
||||||
bottom_n: int = 10,
|
bottom_n: int = 10,
|
||||||
color_top: str = "#4CAF50",
|
color_top: str = COLOR_POSITIVE,
|
||||||
color_bottom: str = "#F44336",
|
color_bottom: str = COLOR_NEGATIVE,
|
||||||
value_format: str = ",.0f",
|
value_format: str = ",.0f",
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""Create horizontal bar chart showing top and bottom rankings.
|
"""Create horizontal bar chart showing top and bottom rankings.
|
||||||
@@ -87,10 +98,10 @@ def create_ranking_bar(
|
|||||||
barmode="group",
|
barmode="group",
|
||||||
showlegend=True,
|
showlegend=True,
|
||||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
xaxis={"gridcolor": GRID_COLOR, "title": None},
|
||||||
yaxis={"autorange": "reversed", "title": None},
|
yaxis={"autorange": "reversed", "title": None},
|
||||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||||
)
|
)
|
||||||
@@ -126,10 +137,10 @@ def create_stacked_bar(
|
|||||||
|
|
||||||
df = pd.DataFrame(data)
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
# Default color scheme
|
# Default color scheme using accessible palette
|
||||||
if color_map is None:
|
if color_map is None:
|
||||||
categories = df[category_column].unique()
|
categories = df[category_column].unique()
|
||||||
colors = px.colors.qualitative.Set2[: len(categories)]
|
colors = CHART_PALETTE[: len(categories)]
|
||||||
color_map = dict(zip(categories, colors, strict=False))
|
color_map = dict(zip(categories, colors, strict=False))
|
||||||
|
|
||||||
fig = px.bar(
|
fig = px.bar(
|
||||||
@@ -147,11 +158,11 @@ def create_stacked_bar(
|
|||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
xaxis={"gridcolor": GRID_COLOR, "title": None},
|
||||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
yaxis={"gridcolor": GRID_COLOR, "title": None},
|
||||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||||
)
|
)
|
||||||
@@ -164,7 +175,7 @@ def create_horizontal_bar(
|
|||||||
name_column: str,
|
name_column: str,
|
||||||
value_column: str,
|
value_column: str,
|
||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
color: str = "#2196F3",
|
color: str = CHART_PALETTE[0],
|
||||||
value_format: str = ",.0f",
|
value_format: str = ",.0f",
|
||||||
sort: bool = True,
|
sort: bool = True,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
@@ -204,10 +215,10 @@ def create_horizontal_bar(
|
|||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": None},
|
xaxis={"gridcolor": GRID_COLOR, "title": None},
|
||||||
yaxis={"title": None},
|
yaxis={"title": None},
|
||||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||||
)
|
)
|
||||||
@@ -225,13 +236,13 @@ def _create_empty_figure(title: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"visible": False},
|
xaxis={"visible": False},
|
||||||
yaxis={"visible": False},
|
yaxis={"visible": False},
|
||||||
)
|
)
|
||||||
@@ -5,6 +5,13 @@ from typing import Any
|
|||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_choropleth_figure(
|
def create_choropleth_figure(
|
||||||
geojson: dict[str, Any] | None,
|
geojson: dict[str, Any] | None,
|
||||||
@@ -55,9 +62,9 @@ def create_choropleth_figure(
|
|||||||
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
||||||
title=title or "Toronto Housing Map",
|
title=title or "Toronto Housing Map",
|
||||||
height=500,
|
height=500,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
fig.add_annotation(
|
fig.add_annotation(
|
||||||
text="No geometry data available. Complete QGIS digitization to enable map.",
|
text="No geometry data available. Complete QGIS digitization to enable map.",
|
||||||
@@ -66,7 +73,7 @@ def create_choropleth_figure(
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@@ -98,17 +105,17 @@ def create_choropleth_figure(
|
|||||||
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
margin={"l": 0, "r": 0, "t": 40, "b": 0},
|
||||||
title=title,
|
title=title,
|
||||||
height=500,
|
height=500,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
coloraxis_colorbar={
|
coloraxis_colorbar={
|
||||||
"title": {
|
"title": {
|
||||||
"text": color_column.replace("_", " ").title(),
|
"text": color_column.replace("_", " ").title(),
|
||||||
"font": {"color": "#c9c9c9"},
|
"font": {"color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
"thickness": 15,
|
"thickness": 15,
|
||||||
"len": 0.7,
|
"len": 0.7,
|
||||||
"tickfont": {"color": "#c9c9c9"},
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -5,6 +5,16 @@ from typing import Any
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
GRID_COLOR,
|
||||||
|
PALETTE_GENDER,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_age_pyramid(
|
def create_age_pyramid(
|
||||||
data: list[dict[str, Any]],
|
data: list[dict[str, Any]],
|
||||||
@@ -52,7 +62,7 @@ def create_age_pyramid(
|
|||||||
x=male_values_neg,
|
x=male_values_neg,
|
||||||
orientation="h",
|
orientation="h",
|
||||||
name="Male",
|
name="Male",
|
||||||
marker_color="#2196F3",
|
marker_color=PALETTE_GENDER["male"],
|
||||||
hovertemplate="%{y}<br>Male: %{customdata:,}<extra></extra>",
|
hovertemplate="%{y}<br>Male: %{customdata:,}<extra></extra>",
|
||||||
customdata=male_values,
|
customdata=male_values,
|
||||||
)
|
)
|
||||||
@@ -65,7 +75,7 @@ def create_age_pyramid(
|
|||||||
x=female_values,
|
x=female_values,
|
||||||
orientation="h",
|
orientation="h",
|
||||||
name="Female",
|
name="Female",
|
||||||
marker_color="#E91E63",
|
marker_color=PALETTE_GENDER["female"],
|
||||||
hovertemplate="%{y}<br>Female: %{x:,}<extra></extra>",
|
hovertemplate="%{y}<br>Female: %{x:,}<extra></extra>",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -77,12 +87,12 @@ def create_age_pyramid(
|
|||||||
title=title,
|
title=title,
|
||||||
barmode="overlay",
|
barmode="overlay",
|
||||||
bargap=0.1,
|
bargap=0.1,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={
|
xaxis={
|
||||||
"title": "Population",
|
"title": "Population",
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"range": [-max_val * 1.1, max_val * 1.1],
|
"range": [-max_val * 1.1, max_val * 1.1],
|
||||||
"tickvals": [-max_val, -max_val / 2, 0, max_val / 2, max_val],
|
"tickvals": [-max_val, -max_val / 2, 0, max_val / 2, max_val],
|
||||||
"ticktext": [
|
"ticktext": [
|
||||||
@@ -93,7 +103,7 @@ def create_age_pyramid(
|
|||||||
f"{max_val:,.0f}",
|
f"{max_val:,.0f}",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
yaxis={"title": None, "gridcolor": "rgba(128,128,128,0.2)"},
|
yaxis={"title": None, "gridcolor": GRID_COLOR},
|
||||||
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
legend={"orientation": "h", "yanchor": "bottom", "y": 1.02},
|
||||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||||
)
|
)
|
||||||
@@ -127,17 +137,9 @@ def create_donut_chart(
|
|||||||
|
|
||||||
df = pd.DataFrame(data)
|
df = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# Use accessible palette by default
|
||||||
if colors is None:
|
if colors is None:
|
||||||
colors = [
|
colors = CHART_PALETTE
|
||||||
"#2196F3",
|
|
||||||
"#4CAF50",
|
|
||||||
"#FF9800",
|
|
||||||
"#E91E63",
|
|
||||||
"#9C27B0",
|
|
||||||
"#00BCD4",
|
|
||||||
"#FFC107",
|
|
||||||
"#795548",
|
|
||||||
]
|
|
||||||
|
|
||||||
fig = go.Figure(
|
fig = go.Figure(
|
||||||
go.Pie(
|
go.Pie(
|
||||||
@@ -153,8 +155,8 @@ def create_donut_chart(
|
|||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
showlegend=False,
|
showlegend=False,
|
||||||
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
margin={"l": 10, "r": 10, "t": 60, "b": 10},
|
||||||
)
|
)
|
||||||
@@ -167,7 +169,7 @@ def create_income_distribution(
|
|||||||
bracket_column: str,
|
bracket_column: str,
|
||||||
count_column: str,
|
count_column: str,
|
||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
color: str = "#4CAF50",
|
color: str = CHART_PALETTE[3], # Teal
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""Create histogram-style bar chart for income distribution.
|
"""Create histogram-style bar chart for income distribution.
|
||||||
|
|
||||||
@@ -199,17 +201,17 @@ def create_income_distribution(
|
|||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={
|
xaxis={
|
||||||
"title": "Income Bracket",
|
"title": "Income Bracket",
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"tickangle": -45,
|
"tickangle": -45,
|
||||||
},
|
},
|
||||||
yaxis={
|
yaxis={
|
||||||
"title": "Households",
|
"title": "Households",
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
},
|
},
|
||||||
margin={"l": 10, "r": 10, "t": 60, "b": 80},
|
margin={"l": 10, "r": 10, "t": 60, "b": 80},
|
||||||
)
|
)
|
||||||
@@ -227,13 +229,13 @@ def _create_empty_figure(title: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"visible": False},
|
xaxis={"visible": False},
|
||||||
yaxis={"visible": False},
|
yaxis={"visible": False},
|
||||||
)
|
)
|
||||||
@@ -4,6 +4,14 @@ from typing import Any
|
|||||||
|
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
GRID_COLOR_DARK,
|
||||||
|
PAPER_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_radar_figure(
|
def create_radar_figure(
|
||||||
data: list[dict[str, Any]],
|
data: list[dict[str, Any]],
|
||||||
@@ -32,16 +40,9 @@ def create_radar_figure(
|
|||||||
if not data or not metrics:
|
if not data or not metrics:
|
||||||
return _create_empty_figure(title or "Radar Chart")
|
return _create_empty_figure(title or "Radar Chart")
|
||||||
|
|
||||||
# Default colors
|
# Use accessible palette by default
|
||||||
if colors is None:
|
if colors is None:
|
||||||
colors = [
|
colors = CHART_PALETTE
|
||||||
"#2196F3",
|
|
||||||
"#4CAF50",
|
|
||||||
"#FF9800",
|
|
||||||
"#E91E63",
|
|
||||||
"#9C27B0",
|
|
||||||
"#00BCD4",
|
|
||||||
]
|
|
||||||
|
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
|
|
||||||
@@ -78,19 +79,19 @@ def create_radar_figure(
|
|||||||
polar={
|
polar={
|
||||||
"radialaxis": {
|
"radialaxis": {
|
||||||
"visible": True,
|
"visible": True,
|
||||||
"gridcolor": "rgba(128,128,128,0.3)",
|
"gridcolor": GRID_COLOR_DARK,
|
||||||
"linecolor": "rgba(128,128,128,0.3)",
|
"linecolor": GRID_COLOR_DARK,
|
||||||
"tickfont": {"color": "#c9c9c9"},
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
"angularaxis": {
|
"angularaxis": {
|
||||||
"gridcolor": "rgba(128,128,128,0.3)",
|
"gridcolor": GRID_COLOR_DARK,
|
||||||
"linecolor": "rgba(128,128,128,0.3)",
|
"linecolor": GRID_COLOR_DARK,
|
||||||
"tickfont": {"color": "#c9c9c9"},
|
"tickfont": {"color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
"bgcolor": "rgba(0,0,0,0)",
|
"bgcolor": PAPER_BG,
|
||||||
},
|
},
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
showlegend=len(data) > 1,
|
showlegend=len(data) > 1,
|
||||||
legend={"orientation": "h", "yanchor": "bottom", "y": -0.2},
|
legend={"orientation": "h", "yanchor": "bottom", "y": -0.2},
|
||||||
margin={"l": 40, "r": 40, "t": 60, "b": 40},
|
margin={"l": 40, "r": 40, "t": 60, "b": 40},
|
||||||
@@ -133,7 +134,7 @@ def create_comparison_radar(
|
|||||||
metrics=metrics,
|
metrics=metrics,
|
||||||
name_column="__name__",
|
name_column="__name__",
|
||||||
title=title,
|
title=title,
|
||||||
colors=["#4CAF50", "#9E9E9E"],
|
colors=[CHART_PALETTE[3], TEXT_SECONDARY], # Teal for selected, gray for avg
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -156,11 +157,11 @@ def _create_empty_figure(title: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
@@ -6,6 +6,15 @@ import pandas as pd
|
|||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
GRID_COLOR,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_scatter_figure(
|
def create_scatter_figure(
|
||||||
data: list[dict[str, Any]],
|
data: list[dict[str, Any]],
|
||||||
@@ -72,21 +81,21 @@ def create_scatter_figure(
|
|||||||
if trendline:
|
if trendline:
|
||||||
fig.update_traces(
|
fig.update_traces(
|
||||||
selector={"mode": "lines"},
|
selector={"mode": "lines"},
|
||||||
line={"color": "#FF9800", "dash": "dash", "width": 2},
|
line={"color": CHART_PALETTE[1], "dash": "dash", "width": 2},
|
||||||
)
|
)
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={
|
xaxis={
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"title": x_title or x_column.replace("_", " ").title(),
|
"title": x_title or x_column.replace("_", " ").title(),
|
||||||
"zeroline": False,
|
"zeroline": False,
|
||||||
},
|
},
|
||||||
yaxis={
|
yaxis={
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"title": y_title or y_column.replace("_", " ").title(),
|
"title": y_title or y_column.replace("_", " ").title(),
|
||||||
"zeroline": False,
|
"zeroline": False,
|
||||||
},
|
},
|
||||||
@@ -140,19 +149,20 @@ def create_bubble_chart(
|
|||||||
hover_name=name_column,
|
hover_name=name_column,
|
||||||
size_max=size_max,
|
size_max=size_max,
|
||||||
opacity=0.7,
|
opacity=0.7,
|
||||||
|
color_discrete_sequence=CHART_PALETTE,
|
||||||
)
|
)
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={
|
xaxis={
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"title": x_title or x_column.replace("_", " ").title(),
|
"title": x_title or x_column.replace("_", " ").title(),
|
||||||
},
|
},
|
||||||
yaxis={
|
yaxis={
|
||||||
"gridcolor": "rgba(128,128,128,0.2)",
|
"gridcolor": GRID_COLOR,
|
||||||
"title": y_title or y_column.replace("_", " ").title(),
|
"title": y_title or y_column.replace("_", " ").title(),
|
||||||
},
|
},
|
||||||
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
margin={"l": 10, "r": 10, "t": 40, "b": 10},
|
||||||
@@ -171,13 +181,13 @@ def _create_empty_figure(title: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"visible": False},
|
xaxis={"visible": False},
|
||||||
yaxis={"visible": False},
|
yaxis={"visible": False},
|
||||||
)
|
)
|
||||||
@@ -4,6 +4,14 @@ from typing import Any
|
|||||||
|
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
COLOR_NEGATIVE,
|
||||||
|
COLOR_POSITIVE,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_metric_card_figure(
|
def create_metric_card_figure(
|
||||||
value: float | int | str,
|
value: float | int | str,
|
||||||
@@ -59,8 +67,12 @@ def create_metric_card_figure(
|
|||||||
"relative": False,
|
"relative": False,
|
||||||
"valueformat": ".1f",
|
"valueformat": ".1f",
|
||||||
"suffix": delta_suffix,
|
"suffix": delta_suffix,
|
||||||
"increasing": {"color": "green" if positive_is_good else "red"},
|
"increasing": {
|
||||||
"decreasing": {"color": "red" if positive_is_good else "green"},
|
"color": COLOR_POSITIVE if positive_is_good else COLOR_NEGATIVE
|
||||||
|
},
|
||||||
|
"decreasing": {
|
||||||
|
"color": COLOR_NEGATIVE if positive_is_good else COLOR_POSITIVE
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
fig.add_trace(go.Indicator(**indicator_config))
|
fig.add_trace(go.Indicator(**indicator_config))
|
||||||
@@ -68,9 +80,9 @@ def create_metric_card_figure(
|
|||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
height=120,
|
height=120,
|
||||||
margin={"l": 20, "r": 20, "t": 40, "b": 20},
|
margin={"l": 20, "r": 20, "t": 40, "b": 20},
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font={"family": "Inter, sans-serif", "color": "#c9c9c9"},
|
font={"family": "Inter, sans-serif", "color": TEXT_PRIMARY},
|
||||||
)
|
)
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
@@ -5,6 +5,15 @@ from typing import Any
|
|||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
GRID_COLOR,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_price_time_series(
|
def create_price_time_series(
|
||||||
data: list[dict[str, Any]],
|
data: list[dict[str, Any]],
|
||||||
@@ -38,14 +47,14 @@ def create_price_time_series(
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"color": "#888888"},
|
font={"color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
height=350,
|
height=350,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@@ -59,6 +68,7 @@ def create_price_time_series(
|
|||||||
y=price_column,
|
y=price_column,
|
||||||
color=group_column,
|
color=group_column,
|
||||||
title=title,
|
title=title,
|
||||||
|
color_discrete_sequence=CHART_PALETTE,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
fig = px.line(
|
fig = px.line(
|
||||||
@@ -67,6 +77,7 @@ def create_price_time_series(
|
|||||||
y=price_column,
|
y=price_column,
|
||||||
title=title,
|
title=title,
|
||||||
)
|
)
|
||||||
|
fig.update_traces(line_color=CHART_PALETTE[0])
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
height=350,
|
height=350,
|
||||||
@@ -76,11 +87,11 @@ def create_price_time_series(
|
|||||||
yaxis_tickprefix="$",
|
yaxis_tickprefix="$",
|
||||||
yaxis_tickformat=",",
|
yaxis_tickformat=",",
|
||||||
hovermode="x unified",
|
hovermode="x unified",
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
xaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
yaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
)
|
)
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
@@ -118,14 +129,14 @@ def create_volume_time_series(
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"color": "#888888"},
|
font={"color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
height=350,
|
height=350,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@@ -140,6 +151,7 @@ def create_volume_time_series(
|
|||||||
y=volume_column,
|
y=volume_column,
|
||||||
color=group_column,
|
color=group_column,
|
||||||
title=title,
|
title=title,
|
||||||
|
color_discrete_sequence=CHART_PALETTE,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
fig = px.bar(
|
fig = px.bar(
|
||||||
@@ -148,6 +160,7 @@ def create_volume_time_series(
|
|||||||
y=volume_column,
|
y=volume_column,
|
||||||
title=title,
|
title=title,
|
||||||
)
|
)
|
||||||
|
fig.update_traces(marker_color=CHART_PALETTE[0])
|
||||||
else:
|
else:
|
||||||
if group_column and group_column in df.columns:
|
if group_column and group_column in df.columns:
|
||||||
fig = px.line(
|
fig = px.line(
|
||||||
@@ -156,6 +169,7 @@ def create_volume_time_series(
|
|||||||
y=volume_column,
|
y=volume_column,
|
||||||
color=group_column,
|
color=group_column,
|
||||||
title=title,
|
title=title,
|
||||||
|
color_discrete_sequence=CHART_PALETTE,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
fig = px.line(
|
fig = px.line(
|
||||||
@@ -164,6 +178,7 @@ def create_volume_time_series(
|
|||||||
y=volume_column,
|
y=volume_column,
|
||||||
title=title,
|
title=title,
|
||||||
)
|
)
|
||||||
|
fig.update_traces(line_color=CHART_PALETTE[0])
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
height=350,
|
height=350,
|
||||||
@@ -172,11 +187,11 @@ def create_volume_time_series(
|
|||||||
yaxis_title=volume_column.replace("_", " ").title(),
|
yaxis_title=volume_column.replace("_", " ").title(),
|
||||||
yaxis_tickformat=",",
|
yaxis_tickformat=",",
|
||||||
hovermode="x unified",
|
hovermode="x unified",
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
xaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
yaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
)
|
)
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
@@ -211,14 +226,14 @@ def create_market_comparison_chart(
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"color": "#888888"},
|
font={"color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
title=title,
|
title=title,
|
||||||
height=400,
|
height=400,
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@@ -230,8 +245,6 @@ def create_market_comparison_chart(
|
|||||||
|
|
||||||
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
||||||
|
|
||||||
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"]
|
|
||||||
|
|
||||||
for i, metric in enumerate(metrics[:4]):
|
for i, metric in enumerate(metrics[:4]):
|
||||||
if metric not in df.columns:
|
if metric not in df.columns:
|
||||||
continue
|
continue
|
||||||
@@ -242,7 +255,7 @@ def create_market_comparison_chart(
|
|||||||
x=df[date_column],
|
x=df[date_column],
|
||||||
y=df[metric],
|
y=df[metric],
|
||||||
name=metric.replace("_", " ").title(),
|
name=metric.replace("_", " ").title(),
|
||||||
line={"color": colors[i % len(colors)]},
|
line={"color": CHART_PALETTE[i % len(CHART_PALETTE)]},
|
||||||
),
|
),
|
||||||
secondary_y=secondary,
|
secondary_y=secondary,
|
||||||
)
|
)
|
||||||
@@ -252,18 +265,18 @@ def create_market_comparison_chart(
|
|||||||
height=400,
|
height=400,
|
||||||
margin={"l": 40, "r": 40, "t": 50, "b": 40},
|
margin={"l": 40, "r": 40, "t": 50, "b": 40},
|
||||||
hovermode="x unified",
|
hovermode="x unified",
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
xaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
yaxis={"gridcolor": "#333333", "linecolor": "#444444"},
|
yaxis={"gridcolor": GRID_COLOR, "linecolor": GRID_COLOR},
|
||||||
legend={
|
legend={
|
||||||
"orientation": "h",
|
"orientation": "h",
|
||||||
"yanchor": "bottom",
|
"yanchor": "bottom",
|
||||||
"y": 1.02,
|
"y": 1.02,
|
||||||
"xanchor": "right",
|
"xanchor": "right",
|
||||||
"x": 1,
|
"x": 1,
|
||||||
"font": {"color": "#c9c9c9"},
|
"font": {"color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -290,13 +303,13 @@ def add_policy_markers(
|
|||||||
if not policy_events:
|
if not policy_events:
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
# Color mapping for policy categories
|
# Color mapping for policy categories using design tokens
|
||||||
category_colors = {
|
category_colors = {
|
||||||
"monetary": "#1f77b4", # Blue
|
"monetary": CHART_PALETTE[0], # Blue
|
||||||
"tax": "#2ca02c", # Green
|
"tax": CHART_PALETTE[3], # Teal/green
|
||||||
"regulatory": "#ff7f0e", # Orange
|
"regulatory": CHART_PALETTE[1], # Orange
|
||||||
"supply": "#9467bd", # Purple
|
"supply": CHART_PALETTE[6], # Pink
|
||||||
"economic": "#d62728", # Red
|
"economic": CHART_PALETTE[5], # Vermillion
|
||||||
}
|
}
|
||||||
|
|
||||||
# Symbol mapping for expected direction
|
# Symbol mapping for expected direction
|
||||||
@@ -313,7 +326,7 @@ def add_policy_markers(
|
|||||||
title = event.get("title", "Policy Event")
|
title = event.get("title", "Policy Event")
|
||||||
level = event.get("level", "federal")
|
level = event.get("level", "federal")
|
||||||
|
|
||||||
color = category_colors.get(category, "#666666")
|
color = category_colors.get(category, TEXT_SECONDARY)
|
||||||
symbol = direction_symbols.get(direction, "circle")
|
symbol = direction_symbols.get(direction, "circle")
|
||||||
|
|
||||||
# Add vertical line for the event
|
# Add vertical line for the event
|
||||||
@@ -335,7 +348,7 @@ def add_policy_markers(
|
|||||||
"symbol": symbol,
|
"symbol": symbol,
|
||||||
"size": 12,
|
"size": 12,
|
||||||
"color": color,
|
"color": color,
|
||||||
"line": {"width": 1, "color": "white"},
|
"line": {"width": 1, "color": TEXT_PRIMARY},
|
||||||
},
|
},
|
||||||
name=title,
|
name=title,
|
||||||
hovertemplate=(
|
hovertemplate=(
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import dash
|
import dash
|
||||||
import dash_mantine_components as dmc
|
import dash_mantine_components as dmc
|
||||||
|
from dash import html
|
||||||
from dash_iconify import DashIconify
|
from dash_iconify import DashIconify
|
||||||
|
|
||||||
dash.register_page(__name__, path="/contact", name="Contact")
|
dash.register_page(__name__, path="/contact", name="Contact")
|
||||||
@@ -51,51 +52,57 @@ def create_intro_section() -> dmc.Stack:
|
|||||||
|
|
||||||
|
|
||||||
def create_contact_form() -> dmc.Paper:
|
def create_contact_form() -> dmc.Paper:
|
||||||
"""Create the contact form (disabled in Phase 1)."""
|
"""Create the contact form with Formspree integration."""
|
||||||
return dmc.Paper(
|
return dmc.Paper(
|
||||||
dmc.Stack(
|
dmc.Stack(
|
||||||
[
|
[
|
||||||
dmc.Title("Send a Message", order=2, size="h4"),
|
dmc.Title("Send a Message", order=2, size="h4"),
|
||||||
dmc.Alert(
|
# Feedback container for success/error messages
|
||||||
"Contact form submission is coming soon. Please use the direct contact "
|
html.Div(id="contact-feedback"),
|
||||||
"methods below for now.",
|
|
||||||
title="Form Coming Soon",
|
|
||||||
color="blue",
|
|
||||||
variant="light",
|
|
||||||
),
|
|
||||||
dmc.TextInput(
|
dmc.TextInput(
|
||||||
|
id="contact-name",
|
||||||
label="Name",
|
label="Name",
|
||||||
placeholder="Your name",
|
placeholder="Your name",
|
||||||
leftSection=DashIconify(icon="tabler:user", width=18),
|
leftSection=DashIconify(icon="tabler:user", width=18),
|
||||||
disabled=True,
|
required=True,
|
||||||
),
|
),
|
||||||
dmc.TextInput(
|
dmc.TextInput(
|
||||||
|
id="contact-email",
|
||||||
label="Email",
|
label="Email",
|
||||||
placeholder="your.email@example.com",
|
placeholder="your.email@example.com",
|
||||||
leftSection=DashIconify(icon="tabler:mail", width=18),
|
leftSection=DashIconify(icon="tabler:mail", width=18),
|
||||||
disabled=True,
|
required=True,
|
||||||
),
|
),
|
||||||
dmc.Select(
|
dmc.Select(
|
||||||
|
id="contact-subject",
|
||||||
label="Subject",
|
label="Subject",
|
||||||
placeholder="Select a subject",
|
placeholder="Select a subject",
|
||||||
data=SUBJECT_OPTIONS,
|
data=SUBJECT_OPTIONS,
|
||||||
leftSection=DashIconify(icon="tabler:tag", width=18),
|
leftSection=DashIconify(icon="tabler:tag", width=18),
|
||||||
disabled=True,
|
|
||||||
),
|
),
|
||||||
dmc.Textarea(
|
dmc.Textarea(
|
||||||
|
id="contact-message",
|
||||||
label="Message",
|
label="Message",
|
||||||
placeholder="Your message...",
|
placeholder="Your message...",
|
||||||
minRows=4,
|
minRows=4,
|
||||||
disabled=True,
|
required=True,
|
||||||
|
),
|
||||||
|
# Honeypot field for spam protection (hidden from users)
|
||||||
|
dmc.TextInput(
|
||||||
|
id="contact-gotcha",
|
||||||
|
style={"position": "absolute", "left": "-9999px"},
|
||||||
|
tabIndex=-1,
|
||||||
|
autoComplete="off",
|
||||||
),
|
),
|
||||||
dmc.Button(
|
dmc.Button(
|
||||||
"Send Message",
|
"Send Message",
|
||||||
|
id="contact-submit",
|
||||||
fullWidth=True,
|
fullWidth=True,
|
||||||
leftSection=DashIconify(icon="tabler:send", width=18),
|
leftSection=DashIconify(icon="tabler:send", width=18),
|
||||||
disabled=True,
|
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
gap="md",
|
gap="md",
|
||||||
|
style={"position": "relative"},
|
||||||
),
|
),
|
||||||
p="xl",
|
p="xl",
|
||||||
radius="md",
|
radius="md",
|
||||||
|
|||||||
@@ -1,10 +1,19 @@
|
|||||||
"""Chart callbacks for supporting visualizations."""
|
"""Chart callbacks for supporting visualizations."""
|
||||||
# mypy: disable-error-code="misc,no-untyped-def,arg-type"
|
# mypy: disable-error-code="misc,no-untyped-def,arg-type"
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
from dash import Input, Output, callback
|
from dash import Input, Output, callback
|
||||||
|
|
||||||
from portfolio_app.figures import (
|
from portfolio_app.design import (
|
||||||
|
CHART_PALETTE,
|
||||||
|
GRID_COLOR,
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
from portfolio_app.figures.toronto import (
|
||||||
create_donut_chart,
|
create_donut_chart,
|
||||||
create_horizontal_bar,
|
create_horizontal_bar,
|
||||||
create_radar_figure,
|
create_radar_figure,
|
||||||
@@ -43,7 +52,24 @@ def update_overview_scatter(year: str) -> go.Figure:
|
|||||||
# Compute safety score (inverse of crime rate)
|
# Compute safety score (inverse of crime rate)
|
||||||
if "total_crime_rate" in merged.columns:
|
if "total_crime_rate" in merged.columns:
|
||||||
max_crime = merged["total_crime_rate"].max()
|
max_crime = merged["total_crime_rate"].max()
|
||||||
merged["safety_score"] = 100 - (merged["total_crime_rate"] / max_crime * 100)
|
if max_crime and max_crime > 0:
|
||||||
|
merged["safety_score"] = 100 - (
|
||||||
|
merged["total_crime_rate"] / max_crime * 100
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
merged["safety_score"] = 50 # Default if no crime data
|
||||||
|
|
||||||
|
# Fill NULL population with median or default value for sizing
|
||||||
|
if "population" in merged.columns:
|
||||||
|
median_pop = merged["population"].median()
|
||||||
|
default_pop = median_pop if pd.notna(median_pop) else 10000
|
||||||
|
merged["population"] = merged["population"].fillna(default_pop)
|
||||||
|
|
||||||
|
# Filter rows with required data for scatter plot
|
||||||
|
merged = merged.dropna(subset=["median_household_income", "safety_score"])
|
||||||
|
|
||||||
|
if merged.empty:
|
||||||
|
return _empty_chart("Insufficient data for scatter plot")
|
||||||
|
|
||||||
data = merged.to_dict("records")
|
data = merged.to_dict("records")
|
||||||
|
|
||||||
@@ -76,12 +102,13 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
|
|||||||
return _empty_chart("No trend data available")
|
return _empty_chart("No trend data available")
|
||||||
|
|
||||||
# Placeholder for trend data - would be historical
|
# Placeholder for trend data - would be historical
|
||||||
|
base_rent = averages.get("avg_rent_2bed") or 2000
|
||||||
data = [
|
data = [
|
||||||
{"year": "2019", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.85},
|
{"year": "2019", "avg_rent": base_rent * 0.85},
|
||||||
{"year": "2020", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.88},
|
{"year": "2020", "avg_rent": base_rent * 0.88},
|
||||||
{"year": "2021", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.92},
|
{"year": "2021", "avg_rent": base_rent * 0.92},
|
||||||
{"year": "2022", "avg_rent": averages.get("avg_rent_2bed", 2000) * 0.96},
|
{"year": "2022", "avg_rent": base_rent * 0.96},
|
||||||
{"year": "2023", "avg_rent": averages.get("avg_rent_2bed", 2000)},
|
{"year": "2023", "avg_rent": base_rent},
|
||||||
]
|
]
|
||||||
|
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
@@ -90,18 +117,18 @@ def update_housing_trend(year: str, neighbourhood_id: int | None) -> go.Figure:
|
|||||||
x=[d["year"] for d in data],
|
x=[d["year"] for d in data],
|
||||||
y=[d["avg_rent"] for d in data],
|
y=[d["avg_rent"] for d in data],
|
||||||
mode="lines+markers",
|
mode="lines+markers",
|
||||||
line={"color": "#2196F3", "width": 2},
|
line={"color": CHART_PALETTE[0], "width": 2},
|
||||||
marker={"size": 8},
|
marker={"size": 8},
|
||||||
name="City Average",
|
name="City Average",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)"},
|
xaxis={"gridcolor": GRID_COLOR},
|
||||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": "Avg Rent (2BR)"},
|
yaxis={"gridcolor": GRID_COLOR, "title": "Avg Rent (2BR)"},
|
||||||
showlegend=False,
|
showlegend=False,
|
||||||
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
||||||
)
|
)
|
||||||
@@ -134,7 +161,7 @@ def update_housing_types(year: str) -> go.Figure:
|
|||||||
data=data,
|
data=data,
|
||||||
name_column="type",
|
name_column="type",
|
||||||
value_column="percentage",
|
value_column="percentage",
|
||||||
colors=["#4CAF50", "#2196F3"],
|
colors=[CHART_PALETTE[3], CHART_PALETTE[0]], # Teal for owner, blue for renter
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -159,19 +186,19 @@ def update_safety_trend(year: str) -> go.Figure:
|
|||||||
x=[d["year"] for d in data],
|
x=[d["year"] for d in data],
|
||||||
y=[d["crime_rate"] for d in data],
|
y=[d["crime_rate"] for d in data],
|
||||||
mode="lines+markers",
|
mode="lines+markers",
|
||||||
line={"color": "#FF5722", "width": 2},
|
line={"color": CHART_PALETTE[5], "width": 2}, # Vermillion
|
||||||
marker={"size": 8},
|
marker={"size": 8},
|
||||||
fill="tozeroy",
|
fill="tozeroy",
|
||||||
fillcolor="rgba(255,87,34,0.1)",
|
fillcolor="rgba(213, 94, 0, 0.1)", # Vermillion with opacity
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"gridcolor": "rgba(128,128,128,0.2)"},
|
xaxis={"gridcolor": GRID_COLOR},
|
||||||
yaxis={"gridcolor": "rgba(128,128,128,0.2)", "title": "Crime Rate per 100K"},
|
yaxis={"gridcolor": GRID_COLOR, "title": "Crime Rate per 100K"},
|
||||||
showlegend=False,
|
showlegend=False,
|
||||||
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
margin={"l": 40, "r": 10, "t": 10, "b": 30},
|
||||||
)
|
)
|
||||||
@@ -214,7 +241,7 @@ def update_safety_types(year: str) -> go.Figure:
|
|||||||
data=data,
|
data=data,
|
||||||
name_column="category",
|
name_column="category",
|
||||||
value_column="count",
|
value_column="count",
|
||||||
color="#FF5722",
|
color=CHART_PALETTE[5], # Vermillion for crime
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -245,7 +272,11 @@ def update_demographics_age(year: str) -> go.Figure:
|
|||||||
data=data,
|
data=data,
|
||||||
name_column="age_group",
|
name_column="age_group",
|
||||||
value_column="percentage",
|
value_column="percentage",
|
||||||
colors=["#9C27B0", "#673AB7", "#3F51B5"],
|
colors=[
|
||||||
|
CHART_PALETTE[2],
|
||||||
|
CHART_PALETTE[0],
|
||||||
|
CHART_PALETTE[4],
|
||||||
|
], # Sky, Blue, Yellow
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -282,7 +313,7 @@ def update_demographics_income(year: str) -> go.Figure:
|
|||||||
data=data,
|
data=data,
|
||||||
name_column="bracket",
|
name_column="bracket",
|
||||||
value_column="count",
|
value_column="count",
|
||||||
color="#4CAF50",
|
color=CHART_PALETTE[3], # Teal
|
||||||
sort=False,
|
sort=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -314,7 +345,7 @@ def update_amenities_breakdown(year: str) -> go.Figure:
|
|||||||
data=data,
|
data=data,
|
||||||
name_column="type",
|
name_column="type",
|
||||||
value_column="count",
|
value_column="count",
|
||||||
color="#4CAF50",
|
color=CHART_PALETTE[3], # Teal
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -330,10 +361,11 @@ def update_amenities_radar(year: str, neighbourhood_id: int | None) -> go.Figure
|
|||||||
# Get city averages
|
# Get city averages
|
||||||
averages = get_city_averages(year_int)
|
averages = get_city_averages(year_int)
|
||||||
|
|
||||||
|
amenity_score = averages.get("avg_amenity_score") or 50
|
||||||
city_data = {
|
city_data = {
|
||||||
"parks_per_1000": averages.get("avg_amenity_score", 50) / 100 * 10,
|
"parks_per_1000": amenity_score / 100 * 10,
|
||||||
"schools_per_1000": averages.get("avg_amenity_score", 50) / 100 * 5,
|
"schools_per_1000": amenity_score / 100 * 5,
|
||||||
"childcare_per_1000": averages.get("avg_amenity_score", 50) / 100 * 3,
|
"childcare_per_1000": amenity_score / 100 * 3,
|
||||||
"transit_access": 70,
|
"transit_access": 70,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -367,9 +399,9 @@ def _empty_chart(message: str) -> go.Figure:
|
|||||||
"""Create an empty chart with a message."""
|
"""Create an empty chart with a message."""
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"visible": False},
|
xaxis={"visible": False},
|
||||||
yaxis={"visible": False},
|
yaxis={"visible": False},
|
||||||
)
|
)
|
||||||
@@ -380,6 +412,6 @@ def _empty_chart(message: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|||||||
@@ -4,7 +4,13 @@
|
|||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
from dash import Input, Output, State, callback, no_update
|
from dash import Input, Output, State, callback, no_update
|
||||||
|
|
||||||
from portfolio_app.figures import create_choropleth_figure, create_ranking_bar
|
from portfolio_app.design import (
|
||||||
|
PAPER_BG,
|
||||||
|
PLOT_BG,
|
||||||
|
TEXT_PRIMARY,
|
||||||
|
TEXT_SECONDARY,
|
||||||
|
)
|
||||||
|
from portfolio_app.figures.toronto import create_choropleth_figure, create_ranking_bar
|
||||||
from portfolio_app.toronto.services import (
|
from portfolio_app.toronto.services import (
|
||||||
get_amenities_data,
|
get_amenities_data,
|
||||||
get_demographics_data,
|
get_demographics_data,
|
||||||
@@ -267,8 +273,8 @@ def _empty_map(message: str) -> go.Figure:
|
|||||||
"zoom": 9.5,
|
"zoom": 9.5,
|
||||||
},
|
},
|
||||||
margin={"l": 0, "r": 0, "t": 0, "b": 0},
|
margin={"l": 0, "r": 0, "t": 0, "b": 0},
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
)
|
)
|
||||||
fig.add_annotation(
|
fig.add_annotation(
|
||||||
text=message,
|
text=message,
|
||||||
@@ -277,7 +283,7 @@ def _empty_map(message: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@@ -286,9 +292,9 @@ def _empty_chart(message: str) -> go.Figure:
|
|||||||
"""Create an empty chart with a message."""
|
"""Create an empty chart with a message."""
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
fig.update_layout(
|
fig.update_layout(
|
||||||
paper_bgcolor="rgba(0,0,0,0)",
|
paper_bgcolor=PAPER_BG,
|
||||||
plot_bgcolor="rgba(0,0,0,0)",
|
plot_bgcolor=PLOT_BG,
|
||||||
font_color="#c9c9c9",
|
font_color=TEXT_PRIMARY,
|
||||||
xaxis={"visible": False},
|
xaxis={"visible": False},
|
||||||
yaxis={"visible": False},
|
yaxis={"visible": False},
|
||||||
)
|
)
|
||||||
@@ -299,6 +305,6 @@ def _empty_chart(message: str) -> go.Figure:
|
|||||||
x=0.5,
|
x=0.5,
|
||||||
y=0.5,
|
y=0.5,
|
||||||
showarrow=False,
|
showarrow=False,
|
||||||
font={"size": 14, "color": "#888888"},
|
font={"size": 14, "color": TEXT_SECONDARY},
|
||||||
)
|
)
|
||||||
return fig
|
return fig
|
||||||
|
|||||||
@@ -3,7 +3,12 @@
|
|||||||
from .amenities import load_amenities, load_amenity_counts
|
from .amenities import load_amenities, load_amenity_counts
|
||||||
from .base import bulk_insert, get_session, upsert_by_key
|
from .base import bulk_insert, get_session, upsert_by_key
|
||||||
from .census import load_census_data
|
from .census import load_census_data
|
||||||
from .cmhc import load_cmhc_record, load_cmhc_rentals
|
from .cmhc import (
|
||||||
|
ensure_toronto_cma_zone,
|
||||||
|
load_cmhc_record,
|
||||||
|
load_cmhc_rentals,
|
||||||
|
load_statcan_cmhc_data,
|
||||||
|
)
|
||||||
from .cmhc_crosswalk import (
|
from .cmhc_crosswalk import (
|
||||||
build_cmhc_neighbourhood_crosswalk,
|
build_cmhc_neighbourhood_crosswalk,
|
||||||
disaggregate_zone_value,
|
disaggregate_zone_value,
|
||||||
@@ -32,6 +37,8 @@ __all__ = [
|
|||||||
# Fact loaders
|
# Fact loaders
|
||||||
"load_cmhc_rentals",
|
"load_cmhc_rentals",
|
||||||
"load_cmhc_record",
|
"load_cmhc_record",
|
||||||
|
"load_statcan_cmhc_data",
|
||||||
|
"ensure_toronto_cma_zone",
|
||||||
# Phase 3 loaders
|
# Phase 3 loaders
|
||||||
"load_census_data",
|
"load_census_data",
|
||||||
"load_crime_data",
|
"load_crime_data",
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
"""Loader for CMHC rental data into fact_rentals."""
|
"""Loader for CMHC rental data into fact_rentals."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import date
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from portfolio_app.toronto.models import DimCMHCZone, DimTime, FactRentals
|
from portfolio_app.toronto.models import DimCMHCZone, DimTime, FactRentals
|
||||||
@@ -8,6 +12,12 @@ from portfolio_app.toronto.schemas import CMHCAnnualSurvey, CMHCRentalRecord
|
|||||||
from .base import get_session, upsert_by_key
|
from .base import get_session, upsert_by_key
|
||||||
from .dimensions import generate_date_key
|
from .dimensions import generate_date_key
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Toronto CMA zone code for CMA-level data
|
||||||
|
TORONTO_CMA_ZONE_CODE = "TORCMA"
|
||||||
|
TORONTO_CMA_ZONE_NAME = "Toronto CMA"
|
||||||
|
|
||||||
|
|
||||||
def load_cmhc_rentals(
|
def load_cmhc_rentals(
|
||||||
survey: CMHCAnnualSurvey,
|
survey: CMHCAnnualSurvey,
|
||||||
@@ -135,3 +145,117 @@ def load_cmhc_record(
|
|||||||
return _load(session)
|
return _load(session)
|
||||||
with get_session() as sess:
|
with get_session() as sess:
|
||||||
return _load(sess)
|
return _load(sess)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_toronto_cma_zone(session: Session | None = None) -> int:
|
||||||
|
"""Ensure Toronto CMA zone exists in dim_cmhc_zone.
|
||||||
|
|
||||||
|
Creates the zone if it doesn't exist.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Optional existing session.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The zone_key for Toronto CMA.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _ensure(sess: Session) -> int:
|
||||||
|
zone = (
|
||||||
|
sess.query(DimCMHCZone).filter_by(zone_code=TORONTO_CMA_ZONE_CODE).first()
|
||||||
|
)
|
||||||
|
if zone:
|
||||||
|
return int(zone.zone_key)
|
||||||
|
|
||||||
|
# Create new zone
|
||||||
|
new_zone = DimCMHCZone(
|
||||||
|
zone_code=TORONTO_CMA_ZONE_CODE,
|
||||||
|
zone_name=TORONTO_CMA_ZONE_NAME,
|
||||||
|
geometry=None, # CMA-level doesn't need geometry
|
||||||
|
)
|
||||||
|
sess.add(new_zone)
|
||||||
|
sess.flush()
|
||||||
|
logger.info(f"Created Toronto CMA zone with zone_key={new_zone.zone_key}")
|
||||||
|
return int(new_zone.zone_key)
|
||||||
|
|
||||||
|
if session:
|
||||||
|
return _ensure(session)
|
||||||
|
with get_session() as sess:
|
||||||
|
result = _ensure(sess)
|
||||||
|
sess.commit()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def load_statcan_cmhc_data(
|
||||||
|
records: list[Any], # List of CMHCRentalRecord from statcan_cmhc parser
|
||||||
|
session: Session | None = None,
|
||||||
|
) -> int:
|
||||||
|
"""Load CMHC rental data from StatCan parser into fact_rentals.
|
||||||
|
|
||||||
|
This function handles CMA-level data from the StatCan API, which provides
|
||||||
|
aggregate Toronto data rather than zone-level HMIP data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
records: List of CMHCRentalRecord dataclass instances from statcan_cmhc parser.
|
||||||
|
session: Optional existing session.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of records loaded.
|
||||||
|
"""
|
||||||
|
from portfolio_app.toronto.parsers.statcan_cmhc import (
|
||||||
|
CMHCRentalRecord as StatCanRecord,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _load(sess: Session) -> int:
|
||||||
|
# Ensure Toronto CMA zone exists
|
||||||
|
zone_key = ensure_toronto_cma_zone(sess)
|
||||||
|
|
||||||
|
loaded = 0
|
||||||
|
for record in records:
|
||||||
|
if not isinstance(record, StatCanRecord):
|
||||||
|
logger.warning(f"Skipping invalid record type: {type(record)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generate date key for this record's survey date
|
||||||
|
survey_date = date(record.year, record.month, 1)
|
||||||
|
date_key = generate_date_key(survey_date)
|
||||||
|
|
||||||
|
# Verify time dimension exists
|
||||||
|
time_dim = sess.query(DimTime).filter_by(date_key=date_key).first()
|
||||||
|
if not time_dim:
|
||||||
|
logger.warning(
|
||||||
|
f"Time dimension not found for {survey_date}, skipping record"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create fact record
|
||||||
|
fact = FactRentals(
|
||||||
|
date_key=date_key,
|
||||||
|
zone_key=zone_key,
|
||||||
|
bedroom_type=record.bedroom_type,
|
||||||
|
universe=record.universe,
|
||||||
|
avg_rent=float(record.avg_rent) if record.avg_rent else None,
|
||||||
|
median_rent=None, # StatCan doesn't provide median
|
||||||
|
vacancy_rate=float(record.vacancy_rate)
|
||||||
|
if record.vacancy_rate
|
||||||
|
else None,
|
||||||
|
availability_rate=None,
|
||||||
|
turnover_rate=None,
|
||||||
|
rent_change_pct=None,
|
||||||
|
reliability_code=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Upsert
|
||||||
|
inserted, updated = upsert_by_key(
|
||||||
|
sess, FactRentals, [fact], ["date_key", "zone_key", "bedroom_type"]
|
||||||
|
)
|
||||||
|
loaded += inserted + updated
|
||||||
|
|
||||||
|
logger.info(f"Loaded {loaded} CMHC rental records from StatCan")
|
||||||
|
return loaded
|
||||||
|
|
||||||
|
if session:
|
||||||
|
return _load(session)
|
||||||
|
with get_session() as sess:
|
||||||
|
result = _load(sess)
|
||||||
|
sess.commit()
|
||||||
|
return result
|
||||||
|
|||||||
@@ -8,11 +8,18 @@ from sqlalchemy.orm import Mapped, mapped_column
|
|||||||
|
|
||||||
from .base import Base
|
from .base import Base
|
||||||
|
|
||||||
|
# Schema constants
|
||||||
|
RAW_TORONTO_SCHEMA = "raw_toronto"
|
||||||
|
|
||||||
|
|
||||||
class DimTime(Base):
|
class DimTime(Base):
|
||||||
"""Time dimension table."""
|
"""Time dimension table (shared across all projects).
|
||||||
|
|
||||||
|
Note: Stays in public schema as it's a shared dimension.
|
||||||
|
"""
|
||||||
|
|
||||||
__tablename__ = "dim_time"
|
__tablename__ = "dim_time"
|
||||||
|
__table_args__ = {"schema": "public"}
|
||||||
|
|
||||||
date_key: Mapped[int] = mapped_column(Integer, primary_key=True)
|
date_key: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||||
full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
|
full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
|
||||||
@@ -27,6 +34,7 @@ class DimCMHCZone(Base):
|
|||||||
"""CMHC zone dimension table with PostGIS geometry."""
|
"""CMHC zone dimension table with PostGIS geometry."""
|
||||||
|
|
||||||
__tablename__ = "dim_cmhc_zone"
|
__tablename__ = "dim_cmhc_zone"
|
||||||
|
__table_args__ = {"schema": RAW_TORONTO_SCHEMA}
|
||||||
|
|
||||||
zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True)
|
zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True)
|
||||||
@@ -41,6 +49,7 @@ class DimNeighbourhood(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "dim_neighbourhood"
|
__tablename__ = "dim_neighbourhood"
|
||||||
|
__table_args__ = {"schema": RAW_TORONTO_SCHEMA}
|
||||||
|
|
||||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||||
name: Mapped[str] = mapped_column(String(100), nullable=False)
|
name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||||
@@ -69,6 +78,7 @@ class DimPolicyEvent(Base):
|
|||||||
"""Policy event dimension for time-series annotation."""
|
"""Policy event dimension for time-series annotation."""
|
||||||
|
|
||||||
__tablename__ = "dim_policy_event"
|
__tablename__ = "dim_policy_event"
|
||||||
|
__table_args__ = {"schema": RAW_TORONTO_SCHEMA}
|
||||||
|
|
||||||
event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
event_date: Mapped[date] = mapped_column(Date, nullable=False)
|
event_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from sqlalchemy import ForeignKey, Index, Integer, Numeric, String
|
|||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
from .base import Base
|
from .base import Base
|
||||||
|
from .dimensions import RAW_TORONTO_SCHEMA
|
||||||
|
|
||||||
|
|
||||||
class BridgeCMHCNeighbourhood(Base):
|
class BridgeCMHCNeighbourhood(Base):
|
||||||
@@ -14,6 +15,11 @@ class BridgeCMHCNeighbourhood(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "bridge_cmhc_neighbourhood"
|
__tablename__ = "bridge_cmhc_neighbourhood"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_bridge_cmhc_zone", "cmhc_zone_code"),
|
||||||
|
Index("ix_bridge_neighbourhood", "neighbourhood_id"),
|
||||||
|
{"schema": RAW_TORONTO_SCHEMA},
|
||||||
|
)
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
cmhc_zone_code: Mapped[str] = mapped_column(String(10), nullable=False)
|
cmhc_zone_code: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||||
@@ -22,11 +28,6 @@ class BridgeCMHCNeighbourhood(Base):
|
|||||||
Numeric(5, 4), nullable=False
|
Numeric(5, 4), nullable=False
|
||||||
) # 0.0000 to 1.0000
|
) # 0.0000 to 1.0000
|
||||||
|
|
||||||
__table_args__ = (
|
|
||||||
Index("ix_bridge_cmhc_zone", "cmhc_zone_code"),
|
|
||||||
Index("ix_bridge_neighbourhood", "neighbourhood_id"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FactCensus(Base):
|
class FactCensus(Base):
|
||||||
"""Census statistics by neighbourhood and year.
|
"""Census statistics by neighbourhood and year.
|
||||||
@@ -35,6 +36,10 @@ class FactCensus(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "fact_census"
|
__tablename__ = "fact_census"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_fact_census_neighbourhood_year", "neighbourhood_id", "census_year"),
|
||||||
|
{"schema": RAW_TORONTO_SCHEMA},
|
||||||
|
)
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
@@ -66,10 +71,6 @@ class FactCensus(Base):
|
|||||||
Numeric(12, 2), nullable=True
|
Numeric(12, 2), nullable=True
|
||||||
)
|
)
|
||||||
|
|
||||||
__table_args__ = (
|
|
||||||
Index("ix_fact_census_neighbourhood_year", "neighbourhood_id", "census_year"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FactCrime(Base):
|
class FactCrime(Base):
|
||||||
"""Crime statistics by neighbourhood and year.
|
"""Crime statistics by neighbourhood and year.
|
||||||
@@ -78,6 +79,11 @@ class FactCrime(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "fact_crime"
|
__tablename__ = "fact_crime"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_fact_crime_neighbourhood_year", "neighbourhood_id", "year"),
|
||||||
|
Index("ix_fact_crime_type", "crime_type"),
|
||||||
|
{"schema": RAW_TORONTO_SCHEMA},
|
||||||
|
)
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
@@ -86,11 +92,6 @@ class FactCrime(Base):
|
|||||||
count: Mapped[int] = mapped_column(Integer, nullable=False)
|
count: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
rate_per_100k: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
rate_per_100k: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||||
|
|
||||||
__table_args__ = (
|
|
||||||
Index("ix_fact_crime_neighbourhood_year", "neighbourhood_id", "year"),
|
|
||||||
Index("ix_fact_crime_type", "crime_type"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FactAmenities(Base):
|
class FactAmenities(Base):
|
||||||
"""Amenity counts by neighbourhood.
|
"""Amenity counts by neighbourhood.
|
||||||
@@ -99,6 +100,11 @@ class FactAmenities(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "fact_amenities"
|
__tablename__ = "fact_amenities"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("ix_fact_amenities_neighbourhood_year", "neighbourhood_id", "year"),
|
||||||
|
Index("ix_fact_amenities_type", "amenity_type"),
|
||||||
|
{"schema": RAW_TORONTO_SCHEMA},
|
||||||
|
)
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
@@ -106,11 +112,6 @@ class FactAmenities(Base):
|
|||||||
count: Mapped[int] = mapped_column(Integer, nullable=False)
|
count: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
year: Mapped[int] = mapped_column(Integer, nullable=False)
|
year: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
|
|
||||||
__table_args__ = (
|
|
||||||
Index("ix_fact_amenities_neighbourhood_year", "neighbourhood_id", "year"),
|
|
||||||
Index("ix_fact_amenities_type", "amenity_type"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FactRentals(Base):
|
class FactRentals(Base):
|
||||||
"""Fact table for CMHC rental market data.
|
"""Fact table for CMHC rental market data.
|
||||||
@@ -119,13 +120,16 @@ class FactRentals(Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "fact_rentals"
|
__tablename__ = "fact_rentals"
|
||||||
|
__table_args__ = {"schema": RAW_TORONTO_SCHEMA}
|
||||||
|
|
||||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||||
date_key: Mapped[int] = mapped_column(
|
date_key: Mapped[int] = mapped_column(
|
||||||
Integer, ForeignKey("dim_time.date_key"), nullable=False
|
Integer, ForeignKey("public.dim_time.date_key"), nullable=False
|
||||||
)
|
)
|
||||||
zone_key: Mapped[int] = mapped_column(
|
zone_key: Mapped[int] = mapped_column(
|
||||||
Integer, ForeignKey("dim_cmhc_zone.zone_key"), nullable=False
|
Integer,
|
||||||
|
ForeignKey(f"{RAW_TORONTO_SCHEMA}.dim_cmhc_zone.zone_key"),
|
||||||
|
nullable=False,
|
||||||
)
|
)
|
||||||
bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||||
universe: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
universe: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
@@ -139,6 +143,6 @@ class FactRentals(Base):
|
|||||||
rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||||
reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True)
|
reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True)
|
||||||
|
|
||||||
# Relationships
|
# Relationships - explicit foreign_keys needed for cross-schema joins
|
||||||
time = relationship("DimTime", backref="rentals")
|
time = relationship("DimTime", foreign_keys=[date_key], backref="rentals")
|
||||||
zone = relationship("DimCMHCZone", backref="rentals")
|
zone = relationship("DimCMHCZone", foreign_keys=[zone_key], backref="rentals")
|
||||||
|
|||||||
383
portfolio_app/toronto/parsers/statcan_cmhc.py
Normal file
383
portfolio_app/toronto/parsers/statcan_cmhc.py
Normal file
@@ -0,0 +1,383 @@
|
|||||||
|
"""Parser for CMHC rental data via Statistics Canada API.
|
||||||
|
|
||||||
|
Downloads rental market data (average rent, vacancy rates, universe)
|
||||||
|
from Statistics Canada's Web Data Service.
|
||||||
|
|
||||||
|
Data Sources:
|
||||||
|
- Table 34-10-0127: Vacancy rates
|
||||||
|
- Table 34-10-0129: Rental universe (total units)
|
||||||
|
- Table 34-10-0133: Average rent by bedroom type
|
||||||
|
"""
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import zipfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from decimal import Decimal
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# StatCan Web Data Service endpoints
|
||||||
|
STATCAN_API_BASE = "https://www150.statcan.gc.ca/t1/wds/rest"
|
||||||
|
STATCAN_DOWNLOAD_BASE = "https://www150.statcan.gc.ca/n1/tbl/csv"
|
||||||
|
|
||||||
|
# CMHC table IDs
|
||||||
|
CMHC_TABLES = {
|
||||||
|
"vacancy": "34100127",
|
||||||
|
"universe": "34100129",
|
||||||
|
"rent": "34100133",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Toronto CMA identifier in StatCan data
|
||||||
|
TORONTO_DGUID = "2011S0503535"
|
||||||
|
TORONTO_GEO_NAME = "Toronto, Ontario"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CMHCRentalRecord:
|
||||||
|
"""Rental market record for database loading."""
|
||||||
|
|
||||||
|
year: int
|
||||||
|
month: int # CMHC surveys in October, so month=10
|
||||||
|
zone_name: str
|
||||||
|
bedroom_type: str
|
||||||
|
avg_rent: Decimal | None
|
||||||
|
vacancy_rate: Decimal | None
|
||||||
|
universe: int | None
|
||||||
|
|
||||||
|
|
||||||
|
class StatCanCMHCParser:
|
||||||
|
"""Parser for CMHC rental data from Statistics Canada.
|
||||||
|
|
||||||
|
Downloads and processes rental market survey data including:
|
||||||
|
- Average rents by bedroom type
|
||||||
|
- Vacancy rates
|
||||||
|
- Rental universe (total units)
|
||||||
|
|
||||||
|
Data is available from 1987 to present, updated annually in January.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BEDROOM_TYPE_MAP = {
|
||||||
|
"Bachelor units": "bachelor",
|
||||||
|
"One bedroom units": "1bed",
|
||||||
|
"Two bedroom units": "2bed",
|
||||||
|
"Three bedroom units": "3bed",
|
||||||
|
"Total": "total",
|
||||||
|
}
|
||||||
|
|
||||||
|
STRUCTURE_FILTER = "Apartment structures of six units and over"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
cache_dir: Path | None = None,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize parser.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cache_dir: Optional directory for caching downloaded files.
|
||||||
|
timeout: HTTP request timeout in seconds.
|
||||||
|
"""
|
||||||
|
self._cache_dir = cache_dir
|
||||||
|
self._timeout = timeout
|
||||||
|
self._client: httpx.Client | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client(self) -> httpx.Client:
|
||||||
|
"""Lazy-initialize HTTP client."""
|
||||||
|
if self._client is None:
|
||||||
|
self._client = httpx.Client(
|
||||||
|
timeout=self._timeout,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close HTTP client."""
|
||||||
|
if self._client is not None:
|
||||||
|
self._client.close()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def __enter__(self) -> "StatCanCMHCParser":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args: Any) -> None:
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def _get_download_url(self, table_id: str) -> str:
|
||||||
|
"""Get CSV download URL for a StatCan table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_id: StatCan table ID (e.g., "34100133").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Direct download URL for the CSV zip file.
|
||||||
|
"""
|
||||||
|
api_url = f"{STATCAN_API_BASE}/getFullTableDownloadCSV/{table_id}/en"
|
||||||
|
response = self.client.get(api_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
if data.get("status") != "SUCCESS":
|
||||||
|
raise ValueError(f"StatCan API error: {data}")
|
||||||
|
|
||||||
|
return str(data["object"])
|
||||||
|
|
||||||
|
def _download_table(self, table_id: str) -> pd.DataFrame:
|
||||||
|
"""Download and extract a StatCan table as DataFrame.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_id: StatCan table ID.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataFrame with table data.
|
||||||
|
"""
|
||||||
|
# Check cache first
|
||||||
|
if self._cache_dir:
|
||||||
|
cache_file = self._cache_dir / f"{table_id}.csv"
|
||||||
|
if cache_file.exists():
|
||||||
|
logger.debug(f"Loading {table_id} from cache")
|
||||||
|
return pd.read_csv(cache_file)
|
||||||
|
|
||||||
|
# Get download URL and fetch
|
||||||
|
download_url = self._get_download_url(table_id)
|
||||||
|
logger.info(f"Downloading StatCan table {table_id}...")
|
||||||
|
|
||||||
|
response = self.client.get(download_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Extract CSV from zip
|
||||||
|
with zipfile.ZipFile(io.BytesIO(response.content)) as zf:
|
||||||
|
csv_name = f"{table_id}.csv"
|
||||||
|
with zf.open(csv_name) as f:
|
||||||
|
df = pd.read_csv(f)
|
||||||
|
|
||||||
|
# Cache if directory specified
|
||||||
|
if self._cache_dir:
|
||||||
|
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
df.to_csv(self._cache_dir / f"{table_id}.csv", index=False)
|
||||||
|
|
||||||
|
logger.info(f"Downloaded {len(df)} records from table {table_id}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
def _filter_toronto(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""Filter DataFrame to Toronto CMA only.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: Full StatCan DataFrame.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataFrame filtered to Toronto.
|
||||||
|
"""
|
||||||
|
# Try DGUID first, then GEO name
|
||||||
|
if "DGUID" in df.columns:
|
||||||
|
toronto_df = df[df["DGUID"] == TORONTO_DGUID]
|
||||||
|
if len(toronto_df) > 0:
|
||||||
|
return toronto_df
|
||||||
|
|
||||||
|
if "GEO" in df.columns:
|
||||||
|
return df[df["GEO"] == TORONTO_GEO_NAME]
|
||||||
|
|
||||||
|
raise ValueError("Could not identify Toronto data in DataFrame")
|
||||||
|
|
||||||
|
def get_vacancy_rates(
|
||||||
|
self,
|
||||||
|
years: list[int] | None = None,
|
||||||
|
) -> dict[int, Decimal]:
|
||||||
|
"""Fetch Toronto vacancy rates by year.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
years: Optional list of years to filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping year to vacancy rate.
|
||||||
|
"""
|
||||||
|
df = self._download_table(CMHC_TABLES["vacancy"])
|
||||||
|
df = self._filter_toronto(df)
|
||||||
|
|
||||||
|
# Filter years if specified
|
||||||
|
if years:
|
||||||
|
df = df[df["REF_DATE"].isin(years)]
|
||||||
|
|
||||||
|
# Extract year -> rate mapping
|
||||||
|
rates = {}
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
year = int(row["REF_DATE"])
|
||||||
|
value = row.get("VALUE")
|
||||||
|
if pd.notna(value):
|
||||||
|
rates[year] = Decimal(str(value))
|
||||||
|
|
||||||
|
logger.info(f"Fetched vacancy rates for {len(rates)} years")
|
||||||
|
return rates
|
||||||
|
|
||||||
|
def get_rental_universe(
|
||||||
|
self,
|
||||||
|
years: list[int] | None = None,
|
||||||
|
) -> dict[tuple[int, str], int]:
|
||||||
|
"""Fetch Toronto rental universe (total units) by year and bedroom type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
years: Optional list of years to filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping (year, bedroom_type) to unit count.
|
||||||
|
"""
|
||||||
|
df = self._download_table(CMHC_TABLES["universe"])
|
||||||
|
df = self._filter_toronto(df)
|
||||||
|
|
||||||
|
# Filter to standard apartment structures
|
||||||
|
if "Type of structure" in df.columns:
|
||||||
|
df = df[df["Type of structure"] == self.STRUCTURE_FILTER]
|
||||||
|
|
||||||
|
if years:
|
||||||
|
df = df[df["REF_DATE"].isin(years)]
|
||||||
|
|
||||||
|
universe = {}
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
year = int(row["REF_DATE"])
|
||||||
|
bedroom_raw = row.get("Type of unit", "Total")
|
||||||
|
bedroom = self.BEDROOM_TYPE_MAP.get(bedroom_raw, "other")
|
||||||
|
value = row.get("VALUE")
|
||||||
|
|
||||||
|
if pd.notna(value) and value is not None:
|
||||||
|
universe[(year, bedroom)] = int(str(value))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Fetched rental universe for {len(universe)} year/bedroom combinations"
|
||||||
|
)
|
||||||
|
return universe
|
||||||
|
|
||||||
|
def get_average_rents(
|
||||||
|
self,
|
||||||
|
years: list[int] | None = None,
|
||||||
|
) -> dict[tuple[int, str], Decimal]:
|
||||||
|
"""Fetch Toronto average rents by year and bedroom type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
years: Optional list of years to filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping (year, bedroom_type) to average rent.
|
||||||
|
"""
|
||||||
|
df = self._download_table(CMHC_TABLES["rent"])
|
||||||
|
df = self._filter_toronto(df)
|
||||||
|
|
||||||
|
# Filter to standard apartment structures (most reliable data)
|
||||||
|
if "Type of structure" in df.columns:
|
||||||
|
df = df[df["Type of structure"] == self.STRUCTURE_FILTER]
|
||||||
|
|
||||||
|
if years:
|
||||||
|
df = df[df["REF_DATE"].isin(years)]
|
||||||
|
|
||||||
|
rents = {}
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
year = int(row["REF_DATE"])
|
||||||
|
bedroom_raw = row.get("Type of unit", "Total")
|
||||||
|
bedroom = self.BEDROOM_TYPE_MAP.get(bedroom_raw, "other")
|
||||||
|
value = row.get("VALUE")
|
||||||
|
|
||||||
|
if pd.notna(value) and str(value) not in ("F", ".."):
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
rents[(year, bedroom)] = Decimal(str(value))
|
||||||
|
|
||||||
|
logger.info(f"Fetched average rents for {len(rents)} year/bedroom combinations")
|
||||||
|
return rents
|
||||||
|
|
||||||
|
def get_all_rental_data(
|
||||||
|
self,
|
||||||
|
start_year: int = 2014,
|
||||||
|
end_year: int | None = None,
|
||||||
|
) -> list[CMHCRentalRecord]:
|
||||||
|
"""Fetch all Toronto rental data and combine into records.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_year: First year to include.
|
||||||
|
end_year: Last year to include (defaults to current year + 1).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CMHCRentalRecord objects ready for database loading.
|
||||||
|
"""
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
if end_year is None:
|
||||||
|
end_year = datetime.date.today().year + 1
|
||||||
|
|
||||||
|
years = list(range(start_year, end_year + 1))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Fetching CMHC rental data for Toronto ({start_year}-{end_year})..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fetch all data types
|
||||||
|
vacancy_rates = self.get_vacancy_rates(years)
|
||||||
|
rents = self.get_average_rents(years)
|
||||||
|
universe = self.get_rental_universe(years)
|
||||||
|
|
||||||
|
# Combine into records
|
||||||
|
records = []
|
||||||
|
bedroom_types = ["bachelor", "1bed", "2bed", "3bed"]
|
||||||
|
|
||||||
|
for year in years:
|
||||||
|
vacancy = vacancy_rates.get(year)
|
||||||
|
|
||||||
|
for bedroom in bedroom_types:
|
||||||
|
avg_rent = rents.get((year, bedroom))
|
||||||
|
units = universe.get((year, bedroom))
|
||||||
|
|
||||||
|
# Skip if no rent data for this year/bedroom
|
||||||
|
if avg_rent is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
records.append(
|
||||||
|
CMHCRentalRecord(
|
||||||
|
year=year,
|
||||||
|
month=10, # CMHC surveys in October
|
||||||
|
zone_name="Toronto CMA",
|
||||||
|
bedroom_type=bedroom,
|
||||||
|
avg_rent=avg_rent,
|
||||||
|
vacancy_rate=vacancy,
|
||||||
|
universe=units,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Created {len(records)} CMHC rental records")
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_toronto_rental_data(
|
||||||
|
start_year: int = 2014,
|
||||||
|
end_year: int | None = None,
|
||||||
|
cache_dir: Path | None = None,
|
||||||
|
) -> list[CMHCRentalRecord]:
|
||||||
|
"""Convenience function to fetch Toronto rental data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_year: First year to include.
|
||||||
|
end_year: Last year to include.
|
||||||
|
cache_dir: Optional cache directory.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CMHCRentalRecord objects.
|
||||||
|
"""
|
||||||
|
with StatCanCMHCParser(cache_dir=cache_dir) as parser:
|
||||||
|
return parser.get_all_rental_data(start_year, end_year)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test the parser
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
records = fetch_toronto_rental_data(start_year=2020)
|
||||||
|
|
||||||
|
print(f"\nFetched {len(records)} records")
|
||||||
|
print("\nSample records:")
|
||||||
|
for r in records[:10]:
|
||||||
|
print(
|
||||||
|
f" {r.year} {r.bedroom_type}: ${r.avg_rent} rent, {r.vacancy_rate}% vacancy"
|
||||||
|
)
|
||||||
@@ -6,6 +6,7 @@ from the City of Toronto's Open Data Portal.
|
|||||||
API Documentation: https://open.toronto.ca/dataset/
|
API Documentation: https://open.toronto.ca/dataset/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
@@ -193,6 +194,9 @@ class TorontoOpenDataParser:
|
|||||||
def _fetch_geojson(self, package_id: str) -> dict[str, Any]:
|
def _fetch_geojson(self, package_id: str) -> dict[str, Any]:
|
||||||
"""Fetch GeoJSON data from a package.
|
"""Fetch GeoJSON data from a package.
|
||||||
|
|
||||||
|
Handles both pure GeoJSON responses and CSV responses with embedded
|
||||||
|
geometry columns (common in Toronto Open Data).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
package_id: The package/dataset ID.
|
package_id: The package/dataset ID.
|
||||||
|
|
||||||
@@ -212,16 +216,65 @@ class TorontoOpenDataParser:
|
|||||||
|
|
||||||
response = self.client.get(url)
|
response = self.client.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
# Cache the response
|
# Try to parse as JSON first
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
# If it's already a valid GeoJSON FeatureCollection, return it
|
||||||
|
if isinstance(data, dict) and data.get("type") == "FeatureCollection":
|
||||||
|
if self._cache_dir:
|
||||||
|
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
cache_file = self._cache_dir / f"{package_id}.geojson"
|
||||||
|
with open(cache_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
return dict(data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If JSON parsing failed, it's likely CSV with embedded geometry
|
||||||
|
# Parse CSV and convert to GeoJSON FeatureCollection
|
||||||
|
logger.info("Response is CSV format, converting to GeoJSON...")
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
|
# Increase field size limit for large geometry columns
|
||||||
|
csv.field_size_limit(10 * 1024 * 1024) # 10 MB
|
||||||
|
|
||||||
|
csv_text = response.text
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_text))
|
||||||
|
|
||||||
|
features = []
|
||||||
|
for row in reader:
|
||||||
|
# Extract geometry from the 'geometry' column if present
|
||||||
|
geometry = None
|
||||||
|
if "geometry" in row and row["geometry"]:
|
||||||
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
|
geometry = json.loads(row["geometry"])
|
||||||
|
|
||||||
|
# Build properties from all other columns
|
||||||
|
properties = {k: v for k, v in row.items() if k != "geometry"}
|
||||||
|
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": geometry,
|
||||||
|
"properties": properties,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
geojson_data: dict[str, Any] = {
|
||||||
|
"type": "FeatureCollection",
|
||||||
|
"features": features,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Cache the converted response
|
||||||
if self._cache_dir:
|
if self._cache_dir:
|
||||||
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
cache_file = self._cache_dir / f"{package_id}.geojson"
|
cache_file = self._cache_dir / f"{package_id}.geojson"
|
||||||
with open(cache_file, "w", encoding="utf-8") as f:
|
with open(cache_file, "w", encoding="utf-8") as f:
|
||||||
json.dump(data, f)
|
json.dump(geojson_data, f)
|
||||||
|
|
||||||
return dict(data)
|
return geojson_data
|
||||||
|
|
||||||
def _fetch_csv_as_json(self, package_id: str) -> list[dict[str, Any]]:
|
def _fetch_csv_as_json(self, package_id: str) -> list[dict[str, Any]]:
|
||||||
"""Fetch CSV data as JSON records via CKAN datastore.
|
"""Fetch CSV data as JSON records via CKAN datastore.
|
||||||
@@ -282,29 +335,32 @@ class TorontoOpenDataParser:
|
|||||||
props = feature.get("properties", {})
|
props = feature.get("properties", {})
|
||||||
geometry = feature.get("geometry")
|
geometry = feature.get("geometry")
|
||||||
|
|
||||||
# Extract area_id from various possible property names
|
# Use AREA_SHORT_CODE as the primary ID (1-158 range)
|
||||||
area_id = props.get("AREA_ID") or props.get("area_id")
|
# AREA_ID is a large internal identifier not useful for our schema
|
||||||
if area_id is None:
|
short_code = props.get("AREA_SHORT_CODE") or props.get(
|
||||||
# Try AREA_SHORT_CODE as fallback
|
"area_short_code", ""
|
||||||
short_code = props.get("AREA_SHORT_CODE", "")
|
)
|
||||||
if short_code:
|
if short_code:
|
||||||
# Extract numeric part
|
area_id = int("".join(c for c in str(short_code) if c.isdigit()) or "0")
|
||||||
area_id = int("".join(c for c in short_code if c.isdigit()) or "0")
|
else:
|
||||||
|
# Fallback to _id (row number) if AREA_SHORT_CODE not available
|
||||||
|
area_id = int(props.get("_id", 0))
|
||||||
|
|
||||||
|
if area_id == 0:
|
||||||
|
logger.warning(f"Skipping neighbourhood with no valid ID: {props}")
|
||||||
|
continue
|
||||||
|
|
||||||
area_name = (
|
area_name = (
|
||||||
props.get("AREA_NAME")
|
props.get("AREA_NAME")
|
||||||
or props.get("area_name")
|
or props.get("area_name")
|
||||||
or f"Neighbourhood {area_id}"
|
or f"Neighbourhood {area_id}"
|
||||||
)
|
)
|
||||||
area_short_code = props.get("AREA_SHORT_CODE") or props.get(
|
|
||||||
"area_short_code"
|
|
||||||
)
|
|
||||||
|
|
||||||
records.append(
|
records.append(
|
||||||
NeighbourhoodRecord(
|
NeighbourhoodRecord(
|
||||||
area_id=int(area_id),
|
area_id=area_id,
|
||||||
area_name=str(area_name),
|
area_name=str(area_name),
|
||||||
area_short_code=area_short_code,
|
area_short_code=str(short_code) if short_code else None,
|
||||||
geometry=geometry,
|
geometry=geometry,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -314,17 +370,17 @@ class TorontoOpenDataParser:
|
|||||||
|
|
||||||
# Mapping of indicator names to CensusRecord fields
|
# Mapping of indicator names to CensusRecord fields
|
||||||
# Keys are partial matches (case-insensitive) found in the "Characteristic" column
|
# Keys are partial matches (case-insensitive) found in the "Characteristic" column
|
||||||
|
# Order matters - first match wins, so more specific patterns come first
|
||||||
|
# Note: owner/renter counts are raw numbers, not percentages - calculated in dbt
|
||||||
CENSUS_INDICATOR_MAPPING: dict[str, str] = {
|
CENSUS_INDICATOR_MAPPING: dict[str, str] = {
|
||||||
"population, 2021": "population",
|
"population, 2021": "population",
|
||||||
"population, 2016": "population",
|
"population, 2016": "population",
|
||||||
"population density per square kilometre": "population_density",
|
"population density per square kilometre": "population_density",
|
||||||
"median total income of household": "median_household_income",
|
"median total income of households in": "median_household_income",
|
||||||
"average total income of household": "average_household_income",
|
"average total income of households in": "average_household_income",
|
||||||
"unemployment rate": "unemployment_rate",
|
"unemployment rate": "unemployment_rate",
|
||||||
"bachelor's degree or higher": "pct_bachelors_or_higher",
|
"bachelor's degree or higher": "pct_bachelors_or_higher",
|
||||||
"owner": "pct_owner_occupied",
|
"average age": "median_age",
|
||||||
"renter": "pct_renter_occupied",
|
|
||||||
"median age": "median_age",
|
|
||||||
"average value of dwellings": "average_dwelling_value",
|
"average value of dwellings": "average_dwelling_value",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -358,17 +414,31 @@ class TorontoOpenDataParser:
|
|||||||
logger.info(f"Fetched {len(raw_records)} census profile rows")
|
logger.info(f"Fetched {len(raw_records)} census profile rows")
|
||||||
|
|
||||||
# Find the characteristic/indicator column name
|
# Find the characteristic/indicator column name
|
||||||
|
# Prioritize "Characteristic" over "Category" since both may exist
|
||||||
sample_row = raw_records[0]
|
sample_row = raw_records[0]
|
||||||
char_col = None
|
char_col = None
|
||||||
for col in sample_row:
|
|
||||||
col_lower = col.lower()
|
# First try exact match for Characteristic
|
||||||
if "characteristic" in col_lower or "category" in col_lower:
|
if "Characteristic" in sample_row:
|
||||||
char_col = col
|
char_col = "Characteristic"
|
||||||
break
|
else:
|
||||||
|
# Fall back to pattern matching
|
||||||
|
for col in sample_row:
|
||||||
|
col_lower = col.lower()
|
||||||
|
if "characteristic" in col_lower:
|
||||||
|
char_col = col
|
||||||
|
break
|
||||||
|
|
||||||
|
# Last resort: try Category
|
||||||
|
if not char_col:
|
||||||
|
for col in sample_row:
|
||||||
|
if "category" in col.lower():
|
||||||
|
char_col = col
|
||||||
|
break
|
||||||
|
|
||||||
if not char_col:
|
if not char_col:
|
||||||
# Try common column names
|
# Try other common column names
|
||||||
for candidate in ["Characteristic", "Category", "Topic", "_id"]:
|
for candidate in ["Topic", "_id"]:
|
||||||
if candidate in sample_row:
|
if candidate in sample_row:
|
||||||
char_col = candidate
|
char_col = candidate
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ def get_neighbourhoods_geojson(year: int = 2021) -> dict[str, Any]:
|
|||||||
ST_AsGeoJSON(geometry)::json as geom,
|
ST_AsGeoJSON(geometry)::json as geom,
|
||||||
population,
|
population,
|
||||||
livability_score
|
livability_score
|
||||||
FROM mart_neighbourhood_overview
|
FROM public_marts.mart_neighbourhood_overview
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
AND geometry IS NOT NULL
|
AND geometry IS NOT NULL
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Service layer for querying neighbourhood data from dbt marts."""
|
"""Service layer for querying neighbourhood data from dbt marts."""
|
||||||
|
|
||||||
|
import logging
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -8,6 +9,8 @@ from sqlalchemy import text
|
|||||||
|
|
||||||
from portfolio_app.toronto.models import get_engine
|
from portfolio_app.toronto.models import get_engine
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
|
||||||
"""Execute SQL query and return DataFrame.
|
"""Execute SQL query and return DataFrame.
|
||||||
@@ -23,8 +26,10 @@ def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFra
|
|||||||
engine = get_engine()
|
engine = get_engine()
|
||||||
with engine.connect() as conn:
|
with engine.connect() as conn:
|
||||||
return pd.read_sql(text(sql), conn, params=params)
|
return pd.read_sql(text(sql), conn, params=params)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
# Return empty DataFrame on connection or query error
|
logger.error(f"Query failed: {e}")
|
||||||
|
logger.debug(f"Failed SQL: {sql}")
|
||||||
|
logger.debug(f"Params: {params}")
|
||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
|
||||||
@@ -56,7 +61,7 @@ def get_overview_data(year: int = 2021) -> pd.DataFrame:
|
|||||||
rent_to_income_pct,
|
rent_to_income_pct,
|
||||||
avg_rent_2bed,
|
avg_rent_2bed,
|
||||||
total_amenities_per_1000
|
total_amenities_per_1000
|
||||||
FROM mart_neighbourhood_overview
|
FROM public_marts.mart_neighbourhood_overview
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
ORDER BY livability_score DESC NULLS LAST
|
ORDER BY livability_score DESC NULLS LAST
|
||||||
"""
|
"""
|
||||||
@@ -95,7 +100,7 @@ def get_housing_data(year: int = 2021) -> pd.DataFrame:
|
|||||||
affordability_index,
|
affordability_index,
|
||||||
rent_yoy_change_pct,
|
rent_yoy_change_pct,
|
||||||
income_quintile
|
income_quintile
|
||||||
FROM mart_neighbourhood_housing
|
FROM public_marts.mart_neighbourhood_housing
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
ORDER BY affordability_index ASC NULLS LAST
|
ORDER BY affordability_index ASC NULLS LAST
|
||||||
"""
|
"""
|
||||||
@@ -112,26 +117,22 @@ def get_safety_data(year: int = 2021) -> pd.DataFrame:
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||||
total_crime_rate, violent_crime_rate, property_crime_rate, etc.
|
total_crime_rate, violent_crimes, property_crimes, etc.
|
||||||
"""
|
"""
|
||||||
sql = """
|
sql = """
|
||||||
SELECT
|
SELECT
|
||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
neighbourhood_name,
|
neighbourhood_name,
|
||||||
year,
|
year,
|
||||||
total_crimes,
|
total_incidents as total_crimes,
|
||||||
crime_rate_per_100k as total_crime_rate,
|
crime_rate_per_100k as total_crime_rate,
|
||||||
violent_crimes,
|
assault_count + robbery_count + homicide_count as violent_crimes,
|
||||||
violent_crime_rate,
|
break_enter_count + auto_theft_count as property_crimes,
|
||||||
property_crimes,
|
theft_over_count as theft_crimes,
|
||||||
property_crime_rate,
|
crime_yoy_change_pct
|
||||||
theft_crimes,
|
FROM public_marts.mart_neighbourhood_safety
|
||||||
theft_rate,
|
|
||||||
crime_yoy_change_pct,
|
|
||||||
crime_trend
|
|
||||||
FROM mart_neighbourhood_safety
|
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
ORDER BY total_crime_rate ASC NULLS LAST
|
ORDER BY crime_rate_per_100k ASC NULLS LAST
|
||||||
"""
|
"""
|
||||||
return _execute_query(sql, {"year": year})
|
return _execute_query(sql, {"year": year})
|
||||||
|
|
||||||
@@ -152,22 +153,22 @@ def get_demographics_data(year: int = 2021) -> pd.DataFrame:
|
|||||||
SELECT
|
SELECT
|
||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
neighbourhood_name,
|
neighbourhood_name,
|
||||||
census_year as year,
|
year,
|
||||||
population,
|
population,
|
||||||
population_density,
|
population_density,
|
||||||
population_change_pct,
|
|
||||||
median_household_income,
|
median_household_income,
|
||||||
average_household_income,
|
average_household_income,
|
||||||
income_quintile,
|
income_quintile,
|
||||||
|
income_index,
|
||||||
median_age,
|
median_age,
|
||||||
pct_under_18,
|
age_index,
|
||||||
pct_18_to_64,
|
pct_owner_occupied,
|
||||||
pct_65_plus,
|
pct_renter_occupied,
|
||||||
pct_bachelors_or_higher,
|
education_bachelors_pct as pct_bachelors_or_higher,
|
||||||
unemployment_rate,
|
unemployment_rate,
|
||||||
diversity_index
|
tenure_diversity_index as diversity_index
|
||||||
FROM mart_neighbourhood_demographics
|
FROM public_marts.mart_neighbourhood_demographics
|
||||||
WHERE census_year = :year
|
WHERE year = :year
|
||||||
ORDER BY population DESC NULLS LAST
|
ORDER BY population DESC NULLS LAST
|
||||||
"""
|
"""
|
||||||
return _execute_query(sql, {"year": year})
|
return _execute_query(sql, {"year": year})
|
||||||
@@ -183,26 +184,26 @@ def get_amenities_data(year: int = 2021) -> pd.DataFrame:
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
DataFrame with columns: neighbourhood_id, neighbourhood_name,
|
||||||
amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
|
amenity_score, parks_per_1000, schools_per_1000, etc.
|
||||||
"""
|
"""
|
||||||
sql = """
|
sql = """
|
||||||
SELECT
|
SELECT
|
||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
neighbourhood_name,
|
neighbourhood_name,
|
||||||
year,
|
year,
|
||||||
park_count,
|
parks_count as park_count,
|
||||||
parks_per_1000,
|
parks_per_1000,
|
||||||
school_count,
|
schools_count as school_count,
|
||||||
schools_per_1000,
|
schools_per_1000,
|
||||||
childcare_count,
|
transit_count as childcare_count,
|
||||||
childcare_per_1000,
|
transit_per_1000 as childcare_per_1000,
|
||||||
total_amenities,
|
total_amenities,
|
||||||
total_amenities_per_1000,
|
total_amenities_per_1000,
|
||||||
amenity_score,
|
amenity_index as amenity_score,
|
||||||
amenity_rank
|
amenity_tier as amenity_rank
|
||||||
FROM mart_neighbourhood_amenities
|
FROM public_marts.mart_neighbourhood_amenities
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
ORDER BY amenity_score DESC NULLS LAST
|
ORDER BY amenity_index DESC NULLS LAST
|
||||||
"""
|
"""
|
||||||
return _execute_query(sql, {"year": year})
|
return _execute_query(sql, {"year": year})
|
||||||
|
|
||||||
@@ -249,17 +250,17 @@ def get_neighbourhood_details(
|
|||||||
a.park_count,
|
a.park_count,
|
||||||
a.school_count,
|
a.school_count,
|
||||||
a.total_amenities
|
a.total_amenities
|
||||||
FROM mart_neighbourhood_overview o
|
FROM public_marts.mart_neighbourhood_overview o
|
||||||
LEFT JOIN mart_neighbourhood_safety s
|
LEFT JOIN public_marts.mart_neighbourhood_safety s
|
||||||
ON o.neighbourhood_id = s.neighbourhood_id
|
ON o.neighbourhood_id = s.neighbourhood_id
|
||||||
AND o.year = s.year
|
AND o.year = s.year
|
||||||
LEFT JOIN mart_neighbourhood_housing h
|
LEFT JOIN public_marts.mart_neighbourhood_housing h
|
||||||
ON o.neighbourhood_id = h.neighbourhood_id
|
ON o.neighbourhood_id = h.neighbourhood_id
|
||||||
AND o.year = h.year
|
AND o.year = h.year
|
||||||
LEFT JOIN mart_neighbourhood_demographics d
|
LEFT JOIN public_marts.mart_neighbourhood_demographics d
|
||||||
ON o.neighbourhood_id = d.neighbourhood_id
|
ON o.neighbourhood_id = d.neighbourhood_id
|
||||||
AND o.year = d.census_year
|
AND o.year = d.census_year
|
||||||
LEFT JOIN mart_neighbourhood_amenities a
|
LEFT JOIN public_marts.mart_neighbourhood_amenities a
|
||||||
ON o.neighbourhood_id = a.neighbourhood_id
|
ON o.neighbourhood_id = a.neighbourhood_id
|
||||||
AND o.year = a.year
|
AND o.year = a.year
|
||||||
WHERE o.neighbourhood_id = :neighbourhood_id
|
WHERE o.neighbourhood_id = :neighbourhood_id
|
||||||
@@ -288,7 +289,7 @@ def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
|
|||||||
neighbourhood_id,
|
neighbourhood_id,
|
||||||
neighbourhood_name,
|
neighbourhood_name,
|
||||||
population
|
population
|
||||||
FROM mart_neighbourhood_overview
|
FROM public_marts.mart_neighbourhood_overview
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
ORDER BY neighbourhood_name
|
ORDER BY neighbourhood_name
|
||||||
"""
|
"""
|
||||||
@@ -317,19 +318,19 @@ def get_rankings(
|
|||||||
"""
|
"""
|
||||||
# Map metrics to their source tables
|
# Map metrics to their source tables
|
||||||
table_map = {
|
table_map = {
|
||||||
"livability_score": "mart_neighbourhood_overview",
|
"livability_score": "public_marts.mart_neighbourhood_overview",
|
||||||
"safety_score": "mart_neighbourhood_overview",
|
"safety_score": "public_marts.mart_neighbourhood_overview",
|
||||||
"affordability_score": "mart_neighbourhood_overview",
|
"affordability_score": "public_marts.mart_neighbourhood_overview",
|
||||||
"amenity_score": "mart_neighbourhood_overview",
|
"amenity_score": "public_marts.mart_neighbourhood_overview",
|
||||||
"crime_rate_per_100k": "mart_neighbourhood_safety",
|
"crime_rate_per_100k": "public_marts.mart_neighbourhood_safety",
|
||||||
"total_crime_rate": "mart_neighbourhood_safety",
|
"total_crime_rate": "public_marts.mart_neighbourhood_safety",
|
||||||
"avg_rent_2bed": "mart_neighbourhood_housing",
|
"avg_rent_2bed": "public_marts.mart_neighbourhood_housing",
|
||||||
"affordability_index": "mart_neighbourhood_housing",
|
"affordability_index": "public_marts.mart_neighbourhood_housing",
|
||||||
"population": "mart_neighbourhood_demographics",
|
"population": "public_marts.mart_neighbourhood_demographics",
|
||||||
"median_household_income": "mart_neighbourhood_demographics",
|
"median_household_income": "public_marts.mart_neighbourhood_demographics",
|
||||||
}
|
}
|
||||||
|
|
||||||
table = table_map.get(metric, "mart_neighbourhood_overview")
|
table = table_map.get(metric, "public_marts.mart_neighbourhood_overview")
|
||||||
year_col = "census_year" if "demographics" in table else "year"
|
year_col = "census_year" if "demographics" in table else "year"
|
||||||
|
|
||||||
order = "ASC" if ascending else "DESC"
|
order = "ASC" if ascending else "DESC"
|
||||||
@@ -375,7 +376,7 @@ def get_city_averages(year: int = 2021) -> dict[str, Any]:
|
|||||||
AVG(crime_rate_per_100k) as avg_crime_rate,
|
AVG(crime_rate_per_100k) as avg_crime_rate,
|
||||||
AVG(avg_rent_2bed) as avg_rent_2bed,
|
AVG(avg_rent_2bed) as avg_rent_2bed,
|
||||||
AVG(rent_to_income_pct) as avg_rent_to_income
|
AVG(rent_to_income_pct) as avg_rent_to_income
|
||||||
FROM mart_neighbourhood_overview
|
FROM public_marts.mart_neighbourhood_overview
|
||||||
WHERE year = :year
|
WHERE year = :year
|
||||||
"""
|
"""
|
||||||
df = _execute_query(sql, {"year": year})
|
df = _execute_query(sql, {"year": year})
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ dependencies = [
|
|||||||
"pandas>=2.3",
|
"pandas>=2.3",
|
||||||
"geopandas>=1.1",
|
"geopandas>=1.1",
|
||||||
"shapely>=2.0",
|
"shapely>=2.0",
|
||||||
|
"pyproj>=3.6",
|
||||||
|
"statsmodels>=0.14",
|
||||||
|
|
||||||
# Visualization
|
# Visualization
|
||||||
"dash>=3.3",
|
"dash>=3.3",
|
||||||
@@ -69,6 +71,10 @@ dev = [
|
|||||||
# Pre-commit
|
# Pre-commit
|
||||||
"pre-commit>=4.0",
|
"pre-commit>=4.0",
|
||||||
|
|
||||||
|
# Jupyter
|
||||||
|
"jupyter>=1.0",
|
||||||
|
"ipykernel>=6.29",
|
||||||
|
|
||||||
# Type stubs
|
# Type stubs
|
||||||
"pandas-stubs",
|
"pandas-stubs",
|
||||||
"types-requests",
|
"types-requests",
|
||||||
|
|||||||
@@ -28,8 +28,13 @@ from datetime import date
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Add project root to path
|
# Add project root to path
|
||||||
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
|
||||||
|
# Load .env file so dbt can access POSTGRES_* environment variables
|
||||||
|
load_dotenv(PROJECT_ROOT / ".env")
|
||||||
sys.path.insert(0, str(PROJECT_ROOT))
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||||||
|
|
||||||
from portfolio_app.toronto.loaders import ( # noqa: E402
|
from portfolio_app.toronto.loaders import ( # noqa: E402
|
||||||
@@ -38,12 +43,16 @@ from portfolio_app.toronto.loaders import ( # noqa: E402
|
|||||||
load_census_data,
|
load_census_data,
|
||||||
load_crime_data,
|
load_crime_data,
|
||||||
load_neighbourhoods,
|
load_neighbourhoods,
|
||||||
|
load_statcan_cmhc_data,
|
||||||
load_time_dimension,
|
load_time_dimension,
|
||||||
)
|
)
|
||||||
from portfolio_app.toronto.parsers import ( # noqa: E402
|
from portfolio_app.toronto.parsers import ( # noqa: E402
|
||||||
TorontoOpenDataParser,
|
TorontoOpenDataParser,
|
||||||
TorontoPoliceParser,
|
TorontoPoliceParser,
|
||||||
)
|
)
|
||||||
|
from portfolio_app.toronto.parsers.statcan_cmhc import ( # noqa: E402
|
||||||
|
fetch_toronto_rental_data,
|
||||||
|
)
|
||||||
from portfolio_app.toronto.schemas import Neighbourhood # noqa: E402
|
from portfolio_app.toronto.schemas import Neighbourhood # noqa: E402
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
@@ -91,6 +100,9 @@ class DataPipeline:
|
|||||||
# 5. Load amenities
|
# 5. Load amenities
|
||||||
self._load_amenities(session)
|
self._load_amenities(session)
|
||||||
|
|
||||||
|
# 6. Load CMHC rental data from StatCan
|
||||||
|
self._load_rentals(session)
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
logger.info("All data committed to database")
|
logger.info("All data committed to database")
|
||||||
|
|
||||||
@@ -241,6 +253,32 @@ class DataPipeline:
|
|||||||
|
|
||||||
self.stats["amenities"] = total_count
|
self.stats["amenities"] = total_count
|
||||||
|
|
||||||
|
def _load_rentals(self, session: Any) -> None:
|
||||||
|
"""Fetch and load CMHC rental data from StatCan."""
|
||||||
|
logger.info("Fetching CMHC rental data from Statistics Canada...")
|
||||||
|
|
||||||
|
if self.dry_run:
|
||||||
|
logger.info(" [DRY RUN] Would fetch and load CMHC rental data")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch rental data (2014-present)
|
||||||
|
rental_records = fetch_toronto_rental_data(start_year=2014)
|
||||||
|
|
||||||
|
if not rental_records:
|
||||||
|
logger.warning(" No rental records fetched")
|
||||||
|
return
|
||||||
|
|
||||||
|
count = load_statcan_cmhc_data(rental_records, session)
|
||||||
|
self.stats["rentals"] = count
|
||||||
|
logger.info(f" Loaded {count} CMHC rental records")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f" Failed to load CMHC rental data: {e}")
|
||||||
|
if self.verbose:
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def run_dbt(self) -> bool:
|
def run_dbt(self) -> bool:
|
||||||
"""Run dbt to transform data.
|
"""Run dbt to transform data.
|
||||||
|
|
||||||
@@ -250,30 +288,46 @@ class DataPipeline:
|
|||||||
logger.info("Running dbt transformations...")
|
logger.info("Running dbt transformations...")
|
||||||
|
|
||||||
dbt_project_dir = PROJECT_ROOT / "dbt"
|
dbt_project_dir = PROJECT_ROOT / "dbt"
|
||||||
|
venv_dbt = PROJECT_ROOT / ".venv" / "bin" / "dbt"
|
||||||
|
|
||||||
|
# Use venv dbt if available, otherwise fall back to system dbt
|
||||||
|
dbt_cmd = str(venv_dbt) if venv_dbt.exists() else "dbt"
|
||||||
|
|
||||||
if not dbt_project_dir.exists():
|
if not dbt_project_dir.exists():
|
||||||
logger.error(f"dbt project directory not found: {dbt_project_dir}")
|
logger.error(f"dbt project directory not found: {dbt_project_dir}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
|
logger.info(" [DRY RUN] Would run: dbt deps")
|
||||||
logger.info(" [DRY RUN] Would run: dbt run")
|
logger.info(" [DRY RUN] Would run: dbt run")
|
||||||
logger.info(" [DRY RUN] Would run: dbt test")
|
logger.info(" [DRY RUN] Would run: dbt test")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Run dbt models
|
# Install dbt packages if needed
|
||||||
logger.info(" Running dbt run...")
|
logger.info(" Running dbt deps...")
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["dbt", "run"],
|
[dbt_cmd, "deps", "--profiles-dir", str(dbt_project_dir)],
|
||||||
cwd=dbt_project_dir,
|
cwd=dbt_project_dir,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
logger.error(f"dbt run failed:\n{result.stderr}")
|
logger.error(f"dbt deps failed:\n{result.stdout}\n{result.stderr}")
|
||||||
if self.verbose:
|
return False
|
||||||
logger.debug(f"dbt output:\n{result.stdout}")
|
|
||||||
|
# Run dbt models
|
||||||
|
logger.info(" Running dbt run...")
|
||||||
|
result = subprocess.run(
|
||||||
|
[dbt_cmd, "run", "--profiles-dir", str(dbt_project_dir)],
|
||||||
|
cwd=dbt_project_dir,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.error(f"dbt run failed:\n{result.stdout}\n{result.stderr}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
logger.info(" dbt run completed successfully")
|
logger.info(" dbt run completed successfully")
|
||||||
@@ -281,14 +335,16 @@ class DataPipeline:
|
|||||||
# Run dbt tests
|
# Run dbt tests
|
||||||
logger.info(" Running dbt test...")
|
logger.info(" Running dbt test...")
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["dbt", "test"],
|
[dbt_cmd, "test", "--profiles-dir", str(dbt_project_dir)],
|
||||||
cwd=dbt_project_dir,
|
cwd=dbt_project_dir,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
logger.warning(f"dbt test had failures:\n{result.stderr}")
|
logger.warning(
|
||||||
|
f"dbt test had failures:\n{result.stdout}\n{result.stderr}"
|
||||||
|
)
|
||||||
# Don't fail on test failures, just warn
|
# Don't fail on test failures, just warn
|
||||||
else:
|
else:
|
||||||
logger.info(" dbt test completed successfully")
|
logger.info(" dbt test completed successfully")
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user