diff --git a/CLAUDE.md b/CLAUDE.md index 78e5da7..ca0d430 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,37 @@ # CLAUDE.md +## ⛔ MANDATORY BEHAVIOR RULES - READ FIRST + +**These rules are NON-NEGOTIABLE. Violating them wastes the user's time and money.** + +### 1. WHEN USER ASKS YOU TO CHECK SOMETHING - CHECK EVERYTHING +- Search ALL locations, not just where you think it is +- Check cache directories: `~/.claude/plugins/cache/` +- Check installed: `~/.claude/plugins/marketplaces/` +- Check source directories +- **NEVER say "no" or "that's not the issue" without exhaustive verification** + +### 2. WHEN USER SAYS SOMETHING IS WRONG - BELIEVE THEM +- The user knows their system better than you +- Investigate thoroughly before disagreeing +- **Your confidence is often wrong. User's instincts are often right.** + +### 3. NEVER SAY "DONE" WITHOUT VERIFICATION +- Run the actual command/script to verify +- Show the output to the user +- **"Done" means VERIFIED WORKING, not "I made changes"** + +### 4. SHOW EXACTLY WHAT USER ASKS FOR +- If user asks for messages, show the MESSAGES +- If user asks for code, show the CODE +- **Do not interpret or summarize unless asked** + +**FAILURE TO FOLLOW THESE RULES = WASTED USER TIME = UNACCEPTABLE** + +--- + + + Working context for Claude Code on the Analytics Portfolio project. --- @@ -26,8 +58,9 @@ make db-init # Initialize database schema make db-reset # Drop and recreate database (DESTRUCTIVE) # Data Loading -make load-data # Load Toronto data from APIs, seed dev data -make load-data-only # Load Toronto data without dbt or seeding +make load-data # Load all project data (currently: Toronto) +make load-toronto # Load Toronto data from APIs +make load-toronto-only # Load Toronto data without dbt or seeding make seed-data # Seed sample development data # Application @@ -127,13 +160,21 @@ class LoadError(PortfolioError): | `pages/` | Dash Pages (file-based routing) | URLs match file paths | | `pages/toronto/` | Toronto Dashboard | `tabs/` for layouts, `callbacks/` for interactions | | `components/` | Shared UI components | metric_card, sidebar, map_controls, time_slider | -| `figures/` | Plotly chart factories | choropleth, bar_charts, scatter, radar, time_series | +| `figures/toronto/` | Toronto chart factories | choropleth, bar_charts, scatter, radar, time_series | | `toronto/` | Toronto data logic | parsers/, loaders/, schemas/, models/ | | `content/blog/` | Markdown blog articles | Processed by `utils/markdown_loader.py` | -| `notebooks/` | Data documentation | 5 domains: overview, housing, safety, demographics, amenities | +| `notebooks/toronto/` | Toronto documentation | 5 domains: overview, housing, safety, demographics, amenities | **Key URLs:** `/` (home), `/toronto` (dashboard), `/blog` (listing), `/blog/{slug}` (articles) +### Multi-Dashboard Architecture + +The codebase is structured to support multiple dashboard projects: +- **figures/**: Domain-namespaced figure factories (`figures/toronto/`, future: `figures/football/`) +- **notebooks/**: Domain-namespaced documentation (`notebooks/toronto/`, future: `notebooks/football/`) +- **dbt models**: Domain subdirectories (`staging/toronto/`, `marts/toronto/`) +- **Database schemas**: Domain-specific raw data (`raw_toronto`, future: `raw_football`) + --- ## Tech Stack (Locked) @@ -161,6 +202,16 @@ class LoadError(PortfolioError): ## Data Model Overview +### Database Schemas + +| Schema | Purpose | +|--------|---------| +| `public` | Shared dimensions (dim_time) | +| `raw_toronto` | Toronto-specific raw/dimension tables | +| `staging` | dbt staging views | +| `intermediate` | dbt intermediate views | +| `marts` | dbt mart tables | + ### Geographic Reality (Toronto Housing) ``` @@ -168,20 +219,31 @@ City Neighbourhoods (158) - Primary geographic unit for analysis CMHC Zones (~20) - Rental data (Census Tract aligned) ``` -### Star Schema +### Star Schema (raw_toronto) | Table | Type | Keys | |-------|------|------| | `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone | -| `dim_time` | Dimension | date_key (PK) | +| `dim_time` | Dimension (public) | date_key (PK) - shared | | `dim_cmhc_zone` | Dimension | zone_key (PK), geometry | | `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry | | `dim_policy_event` | Dimension | event_id (PK) | -### dbt Layers +### dbt Project: `portfolio` + +**Model Structure:** +``` +dbt/models/ +├── shared/ # Cross-domain dimensions +│ └── stg_dimensions__time.sql +├── staging/toronto/ # Toronto staging models +├── intermediate/toronto/ # Toronto intermediate models +└── marts/toronto/ # Toronto mart tables +``` | Layer | Naming | Purpose | |-------|--------|---------| +| Shared | `stg_dimensions__*` | Cross-domain dimensions | | Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed | | Intermediate | `int_{domain}__{transform}` | Business logic | | Marts | `mart_{domain}` | Final analytical tables | @@ -196,7 +258,6 @@ CMHC Zones (~20) - Rental data (Census Tract aligned) |---------|--------| | Historical boundary reconciliation (140->158) | 2021+ data only for V1 | | ML prediction models | Energy project scope (future phase) | -| Multi-project shared infrastructure | Build first, abstract second | --- @@ -351,4 +412,4 @@ Use for git operations assistance. --- -*Last Updated: January 2026 (Post-Sprint 9)* +*Last Updated: February 2026 (Multi-Dashboard Architecture)* diff --git a/Makefile b/Makefile index fa28c22..01014a9 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,12 @@ -.PHONY: setup docker-up docker-down db-init load-data seed-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto +.PHONY: setup docker-up docker-down db-init load-data load-all load-toronto load-toronto-only seed-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto # Default target .DEFAULT_GOAL := help # Environment -PYTHON := python3 -PIP := pip +VENV := .venv +PYTHON := $(VENV)/bin/python3 +PIP := $(VENV)/bin/pip DOCKER_COMPOSE := docker compose # Architecture detection for Docker images @@ -79,16 +80,23 @@ db-reset: ## Drop and recreate database (DESTRUCTIVE) @sleep 3 $(MAKE) db-init -load-data: ## Load Toronto data from APIs, seed dev data, run dbt +# Domain-specific data loading +load-toronto: ## Load Toronto data from APIs @echo "$(GREEN)Loading Toronto neighbourhood data...$(NC)" $(PYTHON) scripts/data/load_toronto_data.py - @echo "$(GREEN)Seeding development data...$(NC)" + @echo "$(GREEN)Seeding Toronto development data...$(NC)" $(PYTHON) scripts/data/seed_amenity_data.py -load-data-only: ## Load Toronto data without running dbt or seeding +load-toronto-only: ## Load Toronto data without running dbt or seeding @echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)" $(PYTHON) scripts/data/load_toronto_data.py --skip-dbt +# Aggregate data loading +load-data: load-toronto ## Load all project data (currently: Toronto) + @echo "$(GREEN)All data loaded!$(NC)" + +load-all: load-data ## Alias for load-data + seed-data: ## Seed sample development data (amenities, median_age) @echo "$(GREEN)Seeding development data...$(NC)" $(PYTHON) scripts/data/seed_amenity_data.py @@ -119,15 +127,15 @@ test-cov: ## Run pytest with coverage dbt-run: ## Run dbt models @echo "$(GREEN)Running dbt models...$(NC)" - cd dbt && dbt run --profiles-dir . + @set -a && . ./.env && set +a && cd dbt && dbt run --profiles-dir . dbt-test: ## Run dbt tests @echo "$(GREEN)Running dbt tests...$(NC)" - cd dbt && dbt test --profiles-dir . + @set -a && . ./.env && set +a && cd dbt && dbt test --profiles-dir . dbt-docs: ## Generate dbt documentation @echo "$(GREEN)Generating dbt docs...$(NC)" - cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir . + @set -a && . ./.env && set +a && cd dbt && dbt docs generate --profiles-dir . && dbt docs serve --profiles-dir . # ============================================================================= # Code Quality diff --git a/README.md b/README.md index 1f156b7..79a038a 100644 --- a/README.md +++ b/README.md @@ -115,28 +115,31 @@ portfolio_app/ │ ├── tabs/ # Tab layouts (5) │ └── callbacks/ # Interaction logic ├── components/ # Shared UI components -├── figures/ # Plotly figure factories +├── figures/ +│ └── toronto/ # Toronto figure factories ├── content/ │ └── blog/ # Markdown blog articles ├── toronto/ # Toronto data logic │ ├── parsers/ # API data extraction │ ├── loaders/ # Database operations │ ├── schemas/ # Pydantic models -│ └── models/ # SQLAlchemy ORM +│ └── models/ # SQLAlchemy ORM (raw_toronto schema) └── errors/ # Exception handling -dbt/ +dbt/ # dbt project: portfolio ├── models/ -│ ├── staging/ # 1:1 source tables -│ ├── intermediate/ # Business logic -│ └── marts/ # Analytical tables +│ ├── shared/ # Cross-domain dimensions +│ ├── staging/toronto/ # Toronto staging models +│ ├── intermediate/toronto/ # Toronto intermediate models +│ └── marts/toronto/ # Toronto analytical tables -notebooks/ # Data documentation (15 notebooks) -├── overview/ # Overview tab visualizations -├── housing/ # Housing tab visualizations -├── safety/ # Safety tab visualizations -├── demographics/ # Demographics tab visualizations -└── amenities/ # Amenities tab visualizations +notebooks/ +└── toronto/ # Toronto documentation (15 notebooks) + ├── overview/ # Overview tab visualizations + ├── housing/ # Housing tab visualizations + ├── safety/ # Safety tab visualizations + ├── demographics/ # Demographics tab visualizations + └── amenities/ # Amenities tab visualizations docs/ ├── PROJECT_REFERENCE.md # Architecture reference diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml index 473d75c..0baa6f7 100644 --- a/dbt/dbt_project.yml +++ b/dbt/dbt_project.yml @@ -1,8 +1,7 @@ -name: 'toronto_housing' -version: '1.0.0' +name: 'portfolio' config-version: 2 -profile: 'toronto_housing' +profile: 'portfolio' model-paths: ["models"] analysis-paths: ["analyses"] @@ -16,13 +15,19 @@ clean-targets: - "dbt_packages" models: - toronto_housing: + portfolio: + shared: + +materialized: view + +schema: shared staging: - +materialized: view - +schema: staging + toronto: + +materialized: view + +schema: staging intermediate: - +materialized: view - +schema: intermediate + toronto: + +materialized: view + +schema: intermediate marts: - +materialized: table - +schema: marts + toronto: + +materialized: table + +schema: marts diff --git a/dbt/models/intermediate/_intermediate.yml b/dbt/models/intermediate/toronto/_intermediate.yml similarity index 100% rename from dbt/models/intermediate/_intermediate.yml rename to dbt/models/intermediate/toronto/_intermediate.yml diff --git a/dbt/models/intermediate/int_census__toronto_cma.sql b/dbt/models/intermediate/toronto/int_census__toronto_cma.sql similarity index 100% rename from dbt/models/intermediate/int_census__toronto_cma.sql rename to dbt/models/intermediate/toronto/int_census__toronto_cma.sql diff --git a/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql b/dbt/models/intermediate/toronto/int_neighbourhood__amenity_scores.sql similarity index 100% rename from dbt/models/intermediate/int_neighbourhood__amenity_scores.sql rename to dbt/models/intermediate/toronto/int_neighbourhood__amenity_scores.sql diff --git a/dbt/models/intermediate/int_neighbourhood__crime_summary.sql b/dbt/models/intermediate/toronto/int_neighbourhood__crime_summary.sql similarity index 100% rename from dbt/models/intermediate/int_neighbourhood__crime_summary.sql rename to dbt/models/intermediate/toronto/int_neighbourhood__crime_summary.sql diff --git a/dbt/models/intermediate/int_neighbourhood__demographics.sql b/dbt/models/intermediate/toronto/int_neighbourhood__demographics.sql similarity index 100% rename from dbt/models/intermediate/int_neighbourhood__demographics.sql rename to dbt/models/intermediate/toronto/int_neighbourhood__demographics.sql diff --git a/dbt/models/intermediate/int_neighbourhood__housing.sql b/dbt/models/intermediate/toronto/int_neighbourhood__housing.sql similarity index 100% rename from dbt/models/intermediate/int_neighbourhood__housing.sql rename to dbt/models/intermediate/toronto/int_neighbourhood__housing.sql diff --git a/dbt/models/intermediate/int_rentals__annual.sql b/dbt/models/intermediate/toronto/int_rentals__annual.sql similarity index 100% rename from dbt/models/intermediate/int_rentals__annual.sql rename to dbt/models/intermediate/toronto/int_rentals__annual.sql diff --git a/dbt/models/intermediate/int_rentals__neighbourhood_allocated.sql b/dbt/models/intermediate/toronto/int_rentals__neighbourhood_allocated.sql similarity index 100% rename from dbt/models/intermediate/int_rentals__neighbourhood_allocated.sql rename to dbt/models/intermediate/toronto/int_rentals__neighbourhood_allocated.sql diff --git a/dbt/models/intermediate/int_rentals__toronto_cma.sql b/dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql similarity index 100% rename from dbt/models/intermediate/int_rentals__toronto_cma.sql rename to dbt/models/intermediate/toronto/int_rentals__toronto_cma.sql diff --git a/dbt/models/intermediate/int_year_spine.sql b/dbt/models/intermediate/toronto/int_year_spine.sql similarity index 100% rename from dbt/models/intermediate/int_year_spine.sql rename to dbt/models/intermediate/toronto/int_year_spine.sql diff --git a/dbt/models/marts/_marts.yml b/dbt/models/marts/toronto/_marts.yml similarity index 100% rename from dbt/models/marts/_marts.yml rename to dbt/models/marts/toronto/_marts.yml diff --git a/dbt/models/marts/mart_neighbourhood_amenities.sql b/dbt/models/marts/toronto/mart_neighbourhood_amenities.sql similarity index 100% rename from dbt/models/marts/mart_neighbourhood_amenities.sql rename to dbt/models/marts/toronto/mart_neighbourhood_amenities.sql diff --git a/dbt/models/marts/mart_neighbourhood_demographics.sql b/dbt/models/marts/toronto/mart_neighbourhood_demographics.sql similarity index 100% rename from dbt/models/marts/mart_neighbourhood_demographics.sql rename to dbt/models/marts/toronto/mart_neighbourhood_demographics.sql diff --git a/dbt/models/marts/mart_neighbourhood_housing.sql b/dbt/models/marts/toronto/mart_neighbourhood_housing.sql similarity index 100% rename from dbt/models/marts/mart_neighbourhood_housing.sql rename to dbt/models/marts/toronto/mart_neighbourhood_housing.sql diff --git a/dbt/models/marts/mart_neighbourhood_overview.sql b/dbt/models/marts/toronto/mart_neighbourhood_overview.sql similarity index 100% rename from dbt/models/marts/mart_neighbourhood_overview.sql rename to dbt/models/marts/toronto/mart_neighbourhood_overview.sql diff --git a/dbt/models/marts/mart_neighbourhood_safety.sql b/dbt/models/marts/toronto/mart_neighbourhood_safety.sql similarity index 100% rename from dbt/models/marts/mart_neighbourhood_safety.sql rename to dbt/models/marts/toronto/mart_neighbourhood_safety.sql diff --git a/dbt/models/marts/mart_toronto_rentals.sql b/dbt/models/marts/toronto/mart_toronto_rentals.sql similarity index 100% rename from dbt/models/marts/mart_toronto_rentals.sql rename to dbt/models/marts/toronto/mart_toronto_rentals.sql diff --git a/dbt/models/shared/_shared.yml b/dbt/models/shared/_shared.yml new file mode 100644 index 0000000..d2c2212 --- /dev/null +++ b/dbt/models/shared/_shared.yml @@ -0,0 +1,33 @@ +version: 2 + +models: + - name: stg_dimensions__time + description: "Staged time dimension - shared across all projects" + columns: + - name: date_key + description: "Primary key (YYYYMM format)" + data_tests: + - unique + - not_null + - name: full_date + description: "First day of month" + data_tests: + - not_null + - name: year + description: "Calendar year" + data_tests: + - not_null + - name: month + description: "Month number (1-12)" + data_tests: + - not_null + - name: quarter + description: "Quarter (1-4)" + data_tests: + - not_null + - name: month_name + description: "Month name" + data_tests: + - not_null + - name: is_month_start + description: "Always true (monthly grain)" diff --git a/dbt/models/shared/_sources.yml b/dbt/models/shared/_sources.yml new file mode 100644 index 0000000..2b6025f --- /dev/null +++ b/dbt/models/shared/_sources.yml @@ -0,0 +1,25 @@ +version: 2 + +sources: + - name: shared + description: "Shared dimension tables used across all dashboards" + database: portfolio + schema: public + tables: + - name: dim_time + description: "Time dimension (monthly grain) - shared across all projects" + columns: + - name: date_key + description: "Primary key (YYYYMM format)" + - name: full_date + description: "First day of month" + - name: year + description: "Calendar year" + - name: month + description: "Month number (1-12)" + - name: quarter + description: "Quarter (1-4)" + - name: month_name + description: "Month name" + - name: is_month_start + description: "Always true (monthly grain)" diff --git a/dbt/models/staging/stg_dimensions__time.sql b/dbt/models/shared/stg_dimensions__time.sql similarity index 64% rename from dbt/models/staging/stg_dimensions__time.sql rename to dbt/models/shared/stg_dimensions__time.sql index c693af9..2491427 100644 --- a/dbt/models/staging/stg_dimensions__time.sql +++ b/dbt/models/shared/stg_dimensions__time.sql @@ -1,9 +1,10 @@ -- Staged time dimension --- Source: dim_time table +-- Source: shared.dim_time table -- Grain: One row per month +-- Note: Shared dimension used across all dashboard projects with source as ( - select * from {{ source('toronto_housing', 'dim_time') }} + select * from {{ source('shared', 'dim_time') }} ), staged as ( diff --git a/dbt/models/staging/_sources.yml b/dbt/models/staging/toronto/_sources.yml similarity index 90% rename from dbt/models/staging/_sources.yml rename to dbt/models/staging/toronto/_sources.yml index 74f4888..5edf967 100644 --- a/dbt/models/staging/_sources.yml +++ b/dbt/models/staging/toronto/_sources.yml @@ -1,10 +1,10 @@ version: 2 sources: - - name: toronto_housing - description: "Toronto housing data loaded from CMHC and City of Toronto sources" + - name: toronto + description: "Toronto data loaded from CMHC and City of Toronto sources" database: portfolio - schema: public + schema: raw_toronto tables: - name: fact_rentals description: "CMHC annual rental survey data by zone and bedroom type" @@ -16,12 +16,6 @@ sources: - name: zone_key description: "Foreign key to dim_cmhc_zone" - - name: dim_time - description: "Time dimension (monthly grain)" - columns: - - name: date_key - description: "Primary key (YYYYMMDD format)" - - name: dim_cmhc_zone description: "CMHC zone dimension with geometry" columns: diff --git a/dbt/models/staging/_staging.yml b/dbt/models/staging/toronto/_staging.yml similarity index 93% rename from dbt/models/staging/_staging.yml rename to dbt/models/staging/toronto/_staging.yml index 5d0dbeb..cadd5f5 100644 --- a/dbt/models/staging/_staging.yml +++ b/dbt/models/staging/toronto/_staging.yml @@ -18,15 +18,6 @@ models: tests: - not_null - - name: stg_dimensions__time - description: "Staged time dimension" - columns: - - name: date_key - description: "Date dimension key (YYYYMMDD)" - tests: - - unique - - not_null - - name: stg_dimensions__cmhc_zones description: "Staged CMHC zone dimension" columns: diff --git a/dbt/models/staging/stg_cmhc__rentals.sql b/dbt/models/staging/toronto/stg_cmhc__rentals.sql similarity index 81% rename from dbt/models/staging/stg_cmhc__rentals.sql rename to dbt/models/staging/toronto/stg_cmhc__rentals.sql index d2d181e..fa63304 100644 --- a/dbt/models/staging/stg_cmhc__rentals.sql +++ b/dbt/models/staging/toronto/stg_cmhc__rentals.sql @@ -6,8 +6,8 @@ with source as ( select f.*, t.year as survey_year - from {{ source('toronto_housing', 'fact_rentals') }} f - join {{ source('toronto_housing', 'dim_time') }} t on f.date_key = t.date_key + from {{ source('toronto', 'fact_rentals') }} f + join {{ source('shared', 'dim_time') }} t on f.date_key = t.date_key ), staged as ( diff --git a/dbt/models/staging/stg_cmhc__zone_crosswalk.sql b/dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql similarity index 81% rename from dbt/models/staging/stg_cmhc__zone_crosswalk.sql rename to dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql index d54148e..e11811f 100644 --- a/dbt/models/staging/stg_cmhc__zone_crosswalk.sql +++ b/dbt/models/staging/toronto/stg_cmhc__zone_crosswalk.sql @@ -3,7 +3,7 @@ -- Grain: One row per zone-neighbourhood intersection with source as ( - select * from {{ source('toronto_housing', 'bridge_cmhc_neighbourhood') }} + select * from {{ source('toronto', 'bridge_cmhc_neighbourhood') }} ), staged as ( diff --git a/dbt/models/staging/stg_dimensions__cmhc_zones.sql b/dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql similarity index 78% rename from dbt/models/staging/stg_dimensions__cmhc_zones.sql rename to dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql index 6ef3344..a4f294c 100644 --- a/dbt/models/staging/stg_dimensions__cmhc_zones.sql +++ b/dbt/models/staging/toronto/stg_dimensions__cmhc_zones.sql @@ -3,7 +3,7 @@ -- Grain: One row per zone with source as ( - select * from {{ source('toronto_housing', 'dim_cmhc_zone') }} + select * from {{ source('toronto', 'dim_cmhc_zone') }} ), staged as ( diff --git a/dbt/models/staging/stg_toronto__amenities.sql b/dbt/models/staging/toronto/stg_toronto__amenities.sql similarity index 83% rename from dbt/models/staging/stg_toronto__amenities.sql rename to dbt/models/staging/toronto/stg_toronto__amenities.sql index c891692..fd60d57 100644 --- a/dbt/models/staging/stg_toronto__amenities.sql +++ b/dbt/models/staging/toronto/stg_toronto__amenities.sql @@ -3,7 +3,7 @@ -- Grain: One row per neighbourhood per amenity type per year with source as ( - select * from {{ source('toronto_housing', 'fact_amenities') }} + select * from {{ source('toronto', 'fact_amenities') }} ), staged as ( diff --git a/dbt/models/staging/stg_toronto__census.sql b/dbt/models/staging/toronto/stg_toronto__census.sql similarity index 89% rename from dbt/models/staging/stg_toronto__census.sql rename to dbt/models/staging/toronto/stg_toronto__census.sql index 66bea60..c993000 100644 --- a/dbt/models/staging/stg_toronto__census.sql +++ b/dbt/models/staging/toronto/stg_toronto__census.sql @@ -3,7 +3,7 @@ -- Grain: One row per neighbourhood per census year with source as ( - select * from {{ source('toronto_housing', 'fact_census') }} + select * from {{ source('toronto', 'fact_census') }} ), staged as ( diff --git a/dbt/models/staging/stg_toronto__crime.sql b/dbt/models/staging/toronto/stg_toronto__crime.sql similarity index 85% rename from dbt/models/staging/stg_toronto__crime.sql rename to dbt/models/staging/toronto/stg_toronto__crime.sql index 1ebde64..9f0ef32 100644 --- a/dbt/models/staging/stg_toronto__crime.sql +++ b/dbt/models/staging/toronto/stg_toronto__crime.sql @@ -3,7 +3,7 @@ -- Grain: One row per neighbourhood per year per crime type with source as ( - select * from {{ source('toronto_housing', 'fact_crime') }} + select * from {{ source('toronto', 'fact_crime') }} ), staged as ( diff --git a/dbt/models/staging/stg_toronto__neighbourhoods.sql b/dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql similarity index 87% rename from dbt/models/staging/stg_toronto__neighbourhoods.sql rename to dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql index 90d7f0b..b922ec5 100644 --- a/dbt/models/staging/stg_toronto__neighbourhoods.sql +++ b/dbt/models/staging/toronto/stg_toronto__neighbourhoods.sql @@ -3,7 +3,7 @@ -- Grain: One row per neighbourhood (158 total) with source as ( - select * from {{ source('toronto_housing', 'dim_neighbourhood') }} + select * from {{ source('toronto', 'dim_neighbourhood') }} ), staged as ( diff --git a/dbt/profiles.yml b/dbt/profiles.yml index 360fbc2..64dcb11 100644 --- a/dbt/profiles.yml +++ b/dbt/profiles.yml @@ -1,4 +1,4 @@ -toronto_housing: +portfolio: target: dev outputs: dev: diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index cfe3507..4d514f1 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -290,7 +290,7 @@ Dashboard tabs are in `portfolio_app/pages/toronto/tabs/`. import dash_mantine_components as dmc -from portfolio_app.figures.choropleth import create_choropleth +from portfolio_app.figures.toronto.choropleth import create_choropleth from portfolio_app.toronto.demo_data import get_demo_data @@ -339,13 +339,13 @@ dmc.TabsPanel(create_your_tab_layout(), value="your-tab"), ## Creating Figure Factories -Figure factories are in `portfolio_app/figures/`. They create reusable Plotly figures. +Figure factories are organized by dashboard domain under `portfolio_app/figures/{domain}/`. ### Pattern ```python -# figures/your_chart.py -"""Your chart type factory.""" +# figures/toronto/your_chart.py +"""Your chart type factory for Toronto dashboard.""" import plotly.express as px import plotly.graph_objects as go @@ -382,7 +382,7 @@ def create_your_chart( ### Export from `__init__.py` ```python -# figures/__init__.py +# figures/toronto/__init__.py from .your_chart import create_your_chart __all__ = [ @@ -391,6 +391,14 @@ __all__ = [ ] ``` +### Importing Figure Factories + +```python +# In callbacks or tabs +from portfolio_app.figures.toronto import create_choropleth_figure +from portfolio_app.figures.toronto.bar_charts import create_ranking_bar +``` + --- ## Branch Workflow diff --git a/docs/DATABASE_SCHEMA.md b/docs/DATABASE_SCHEMA.md index 4ecf5f3..c94a578 100644 --- a/docs/DATABASE_SCHEMA.md +++ b/docs/DATABASE_SCHEMA.md @@ -116,16 +116,38 @@ erDiagram ## Schema Layers -### Raw Schema +### Database Schemas -Raw data is loaded directly from external sources without transformation: +| Schema | Purpose | Managed By | +|--------|---------|------------| +| `public` | Shared dimensions (dim_time) | SQLAlchemy | +| `raw_toronto` | Toronto dimension and fact tables | SQLAlchemy | +| `staging` | Staging models | dbt | +| `intermediate` | Intermediate models | dbt | +| `marts` | Analytical tables | dbt | + +### Raw Toronto Schema (raw_toronto) + +Toronto-specific tables loaded by SQLAlchemy: | Table | Source | Description | |-------|--------|-------------| -| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries | -| `raw.census_profiles` | City of Toronto API | Census profile data | -| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood | -| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data | +| `dim_neighbourhood` | City of Toronto API | 158 neighbourhood boundaries | +| `dim_cmhc_zone` | CMHC | ~20 rental market zones | +| `dim_policy_event` | Manual | Policy events for annotation | +| `fact_census` | City of Toronto API | Census profile data | +| `fact_crime` | Toronto Police API | Crime statistics | +| `fact_amenities` | City of Toronto API | Amenity counts | +| `fact_rentals` | CMHC Data Files | Rental market survey data | +| `bridge_cmhc_neighbourhood` | Computed | Zone-neighbourhood mapping | + +### Public Schema + +Shared dimensions used across all projects: + +| Table | Description | +|-------|-------------| +| `dim_time` | Time dimension (monthly grain) | ### Staging Schema (dbt) diff --git a/docs/PROJECT_REFERENCE.md b/docs/PROJECT_REFERENCE.md index d9bec1b..cf0e732 100644 --- a/docs/PROJECT_REFERENCE.md +++ b/docs/PROJECT_REFERENCE.md @@ -76,7 +76,8 @@ portfolio_app/ ├── components/ # Shared UI components ├── content/blog/ # Markdown blog articles ├── errors/ # Exception handling -├── figures/ # Plotly figure factories +├── figures/ +│ └── toronto/ # Toronto figure factories ├── pages/ │ ├── home.py │ ├── about.py @@ -96,11 +97,21 @@ portfolio_app/ │ ├── parsers/ # API extraction (geo, toronto_open_data, toronto_police, cmhc) │ ├── loaders/ # Database operations (base, cmhc, cmhc_crosswalk) │ ├── schemas/ # Pydantic models -│ ├── models/ # SQLAlchemy ORM +│ ├── models/ # SQLAlchemy ORM (raw_toronto schema) │ ├── services/ # Query functions (neighbourhood_service, geometry_service) │ └── demo_data.py # Sample data └── utils/ └── markdown_loader.py # Blog article loading + +dbt/ # dbt project: portfolio +├── models/ +│ ├── shared/ # Cross-domain dimensions +│ ├── staging/toronto/ # Toronto staging models +│ ├── intermediate/toronto/ # Toronto intermediate models +│ └── marts/toronto/ # Toronto mart tables + +notebooks/ +└── toronto/ # Toronto documentation notebooks ``` --- @@ -144,10 +155,20 @@ CMHC Zones (~20) ← Rental data (Census Tract aligned) | `fact_rentals` | Fact | Rental data by CMHC zone | | `fact_amenities` | Fact | Amenity counts by neighbourhood | -### dbt Layers +### dbt Project: `portfolio` + +**Model Structure:** +``` +dbt/models/ +├── shared/ # Cross-domain dimensions (stg_dimensions__time) +├── staging/toronto/ # Toronto staging models +├── intermediate/toronto/ # Toronto intermediate models +└── marts/toronto/ # Toronto mart tables +``` | Layer | Naming | Example | |-------|--------|---------| +| Shared | `stg_dimensions__*` | `stg_dimensions__time` | | Staging | `stg_{source}__{entity}` | `stg_toronto__neighbourhoods` | | Intermediate | `int_{domain}__{transform}` | `int_neighbourhood__demographics` | | Marts | `mart_{domain}` | `mart_neighbourhood_overview` | diff --git a/docs/runbooks/adding-dashboard.md b/docs/runbooks/adding-dashboard.md index d02e421..a0ff4d8 100644 --- a/docs/runbooks/adding-dashboard.md +++ b/docs/runbooks/adding-dashboard.md @@ -10,7 +10,9 @@ This runbook describes how to add a new data dashboard to the portfolio applicat ## Directory Structure -Create the following structure under `portfolio_app/`: +Create the following structure: + +### Application Code (`portfolio_app/`) ``` portfolio_app/ @@ -33,8 +35,40 @@ portfolio_app/ │ │ └── __init__.py │ ├── schemas/ # Pydantic models │ │ └── __init__.py -│ └── models/ # SQLAlchemy ORM +│ └── models/ # SQLAlchemy ORM (schema: raw_{dashboard_name}) │ └── __init__.py +└── figures/ + └── {dashboard_name}/ # Figure factories for this dashboard + ├── __init__.py + └── ... # Chart modules +``` + +### dbt Models (`dbt/models/`) + +``` +dbt/models/ +├── staging/ +│ └── {dashboard_name}/ # Staging models +│ ├── _sources.yml # Source definitions (schema: raw_{dashboard_name}) +│ ├── _staging.yml # Model tests/docs +│ └── stg_*.sql # Staging models +├── intermediate/ +│ └── {dashboard_name}/ # Intermediate models +│ ├── _intermediate.yml +│ └── int_*.sql +└── marts/ + └── {dashboard_name}/ # Mart tables + ├── _marts.yml + └── mart_*.sql +``` + +### Documentation (`notebooks/`) + +``` +notebooks/ +└── {dashboard_name}/ # Domain subdirectories + ├── overview/ + ├── ... ``` ## Step-by-Step Checklist @@ -47,24 +81,47 @@ portfolio_app/ - [ ] Create loaders in `{dashboard_name}/loaders/` - [ ] Add database migrations if needed -### 2. dbt Models +### 2. Database Schema + +- [ ] Define schema constant in models (e.g., `RAW_FOOTBALL_SCHEMA = "raw_football"`) +- [ ] Add `__table_args__ = {"schema": RAW_FOOTBALL_SCHEMA}` to all models +- [ ] Update `scripts/db/init_schema.py` to create the new schema + +### 3. dbt Models Create dbt models in `dbt/models/`: -- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning -- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic -- [ ] `marts/mart_{domain}.sql` - Final analytical tables +- [ ] `staging/{dashboard_name}/_sources.yml` - Source definitions pointing to `raw_{dashboard_name}` schema +- [ ] `staging/{dashboard_name}/stg_{source}__{entity}.sql` - Raw data cleaning +- [ ] `intermediate/{dashboard_name}/int_{domain}__{transform}.sql` - Business logic +- [ ] `marts/{dashboard_name}/mart_{domain}.sql` - Final analytical tables + +Update `dbt/dbt_project.yml` with new subdirectory config: +```yaml +models: + portfolio: + staging: + {dashboard_name}: + +materialized: view + +schema: staging +``` Follow naming conventions: - Staging: `stg_{source}__{entity}` - Intermediate: `int_{domain}__{transform}` - Marts: `mart_{domain}` -### 3. Visualization Layer +### 4. Visualization Layer -- [ ] Create figure factories in `figures/` (or reuse existing) +- [ ] Create figure factories in `figures/{dashboard_name}/` +- [ ] Create `figures/{dashboard_name}/__init__.py` with exports - [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)` +Import pattern: +```python +from portfolio_app.figures.{dashboard_name} import create_choropleth_figure +``` + ### 4. Dashboard Pages #### Main Dashboard (`pages/{dashboard_name}/dashboard.py`) diff --git a/notebooks/README.md b/notebooks/README.md index 6354a9c..2dde749 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,17 +1,18 @@ -# Toronto Neighbourhood Dashboard - Notebooks +# Dashboard Documentation Notebooks -Documentation notebooks for the Toronto Neighbourhood Dashboard visualizations. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern. +Documentation notebooks organized by dashboard project. Each notebook documents how data is queried, transformed, and visualized using the figure factory pattern. ## Directory Structure ``` notebooks/ ├── README.md # This file -├── overview/ # Overview tab visualizations -├── housing/ # Housing tab visualizations -├── safety/ # Safety tab visualizations -├── demographics/ # Demographics tab visualizations -└── amenities/ # Amenities tab visualizations +└── toronto/ # Toronto Neighbourhood Dashboard + ├── overview/ # Overview tab visualizations + ├── housing/ # Housing tab visualizations + ├── safety/ # Safety tab visualizations + ├── demographics/ # Demographics tab visualizations + └── amenities/ # Amenities tab visualizations ``` ## Notebook Template diff --git a/notebooks/amenities/amenity_radar.ipynb b/notebooks/amenities/amenity_radar.ipynb deleted file mode 100644 index 99327d5..0000000 --- a/notebooks/amenities/amenity_radar.ipynb +++ /dev/null @@ -1,123 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Amenity Radar Chart\n", - "\n", - "Spider/radar chart comparing amenity categories for selected neighbourhoods." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Data Reference\n", - "\n", - "### Source Tables\n", - "\n", - "| Table | Grain | Key Columns |\n", - "|-------|-------|-------------|\n", - "| `mart_neighbourhood_amenities` | neighbourhood × year | parks_index, schools_index, transit_index |\n", - "\n", - "### SQL Query" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": "import pandas as pd\nfrom sqlalchemy import create_engine\nfrom dotenv import load_dotenv\nimport os\n\n# Load .env from project root\nload_dotenv('../../.env')\n\nengine = create_engine(os.environ['DATABASE_URL'])\n\nquery = \"\"\"\nSELECT\n neighbourhood_name,\n parks_index,\n schools_index,\n transit_index,\n amenity_index,\n amenity_tier\nFROM public_marts.mart_neighbourhood_amenities\nWHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\nORDER BY amenity_index DESC\n\"\"\"\n\ndf = pd.read_sql(query, engine)\nprint(f\"Loaded {len(df)} neighbourhoods\")" - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Transformation Steps\n", - "\n", - "1. Select top 5 and bottom 5 neighbourhoods by amenity index\n", - "2. Reshape for radar chart format" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Select representative neighbourhoods\n", - "top_5 = df.head(5)\n", - "bottom_5 = df.tail(5)\n", - "\n", - "# Prepare radar data\n", - "categories = ['Parks', 'Schools', 'Transit']\n", - "index_columns = ['parks_index', 'schools_index', 'transit_index']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sample Output" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n", - "display(top_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])\n", - "print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n", - "display(bottom_5[['neighbourhood_name', 'parks_index', 'schools_index', 'transit_index', 'amenity_index']])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Data Visualization\n", - "\n", - "### Figure Factory\n", - "\n", - "Uses `create_radar` from `portfolio_app.figures.radar`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": "import sys\nsys.path.insert(0, '../..')\n\nfrom portfolio_app.figures.radar import create_comparison_radar\n\n# Compare top neighbourhood vs city average (100)\ntop_hood = top_5.iloc[0]\nmetrics = ['parks_index', 'schools_index', 'transit_index']\n\nfig = create_comparison_radar(\n selected_data=top_hood.to_dict(),\n average_data={'parks_index': 100, 'schools_index': 100, 'transit_index': 100},\n metrics=metrics,\n selected_name=top_hood['neighbourhood_name'],\n average_name='City Average',\n title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n)\n\nfig.show()" - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Index Interpretation\n", - "\n", - "| Value | Meaning |\n", - "|-------|--------|\n", - "| < 100 | Below city average |\n", - "| = 100 | City average |\n", - "| > 100 | Above city average |" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/amenities/.gitkeep b/notebooks/toronto/amenities/.gitkeep similarity index 100% rename from notebooks/amenities/.gitkeep rename to notebooks/toronto/amenities/.gitkeep diff --git a/notebooks/amenities/amenity_index_choropleth.ipynb b/notebooks/toronto/amenities/amenity_index_choropleth.ipynb similarity index 74% rename from notebooks/amenities/amenity_index_choropleth.ipynb rename to notebooks/toronto/amenities/amenity_index_choropleth.ipynb index fa157d3..8308325 100644 --- a/notebooks/amenities/amenity_index_choropleth.ipynb +++ b/notebooks/toronto/amenities/amenity_index_choropleth.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_amenities` | neighbourhood \u00d7 year | amenity_index, total_amenities_per_1000, amenity_tier, geometry |\n", + "| `mart_neighbourhood_amenities` | neighbourhood × year | amenity_index, total_amenities_per_1000, amenity_tier, geometry |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -79,17 +80,16 @@ "metadata": {}, "outputs": [], "source": [ - "import geopandas as gpd\n", "import json\n", "\n", + "import geopandas as gpd\n", + "\n", "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n", - " crs='EPSG:4326'\n", + " df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n", ")\n", "\n", "geojson = json.loads(gdf.to_json())\n", - "data = df.drop(columns=['geometry']).to_dict('records')" + "data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")" ] }, { @@ -105,7 +105,9 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'total_amenities_per_1000', 'amenity_index', 'amenity_tier']].head(10)" + "df[\n", + " [\"neighbourhood_name\", \"total_amenities_per_1000\", \"amenity_index\", \"amenity_tier\"]\n", + "].head(10)" ] }, { @@ -116,7 +118,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`." + "Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`." ] }, { @@ -126,18 +128,24 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.choropleth import create_choropleth_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n", "\n", "fig = create_choropleth_figure(\n", " geojson=geojson,\n", " data=data,\n", - " location_key='neighbourhood_id',\n", - " color_column='total_amenities_per_1000',\n", - " hover_data=['neighbourhood_name', 'amenity_index', 'parks_per_1000', 'schools_per_1000'],\n", - " color_scale='Greens',\n", - " title='Toronto Amenities per 1,000 Population',\n", + " location_key=\"neighbourhood_id\",\n", + " color_column=\"total_amenities_per_1000\",\n", + " hover_data=[\n", + " \"neighbourhood_name\",\n", + " \"amenity_index\",\n", + " \"parks_per_1000\",\n", + " \"schools_per_1000\",\n", + " ],\n", + " color_scale=\"Greens\",\n", + " title=\"Toronto Amenities per 1,000 Population\",\n", " zoom=10,\n", ")\n", "\n", diff --git a/notebooks/toronto/amenities/amenity_radar.ipynb b/notebooks/toronto/amenities/amenity_radar.ipynb new file mode 100644 index 0000000..b8771e0 --- /dev/null +++ b/notebooks/toronto/amenities/amenity_radar.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Amenity Radar Chart\n", + "\n", + "Spider/radar chart comparing amenity categories for selected neighbourhoods." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Data Reference\n", + "\n", + "### Source Tables\n", + "\n", + "| Table | Grain | Key Columns |\n", + "|-------|-------|-------------|\n", + "| `mart_neighbourhood_amenities` | neighbourhood × year | parks_index, schools_index, transit_index |\n", + "\n", + "### SQL Query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", + "\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", + "\n", + "query = \"\"\"\n", + "SELECT\n", + " neighbourhood_name,\n", + " parks_index,\n", + " schools_index,\n", + " transit_index,\n", + " amenity_index,\n", + " amenity_tier\n", + "FROM public_marts.mart_neighbourhood_amenities\n", + "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_amenities)\n", + "ORDER BY amenity_index DESC\n", + "\"\"\"\n", + "\n", + "df = pd.read_sql(query, engine)\n", + "print(f\"Loaded {len(df)} neighbourhoods\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transformation Steps\n", + "\n", + "1. Select top 5 and bottom 5 neighbourhoods by amenity index\n", + "2. Reshape for radar chart format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Select representative neighbourhoods\n", + "top_5 = df.head(5)\n", + "bottom_5 = df.tail(5)\n", + "\n", + "# Prepare radar data\n", + "categories = [\"Parks\", \"Schools\", \"Transit\"]\n", + "index_columns = [\"parks_index\", \"schools_index\", \"transit_index\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sample Output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Top 5 Amenity-Rich Neighbourhoods:\")\n", + "display(\n", + " top_5[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"parks_index\",\n", + " \"schools_index\",\n", + " \"transit_index\",\n", + " \"amenity_index\",\n", + " ]\n", + " ]\n", + ")\n", + "print(\"\\nBottom 5 Underserved Neighbourhoods:\")\n", + "display(\n", + " bottom_5[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"parks_index\",\n", + " \"schools_index\",\n", + " \"transit_index\",\n", + " \"amenity_index\",\n", + " ]\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Data Visualization\n", + "\n", + "### Figure Factory\n", + "\n", + "Uses `create_radar` from `portfolio_app.figures.toronto.radar`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.radar import create_comparison_radar\n", + "\n", + "# Compare top neighbourhood vs city average (100)\n", + "top_hood = top_5.iloc[0]\n", + "metrics = [\"parks_index\", \"schools_index\", \"transit_index\"]\n", + "\n", + "fig = create_comparison_radar(\n", + " selected_data=top_hood.to_dict(),\n", + " average_data={\"parks_index\": 100, \"schools_index\": 100, \"transit_index\": 100},\n", + " metrics=metrics,\n", + " selected_name=top_hood[\"neighbourhood_name\"],\n", + " average_name=\"City Average\",\n", + " title=f\"Amenity Profile: {top_hood['neighbourhood_name']} vs City Average\",\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Index Interpretation\n", + "\n", + "| Value | Meaning |\n", + "|-------|--------|\n", + "| < 100 | Below city average |\n", + "| = 100 | City average |\n", + "| > 100 | Above city average |" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/amenities/transit_accessibility_bar.ipynb b/notebooks/toronto/amenities/transit_accessibility_bar.ipynb similarity index 77% rename from notebooks/amenities/transit_accessibility_bar.ipynb rename to notebooks/toronto/amenities/transit_accessibility_bar.ipynb index 126843b..01ea873 100644 --- a/notebooks/amenities/transit_accessibility_bar.ipynb +++ b/notebooks/toronto/amenities/transit_accessibility_bar.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_amenities` | neighbourhood \u00d7 year | transit_per_1000, transit_index, transit_count |\n", + "| `mart_neighbourhood_amenities` | neighbourhood × year | transit_per_1000, transit_index, transit_count |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -74,7 +75,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = df.head(20).to_dict('records')" + "data = df.head(20).to_dict(\"records\")" ] }, { @@ -90,7 +91,9 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'transit_per_1000', 'transit_index', 'transit_count']].head(10)" + "df[[\"neighbourhood_name\", \"transit_per_1000\", \"transit_index\", \"transit_count\"]].head(\n", + " 10\n", + ")" ] }, { @@ -101,7 +104,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`." + "Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`." ] }, { @@ -111,17 +114,18 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_horizontal_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n", "\n", "fig = create_horizontal_bar(\n", " data=data,\n", - " name_column='neighbourhood_name',\n", - " value_column='transit_per_1000',\n", - " title='Top 20 Neighbourhoods by Transit Accessibility',\n", - " color='#00BCD4',\n", - " value_format='.2f',\n", + " name_column=\"neighbourhood_name\",\n", + " value_column=\"transit_per_1000\",\n", + " title=\"Top 20 Neighbourhoods by Transit Accessibility\",\n", + " color=\"#00BCD4\",\n", + " value_format=\".2f\",\n", ")\n", "\n", "fig.show()" @@ -140,7 +144,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"City-wide Transit Statistics:\")\n", + "print(\"City-wide Transit Statistics:\")\n", "print(f\" Total Transit Stops: {df['transit_count'].sum():,.0f}\")\n", "print(f\" Average per 1,000 pop: {df['transit_per_1000'].mean():.2f}\")\n", "print(f\" Median per 1,000 pop: {df['transit_per_1000'].median():.2f}\")\n", diff --git a/notebooks/demographics/.gitkeep b/notebooks/toronto/demographics/.gitkeep similarity index 100% rename from notebooks/demographics/.gitkeep rename to notebooks/toronto/demographics/.gitkeep diff --git a/notebooks/demographics/age_distribution.ipynb b/notebooks/toronto/demographics/age_distribution.ipynb similarity index 70% rename from notebooks/demographics/age_distribution.ipynb rename to notebooks/toronto/demographics/age_distribution.ipynb index 208bb6d..1c5f423 100644 --- a/notebooks/demographics/age_distribution.ipynb +++ b/notebooks/toronto/demographics/age_distribution.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_demographics` | neighbourhood \u00d7 year | median_age, age_index, city_avg_age |\n", + "| `mart_neighbourhood_demographics` | neighbourhood × year | median_age, age_index, city_avg_age |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -76,13 +77,13 @@ "metadata": {}, "outputs": [], "source": [ - "city_avg = df['city_avg_age'].iloc[0]\n", - "df['age_category'] = df['median_age'].apply(\n", - " lambda x: 'Younger' if x < city_avg else 'Older'\n", + "city_avg = df[\"city_avg_age\"].iloc[0]\n", + "df[\"age_category\"] = df[\"median_age\"].apply(\n", + " lambda x: \"Younger\" if x < city_avg else \"Older\"\n", ")\n", - "df['age_deviation'] = df['median_age'] - city_avg\n", + "df[\"age_deviation\"] = df[\"median_age\"] - city_avg\n", "\n", - "data = df.to_dict('records')" + "data = df.to_dict(\"records\")" ] }, { @@ -100,9 +101,13 @@ "source": [ "print(f\"City Average Age: {city_avg:.1f}\")\n", "print(\"\\nYoungest Neighbourhoods:\")\n", - "display(df.tail(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])\n", + "display(\n", + " df.tail(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n", + ")\n", "print(\"\\nOldest Neighbourhoods:\")\n", - "display(df.head(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])" + "display(\n", + " df.head(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n", + ")" ] }, { @@ -113,7 +118,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`." + "Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`." ] }, { @@ -123,20 +128,21 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_ranking_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n", "\n", "fig = create_ranking_bar(\n", " data=data,\n", - " name_column='neighbourhood_name',\n", - " value_column='median_age',\n", - " title='Youngest & Oldest Neighbourhoods (Median Age)',\n", + " name_column=\"neighbourhood_name\",\n", + " value_column=\"median_age\",\n", + " title=\"Youngest & Oldest Neighbourhoods (Median Age)\",\n", " top_n=10,\n", " bottom_n=10,\n", - " color_top='#FF9800', # Orange for older\n", - " color_bottom='#2196F3', # Blue for younger\n", - " value_format='.1f',\n", + " color_top=\"#FF9800\", # Orange for older\n", + " color_bottom=\"#2196F3\", # Blue for younger\n", + " value_format=\".1f\",\n", ")\n", "\n", "fig.show()" @@ -157,7 +163,7 @@ "source": [ "# Age by income quintile\n", "print(\"Median Age by Income Quintile:\")\n", - "df.groupby('income_quintile')['median_age'].mean().round(1)" + "df.groupby(\"income_quintile\")[\"median_age\"].mean().round(1)" ] } ], diff --git a/notebooks/demographics/income_choropleth.ipynb b/notebooks/toronto/demographics/income_choropleth.ipynb similarity index 72% rename from notebooks/demographics/income_choropleth.ipynb rename to notebooks/toronto/demographics/income_choropleth.ipynb index 8eaef95..8876c15 100644 --- a/notebooks/demographics/income_choropleth.ipynb +++ b/notebooks/toronto/demographics/income_choropleth.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_demographics` | neighbourhood \u00d7 year | median_household_income, income_index, income_quintile, geometry |\n", + "| `mart_neighbourhood_demographics` | neighbourhood × year | median_household_income, income_index, income_quintile, geometry |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -77,19 +78,18 @@ "metadata": {}, "outputs": [], "source": [ - "import geopandas as gpd\n", "import json\n", "\n", - "df['income_thousands'] = df['median_household_income'] / 1000\n", + "import geopandas as gpd\n", + "\n", + "df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n", "\n", "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n", - " crs='EPSG:4326'\n", + " df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n", ")\n", "\n", "geojson = json.loads(gdf.to_json())\n", - "data = df.drop(columns=['geometry']).to_dict('records')" + "data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")" ] }, { @@ -105,7 +105,9 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'median_household_income', 'income_index', 'income_quintile']].head(10)" + "df[\n", + " [\"neighbourhood_name\", \"median_household_income\", \"income_index\", \"income_quintile\"]\n", + "].head(10)" ] }, { @@ -116,7 +118,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`." + "Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`." ] }, { @@ -126,18 +128,19 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.choropleth import create_choropleth_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n", "\n", "fig = create_choropleth_figure(\n", " geojson=geojson,\n", " data=data,\n", - " location_key='neighbourhood_id',\n", - " color_column='median_household_income',\n", - " hover_data=['neighbourhood_name', 'income_index', 'income_quintile'],\n", - " color_scale='Viridis',\n", - " title='Toronto Median Household Income by Neighbourhood',\n", + " location_key=\"neighbourhood_id\",\n", + " color_column=\"median_household_income\",\n", + " hover_data=[\"neighbourhood_name\", \"income_index\", \"income_quintile\"],\n", + " color_scale=\"Viridis\",\n", + " title=\"Toronto Median Household Income by Neighbourhood\",\n", " zoom=10,\n", ")\n", "\n", @@ -157,7 +160,9 @@ "metadata": {}, "outputs": [], "source": [ - "df.groupby('income_quintile')['median_household_income'].agg(['count', 'mean', 'min', 'max']).round(0)" + "df.groupby(\"income_quintile\")[\"median_household_income\"].agg(\n", + " [\"count\", \"mean\", \"min\", \"max\"]\n", + ").round(0)" ] } ], diff --git a/notebooks/demographics/population_density_bar.ipynb b/notebooks/toronto/demographics/population_density_bar.ipynb similarity index 78% rename from notebooks/demographics/population_density_bar.ipynb rename to notebooks/toronto/demographics/population_density_bar.ipynb index 8c7cd23..003e3f8 100644 --- a/notebooks/demographics/population_density_bar.ipynb +++ b/notebooks/toronto/demographics/population_density_bar.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_demographics` | neighbourhood \u00d7 year | population_density, population, land_area_sqkm |\n", + "| `mart_neighbourhood_demographics` | neighbourhood × year | population_density, population, land_area_sqkm |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -74,7 +75,7 @@ "metadata": {}, "outputs": [], "source": [ - "data = df.head(20).to_dict('records')" + "data = df.head(20).to_dict(\"records\")" ] }, { @@ -90,7 +91,9 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'population_density', 'population', 'land_area_sqkm']].head(10)" + "df[[\"neighbourhood_name\", \"population_density\", \"population\", \"land_area_sqkm\"]].head(\n", + " 10\n", + ")" ] }, { @@ -101,7 +104,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`." + "Uses `create_horizontal_bar` from `portfolio_app.figures.toronto.bar_charts`." ] }, { @@ -111,17 +114,18 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_horizontal_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_horizontal_bar\n", "\n", "fig = create_horizontal_bar(\n", " data=data,\n", - " name_column='neighbourhood_name',\n", - " value_column='population_density',\n", - " title='Top 20 Most Dense Neighbourhoods',\n", - " color='#9C27B0',\n", - " value_format=',.0f',\n", + " name_column=\"neighbourhood_name\",\n", + " value_column=\"population_density\",\n", + " title=\"Top 20 Most Dense Neighbourhoods\",\n", + " color=\"#9C27B0\",\n", + " value_format=\",.0f\",\n", ")\n", "\n", "fig.show()" @@ -140,7 +144,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"City-wide Statistics:\")\n", + "print(\"City-wide Statistics:\")\n", "print(f\" Total Population: {df['population'].sum():,.0f}\")\n", "print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n", "print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n", diff --git a/notebooks/housing/affordability_choropleth.ipynb b/notebooks/toronto/housing/affordability_choropleth.ipynb similarity index 75% rename from notebooks/housing/affordability_choropleth.ipynb rename to notebooks/toronto/housing/affordability_choropleth.ipynb index ca2f054..9d93219 100644 --- a/notebooks/housing/affordability_choropleth.ipynb +++ b/notebooks/toronto/housing/affordability_choropleth.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_housing` | neighbourhood \u00d7 year | affordability_index, rent_to_income_pct, avg_rent_2bed, geometry |\n", + "| `mart_neighbourhood_housing` | neighbourhood × year | affordability_index, rent_to_income_pct, avg_rent_2bed, geometry |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -77,17 +78,16 @@ "metadata": {}, "outputs": [], "source": [ - "import geopandas as gpd\n", "import json\n", "\n", + "import geopandas as gpd\n", + "\n", "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n", - " crs='EPSG:4326'\n", + " df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n", ")\n", "\n", "geojson = json.loads(gdf.to_json())\n", - "data = df.drop(columns=['geometry']).to_dict('records')" + "data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")" ] }, { @@ -103,7 +103,15 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'affordability_index', 'rent_to_income_pct', 'avg_rent_2bed', 'is_affordable']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"affordability_index\",\n", + " \"rent_to_income_pct\",\n", + " \"avg_rent_2bed\",\n", + " \"is_affordable\",\n", + " ]\n", + "].head(10)" ] }, { @@ -114,7 +122,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n", + "Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n", "\n", "**Key Parameters:**\n", "- `color_column`: 'affordability_index'\n", @@ -128,18 +136,19 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.choropleth import create_choropleth_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n", "\n", "fig = create_choropleth_figure(\n", " geojson=geojson,\n", " data=data,\n", - " location_key='neighbourhood_id',\n", - " color_column='affordability_index',\n", - " hover_data=['neighbourhood_name', 'rent_to_income_pct', 'avg_rent_2bed'],\n", - " color_scale='RdYlGn_r', # Reversed: lower index (affordable) = green\n", - " title='Toronto Housing Affordability Index',\n", + " location_key=\"neighbourhood_id\",\n", + " color_column=\"affordability_index\",\n", + " hover_data=[\"neighbourhood_name\", \"rent_to_income_pct\", \"avg_rent_2bed\"],\n", + " color_scale=\"RdYlGn_r\", # Reversed: lower index (affordable) = green\n", + " title=\"Toronto Housing Affordability Index\",\n", " zoom=10,\n", ")\n", "\n", diff --git a/notebooks/housing/rent_trend_line.ipynb b/notebooks/toronto/housing/rent_trend_line.ipynb similarity index 69% rename from notebooks/housing/rent_trend_line.ipynb rename to notebooks/toronto/housing/rent_trend_line.ipynb index a658ab1..284a853 100644 --- a/notebooks/housing/rent_trend_line.ipynb +++ b/notebooks/toronto/housing/rent_trend_line.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_housing` | neighbourhood \u00d7 year | year, avg_rent_2bed, rent_yoy_change_pct |\n", + "| `mart_neighbourhood_housing` | neighbourhood × year | year, avg_rent_2bed, rent_yoy_change_pct |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "# City-wide average rent by year\n", "query = \"\"\"\n", @@ -77,23 +78,25 @@ "outputs": [], "source": [ "# Create date column from year\n", - "df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n", + "df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n", "\n", "# Melt for multi-line chart\n", "df_melted = df.melt(\n", - " id_vars=['year', 'date'],\n", - " value_vars=['avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed'],\n", - " var_name='bedroom_type',\n", - " value_name='avg_rent'\n", + " id_vars=[\"year\", \"date\"],\n", + " value_vars=[\"avg_rent_bachelor\", \"avg_rent_1bed\", \"avg_rent_2bed\", \"avg_rent_3bed\"],\n", + " var_name=\"bedroom_type\",\n", + " value_name=\"avg_rent\",\n", ")\n", "\n", "# Clean labels\n", - "df_melted['bedroom_type'] = df_melted['bedroom_type'].map({\n", - " 'avg_rent_bachelor': 'Bachelor',\n", - " 'avg_rent_1bed': '1 Bedroom',\n", - " 'avg_rent_2bed': '2 Bedroom',\n", - " 'avg_rent_3bed': '3 Bedroom'\n", - "})" + "df_melted[\"bedroom_type\"] = df_melted[\"bedroom_type\"].map(\n", + " {\n", + " \"avg_rent_bachelor\": \"Bachelor\",\n", + " \"avg_rent_1bed\": \"1 Bedroom\",\n", + " \"avg_rent_2bed\": \"2 Bedroom\",\n", + " \"avg_rent_3bed\": \"3 Bedroom\",\n", + " }\n", + ")" ] }, { @@ -109,7 +112,16 @@ "metadata": {}, "outputs": [], "source": [ - "df[['year', 'avg_rent_bachelor', 'avg_rent_1bed', 'avg_rent_2bed', 'avg_rent_3bed', 'avg_yoy_change']]" + "df[\n", + " [\n", + " \"year\",\n", + " \"avg_rent_bachelor\",\n", + " \"avg_rent_1bed\",\n", + " \"avg_rent_2bed\",\n", + " \"avg_rent_3bed\",\n", + " \"avg_yoy_change\",\n", + " ]\n", + "]" ] }, { @@ -120,7 +132,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_price_time_series` from `portfolio_app.figures.time_series`.\n", + "Uses `create_price_time_series` from `portfolio_app.figures.toronto.time_series`.\n", "\n", "**Key Parameters:**\n", "- `date_column`: 'date'\n", @@ -135,18 +147,19 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.time_series import create_price_time_series\n", + "sys.path.insert(0, \"../..\")\n", "\n", - "data = df_melted.to_dict('records')\n", + "from portfolio_app.figures.toronto.time_series import create_price_time_series\n", + "\n", + "data = df_melted.to_dict(\"records\")\n", "\n", "fig = create_price_time_series(\n", " data=data,\n", - " date_column='date',\n", - " price_column='avg_rent',\n", - " group_column='bedroom_type',\n", - " title='Toronto Average Rent Trend (5 Years)',\n", + " date_column=\"date\",\n", + " price_column=\"avg_rent\",\n", + " group_column=\"bedroom_type\",\n", + " title=\"Toronto Average Rent Trend (5 Years)\",\n", ")\n", "\n", "fig.show()" @@ -167,7 +180,7 @@ "source": [ "# Show year-over-year changes\n", "print(\"Year-over-Year Rent Change (%)\")\n", - "df[['year', 'avg_yoy_change']].dropna()" + "df[[\"year\", \"avg_yoy_change\"]].dropna()" ] } ], diff --git a/notebooks/housing/tenure_breakdown_bar.ipynb b/notebooks/toronto/housing/tenure_breakdown_bar.ipynb similarity index 68% rename from notebooks/housing/tenure_breakdown_bar.ipynb rename to notebooks/toronto/housing/tenure_breakdown_bar.ipynb index 413f6b8..90b92f7 100644 --- a/notebooks/housing/tenure_breakdown_bar.ipynb +++ b/notebooks/toronto/housing/tenure_breakdown_bar.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_housing` | neighbourhood \u00d7 year | pct_owner_occupied, pct_renter_occupied, income_quintile |\n", + "| `mart_neighbourhood_housing` | neighbourhood × year | pct_owner_occupied, pct_renter_occupied, income_quintile |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -77,18 +78,17 @@ "source": [ "# Prepare for stacked bar\n", "df_stacked = df.melt(\n", - " id_vars=['neighbourhood_name', 'income_quintile'],\n", - " value_vars=['pct_owner_occupied', 'pct_renter_occupied'],\n", - " var_name='tenure_type',\n", - " value_name='percentage'\n", + " id_vars=[\"neighbourhood_name\", \"income_quintile\"],\n", + " value_vars=[\"pct_owner_occupied\", \"pct_renter_occupied\"],\n", + " var_name=\"tenure_type\",\n", + " value_name=\"percentage\",\n", ")\n", "\n", - "df_stacked['tenure_type'] = df_stacked['tenure_type'].map({\n", - " 'pct_owner_occupied': 'Owner',\n", - " 'pct_renter_occupied': 'Renter'\n", - "})\n", + "df_stacked[\"tenure_type\"] = df_stacked[\"tenure_type\"].map(\n", + " {\"pct_owner_occupied\": \"Owner\", \"pct_renter_occupied\": \"Renter\"}\n", + ")\n", "\n", - "data = df_stacked.to_dict('records')" + "data = df_stacked.to_dict(\"records\")" ] }, { @@ -105,7 +105,14 @@ "outputs": [], "source": [ "print(\"Highest Renter Neighbourhoods:\")\n", - "df[['neighbourhood_name', 'pct_renter_occupied', 'pct_owner_occupied', 'income_quintile']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"pct_renter_occupied\",\n", + " \"pct_owner_occupied\",\n", + " \"income_quintile\",\n", + " ]\n", + "].head(10)" ] }, { @@ -116,7 +123,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`.\n", + "Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`.\n", "\n", "**Key Parameters:**\n", "- `x_column`: 'neighbourhood_name'\n", @@ -132,21 +139,22 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_stacked_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n", "\n", "# Show top 20 by renter percentage\n", - "top_20_names = df.head(20)['neighbourhood_name'].tolist()\n", - "data_filtered = [d for d in data if d['neighbourhood_name'] in top_20_names]\n", + "top_20_names = df.head(20)[\"neighbourhood_name\"].tolist()\n", + "data_filtered = [d for d in data if d[\"neighbourhood_name\"] in top_20_names]\n", "\n", "fig = create_stacked_bar(\n", " data=data_filtered,\n", - " x_column='neighbourhood_name',\n", - " value_column='percentage',\n", - " category_column='tenure_type',\n", - " title='Housing Tenure Mix - Top 20 Renter Neighbourhoods',\n", - " color_map={'Owner': '#4CAF50', 'Renter': '#2196F3'},\n", + " x_column=\"neighbourhood_name\",\n", + " value_column=\"percentage\",\n", + " category_column=\"tenure_type\",\n", + " title=\"Housing Tenure Mix - Top 20 Renter Neighbourhoods\",\n", + " color_map={\"Owner\": \"#4CAF50\", \"Renter\": \"#2196F3\"},\n", " show_percentages=True,\n", ")\n", "\n", @@ -172,7 +180,9 @@ "\n", "# By income quintile\n", "print(\"\\nTenure by Income Quintile:\")\n", - "df.groupby('income_quintile')[['pct_owner_occupied', 'pct_renter_occupied']].mean().round(1)" + "df.groupby(\"income_quintile\")[\n", + " [\"pct_owner_occupied\", \"pct_renter_occupied\"]\n", + "].mean().round(1)" ] } ], diff --git a/notebooks/overview/income_safety_scatter.ipynb b/notebooks/toronto/overview/income_safety_scatter.ipynb similarity index 77% rename from notebooks/overview/income_safety_scatter.ipynb rename to notebooks/toronto/overview/income_safety_scatter.ipynb index da41fb2..a777b71 100644 --- a/notebooks/overview/income_safety_scatter.ipynb +++ b/notebooks/toronto/overview/income_safety_scatter.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_overview` | neighbourhood \u00d7 year | neighbourhood_name, median_household_income, safety_score, population |\n", + "| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, median_household_income, safety_score, population |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -77,10 +78,10 @@ "outputs": [], "source": [ "# Scale income to thousands for better axis readability\n", - "df['income_thousands'] = df['median_household_income'] / 1000\n", + "df[\"income_thousands\"] = df[\"median_household_income\"] / 1000\n", "\n", "# Prepare data for figure factory\n", - "data = df.to_dict('records')" + "data = df.to_dict(\"records\")" ] }, { @@ -96,7 +97,14 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'median_household_income', 'safety_score', 'crime_rate_per_100k']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"median_household_income\",\n", + " \"safety_score\",\n", + " \"crime_rate_per_100k\",\n", + " ]\n", + "].head(10)" ] }, { @@ -107,7 +115,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_scatter_figure` from `portfolio_app.figures.scatter`.\n", + "Uses `create_scatter_figure` from `portfolio_app.figures.toronto.scatter`.\n", "\n", "**Key Parameters:**\n", "- `x_column`: 'income_thousands' (median household income in $K)\n", @@ -124,19 +132,20 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.scatter import create_scatter_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.scatter import create_scatter_figure\n", "\n", "fig = create_scatter_figure(\n", " data=data,\n", - " x_column='income_thousands',\n", - " y_column='safety_score',\n", - " name_column='neighbourhood_name',\n", - " size_column='population',\n", - " title='Income vs Safety by Neighbourhood',\n", - " x_title='Median Household Income ($K)',\n", - " y_title='Safety Score (0-100)',\n", + " x_column=\"income_thousands\",\n", + " y_column=\"safety_score\",\n", + " name_column=\"neighbourhood_name\",\n", + " size_column=\"population\",\n", + " title=\"Income vs Safety by Neighbourhood\",\n", + " x_title=\"Median Household Income ($K)\",\n", + " y_title=\"Safety Score (0-100)\",\n", " trendline=True,\n", ")\n", "\n", @@ -166,7 +175,7 @@ "outputs": [], "source": [ "# Calculate correlation coefficient\n", - "correlation = df['median_household_income'].corr(df['safety_score'])\n", + "correlation = df[\"median_household_income\"].corr(df[\"safety_score\"])\n", "print(f\"Correlation coefficient (Income vs Safety): {correlation:.3f}\")" ] } diff --git a/notebooks/overview/livability_choropleth.ipynb b/notebooks/toronto/overview/livability_choropleth.ipynb similarity index 63% rename from notebooks/overview/livability_choropleth.ipynb rename to notebooks/toronto/overview/livability_choropleth.ipynb index 580bc59..d2c0a1c 100644 --- a/notebooks/overview/livability_choropleth.ipynb +++ b/notebooks/toronto/overview/livability_choropleth.ipynb @@ -29,7 +29,38 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "import pandas as pd\nfrom sqlalchemy import create_engine\nfrom dotenv import load_dotenv\nimport os\n\n# Load .env from project root\nload_dotenv('../../.env')\n\nengine = create_engine(os.environ['DATABASE_URL'])\n\nquery = \"\"\"\nSELECT\n neighbourhood_id,\n neighbourhood_name,\n geometry,\n year,\n livability_score,\n safety_score,\n affordability_score,\n amenity_score,\n population,\n median_household_income\nFROM public_marts.mart_neighbourhood_overview\nWHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\nORDER BY livability_score DESC\n\"\"\"\n\ndf = pd.read_sql(query, engine)\nprint(f\"Loaded {len(df)} neighbourhoods\")" + "source": [ + "import os\n", + "\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", + "\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", + "\n", + "query = \"\"\"\n", + "SELECT\n", + " neighbourhood_id,\n", + " neighbourhood_name,\n", + " geometry,\n", + " year,\n", + " livability_score,\n", + " safety_score,\n", + " affordability_score,\n", + " amenity_score,\n", + " population,\n", + " median_household_income\n", + "FROM public_marts.mart_neighbourhood_overview\n", + "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_overview)\n", + "ORDER BY livability_score DESC\n", + "\"\"\"\n", + "\n", + "df = pd.read_sql(query, engine)\n", + "print(f\"Loaded {len(df)} neighbourhoods\")" + ] }, { "cell_type": "markdown", @@ -49,21 +80,20 @@ "outputs": [], "source": [ "# Transform geometry to GeoJSON\n", - "import geopandas as gpd\n", "import json\n", "\n", + "import geopandas as gpd\n", + "\n", "# Convert WKB geometry to GeoDataFrame\n", "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n", - " crs='EPSG:4326'\n", + " df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n", ")\n", "\n", "# Create GeoJSON FeatureCollection\n", "geojson = json.loads(gdf.to_json())\n", "\n", "# Prepare data for figure factory\n", - "data = df.drop(columns=['geometry']).to_dict('records')" + "data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")" ] }, { @@ -79,7 +109,15 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'livability_score', 'safety_score', 'affordability_score', 'amenity_score']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"livability_score\",\n", + " \"safety_score\",\n", + " \"affordability_score\",\n", + " \"amenity_score\",\n", + " ]\n", + "].head(10)" ] }, { @@ -90,7 +128,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n", + "Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n", "\n", "**Key Parameters:**\n", "- `geojson`: GeoJSON FeatureCollection with neighbourhood boundaries\n", @@ -107,18 +145,24 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.choropleth import create_choropleth_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n", "\n", "fig = create_choropleth_figure(\n", " geojson=geojson,\n", " data=data,\n", - " location_key='neighbourhood_id',\n", - " color_column='livability_score',\n", - " hover_data=['neighbourhood_name', 'safety_score', 'affordability_score', 'amenity_score'],\n", - " color_scale='RdYlGn',\n", - " title='Toronto Neighbourhood Livability Score',\n", + " location_key=\"neighbourhood_id\",\n", + " color_column=\"livability_score\",\n", + " hover_data=[\n", + " \"neighbourhood_name\",\n", + " \"safety_score\",\n", + " \"affordability_score\",\n", + " \"amenity_score\",\n", + " ],\n", + " color_scale=\"RdYlGn\",\n", + " title=\"Toronto Neighbourhood Livability Score\",\n", " zoom=10,\n", ")\n", "\n", diff --git a/notebooks/overview/top_bottom_10_bar.ipynb b/notebooks/toronto/overview/top_bottom_10_bar.ipynb similarity index 81% rename from notebooks/overview/top_bottom_10_bar.ipynb rename to notebooks/toronto/overview/top_bottom_10_bar.ipynb index 6468ea3..9cbe3ec 100644 --- a/notebooks/overview/top_bottom_10_bar.ipynb +++ b/notebooks/toronto/overview/top_bottom_10_bar.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_overview` | neighbourhood \u00d7 year | neighbourhood_name, livability_score |\n", + "| `mart_neighbourhood_overview` | neighbourhood × year | neighbourhood_name, livability_score |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -76,7 +77,7 @@ "source": [ "# The figure factory handles top/bottom selection internally\n", "# Just prepare as list of dicts\n", - "data = df.to_dict('records')" + "data = df.to_dict(\"records\")" ] }, { @@ -106,7 +107,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`.\n", + "Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`.\n", "\n", "**Key Parameters:**\n", "- `data`: List of dicts with all neighbourhoods\n", @@ -123,20 +124,21 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_ranking_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n", "\n", "fig = create_ranking_bar(\n", " data=data,\n", - " name_column='neighbourhood_name',\n", - " value_column='livability_score',\n", - " title='Top & Bottom 10 Neighbourhoods by Livability',\n", + " name_column=\"neighbourhood_name\",\n", + " value_column=\"livability_score\",\n", + " title=\"Top & Bottom 10 Neighbourhoods by Livability\",\n", " top_n=10,\n", " bottom_n=10,\n", - " color_top='#4CAF50', # Green for top performers\n", - " color_bottom='#F44336', # Red for bottom performers\n", - " value_format='.1f',\n", + " color_top=\"#4CAF50\", # Green for top performers\n", + " color_bottom=\"#F44336\", # Red for bottom performers\n", + " value_format=\".1f\",\n", ")\n", "\n", "fig.show()" diff --git a/notebooks/safety/.gitkeep b/notebooks/toronto/safety/.gitkeep similarity index 100% rename from notebooks/safety/.gitkeep rename to notebooks/toronto/safety/.gitkeep diff --git a/notebooks/safety/crime_breakdown_bar.ipynb b/notebooks/toronto/safety/crime_breakdown_bar.ipynb similarity index 64% rename from notebooks/safety/crime_breakdown_bar.ipynb rename to notebooks/toronto/safety/crime_breakdown_bar.ipynb index 2ff6d43..9bc48b2 100644 --- a/notebooks/safety/crime_breakdown_bar.ipynb +++ b/notebooks/toronto/safety/crime_breakdown_bar.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_safety` | neighbourhood \u00d7 year | assault_count, auto_theft_count, break_enter_count, robbery_count, etc. |\n", + "| `mart_neighbourhood_safety` | neighbourhood × year | assault_count, auto_theft_count, break_enter_count, robbery_count, etc. |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -79,17 +80,25 @@ "outputs": [], "source": [ "df_melted = df.melt(\n", - " id_vars=['neighbourhood_name', 'total_incidents'],\n", - " value_vars=['assault_count', 'auto_theft_count', 'break_enter_count', \n", - " 'robbery_count', 'theft_over_count', 'homicide_count'],\n", - " var_name='crime_type',\n", - " value_name='count'\n", + " id_vars=[\"neighbourhood_name\", \"total_incidents\"],\n", + " value_vars=[\n", + " \"assault_count\",\n", + " \"auto_theft_count\",\n", + " \"break_enter_count\",\n", + " \"robbery_count\",\n", + " \"theft_over_count\",\n", + " \"homicide_count\",\n", + " ],\n", + " var_name=\"crime_type\",\n", + " value_name=\"count\",\n", ")\n", "\n", "# Clean labels\n", - "df_melted['crime_type'] = df_melted['crime_type'].str.replace('_count', '').str.replace('_', ' ').str.title()\n", + "df_melted[\"crime_type\"] = (\n", + " df_melted[\"crime_type\"].str.replace(\"_count\", \"\").str.replace(\"_\", \" \").str.title()\n", + ")\n", "\n", - "data = df_melted.to_dict('records')" + "data = df_melted.to_dict(\"records\")" ] }, { @@ -105,7 +114,15 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'assault_count', 'auto_theft_count', 'break_enter_count', 'total_incidents']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"assault_count\",\n", + " \"auto_theft_count\",\n", + " \"break_enter_count\",\n", + " \"total_incidents\",\n", + " ]\n", + "].head(10)" ] }, { @@ -116,7 +133,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_stacked_bar` from `portfolio_app.figures.bar_charts`." + "Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`." ] }, { @@ -126,23 +143,24 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.bar_charts import create_stacked_bar\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n", "\n", "fig = create_stacked_bar(\n", " data=data,\n", - " x_column='neighbourhood_name',\n", - " value_column='count',\n", - " category_column='crime_type',\n", - " title='Crime Type Breakdown - Top 15 Neighbourhoods',\n", + " x_column=\"neighbourhood_name\",\n", + " value_column=\"count\",\n", + " category_column=\"crime_type\",\n", + " title=\"Crime Type Breakdown - Top 15 Neighbourhoods\",\n", " color_map={\n", - " 'Assault': '#d62728',\n", - " 'Auto Theft': '#ff7f0e',\n", - " 'Break Enter': '#9467bd',\n", - " 'Robbery': '#8c564b',\n", - " 'Theft Over': '#e377c2',\n", - " 'Homicide': '#1f77b4'\n", + " \"Assault\": \"#d62728\",\n", + " \"Auto Theft\": \"#ff7f0e\",\n", + " \"Break Enter\": \"#9467bd\",\n", + " \"Robbery\": \"#8c564b\",\n", + " \"Theft Over\": \"#e377c2\",\n", + " \"Homicide\": \"#1f77b4\",\n", " },\n", ")\n", "\n", diff --git a/notebooks/safety/crime_rate_choropleth.ipynb b/notebooks/toronto/safety/crime_rate_choropleth.ipynb similarity index 75% rename from notebooks/safety/crime_rate_choropleth.ipynb rename to notebooks/toronto/safety/crime_rate_choropleth.ipynb index 05c3225..011c753 100644 --- a/notebooks/safety/crime_rate_choropleth.ipynb +++ b/notebooks/toronto/safety/crime_rate_choropleth.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_safety` | neighbourhood \u00d7 year | crime_rate_per_100k, crime_index, safety_tier, geometry |\n", + "| `mart_neighbourhood_safety` | neighbourhood × year | crime_rate_per_100k, crime_index, safety_tier, geometry |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -77,17 +78,16 @@ "metadata": {}, "outputs": [], "source": [ - "import geopandas as gpd\n", "import json\n", "\n", + "import geopandas as gpd\n", + "\n", "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkb(df['geometry']),\n", - " crs='EPSG:4326'\n", + " df, geometry=gpd.GeoSeries.from_wkb(df[\"geometry\"]), crs=\"EPSG:4326\"\n", ")\n", "\n", "geojson = json.loads(gdf.to_json())\n", - "data = df.drop(columns=['geometry']).to_dict('records')" + "data = df.drop(columns=[\"geometry\"]).to_dict(\"records\")" ] }, { @@ -103,7 +103,15 @@ "metadata": {}, "outputs": [], "source": [ - "df[['neighbourhood_name', 'crime_rate_per_100k', 'crime_index', 'safety_tier', 'total_incidents']].head(10)" + "df[\n", + " [\n", + " \"neighbourhood_name\",\n", + " \"crime_rate_per_100k\",\n", + " \"crime_index\",\n", + " \"safety_tier\",\n", + " \"total_incidents\",\n", + " ]\n", + "].head(10)" ] }, { @@ -114,7 +122,7 @@ "\n", "### Figure Factory\n", "\n", - "Uses `create_choropleth_figure` from `portfolio_app.figures.choropleth`.\n", + "Uses `create_choropleth_figure` from `portfolio_app.figures.toronto.choropleth`.\n", "\n", "**Key Parameters:**\n", "- `color_column`: 'crime_rate_per_100k'\n", @@ -128,18 +136,19 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.choropleth import create_choropleth_figure\n", + "sys.path.insert(0, \"../..\")\n", + "\n", + "from portfolio_app.figures.toronto.choropleth import create_choropleth_figure\n", "\n", "fig = create_choropleth_figure(\n", " geojson=geojson,\n", " data=data,\n", - " location_key='neighbourhood_id',\n", - " color_column='crime_rate_per_100k',\n", - " hover_data=['neighbourhood_name', 'crime_index', 'total_incidents'],\n", - " color_scale='RdYlGn_r',\n", - " title='Toronto Crime Rate per 100,000 Population',\n", + " location_key=\"neighbourhood_id\",\n", + " color_column=\"crime_rate_per_100k\",\n", + " hover_data=[\"neighbourhood_name\", \"crime_index\", \"total_incidents\"],\n", + " color_scale=\"RdYlGn_r\",\n", + " title=\"Toronto Crime Rate per 100,000 Population\",\n", " zoom=10,\n", ")\n", "\n", diff --git a/notebooks/safety/crime_trend_line.ipynb b/notebooks/toronto/safety/crime_trend_line.ipynb similarity index 65% rename from notebooks/safety/crime_trend_line.ipynb rename to notebooks/toronto/safety/crime_trend_line.ipynb index 3e3a3a5..bc23631 100644 --- a/notebooks/safety/crime_trend_line.ipynb +++ b/notebooks/toronto/safety/crime_trend_line.ipynb @@ -19,7 +19,7 @@ "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", - "| `mart_neighbourhood_safety` | neighbourhood \u00d7 year | year, crime_rate_per_100k, crime_yoy_change_pct |\n", + "| `mart_neighbourhood_safety` | neighbourhood × year | year, crime_rate_per_100k, crime_yoy_change_pct |\n", "\n", "### SQL Query" ] @@ -30,15 +30,16 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", "import os\n", "\n", - "# Load .env from project root\n", - "load_dotenv('../../.env')\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from sqlalchemy import create_engine\n", "\n", - "engine = create_engine(os.environ['DATABASE_URL'])\n", + "# Load .env from project root\n", + "load_dotenv(\"../../.env\")\n", + "\n", + "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", @@ -76,21 +77,23 @@ "metadata": {}, "outputs": [], "source": [ - "df['date'] = pd.to_datetime(df['year'].astype(str) + '-01-01')\n", + "df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n", "\n", "# Melt for multi-line\n", "df_melted = df.melt(\n", - " id_vars=['year', 'date'],\n", - " value_vars=['avg_assault_rate', 'avg_auto_theft_rate', 'avg_break_enter_rate'],\n", - " var_name='crime_type',\n", - " value_name='rate_per_100k'\n", + " id_vars=[\"year\", \"date\"],\n", + " value_vars=[\"avg_assault_rate\", \"avg_auto_theft_rate\", \"avg_break_enter_rate\"],\n", + " var_name=\"crime_type\",\n", + " value_name=\"rate_per_100k\",\n", ")\n", "\n", - "df_melted['crime_type'] = df_melted['crime_type'].map({\n", - " 'avg_assault_rate': 'Assault',\n", - " 'avg_auto_theft_rate': 'Auto Theft',\n", - " 'avg_break_enter_rate': 'Break & Enter'\n", - "})" + "df_melted[\"crime_type\"] = df_melted[\"crime_type\"].map(\n", + " {\n", + " \"avg_assault_rate\": \"Assault\",\n", + " \"avg_auto_theft_rate\": \"Auto Theft\",\n", + " \"avg_break_enter_rate\": \"Break & Enter\",\n", + " }\n", + ")" ] }, { @@ -106,7 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[['year', 'avg_crime_rate', 'total_city_incidents', 'avg_yoy_change']]" + "df[[\"year\", \"avg_crime_rate\", \"total_city_incidents\", \"avg_yoy_change\"]]" ] }, { @@ -127,22 +130,23 @@ "outputs": [], "source": [ "import sys\n", - "sys.path.insert(0, '../..')\n", "\n", - "from portfolio_app.figures.time_series import create_price_time_series\n", + "sys.path.insert(0, \"../..\")\n", "\n", - "data = df_melted.to_dict('records')\n", + "from portfolio_app.figures.toronto.time_series import create_price_time_series\n", + "\n", + "data = df_melted.to_dict(\"records\")\n", "\n", "fig = create_price_time_series(\n", " data=data,\n", - " date_column='date',\n", - " price_column='rate_per_100k',\n", - " group_column='crime_type',\n", - " title='Toronto Crime Trends by Type (5 Years)',\n", + " date_column=\"date\",\n", + " price_column=\"rate_per_100k\",\n", + " group_column=\"crime_type\",\n", + " title=\"Toronto Crime Trends by Type (5 Years)\",\n", ")\n", "\n", "# Remove dollar sign formatting since this is rate data\n", - "fig.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n", + "fig.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n", "\n", "fig.show()" ] @@ -161,15 +165,19 @@ "outputs": [], "source": [ "# Total crime rate trend\n", - "total_data = df[['date', 'avg_crime_rate']].rename(columns={'avg_crime_rate': 'total_rate'}).to_dict('records')\n", + "total_data = (\n", + " df[[\"date\", \"avg_crime_rate\"]]\n", + " .rename(columns={\"avg_crime_rate\": \"total_rate\"})\n", + " .to_dict(\"records\")\n", + ")\n", "\n", "fig2 = create_price_time_series(\n", " data=total_data,\n", - " date_column='date',\n", - " price_column='total_rate',\n", - " title='Toronto Overall Crime Rate Trend',\n", + " date_column=\"date\",\n", + " price_column=\"total_rate\",\n", + " title=\"Toronto Overall Crime Rate Trend\",\n", ")\n", - "fig2.update_layout(yaxis_tickprefix='', yaxis_title='Rate per 100K')\n", + "fig2.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n", "fig2.show()" ] } diff --git a/portfolio_app/components/metric_card.py b/portfolio_app/components/metric_card.py index 42b3d24..87e9162 100644 --- a/portfolio_app/components/metric_card.py +++ b/portfolio_app/components/metric_card.py @@ -5,7 +5,7 @@ from typing import Any import dash_mantine_components as dmc from dash import dcc -from portfolio_app.figures.summary_cards import create_metric_card_figure +from portfolio_app.figures.toronto.summary_cards import create_metric_card_figure class MetricCard: diff --git a/portfolio_app/figures/__init__.py b/portfolio_app/figures/__init__.py index fe22939..e0d98ec 100644 --- a/portfolio_app/figures/__init__.py +++ b/portfolio_app/figures/__init__.py @@ -1,61 +1,15 @@ -"""Plotly figure factories for data visualization.""" +"""Plotly figure factories for data visualization. -from .bar_charts import ( - create_horizontal_bar, - create_ranking_bar, - create_stacked_bar, -) -from .choropleth import ( - create_choropleth_figure, - create_zone_map, -) -from .demographics import ( - create_age_pyramid, - create_donut_chart, - create_income_distribution, -) -from .radar import ( - create_comparison_radar, - create_radar_figure, -) -from .scatter import ( - create_bubble_chart, - create_scatter_figure, -) -from .summary_cards import create_metric_card_figure, create_summary_metrics -from .time_series import ( - add_policy_markers, - create_market_comparison_chart, - create_price_time_series, - create_time_series_with_events, - create_volume_time_series, -) +Figure factories are organized by dashboard domain: +- toronto/ : Toronto Neighbourhood Dashboard figures + +Usage: + from portfolio_app.figures.toronto import create_choropleth_figure + from portfolio_app.figures.toronto import create_ranking_bar +""" + +from . import toronto __all__ = [ - # Choropleth - "create_choropleth_figure", - "create_zone_map", - # Time series - "create_price_time_series", - "create_volume_time_series", - "create_market_comparison_chart", - "create_time_series_with_events", - "add_policy_markers", - # Summary - "create_metric_card_figure", - "create_summary_metrics", - # Bar charts - "create_ranking_bar", - "create_stacked_bar", - "create_horizontal_bar", - # Scatter plots - "create_scatter_figure", - "create_bubble_chart", - # Radar charts - "create_radar_figure", - "create_comparison_radar", - # Demographics - "create_age_pyramid", - "create_donut_chart", - "create_income_distribution", + "toronto", ] diff --git a/portfolio_app/figures/toronto/__init__.py b/portfolio_app/figures/toronto/__init__.py new file mode 100644 index 0000000..ea2dbe1 --- /dev/null +++ b/portfolio_app/figures/toronto/__init__.py @@ -0,0 +1,61 @@ +"""Plotly figure factories for Toronto dashboard visualizations.""" + +from .bar_charts import ( + create_horizontal_bar, + create_ranking_bar, + create_stacked_bar, +) +from .choropleth import ( + create_choropleth_figure, + create_zone_map, +) +from .demographics import ( + create_age_pyramid, + create_donut_chart, + create_income_distribution, +) +from .radar import ( + create_comparison_radar, + create_radar_figure, +) +from .scatter import ( + create_bubble_chart, + create_scatter_figure, +) +from .summary_cards import create_metric_card_figure, create_summary_metrics +from .time_series import ( + add_policy_markers, + create_market_comparison_chart, + create_price_time_series, + create_time_series_with_events, + create_volume_time_series, +) + +__all__ = [ + # Choropleth + "create_choropleth_figure", + "create_zone_map", + # Time series + "create_price_time_series", + "create_volume_time_series", + "create_market_comparison_chart", + "create_time_series_with_events", + "add_policy_markers", + # Summary + "create_metric_card_figure", + "create_summary_metrics", + # Bar charts + "create_ranking_bar", + "create_stacked_bar", + "create_horizontal_bar", + # Scatter plots + "create_scatter_figure", + "create_bubble_chart", + # Radar charts + "create_radar_figure", + "create_comparison_radar", + # Demographics + "create_age_pyramid", + "create_donut_chart", + "create_income_distribution", +] diff --git a/portfolio_app/figures/bar_charts.py b/portfolio_app/figures/toronto/bar_charts.py similarity index 100% rename from portfolio_app/figures/bar_charts.py rename to portfolio_app/figures/toronto/bar_charts.py diff --git a/portfolio_app/figures/choropleth.py b/portfolio_app/figures/toronto/choropleth.py similarity index 100% rename from portfolio_app/figures/choropleth.py rename to portfolio_app/figures/toronto/choropleth.py diff --git a/portfolio_app/figures/demographics.py b/portfolio_app/figures/toronto/demographics.py similarity index 100% rename from portfolio_app/figures/demographics.py rename to portfolio_app/figures/toronto/demographics.py diff --git a/portfolio_app/figures/radar.py b/portfolio_app/figures/toronto/radar.py similarity index 100% rename from portfolio_app/figures/radar.py rename to portfolio_app/figures/toronto/radar.py diff --git a/portfolio_app/figures/scatter.py b/portfolio_app/figures/toronto/scatter.py similarity index 100% rename from portfolio_app/figures/scatter.py rename to portfolio_app/figures/toronto/scatter.py diff --git a/portfolio_app/figures/summary_cards.py b/portfolio_app/figures/toronto/summary_cards.py similarity index 100% rename from portfolio_app/figures/summary_cards.py rename to portfolio_app/figures/toronto/summary_cards.py diff --git a/portfolio_app/figures/time_series.py b/portfolio_app/figures/toronto/time_series.py similarity index 100% rename from portfolio_app/figures/time_series.py rename to portfolio_app/figures/toronto/time_series.py diff --git a/portfolio_app/pages/toronto/callbacks/chart_callbacks.py b/portfolio_app/pages/toronto/callbacks/chart_callbacks.py index 1207100..e0d2e5c 100644 --- a/portfolio_app/pages/toronto/callbacks/chart_callbacks.py +++ b/portfolio_app/pages/toronto/callbacks/chart_callbacks.py @@ -5,7 +5,7 @@ import pandas as pd import plotly.graph_objects as go from dash import Input, Output, callback -from portfolio_app.figures import ( +from portfolio_app.figures.toronto import ( create_donut_chart, create_horizontal_bar, create_radar_figure, diff --git a/portfolio_app/pages/toronto/callbacks/map_callbacks.py b/portfolio_app/pages/toronto/callbacks/map_callbacks.py index 9aef1e8..2856546 100644 --- a/portfolio_app/pages/toronto/callbacks/map_callbacks.py +++ b/portfolio_app/pages/toronto/callbacks/map_callbacks.py @@ -4,7 +4,7 @@ import plotly.graph_objects as go from dash import Input, Output, State, callback, no_update -from portfolio_app.figures import create_choropleth_figure, create_ranking_bar +from portfolio_app.figures.toronto import create_choropleth_figure, create_ranking_bar from portfolio_app.toronto.services import ( get_amenities_data, get_demographics_data, diff --git a/portfolio_app/toronto/models/dimensions.py b/portfolio_app/toronto/models/dimensions.py index 7e86265..25a9a7e 100644 --- a/portfolio_app/toronto/models/dimensions.py +++ b/portfolio_app/toronto/models/dimensions.py @@ -8,11 +8,18 @@ from sqlalchemy.orm import Mapped, mapped_column from .base import Base +# Schema constants +RAW_TORONTO_SCHEMA = "raw_toronto" + class DimTime(Base): - """Time dimension table.""" + """Time dimension table (shared across all projects). + + Note: Stays in public schema as it's a shared dimension. + """ __tablename__ = "dim_time" + __table_args__ = {"schema": "public"} date_key: Mapped[int] = mapped_column(Integer, primary_key=True) full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True) @@ -27,6 +34,7 @@ class DimCMHCZone(Base): """CMHC zone dimension table with PostGIS geometry.""" __tablename__ = "dim_cmhc_zone" + __table_args__ = {"schema": RAW_TORONTO_SCHEMA} zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True) @@ -41,6 +49,7 @@ class DimNeighbourhood(Base): """ __tablename__ = "dim_neighbourhood" + __table_args__ = {"schema": RAW_TORONTO_SCHEMA} neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True) name: Mapped[str] = mapped_column(String(100), nullable=False) @@ -69,6 +78,7 @@ class DimPolicyEvent(Base): """Policy event dimension for time-series annotation.""" __tablename__ = "dim_policy_event" + __table_args__ = {"schema": RAW_TORONTO_SCHEMA} event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) event_date: Mapped[date] = mapped_column(Date, nullable=False) diff --git a/portfolio_app/toronto/models/facts.py b/portfolio_app/toronto/models/facts.py index 2f1d5bd..df619b7 100644 --- a/portfolio_app/toronto/models/facts.py +++ b/portfolio_app/toronto/models/facts.py @@ -4,6 +4,7 @@ from sqlalchemy import ForeignKey, Index, Integer, Numeric, String from sqlalchemy.orm import Mapped, mapped_column, relationship from .base import Base +from .dimensions import RAW_TORONTO_SCHEMA class BridgeCMHCNeighbourhood(Base): @@ -14,6 +15,11 @@ class BridgeCMHCNeighbourhood(Base): """ __tablename__ = "bridge_cmhc_neighbourhood" + __table_args__ = ( + Index("ix_bridge_cmhc_zone", "cmhc_zone_code"), + Index("ix_bridge_neighbourhood", "neighbourhood_id"), + {"schema": RAW_TORONTO_SCHEMA}, + ) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) cmhc_zone_code: Mapped[str] = mapped_column(String(10), nullable=False) @@ -22,11 +28,6 @@ class BridgeCMHCNeighbourhood(Base): Numeric(5, 4), nullable=False ) # 0.0000 to 1.0000 - __table_args__ = ( - Index("ix_bridge_cmhc_zone", "cmhc_zone_code"), - Index("ix_bridge_neighbourhood", "neighbourhood_id"), - ) - class FactCensus(Base): """Census statistics by neighbourhood and year. @@ -35,6 +36,10 @@ class FactCensus(Base): """ __tablename__ = "fact_census" + __table_args__ = ( + Index("ix_fact_census_neighbourhood_year", "neighbourhood_id", "census_year"), + {"schema": RAW_TORONTO_SCHEMA}, + ) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False) @@ -66,10 +71,6 @@ class FactCensus(Base): Numeric(12, 2), nullable=True ) - __table_args__ = ( - Index("ix_fact_census_neighbourhood_year", "neighbourhood_id", "census_year"), - ) - class FactCrime(Base): """Crime statistics by neighbourhood and year. @@ -78,6 +79,11 @@ class FactCrime(Base): """ __tablename__ = "fact_crime" + __table_args__ = ( + Index("ix_fact_crime_neighbourhood_year", "neighbourhood_id", "year"), + Index("ix_fact_crime_type", "crime_type"), + {"schema": RAW_TORONTO_SCHEMA}, + ) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False) @@ -86,11 +92,6 @@ class FactCrime(Base): count: Mapped[int] = mapped_column(Integer, nullable=False) rate_per_100k: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True) - __table_args__ = ( - Index("ix_fact_crime_neighbourhood_year", "neighbourhood_id", "year"), - Index("ix_fact_crime_type", "crime_type"), - ) - class FactAmenities(Base): """Amenity counts by neighbourhood. @@ -99,6 +100,11 @@ class FactAmenities(Base): """ __tablename__ = "fact_amenities" + __table_args__ = ( + Index("ix_fact_amenities_neighbourhood_year", "neighbourhood_id", "year"), + Index("ix_fact_amenities_type", "amenity_type"), + {"schema": RAW_TORONTO_SCHEMA}, + ) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False) @@ -106,11 +112,6 @@ class FactAmenities(Base): count: Mapped[int] = mapped_column(Integer, nullable=False) year: Mapped[int] = mapped_column(Integer, nullable=False) - __table_args__ = ( - Index("ix_fact_amenities_neighbourhood_year", "neighbourhood_id", "year"), - Index("ix_fact_amenities_type", "amenity_type"), - ) - class FactRentals(Base): """Fact table for CMHC rental market data. @@ -119,13 +120,16 @@ class FactRentals(Base): """ __tablename__ = "fact_rentals" + __table_args__ = {"schema": RAW_TORONTO_SCHEMA} id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) date_key: Mapped[int] = mapped_column( - Integer, ForeignKey("dim_time.date_key"), nullable=False + Integer, ForeignKey("public.dim_time.date_key"), nullable=False ) zone_key: Mapped[int] = mapped_column( - Integer, ForeignKey("dim_cmhc_zone.zone_key"), nullable=False + Integer, + ForeignKey(f"{RAW_TORONTO_SCHEMA}.dim_cmhc_zone.zone_key"), + nullable=False, ) bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False) universe: Mapped[int | None] = mapped_column(Integer, nullable=True) @@ -139,6 +143,6 @@ class FactRentals(Base): rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True) reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True) - # Relationships - time = relationship("DimTime", backref="rentals") - zone = relationship("DimCMHCZone", backref="rentals") + # Relationships - explicit foreign_keys needed for cross-schema joins + time = relationship("DimTime", foreign_keys=[date_key], backref="rentals") + zone = relationship("DimCMHCZone", foreign_keys=[zone_key], backref="rentals") diff --git a/scripts/db/init_schema.py b/scripts/db/init_schema.py index 19ac89d..007f201 100644 --- a/scripts/db/init_schema.py +++ b/scripts/db/init_schema.py @@ -15,6 +15,7 @@ from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from portfolio_app.toronto.models import create_tables, get_engine # noqa: E402 +from portfolio_app.toronto.models.dimensions import RAW_TORONTO_SCHEMA # noqa: E402 def main() -> int: @@ -32,16 +33,30 @@ def main() -> int: result.fetchone() print("Database connection successful") + # Create domain-specific schemas + with engine.connect() as conn: + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {RAW_TORONTO_SCHEMA}")) + conn.commit() + print(f"Created schema: {RAW_TORONTO_SCHEMA}") + # Create all tables create_tables() print("Schema created successfully") - # List created tables + # List created tables by schema from sqlalchemy import inspect inspector = inspect(engine) - tables = inspector.get_table_names() - print(f"Created tables: {', '.join(tables)}") + + # Public schema tables + public_tables = inspector.get_table_names(schema="public") + if public_tables: + print(f"Public schema tables: {', '.join(public_tables)}") + + # raw_toronto schema tables + toronto_tables = inspector.get_table_names(schema=RAW_TORONTO_SCHEMA) + if toronto_tables: + print(f"{RAW_TORONTO_SCHEMA} schema tables: {', '.join(toronto_tables)}") return 0