refactor: multi-dashboard structural migration

- Rename dbt project from toronto_housing to portfolio - Restructure dbt models into domain subdirectories: - shared/ for cross-domain dimensions (dim_time) - staging/toronto/, intermediate/toronto/, marts/toronto/ - Update SQLAlchemy models for raw_toronto schema - Add explicit cross-schema FK relationships for FactRentals - Namespace figure factories under figures/toronto/ - Namespace notebooks under notebooks/toronto/ - Update Makefile with domain-specific targets and env loading - Update all documentation for multi-dashboard structure This enables adding new dashboard projects (e.g., /football, /energy) without structural conflicts or naming collisions. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 19:08:20 -05:00
parent a5d6866d63
commit 62d1a52eed
73 changed files with 1114 additions and 623 deletions
--- a/dbt/models/marts/toronto/mart_neighbourhood_overview.sql
+++ b/dbt/models/marts/toronto/mart_neighbourhood_overview.sql
@@ -0,0 +1,152 @@
+-- Mart: Neighbourhood Overview with Composite Livability Score
+-- Dashboard Tab: Overview
+-- Grain: One row per neighbourhood per year
+-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
+
+with years as (
+    select * from {{ ref('int_year_spine') }}
+),
+
+neighbourhoods as (
+    select * from {{ ref('stg_toronto__neighbourhoods') }}
+),
+
+-- Create base: all neighbourhoods × all years
+neighbourhood_years as (
+    select
+        n.neighbourhood_id,
+        n.neighbourhood_name,
+        n.geometry,
+        y.year
+    from neighbourhoods n
+    cross join years y
+),
+
+-- Census data (available for 2016, 2021)
+-- For each year, use the most recent census data available
+census as (
+    select * from {{ ref('stg_toronto__census') }}
+),
+
+census_mapped as (
+    select
+        ny.neighbourhood_id,
+        ny.year,
+        c.population,
+        c.unemployment_rate,
+        c.pct_bachelors_or_higher as education_bachelors_pct
+    from neighbourhood_years ny
+    left join census c on ny.neighbourhood_id = c.neighbourhood_id
+        -- Use census year <= analysis year, prefer most recent
+        and c.census_year = (
+            select max(c2.census_year)
+            from {{ ref('stg_toronto__census') }} c2
+            where c2.neighbourhood_id = ny.neighbourhood_id
+            and c2.census_year <= ny.year
+        )
+),
+
+-- CMA-level census data (for income - not available at neighbourhood level)
+cma_census as (
+    select * from {{ ref('int_census__toronto_cma') }}
+),
+
+-- Crime data (2014-2024)
+crime as (
+    select * from {{ ref('int_neighbourhood__crime_summary') }}
+),
+
+-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
+rentals as (
+    select * from {{ ref('int_rentals__toronto_cma') }}
+),
+
+-- Compute scores
+scored as (
+    select
+        ny.neighbourhood_id,
+        ny.neighbourhood_name,
+        ny.geometry,
+        ny.year,
+        cm.population,
+        -- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
+        cma.median_household_income,
+
+        -- Safety score: inverse of crime rate (higher = safer)
+        case
+            when cr.crime_rate_per_100k is not null
+            then 100 - percent_rank() over (
+                partition by ny.year
+                order by cr.crime_rate_per_100k
+            ) * 100
+            else null
+        end as safety_score,
+
+        -- Affordability score: inverse of rent-to-income ratio
+        -- Using CMA-level income since neighbourhood-level not available
+        case
+            when cma.median_household_income > 0 and r.avg_rent_standard > 0
+            then 100 - percent_rank() over (
+                partition by ny.year
+                order by (r.avg_rent_standard * 12 / cma.median_household_income)
+            ) * 100
+            else null
+        end as affordability_score,
+
+        -- Raw metrics
+        cr.crime_rate_per_100k,
+        case
+            when cma.median_household_income > 0 and r.avg_rent_standard > 0
+            then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
+            else null
+        end as rent_to_income_pct,
+        r.avg_rent_standard as avg_rent_2bed,
+        r.vacancy_rate
+
+    from neighbourhood_years ny
+    left join census_mapped cm
+        on ny.neighbourhood_id = cm.neighbourhood_id
+        and ny.year = cm.year
+    left join cma_census cma
+        on ny.year = cma.year
+    left join crime cr
+        on ny.neighbourhood_id = cr.neighbourhood_id
+        and ny.year = cr.year
+    left join rentals r
+        on ny.year = r.year
+),
+
+final as (
+    select
+        neighbourhood_id,
+        neighbourhood_name,
+        geometry,
+        year,
+        population,
+        median_household_income,
+
+        -- Component scores (0-100)
+        round(safety_score::numeric, 1) as safety_score,
+        round(affordability_score::numeric, 1) as affordability_score,
+        -- Amenity score not available at this level, use placeholder
+        50.0 as amenity_score,
+
+        -- Composite livability score: safety (40%), affordability (40%), amenities (20%)
+        round(
+            (coalesce(safety_score, 50) * 0.40 +
+             coalesce(affordability_score, 50) * 0.40 +
+             50 * 0.20)::numeric,
+            1
+        ) as livability_score,
+
+        -- Raw metrics
+        crime_rate_per_100k,
+        rent_to_income_pct,
+        avg_rent_2bed,
+        vacancy_rate,
+        null::numeric as total_amenities_per_1000
+
+    from scored
+)
+
+select * from final