refactor: multi-dashboard structural migration
Some checks failed
CI / lint-and-test (pull_request) Has been cancelled

- Rename dbt project from toronto_housing to portfolio
- Restructure dbt models into domain subdirectories:
  - shared/ for cross-domain dimensions (dim_time)
  - staging/toronto/, intermediate/toronto/, marts/toronto/
- Update SQLAlchemy models for raw_toronto schema
- Add explicit cross-schema FK relationships for FactRentals
- Namespace figure factories under figures/toronto/
- Namespace notebooks under notebooks/toronto/
- Update Makefile with domain-specific targets and env loading
- Update all documentation for multi-dashboard structure

This enables adding new dashboard projects (e.g., /football, /energy)
without structural conflicts or naming collisions.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-01 19:08:20 -05:00
parent a5d6866d63
commit 62d1a52eed
73 changed files with 1114 additions and 623 deletions

View File

@@ -0,0 +1,135 @@
version: 2
models:
- name: mart_toronto_rentals
description: "Final mart for Toronto rental market analysis by zone and time"
columns:
- name: rental_id
description: "Unique rental record identifier"
tests:
- unique
- not_null
- name: mart_neighbourhood_overview
description: "Neighbourhood overview with composite livability score"
meta:
dashboard_tab: Overview
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: neighbourhood_name
description: "Official neighbourhood name"
tests:
- not_null
- name: geometry
description: "PostGIS geometry for mapping"
- name: livability_score
description: "Composite score: safety (30%), affordability (40%), amenities (30%)"
- name: safety_score
description: "Safety component score (0-100)"
- name: affordability_score
description: "Affordability component score (0-100)"
- name: amenity_score
description: "Amenity component score (0-100)"
- name: mart_neighbourhood_housing
description: "Housing and affordability metrics by neighbourhood"
meta:
dashboard_tab: Housing
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: neighbourhood_name
description: "Official neighbourhood name"
tests:
- not_null
- name: geometry
description: "PostGIS geometry for mapping"
- name: rent_to_income_pct
description: "Rent as percentage of median income"
- name: affordability_index
description: "100 = city average affordability"
- name: rent_yoy_change_pct
description: "Year-over-year rent change"
- name: mart_neighbourhood_safety
description: "Crime rates and safety metrics by neighbourhood"
meta:
dashboard_tab: Safety
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: neighbourhood_name
description: "Official neighbourhood name"
tests:
- not_null
- name: geometry
description: "PostGIS geometry for mapping"
- name: crime_rate_per_100k
description: "Total crime rate per 100K population"
- name: crime_index
description: "100 = city average crime rate"
- name: safety_tier
description: "Safety tier (1=safest, 5=highest crime)"
tests:
- accepted_values:
arguments:
values: [1, 2, 3, 4, 5]
- name: mart_neighbourhood_demographics
description: "Demographics and income metrics by neighbourhood"
meta:
dashboard_tab: Demographics
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: neighbourhood_name
description: "Official neighbourhood name"
tests:
- not_null
- name: geometry
description: "PostGIS geometry for mapping"
- name: median_household_income
description: "Median household income"
- name: income_index
description: "100 = city average income"
- name: income_quintile
description: "Income quintile (1-5)"
tests:
- accepted_values:
arguments:
values: [1, 2, 3, 4, 5]
- name: mart_neighbourhood_amenities
description: "Amenity access metrics by neighbourhood"
meta:
dashboard_tab: Amenities
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: neighbourhood_name
description: "Official neighbourhood name"
tests:
- not_null
- name: geometry
description: "PostGIS geometry for mapping"
- name: total_amenities_per_1000
description: "Total amenities per 1000 population"
- name: amenity_index
description: "100 = city average amenities"
- name: amenity_tier
description: "Amenity tier (1=best, 5=lowest)"
tests:
- accepted_values:
arguments:
values: [1, 2, 3, 4, 5]

View File

@@ -0,0 +1,89 @@
-- Mart: Neighbourhood Amenities Analysis
-- Dashboard Tab: Amenities
-- Grain: One row per neighbourhood per year
with amenities as (
select * from {{ ref('int_neighbourhood__amenity_scores') }}
),
-- City-wide averages for comparison
city_avg as (
select
year,
avg(parks_per_1000) as city_avg_parks,
avg(schools_per_1000) as city_avg_schools,
avg(transit_per_1000) as city_avg_transit,
avg(total_amenities_per_1000) as city_avg_total_amenities
from amenities
group by year
),
final as (
select
a.neighbourhood_id,
a.neighbourhood_name,
a.geometry,
a.population,
a.land_area_sqkm,
a.year,
-- Raw counts
a.parks_count,
a.schools_count,
a.transit_count,
a.libraries_count,
a.community_centres_count,
a.recreation_count,
a.total_amenities,
-- Per 1000 population
a.parks_per_1000,
a.schools_per_1000,
a.transit_per_1000,
a.total_amenities_per_1000,
-- Per square km
a.amenities_per_sqkm,
-- City averages
round(ca.city_avg_parks::numeric, 3) as city_avg_parks_per_1000,
round(ca.city_avg_schools::numeric, 3) as city_avg_schools_per_1000,
round(ca.city_avg_transit::numeric, 3) as city_avg_transit_per_1000,
-- Amenity index (100 = city average)
case
when ca.city_avg_total_amenities > 0
then round(a.total_amenities_per_1000 / ca.city_avg_total_amenities * 100, 1)
else null
end as amenity_index,
-- Category indices
case
when ca.city_avg_parks > 0
then round(a.parks_per_1000 / ca.city_avg_parks * 100, 1)
else null
end as parks_index,
case
when ca.city_avg_schools > 0
then round(a.schools_per_1000 / ca.city_avg_schools * 100, 1)
else null
end as schools_index,
case
when ca.city_avg_transit > 0
then round(a.transit_per_1000 / ca.city_avg_transit * 100, 1)
else null
end as transit_index,
-- Amenity tier (1 = best, 5 = lowest)
ntile(5) over (
partition by a.year
order by a.total_amenities_per_1000 desc
) as amenity_tier
from amenities a
left join city_avg ca on a.year = ca.year
)
select * from final

View File

@@ -0,0 +1,81 @@
-- Mart: Neighbourhood Demographics Analysis
-- Dashboard Tab: Demographics
-- Grain: One row per neighbourhood per census year
with demographics as (
select * from {{ ref('int_neighbourhood__demographics') }}
),
-- City-wide averages for comparison
city_avg as (
select
census_year,
avg(median_household_income) as city_avg_income,
avg(median_age) as city_avg_age,
avg(unemployment_rate) as city_avg_unemployment,
avg(education_bachelors_pct) as city_avg_education,
avg(population_density) as city_avg_density
from demographics
group by census_year
),
final as (
select
d.neighbourhood_id,
d.neighbourhood_name,
d.geometry,
d.census_year as year,
-- Population
d.population,
d.land_area_sqkm,
d.population_density,
-- Income
d.median_household_income,
d.average_household_income,
d.income_quintile,
-- Income index (100 = city average)
case
when ca.city_avg_income > 0
then round(d.median_household_income / ca.city_avg_income * 100, 1)
else null
end as income_index,
-- Demographics
d.median_age,
d.unemployment_rate,
d.education_bachelors_pct,
-- Age index (100 = city average)
case
when ca.city_avg_age > 0
then round(d.median_age / ca.city_avg_age * 100, 1)
else null
end as age_index,
-- Housing tenure
d.pct_owner_occupied,
d.pct_renter_occupied,
d.average_dwelling_value,
-- Diversity index (using tenure mix as proxy - higher rental = more diverse typically)
round(
1 - (
power(d.pct_owner_occupied / 100, 2) +
power(d.pct_renter_occupied / 100, 2)
),
3
) * 100 as tenure_diversity_index,
-- City comparisons
round(ca.city_avg_income::numeric, 2) as city_avg_income,
round(ca.city_avg_age::numeric, 1) as city_avg_age,
round(ca.city_avg_unemployment::numeric, 2) as city_avg_unemployment
from demographics d
left join city_avg ca on d.census_year = ca.census_year
)
select * from final

View File

@@ -0,0 +1,93 @@
-- Mart: Neighbourhood Housing Analysis
-- Dashboard Tab: Housing
-- Grain: One row per neighbourhood per year
with housing as (
select * from {{ ref('int_neighbourhood__housing') }}
),
rentals as (
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
),
demographics as (
select * from {{ ref('int_neighbourhood__demographics') }}
),
-- Add year-over-year rent changes
with_yoy as (
select
h.*,
r.avg_rent_bachelor,
r.avg_rent_1bed,
r.avg_rent_3bed,
r.total_rental_units,
d.income_quintile,
-- Previous year rent for YoY calculation
lag(h.avg_rent_2bed, 1) over (
partition by h.neighbourhood_id
order by h.year
) as prev_year_rent_2bed
from housing h
left join rentals r
on h.neighbourhood_id = r.neighbourhood_id
and h.year = r.year
left join demographics d
on h.neighbourhood_id = d.neighbourhood_id
and h.year = d.census_year
),
final as (
select
neighbourhood_id,
neighbourhood_name,
geometry,
year,
-- Tenure mix
pct_owner_occupied,
pct_renter_occupied,
-- Housing values
average_dwelling_value,
median_household_income,
-- Rental metrics
avg_rent_bachelor,
avg_rent_1bed,
avg_rent_2bed,
avg_rent_3bed,
vacancy_rate,
total_rental_units,
-- Affordability
rent_to_income_pct,
is_affordable,
-- Affordability index (100 = city average)
round(
rent_to_income_pct / nullif(
avg(rent_to_income_pct) over (partition by year),
0
) * 100,
1
) as affordability_index,
-- Year-over-year rent change
case
when prev_year_rent_2bed > 0
then round(
(avg_rent_2bed - prev_year_rent_2bed) / prev_year_rent_2bed * 100,
2
)
else null
end as rent_yoy_change_pct,
income_quintile
from with_yoy
)
select * from final

View File

@@ -0,0 +1,152 @@
-- Mart: Neighbourhood Overview with Composite Livability Score
-- Dashboard Tab: Overview
-- Grain: One row per neighbourhood per year
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
with years as (
select * from {{ ref('int_year_spine') }}
),
neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
-- Create base: all neighbourhoods × all years
neighbourhood_years as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
y.year
from neighbourhoods n
cross join years y
),
-- Census data (available for 2016, 2021)
-- For each year, use the most recent census data available
census as (
select * from {{ ref('stg_toronto__census') }}
),
census_mapped as (
select
ny.neighbourhood_id,
ny.year,
c.population,
c.unemployment_rate,
c.pct_bachelors_or_higher as education_bachelors_pct
from neighbourhood_years ny
left join census c on ny.neighbourhood_id = c.neighbourhood_id
-- Use census year <= analysis year, prefer most recent
and c.census_year = (
select max(c2.census_year)
from {{ ref('stg_toronto__census') }} c2
where c2.neighbourhood_id = ny.neighbourhood_id
and c2.census_year <= ny.year
)
),
-- CMA-level census data (for income - not available at neighbourhood level)
cma_census as (
select * from {{ ref('int_census__toronto_cma') }}
),
-- Crime data (2014-2024)
crime as (
select * from {{ ref('int_neighbourhood__crime_summary') }}
),
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
rentals as (
select * from {{ ref('int_rentals__toronto_cma') }}
),
-- Compute scores
scored as (
select
ny.neighbourhood_id,
ny.neighbourhood_name,
ny.geometry,
ny.year,
cm.population,
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
cma.median_household_income,
-- Safety score: inverse of crime rate (higher = safer)
case
when cr.crime_rate_per_100k is not null
then 100 - percent_rank() over (
partition by ny.year
order by cr.crime_rate_per_100k
) * 100
else null
end as safety_score,
-- Affordability score: inverse of rent-to-income ratio
-- Using CMA-level income since neighbourhood-level not available
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then 100 - percent_rank() over (
partition by ny.year
order by (r.avg_rent_standard * 12 / cma.median_household_income)
) * 100
else null
end as affordability_score,
-- Raw metrics
cr.crime_rate_per_100k,
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
else null
end as rent_to_income_pct,
r.avg_rent_standard as avg_rent_2bed,
r.vacancy_rate
from neighbourhood_years ny
left join census_mapped cm
on ny.neighbourhood_id = cm.neighbourhood_id
and ny.year = cm.year
left join cma_census cma
on ny.year = cma.year
left join crime cr
on ny.neighbourhood_id = cr.neighbourhood_id
and ny.year = cr.year
left join rentals r
on ny.year = r.year
),
final as (
select
neighbourhood_id,
neighbourhood_name,
geometry,
year,
population,
median_household_income,
-- Component scores (0-100)
round(safety_score::numeric, 1) as safety_score,
round(affordability_score::numeric, 1) as affordability_score,
-- Amenity score not available at this level, use placeholder
50.0 as amenity_score,
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
round(
(coalesce(safety_score, 50) * 0.40 +
coalesce(affordability_score, 50) * 0.40 +
50 * 0.20)::numeric,
1
) as livability_score,
-- Raw metrics
crime_rate_per_100k,
rent_to_income_pct,
avg_rent_2bed,
vacancy_rate,
null::numeric as total_amenities_per_1000
from scored
)
select * from final

View File

@@ -0,0 +1,78 @@
-- Mart: Neighbourhood Safety Analysis
-- Dashboard Tab: Safety
-- Grain: One row per neighbourhood per year
with crime as (
select * from {{ ref('int_neighbourhood__crime_summary') }}
),
-- City-wide averages for comparison
city_avg as (
select
year,
avg(crime_rate_per_100k) as city_avg_crime_rate,
avg(assault_count) as city_avg_assault,
avg(auto_theft_count) as city_avg_auto_theft,
avg(break_enter_count) as city_avg_break_enter
from crime
group by year
),
final as (
select
c.neighbourhood_id,
c.neighbourhood_name,
c.geometry,
c.population,
c.year,
-- Total crime
c.total_incidents,
c.crime_rate_per_100k,
c.yoy_change_pct as crime_yoy_change_pct,
-- Crime breakdown
c.assault_count,
c.auto_theft_count,
c.break_enter_count,
c.robbery_count,
c.theft_over_count,
c.homicide_count,
-- Per 100K rates by type
case when c.population > 0
then round(c.assault_count::numeric / c.population * 100000, 2)
else null
end as assault_rate_per_100k,
case when c.population > 0
then round(c.auto_theft_count::numeric / c.population * 100000, 2)
else null
end as auto_theft_rate_per_100k,
case when c.population > 0
then round(c.break_enter_count::numeric / c.population * 100000, 2)
else null
end as break_enter_rate_per_100k,
-- Comparison to city average
round(ca.city_avg_crime_rate::numeric, 2) as city_avg_crime_rate,
-- Crime index (100 = city average)
case
when ca.city_avg_crime_rate > 0
then round(c.crime_rate_per_100k / ca.city_avg_crime_rate * 100, 1)
else null
end as crime_index,
-- Safety tier based on crime rate percentile
ntile(5) over (
partition by c.year
order by c.crime_rate_per_100k desc
) as safety_tier
from crime c
left join city_avg ca on c.year = ca.year
)
select * from final

View File

@@ -0,0 +1,64 @@
-- Mart: Toronto Rental Market Analysis
-- Final analytical table for rental market visualization
-- Grain: One row per zone per bedroom type per survey year
with rentals as (
select * from {{ ref('int_rentals__annual') }}
),
-- Add year-over-year calculations
with_yoy as (
select
r.*,
-- Previous year values
lag(r.avg_rent, 1) over (
partition by r.zone_code, r.bedroom_type
order by r.year
) as avg_rent_prev_year,
lag(r.vacancy_rate, 1) over (
partition by r.zone_code, r.bedroom_type
order by r.year
) as vacancy_rate_prev_year
from rentals r
),
final as (
select
rental_id,
date_key,
full_date,
year,
quarter,
zone_key,
zone_code,
zone_name,
bedroom_type,
rental_universe,
avg_rent,
median_rent,
vacancy_rate,
availability_rate,
turnover_rate,
year_over_year_rent_change,
reliability_code,
vacant_units_estimate,
-- Calculated year-over-year (if not provided)
coalesce(
year_over_year_rent_change,
case
when avg_rent_prev_year > 0
then round(((avg_rent - avg_rent_prev_year) / avg_rent_prev_year) * 100, 2)
else null
end
) as rent_change_pct,
vacancy_rate - vacancy_rate_prev_year as vacancy_rate_change
from with_yoy
)
select * from final