feat: Implement Phase 4 dbt model restructuring

Create neighbourhood-centric dbt transformation layer:

Staging (5 models):
- stg_toronto__neighbourhoods - Neighbourhood dimension
- stg_toronto__census - Census demographics
- stg_toronto__crime - Crime statistics
- stg_toronto__amenities - Amenity counts
- stg_cmhc__zone_crosswalk - Zone-to-neighbourhood weights

Intermediate (5 models):
- int_neighbourhood__demographics - Combined census with quintiles
- int_neighbourhood__housing - Housing + affordability indicators
- int_neighbourhood__crime_summary - Aggregated crime with YoY
- int_neighbourhood__amenity_scores - Per-capita amenity metrics
- int_rentals__neighbourhood_allocated - CMHC via area weights

Marts (5 models):
- mart_neighbourhood_overview - Composite livability score
- mart_neighbourhood_housing - Affordability index
- mart_neighbourhood_safety - Crime rates per 100K
- mart_neighbourhood_demographics - Income/age indices
- mart_neighbourhood_amenities - Amenity index

Closes #60, #61, #62, #63

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-16 11:41:27 -05:00
parent 053acf6436
commit b6d210ec6b
20 changed files with 1245 additions and 0 deletions

View File

@@ -11,3 +11,77 @@ models:
- name: zone_code
tests:
- not_null
- name: int_neighbourhood__demographics
description: "Combined census demographics with neighbourhood attributes"
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: census_year
description: "Census year"
tests:
- not_null
- name: income_quintile
description: "Income quintile (1-5, city-wide)"
- name: int_neighbourhood__housing
description: "Housing indicators combining census and rental data"
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: year
description: "Reference year"
- name: rent_to_income_pct
description: "Rent as percentage of median income"
- name: is_affordable
description: "Boolean: rent <= 30% of income"
- name: int_neighbourhood__crime_summary
description: "Aggregated crime with year-over-year trends"
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: year
description: "Statistics year"
tests:
- not_null
- name: crime_rate_per_100k
description: "Total crime rate per 100K population"
- name: yoy_change_pct
description: "Year-over-year change percentage"
- name: int_neighbourhood__amenity_scores
description: "Normalized amenities per capita and per area"
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: year
description: "Reference year"
- name: total_amenities_per_1000
description: "Total amenities per 1000 population"
- name: amenities_per_sqkm
description: "Total amenities per square km"
- name: int_rentals__neighbourhood_allocated
description: "CMHC rental data allocated to neighbourhoods via area weights"
columns:
- name: neighbourhood_id
description: "Neighbourhood identifier"
tests:
- not_null
- name: year
description: "Survey year"
tests:
- not_null
- name: avg_rent_2bed
description: "Weighted average 2-bedroom rent"
- name: vacancy_rate
description: "Weighted average vacancy rate"

View File

@@ -0,0 +1,79 @@
-- Intermediate: Normalized amenities per 1000 population
-- Pivots amenity types and calculates per-capita metrics
-- Grain: One row per neighbourhood per year
with neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
amenities as (
select * from {{ ref('stg_toronto__amenities') }}
),
-- Aggregate amenity types
amenities_by_year as (
select
neighbourhood_id,
amenity_year as year,
sum(case when amenity_type = 'Parks' then amenity_count else 0 end) as parks_count,
sum(case when amenity_type = 'Schools' then amenity_count else 0 end) as schools_count,
sum(case when amenity_type = 'Transit Stops' then amenity_count else 0 end) as transit_count,
sum(case when amenity_type = 'Libraries' then amenity_count else 0 end) as libraries_count,
sum(case when amenity_type = 'Community Centres' then amenity_count else 0 end) as community_centres_count,
sum(case when amenity_type = 'Recreation' then amenity_count else 0 end) as recreation_count,
sum(amenity_count) as total_amenities
from amenities
group by neighbourhood_id, amenity_year
),
amenity_scores as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
n.population,
n.land_area_sqkm,
a.year,
-- Raw counts
a.parks_count,
a.schools_count,
a.transit_count,
a.libraries_count,
a.community_centres_count,
a.recreation_count,
a.total_amenities,
-- Per 1000 population
case when n.population > 0
then round(a.parks_count::numeric / n.population * 1000, 3)
else null
end as parks_per_1000,
case when n.population > 0
then round(a.schools_count::numeric / n.population * 1000, 3)
else null
end as schools_per_1000,
case when n.population > 0
then round(a.transit_count::numeric / n.population * 1000, 3)
else null
end as transit_per_1000,
case when n.population > 0
then round(a.total_amenities::numeric / n.population * 1000, 3)
else null
end as total_amenities_per_1000,
-- Per square km
case when n.land_area_sqkm > 0
then round(a.total_amenities::numeric / n.land_area_sqkm, 2)
else null
end as amenities_per_sqkm
from neighbourhoods n
left join amenities_by_year a on n.neighbourhood_id = a.neighbourhood_id
)
select * from amenity_scores

View File

@@ -0,0 +1,81 @@
-- Intermediate: Aggregated crime by neighbourhood with YoY change
-- Pivots crime types and calculates year-over-year trends
-- Grain: One row per neighbourhood per year
with neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
crime as (
select * from {{ ref('stg_toronto__crime') }}
),
-- Aggregate crime types
crime_by_year as (
select
neighbourhood_id,
crime_year as year,
sum(incident_count) as total_incidents,
sum(case when crime_type = 'Assault' then incident_count else 0 end) as assault_count,
sum(case when crime_type = 'Auto Theft' then incident_count else 0 end) as auto_theft_count,
sum(case when crime_type = 'Break and Enter' then incident_count else 0 end) as break_enter_count,
sum(case when crime_type = 'Robbery' then incident_count else 0 end) as robbery_count,
sum(case when crime_type = 'Theft Over' then incident_count else 0 end) as theft_over_count,
sum(case when crime_type = 'Homicide' then incident_count else 0 end) as homicide_count,
avg(rate_per_100k) as avg_rate_per_100k
from crime
group by neighbourhood_id, crime_year
),
-- Add year-over-year changes
with_yoy as (
select
c.*,
lag(c.total_incidents, 1) over (
partition by c.neighbourhood_id
order by c.year
) as prev_year_incidents,
round(
(c.total_incidents - lag(c.total_incidents, 1) over (
partition by c.neighbourhood_id
order by c.year
))::numeric /
nullif(lag(c.total_incidents, 1) over (
partition by c.neighbourhood_id
order by c.year
), 0) * 100,
2
) as yoy_change_pct
from crime_by_year c
),
crime_summary as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
n.population,
w.year,
w.total_incidents,
w.assault_count,
w.auto_theft_count,
w.break_enter_count,
w.robbery_count,
w.theft_over_count,
w.homicide_count,
w.avg_rate_per_100k,
w.yoy_change_pct,
-- Crime rate per 100K population
case
when n.population > 0
then round(w.total_incidents::numeric / n.population * 100000, 2)
else null
end as crime_rate_per_100k
from neighbourhoods n
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
)
select * from crime_summary

View File

@@ -0,0 +1,44 @@
-- Intermediate: Combined census demographics by neighbourhood
-- Joins neighbourhoods with census data for demographic analysis
-- Grain: One row per neighbourhood per census year
with neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
census as (
select * from {{ ref('stg_toronto__census') }}
),
demographics as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
n.land_area_sqkm,
c.census_year,
c.population,
c.population_density,
c.median_household_income,
c.average_household_income,
c.median_age,
c.unemployment_rate,
c.pct_bachelors_or_higher as education_bachelors_pct,
c.average_dwelling_value,
-- Tenure mix
c.pct_owner_occupied,
c.pct_renter_occupied,
-- Income quintile (city-wide comparison)
ntile(5) over (
partition by c.census_year
order by c.median_household_income
) as income_quintile
from neighbourhoods n
left join census c on n.neighbourhood_id = c.neighbourhood_id
)
select * from demographics

View File

@@ -0,0 +1,56 @@
-- Intermediate: Housing indicators by neighbourhood
-- Combines census housing data with allocated CMHC rental data
-- Grain: One row per neighbourhood per year
with neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
census as (
select * from {{ ref('stg_toronto__census') }}
),
allocated_rentals as (
select * from {{ ref('int_rentals__neighbourhood_allocated') }}
),
housing as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
coalesce(r.year, c.census_year) as year,
-- Census housing metrics
c.pct_owner_occupied,
c.pct_renter_occupied,
c.average_dwelling_value,
c.median_household_income,
-- Allocated rental metrics (weighted average from CMHC zones)
r.avg_rent_2bed,
r.vacancy_rate,
-- Affordability calculations
case
when c.median_household_income > 0 and r.avg_rent_2bed > 0
then round((r.avg_rent_2bed * 12 / c.median_household_income) * 100, 2)
else null
end as rent_to_income_pct,
-- Affordability threshold (30% of income)
case
when c.median_household_income > 0 and r.avg_rent_2bed > 0
then r.avg_rent_2bed * 12 <= c.median_household_income * 0.30
else null
end as is_affordable
from neighbourhoods n
left join census c on n.neighbourhood_id = c.neighbourhood_id
left join allocated_rentals r
on n.neighbourhood_id = r.neighbourhood_id
and r.year = c.census_year
)
select * from housing

View File

@@ -0,0 +1,73 @@
-- Intermediate: CMHC rentals allocated to neighbourhoods via area weights
-- Disaggregates zone-level rental data to neighbourhood level
-- Grain: One row per neighbourhood per year
with crosswalk as (
select * from {{ ref('stg_cmhc__zone_crosswalk') }}
),
rentals as (
select * from {{ ref('int_rentals__annual') }}
),
neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
-- Allocate rental metrics to neighbourhoods using area weights
allocated as (
select
c.neighbourhood_id,
r.year,
r.bedroom_type,
-- Weighted average rent (using area weight)
sum(r.avg_rent * c.area_weight) as weighted_avg_rent,
sum(r.median_rent * c.area_weight) as weighted_median_rent,
sum(c.area_weight) as total_weight,
-- Weighted vacancy rate
sum(r.vacancy_rate * c.area_weight) / nullif(sum(c.area_weight), 0) as vacancy_rate,
-- Weighted rental universe
sum(r.rental_universe * c.area_weight) as rental_units_estimate
from crosswalk c
inner join rentals r on c.cmhc_zone_code = r.zone_code
group by c.neighbourhood_id, r.year, r.bedroom_type
),
-- Pivot to get 2-bedroom as primary metric
pivoted as (
select
neighbourhood_id,
year,
max(case when bedroom_type = 'Two Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed,
max(case when bedroom_type = 'One Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed,
max(case when bedroom_type = 'Bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor,
max(case when bedroom_type = 'Three Bedroom +' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed,
avg(vacancy_rate) as vacancy_rate,
sum(rental_units_estimate) as total_rental_units
from allocated
group by neighbourhood_id, year
),
final as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
p.year,
round(p.avg_rent_bachelor::numeric, 2) as avg_rent_bachelor,
round(p.avg_rent_1bed::numeric, 2) as avg_rent_1bed,
round(p.avg_rent_2bed::numeric, 2) as avg_rent_2bed,
round(p.avg_rent_3bed::numeric, 2) as avg_rent_3bed,
round(p.vacancy_rate::numeric, 2) as vacancy_rate,
round(p.total_rental_units::numeric, 0) as total_rental_units
from neighbourhoods n
inner join pivoted p on n.neighbourhood_id = p.neighbourhood_id
)
select * from final