Files
personal-portfolio/dbt/models/marts/mart_neighbourhood_overview.sql
lmiranda d0f32edba7 fix: Repair data pipeline with StatCan CMHC rental data
- Add StatCan CMHC parser to fetch rental data from Statistics Canada API
- Create year spine (2014-2025) as time dimension driver instead of census
- Add CMA-level rental and income intermediate models
- Update mart_neighbourhood_overview to use rental years as base
- Fix neighbourhood_service queries to match dbt schema
- Add CMHC data loading to pipeline script

Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records
Rent data available 2019-2025, crime data 2014-2024

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-17 15:38:31 -05:00

153 lines
4.4 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Mart: Neighbourhood Overview with Composite Livability Score
-- Dashboard Tab: Overview
-- Grain: One row per neighbourhood per year
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
with years as (
select * from {{ ref('int_year_spine') }}
),
neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
-- Create base: all neighbourhoods × all years
neighbourhood_years as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
y.year
from neighbourhoods n
cross join years y
),
-- Census data (available for 2016, 2021)
-- For each year, use the most recent census data available
census as (
select * from {{ ref('stg_toronto__census') }}
),
census_mapped as (
select
ny.neighbourhood_id,
ny.year,
c.population,
c.unemployment_rate,
c.pct_bachelors_or_higher as education_bachelors_pct
from neighbourhood_years ny
left join census c on ny.neighbourhood_id = c.neighbourhood_id
-- Use census year <= analysis year, prefer most recent
and c.census_year = (
select max(c2.census_year)
from {{ ref('stg_toronto__census') }} c2
where c2.neighbourhood_id = ny.neighbourhood_id
and c2.census_year <= ny.year
)
),
-- CMA-level census data (for income - not available at neighbourhood level)
cma_census as (
select * from {{ ref('int_census__toronto_cma') }}
),
-- Crime data (2014-2024)
crime as (
select * from {{ ref('int_neighbourhood__crime_summary') }}
),
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
rentals as (
select * from {{ ref('int_rentals__toronto_cma') }}
),
-- Compute scores
scored as (
select
ny.neighbourhood_id,
ny.neighbourhood_name,
ny.geometry,
ny.year,
cm.population,
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
cma.median_household_income,
-- Safety score: inverse of crime rate (higher = safer)
case
when cr.crime_rate_per_100k is not null
then 100 - percent_rank() over (
partition by ny.year
order by cr.crime_rate_per_100k
) * 100
else null
end as safety_score,
-- Affordability score: inverse of rent-to-income ratio
-- Using CMA-level income since neighbourhood-level not available
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then 100 - percent_rank() over (
partition by ny.year
order by (r.avg_rent_standard * 12 / cma.median_household_income)
) * 100
else null
end as affordability_score,
-- Raw metrics
cr.crime_rate_per_100k,
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
else null
end as rent_to_income_pct,
r.avg_rent_standard as avg_rent_2bed,
r.vacancy_rate
from neighbourhood_years ny
left join census_mapped cm
on ny.neighbourhood_id = cm.neighbourhood_id
and ny.year = cm.year
left join cma_census cma
on ny.year = cma.year
left join crime cr
on ny.neighbourhood_id = cr.neighbourhood_id
and ny.year = cr.year
left join rentals r
on ny.year = r.year
),
final as (
select
neighbourhood_id,
neighbourhood_name,
geometry,
year,
population,
median_household_income,
-- Component scores (0-100)
round(safety_score::numeric, 1) as safety_score,
round(affordability_score::numeric, 1) as affordability_score,
-- Amenity score not available at this level, use placeholder
50.0 as amenity_score,
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
round(
(coalesce(safety_score, 50) * 0.40 +
coalesce(affordability_score, 50) * 0.40 +
50 * 0.20)::numeric,
1
) as livability_score,
-- Raw metrics
crime_rate_per_100k,
rent_to_income_pct,
avg_rent_2bed,
vacancy_rate,
null::numeric as total_amenities_per_1000
from scored
)
select * from final