Files
personal-portfolio/dbt/models/marts/toronto/mart_neighbourhood_overview.sql
l3ocho dfa5f92d8a
Some checks failed
CI / lint-and-test (pull_request) Has been cancelled
refactor: update app code for domain-scoped schema migration
- Update dbt model references to use new schema naming (stg_toronto, int_toronto, mart_toronto)
- Refactor figure factories to use consistent column naming from new schema
- Update callbacks to work with refactored data structures
- Add centralized design tokens module for consistent styling
- Streamline CLAUDE.md documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:00:30 -05:00

154 lines
4.5 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Mart: Neighbourhood Overview with Composite Livability Score
-- Dashboard Tab: Overview
-- Grain: One row per neighbourhood per year
-- Time spine: Years 2014-2025 (driven by crime/rental data availability)
with years as (
select * from {{ ref('int_year_spine') }}
),
neighbourhoods as (
select * from {{ ref('stg_toronto__neighbourhoods') }}
),
-- Create base: all neighbourhoods × all years
neighbourhood_years as (
select
n.neighbourhood_id,
n.neighbourhood_name,
n.geometry,
y.year
from neighbourhoods n
cross join years y
),
-- Census data (available for 2016, 2021)
-- For each year, use the most recent census data available
census as (
select * from {{ ref('stg_toronto__census') }}
),
census_mapped as (
select
ny.neighbourhood_id,
ny.year,
c.population,
c.unemployment_rate,
c.pct_bachelors_or_higher as education_bachelors_pct
from neighbourhood_years ny
left join census c on ny.neighbourhood_id = c.neighbourhood_id
-- Use census year <= analysis year, prefer most recent
and c.census_year = (
select max(c2.census_year)
from {{ ref('stg_toronto__census') }} c2
where c2.neighbourhood_id = ny.neighbourhood_id
and c2.census_year <= ny.year
)
),
-- CMA-level census data (for income - not available at neighbourhood level)
cma_census as (
select * from {{ ref('int_census__toronto_cma') }}
),
-- Crime data (2014-2024)
crime as (
select * from {{ ref('int_neighbourhood__crime_summary') }}
),
-- Rentals (2019-2025) - CMA level applied to all neighbourhoods
rentals as (
select * from {{ ref('int_rentals__toronto_cma') }}
),
-- Compute scores
scored as (
select
ny.neighbourhood_id,
ny.neighbourhood_name,
ny.geometry,
ny.year,
cm.population,
-- Use CMA-level income (neighbourhood-level not available in Toronto Open Data)
cma.median_household_income,
-- Safety score: inverse of crime rate (higher = safer)
case
when cr.crime_rate_per_100k is not null
then 100 - percent_rank() over (
partition by ny.year
order by cr.crime_rate_per_100k
) * 100
else null
end as safety_score,
-- Affordability score: inverse of rent-to-income ratio
-- Using CMA-level income since neighbourhood-level not available
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then 100 - percent_rank() over (
partition by ny.year
order by (r.avg_rent_standard * 12 / cma.median_household_income)
) * 100
else null
end as affordability_score,
-- Raw metrics
cr.crime_rate_per_100k,
case
when cma.median_household_income > 0 and r.avg_rent_standard > 0
then round((r.avg_rent_standard * 12 / cma.median_household_income) * 100, 2)
else null
end as rent_to_income_pct,
r.avg_rent_standard as avg_rent_2bed,
r.vacancy_rate
from neighbourhood_years ny
left join census_mapped cm
on ny.neighbourhood_id = cm.neighbourhood_id
and ny.year = cm.year
left join cma_census cma
on ny.year = cma.year
left join crime cr
on ny.neighbourhood_id = cr.neighbourhood_id
and ny.year = cr.year
left join rentals r
on ny.year = r.year
),
final as (
select
neighbourhood_id,
neighbourhood_name,
geometry,
year,
population,
median_household_income,
-- Component scores (0-100)
round(safety_score::numeric, 1) as safety_score,
round(affordability_score::numeric, 1) as affordability_score,
-- TODO: Replace with actual amenity score when fact_amenities is populated
-- Currently uses neutral placeholder (50.0) which affects livability_score accuracy
50.0 as amenity_score,
-- Composite livability score: safety (40%), affordability (40%), amenities (20%)
round(
(coalesce(safety_score, 50) * 0.40 +
coalesce(affordability_score, 50) * 0.40 +
50 * 0.20)::numeric,
1
) as livability_score,
-- Raw metrics
crime_rate_per_100k,
rent_to_income_pct,
avg_rent_2bed,
vacancy_rate,
null::numeric as total_amenities_per_1000
from scored
)
select * from final