diff --git a/dbt/models/intermediate/_intermediate.yml b/dbt/models/intermediate/_intermediate.yml index 9fd7fd9..908da4a 100644 --- a/dbt/models/intermediate/_intermediate.yml +++ b/dbt/models/intermediate/_intermediate.yml @@ -11,3 +11,77 @@ models: - name: zone_code tests: - not_null + + - name: int_neighbourhood__demographics + description: "Combined census demographics with neighbourhood attributes" + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: census_year + description: "Census year" + tests: + - not_null + - name: income_quintile + description: "Income quintile (1-5, city-wide)" + + - name: int_neighbourhood__housing + description: "Housing indicators combining census and rental data" + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: year + description: "Reference year" + - name: rent_to_income_pct + description: "Rent as percentage of median income" + - name: is_affordable + description: "Boolean: rent <= 30% of income" + + - name: int_neighbourhood__crime_summary + description: "Aggregated crime with year-over-year trends" + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: year + description: "Statistics year" + tests: + - not_null + - name: crime_rate_per_100k + description: "Total crime rate per 100K population" + - name: yoy_change_pct + description: "Year-over-year change percentage" + + - name: int_neighbourhood__amenity_scores + description: "Normalized amenities per capita and per area" + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: year + description: "Reference year" + - name: total_amenities_per_1000 + description: "Total amenities per 1000 population" + - name: amenities_per_sqkm + description: "Total amenities per square km" + + - name: int_rentals__neighbourhood_allocated + description: "CMHC rental data allocated to neighbourhoods via area weights" + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: year + description: "Survey year" + tests: + - not_null + - name: avg_rent_2bed + description: "Weighted average 2-bedroom rent" + - name: vacancy_rate + description: "Weighted average vacancy rate" diff --git a/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql b/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql new file mode 100644 index 0000000..8890177 --- /dev/null +++ b/dbt/models/intermediate/int_neighbourhood__amenity_scores.sql @@ -0,0 +1,79 @@ +-- Intermediate: Normalized amenities per 1000 population +-- Pivots amenity types and calculates per-capita metrics +-- Grain: One row per neighbourhood per year + +with neighbourhoods as ( + select * from {{ ref('stg_toronto__neighbourhoods') }} +), + +amenities as ( + select * from {{ ref('stg_toronto__amenities') }} +), + +-- Aggregate amenity types +amenities_by_year as ( + select + neighbourhood_id, + amenity_year as year, + sum(case when amenity_type = 'Parks' then amenity_count else 0 end) as parks_count, + sum(case when amenity_type = 'Schools' then amenity_count else 0 end) as schools_count, + sum(case when amenity_type = 'Transit Stops' then amenity_count else 0 end) as transit_count, + sum(case when amenity_type = 'Libraries' then amenity_count else 0 end) as libraries_count, + sum(case when amenity_type = 'Community Centres' then amenity_count else 0 end) as community_centres_count, + sum(case when amenity_type = 'Recreation' then amenity_count else 0 end) as recreation_count, + sum(amenity_count) as total_amenities + from amenities + group by neighbourhood_id, amenity_year +), + +amenity_scores as ( + select + n.neighbourhood_id, + n.neighbourhood_name, + n.geometry, + n.population, + n.land_area_sqkm, + + a.year, + + -- Raw counts + a.parks_count, + a.schools_count, + a.transit_count, + a.libraries_count, + a.community_centres_count, + a.recreation_count, + a.total_amenities, + + -- Per 1000 population + case when n.population > 0 + then round(a.parks_count::numeric / n.population * 1000, 3) + else null + end as parks_per_1000, + + case when n.population > 0 + then round(a.schools_count::numeric / n.population * 1000, 3) + else null + end as schools_per_1000, + + case when n.population > 0 + then round(a.transit_count::numeric / n.population * 1000, 3) + else null + end as transit_per_1000, + + case when n.population > 0 + then round(a.total_amenities::numeric / n.population * 1000, 3) + else null + end as total_amenities_per_1000, + + -- Per square km + case when n.land_area_sqkm > 0 + then round(a.total_amenities::numeric / n.land_area_sqkm, 2) + else null + end as amenities_per_sqkm + + from neighbourhoods n + left join amenities_by_year a on n.neighbourhood_id = a.neighbourhood_id +) + +select * from amenity_scores diff --git a/dbt/models/intermediate/int_neighbourhood__crime_summary.sql b/dbt/models/intermediate/int_neighbourhood__crime_summary.sql new file mode 100644 index 0000000..bd20997 --- /dev/null +++ b/dbt/models/intermediate/int_neighbourhood__crime_summary.sql @@ -0,0 +1,81 @@ +-- Intermediate: Aggregated crime by neighbourhood with YoY change +-- Pivots crime types and calculates year-over-year trends +-- Grain: One row per neighbourhood per year + +with neighbourhoods as ( + select * from {{ ref('stg_toronto__neighbourhoods') }} +), + +crime as ( + select * from {{ ref('stg_toronto__crime') }} +), + +-- Aggregate crime types +crime_by_year as ( + select + neighbourhood_id, + crime_year as year, + sum(incident_count) as total_incidents, + sum(case when crime_type = 'Assault' then incident_count else 0 end) as assault_count, + sum(case when crime_type = 'Auto Theft' then incident_count else 0 end) as auto_theft_count, + sum(case when crime_type = 'Break and Enter' then incident_count else 0 end) as break_enter_count, + sum(case when crime_type = 'Robbery' then incident_count else 0 end) as robbery_count, + sum(case when crime_type = 'Theft Over' then incident_count else 0 end) as theft_over_count, + sum(case when crime_type = 'Homicide' then incident_count else 0 end) as homicide_count, + avg(rate_per_100k) as avg_rate_per_100k + from crime + group by neighbourhood_id, crime_year +), + +-- Add year-over-year changes +with_yoy as ( + select + c.*, + lag(c.total_incidents, 1) over ( + partition by c.neighbourhood_id + order by c.year + ) as prev_year_incidents, + round( + (c.total_incidents - lag(c.total_incidents, 1) over ( + partition by c.neighbourhood_id + order by c.year + ))::numeric / + nullif(lag(c.total_incidents, 1) over ( + partition by c.neighbourhood_id + order by c.year + ), 0) * 100, + 2 + ) as yoy_change_pct + from crime_by_year c +), + +crime_summary as ( + select + n.neighbourhood_id, + n.neighbourhood_name, + n.geometry, + n.population, + + w.year, + w.total_incidents, + w.assault_count, + w.auto_theft_count, + w.break_enter_count, + w.robbery_count, + w.theft_over_count, + w.homicide_count, + w.avg_rate_per_100k, + w.yoy_change_pct, + + -- Crime rate per 100K population + case + when n.population > 0 + then round(w.total_incidents::numeric / n.population * 100000, 2) + else null + end as crime_rate_per_100k + + from neighbourhoods n + inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id +) + +select * from crime_summary diff --git a/dbt/models/intermediate/int_neighbourhood__demographics.sql b/dbt/models/intermediate/int_neighbourhood__demographics.sql new file mode 100644 index 0000000..2316f6a --- /dev/null +++ b/dbt/models/intermediate/int_neighbourhood__demographics.sql @@ -0,0 +1,44 @@ +-- Intermediate: Combined census demographics by neighbourhood +-- Joins neighbourhoods with census data for demographic analysis +-- Grain: One row per neighbourhood per census year + +with neighbourhoods as ( + select * from {{ ref('stg_toronto__neighbourhoods') }} +), + +census as ( + select * from {{ ref('stg_toronto__census') }} +), + +demographics as ( + select + n.neighbourhood_id, + n.neighbourhood_name, + n.geometry, + n.land_area_sqkm, + + c.census_year, + c.population, + c.population_density, + c.median_household_income, + c.average_household_income, + c.median_age, + c.unemployment_rate, + c.pct_bachelors_or_higher as education_bachelors_pct, + c.average_dwelling_value, + + -- Tenure mix + c.pct_owner_occupied, + c.pct_renter_occupied, + + -- Income quintile (city-wide comparison) + ntile(5) over ( + partition by c.census_year + order by c.median_household_income + ) as income_quintile + + from neighbourhoods n + left join census c on n.neighbourhood_id = c.neighbourhood_id +) + +select * from demographics diff --git a/dbt/models/intermediate/int_neighbourhood__housing.sql b/dbt/models/intermediate/int_neighbourhood__housing.sql new file mode 100644 index 0000000..9d7f705 --- /dev/null +++ b/dbt/models/intermediate/int_neighbourhood__housing.sql @@ -0,0 +1,56 @@ +-- Intermediate: Housing indicators by neighbourhood +-- Combines census housing data with allocated CMHC rental data +-- Grain: One row per neighbourhood per year + +with neighbourhoods as ( + select * from {{ ref('stg_toronto__neighbourhoods') }} +), + +census as ( + select * from {{ ref('stg_toronto__census') }} +), + +allocated_rentals as ( + select * from {{ ref('int_rentals__neighbourhood_allocated') }} +), + +housing as ( + select + n.neighbourhood_id, + n.neighbourhood_name, + n.geometry, + + coalesce(r.year, c.census_year) as year, + + -- Census housing metrics + c.pct_owner_occupied, + c.pct_renter_occupied, + c.average_dwelling_value, + c.median_household_income, + + -- Allocated rental metrics (weighted average from CMHC zones) + r.avg_rent_2bed, + r.vacancy_rate, + + -- Affordability calculations + case + when c.median_household_income > 0 and r.avg_rent_2bed > 0 + then round((r.avg_rent_2bed * 12 / c.median_household_income) * 100, 2) + else null + end as rent_to_income_pct, + + -- Affordability threshold (30% of income) + case + when c.median_household_income > 0 and r.avg_rent_2bed > 0 + then r.avg_rent_2bed * 12 <= c.median_household_income * 0.30 + else null + end as is_affordable + + from neighbourhoods n + left join census c on n.neighbourhood_id = c.neighbourhood_id + left join allocated_rentals r + on n.neighbourhood_id = r.neighbourhood_id + and r.year = c.census_year +) + +select * from housing diff --git a/dbt/models/intermediate/int_rentals__neighbourhood_allocated.sql b/dbt/models/intermediate/int_rentals__neighbourhood_allocated.sql new file mode 100644 index 0000000..aa04534 --- /dev/null +++ b/dbt/models/intermediate/int_rentals__neighbourhood_allocated.sql @@ -0,0 +1,73 @@ +-- Intermediate: CMHC rentals allocated to neighbourhoods via area weights +-- Disaggregates zone-level rental data to neighbourhood level +-- Grain: One row per neighbourhood per year + +with crosswalk as ( + select * from {{ ref('stg_cmhc__zone_crosswalk') }} +), + +rentals as ( + select * from {{ ref('int_rentals__annual') }} +), + +neighbourhoods as ( + select * from {{ ref('stg_toronto__neighbourhoods') }} +), + +-- Allocate rental metrics to neighbourhoods using area weights +allocated as ( + select + c.neighbourhood_id, + r.year, + r.bedroom_type, + + -- Weighted average rent (using area weight) + sum(r.avg_rent * c.area_weight) as weighted_avg_rent, + sum(r.median_rent * c.area_weight) as weighted_median_rent, + sum(c.area_weight) as total_weight, + + -- Weighted vacancy rate + sum(r.vacancy_rate * c.area_weight) / nullif(sum(c.area_weight), 0) as vacancy_rate, + + -- Weighted rental universe + sum(r.rental_universe * c.area_weight) as rental_units_estimate + + from crosswalk c + inner join rentals r on c.cmhc_zone_code = r.zone_code + group by c.neighbourhood_id, r.year, r.bedroom_type +), + +-- Pivot to get 2-bedroom as primary metric +pivoted as ( + select + neighbourhood_id, + year, + max(case when bedroom_type = 'Two Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_2bed, + max(case when bedroom_type = 'One Bedroom' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_1bed, + max(case when bedroom_type = 'Bachelor' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_bachelor, + max(case when bedroom_type = 'Three Bedroom +' then weighted_avg_rent / nullif(total_weight, 0) end) as avg_rent_3bed, + avg(vacancy_rate) as vacancy_rate, + sum(rental_units_estimate) as total_rental_units + from allocated + group by neighbourhood_id, year +), + +final as ( + select + n.neighbourhood_id, + n.neighbourhood_name, + n.geometry, + + p.year, + round(p.avg_rent_bachelor::numeric, 2) as avg_rent_bachelor, + round(p.avg_rent_1bed::numeric, 2) as avg_rent_1bed, + round(p.avg_rent_2bed::numeric, 2) as avg_rent_2bed, + round(p.avg_rent_3bed::numeric, 2) as avg_rent_3bed, + round(p.vacancy_rate::numeric, 2) as vacancy_rate, + round(p.total_rental_units::numeric, 0) as total_rental_units + + from neighbourhoods n + inner join pivoted p on n.neighbourhood_id = p.neighbourhood_id +) + +select * from final diff --git a/dbt/models/marts/_marts.yml b/dbt/models/marts/_marts.yml index 1f89efe..37170ca 100644 --- a/dbt/models/marts/_marts.yml +++ b/dbt/models/marts/_marts.yml @@ -9,3 +9,127 @@ models: tests: - unique - not_null + + - name: mart_neighbourhood_overview + description: "Neighbourhood overview with composite livability score" + meta: + dashboard_tab: Overview + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry for mapping" + - name: livability_score + description: "Composite score: safety (30%), affordability (40%), amenities (30%)" + - name: safety_score + description: "Safety component score (0-100)" + - name: affordability_score + description: "Affordability component score (0-100)" + - name: amenity_score + description: "Amenity component score (0-100)" + + - name: mart_neighbourhood_housing + description: "Housing and affordability metrics by neighbourhood" + meta: + dashboard_tab: Housing + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry for mapping" + - name: rent_to_income_pct + description: "Rent as percentage of median income" + - name: affordability_index + description: "100 = city average affordability" + - name: rent_yoy_change_pct + description: "Year-over-year rent change" + + - name: mart_neighbourhood_safety + description: "Crime rates and safety metrics by neighbourhood" + meta: + dashboard_tab: Safety + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry for mapping" + - name: crime_rate_per_100k + description: "Total crime rate per 100K population" + - name: crime_index + description: "100 = city average crime rate" + - name: safety_tier + description: "Safety tier (1=safest, 5=highest crime)" + tests: + - accepted_values: + arguments: + values: [1, 2, 3, 4, 5] + + - name: mart_neighbourhood_demographics + description: "Demographics and income metrics by neighbourhood" + meta: + dashboard_tab: Demographics + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry for mapping" + - name: median_household_income + description: "Median household income" + - name: income_index + description: "100 = city average income" + - name: income_quintile + description: "Income quintile (1-5)" + tests: + - accepted_values: + arguments: + values: [1, 2, 3, 4, 5] + + - name: mart_neighbourhood_amenities + description: "Amenity access metrics by neighbourhood" + meta: + dashboard_tab: Amenities + columns: + - name: neighbourhood_id + description: "Neighbourhood identifier" + tests: + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry for mapping" + - name: total_amenities_per_1000 + description: "Total amenities per 1000 population" + - name: amenity_index + description: "100 = city average amenities" + - name: amenity_tier + description: "Amenity tier (1=best, 5=lowest)" + tests: + - accepted_values: + arguments: + values: [1, 2, 3, 4, 5] diff --git a/dbt/models/marts/mart_neighbourhood_amenities.sql b/dbt/models/marts/mart_neighbourhood_amenities.sql new file mode 100644 index 0000000..76f92f9 --- /dev/null +++ b/dbt/models/marts/mart_neighbourhood_amenities.sql @@ -0,0 +1,89 @@ +-- Mart: Neighbourhood Amenities Analysis +-- Dashboard Tab: Amenities +-- Grain: One row per neighbourhood per year + +with amenities as ( + select * from {{ ref('int_neighbourhood__amenity_scores') }} +), + +-- City-wide averages for comparison +city_avg as ( + select + year, + avg(parks_per_1000) as city_avg_parks, + avg(schools_per_1000) as city_avg_schools, + avg(transit_per_1000) as city_avg_transit, + avg(total_amenities_per_1000) as city_avg_total_amenities + from amenities + group by year +), + +final as ( + select + a.neighbourhood_id, + a.neighbourhood_name, + a.geometry, + a.population, + a.land_area_sqkm, + a.year, + + -- Raw counts + a.parks_count, + a.schools_count, + a.transit_count, + a.libraries_count, + a.community_centres_count, + a.recreation_count, + a.total_amenities, + + -- Per 1000 population + a.parks_per_1000, + a.schools_per_1000, + a.transit_per_1000, + a.total_amenities_per_1000, + + -- Per square km + a.amenities_per_sqkm, + + -- City averages + round(ca.city_avg_parks::numeric, 3) as city_avg_parks_per_1000, + round(ca.city_avg_schools::numeric, 3) as city_avg_schools_per_1000, + round(ca.city_avg_transit::numeric, 3) as city_avg_transit_per_1000, + + -- Amenity index (100 = city average) + case + when ca.city_avg_total_amenities > 0 + then round(a.total_amenities_per_1000 / ca.city_avg_total_amenities * 100, 1) + else null + end as amenity_index, + + -- Category indices + case + when ca.city_avg_parks > 0 + then round(a.parks_per_1000 / ca.city_avg_parks * 100, 1) + else null + end as parks_index, + + case + when ca.city_avg_schools > 0 + then round(a.schools_per_1000 / ca.city_avg_schools * 100, 1) + else null + end as schools_index, + + case + when ca.city_avg_transit > 0 + then round(a.transit_per_1000 / ca.city_avg_transit * 100, 1) + else null + end as transit_index, + + -- Amenity tier (1 = best, 5 = lowest) + ntile(5) over ( + partition by a.year + order by a.total_amenities_per_1000 desc + ) as amenity_tier + + from amenities a + left join city_avg ca on a.year = ca.year +) + +select * from final diff --git a/dbt/models/marts/mart_neighbourhood_demographics.sql b/dbt/models/marts/mart_neighbourhood_demographics.sql new file mode 100644 index 0000000..9a58e92 --- /dev/null +++ b/dbt/models/marts/mart_neighbourhood_demographics.sql @@ -0,0 +1,81 @@ +-- Mart: Neighbourhood Demographics Analysis +-- Dashboard Tab: Demographics +-- Grain: One row per neighbourhood per census year + +with demographics as ( + select * from {{ ref('int_neighbourhood__demographics') }} +), + +-- City-wide averages for comparison +city_avg as ( + select + census_year, + avg(median_household_income) as city_avg_income, + avg(median_age) as city_avg_age, + avg(unemployment_rate) as city_avg_unemployment, + avg(education_bachelors_pct) as city_avg_education, + avg(population_density) as city_avg_density + from demographics + group by census_year +), + +final as ( + select + d.neighbourhood_id, + d.neighbourhood_name, + d.geometry, + d.census_year as year, + + -- Population + d.population, + d.land_area_sqkm, + d.population_density, + + -- Income + d.median_household_income, + d.average_household_income, + d.income_quintile, + + -- Income index (100 = city average) + case + when ca.city_avg_income > 0 + then round(d.median_household_income / ca.city_avg_income * 100, 1) + else null + end as income_index, + + -- Demographics + d.median_age, + d.unemployment_rate, + d.education_bachelors_pct, + + -- Age index (100 = city average) + case + when ca.city_avg_age > 0 + then round(d.median_age / ca.city_avg_age * 100, 1) + else null + end as age_index, + + -- Housing tenure + d.pct_owner_occupied, + d.pct_renter_occupied, + d.average_dwelling_value, + + -- Diversity index (using tenure mix as proxy - higher rental = more diverse typically) + round( + 1 - ( + power(d.pct_owner_occupied / 100, 2) + + power(d.pct_renter_occupied / 100, 2) + ), + 3 + ) * 100 as tenure_diversity_index, + + -- City comparisons + round(ca.city_avg_income::numeric, 2) as city_avg_income, + round(ca.city_avg_age::numeric, 1) as city_avg_age, + round(ca.city_avg_unemployment::numeric, 2) as city_avg_unemployment + + from demographics d + left join city_avg ca on d.census_year = ca.census_year +) + +select * from final diff --git a/dbt/models/marts/mart_neighbourhood_housing.sql b/dbt/models/marts/mart_neighbourhood_housing.sql new file mode 100644 index 0000000..6b047ee --- /dev/null +++ b/dbt/models/marts/mart_neighbourhood_housing.sql @@ -0,0 +1,93 @@ +-- Mart: Neighbourhood Housing Analysis +-- Dashboard Tab: Housing +-- Grain: One row per neighbourhood per year + +with housing as ( + select * from {{ ref('int_neighbourhood__housing') }} +), + +rentals as ( + select * from {{ ref('int_rentals__neighbourhood_allocated') }} +), + +demographics as ( + select * from {{ ref('int_neighbourhood__demographics') }} +), + +-- Add year-over-year rent changes +with_yoy as ( + select + h.*, + r.avg_rent_bachelor, + r.avg_rent_1bed, + r.avg_rent_3bed, + r.total_rental_units, + d.income_quintile, + + -- Previous year rent for YoY calculation + lag(h.avg_rent_2bed, 1) over ( + partition by h.neighbourhood_id + order by h.year + ) as prev_year_rent_2bed + + from housing h + left join rentals r + on h.neighbourhood_id = r.neighbourhood_id + and h.year = r.year + left join demographics d + on h.neighbourhood_id = d.neighbourhood_id + and h.year = d.census_year +), + +final as ( + select + neighbourhood_id, + neighbourhood_name, + geometry, + year, + + -- Tenure mix + pct_owner_occupied, + pct_renter_occupied, + + -- Housing values + average_dwelling_value, + median_household_income, + + -- Rental metrics + avg_rent_bachelor, + avg_rent_1bed, + avg_rent_2bed, + avg_rent_3bed, + vacancy_rate, + total_rental_units, + + -- Affordability + rent_to_income_pct, + is_affordable, + + -- Affordability index (100 = city average) + round( + rent_to_income_pct / nullif( + avg(rent_to_income_pct) over (partition by year), + 0 + ) * 100, + 1 + ) as affordability_index, + + -- Year-over-year rent change + case + when prev_year_rent_2bed > 0 + then round( + (avg_rent_2bed - prev_year_rent_2bed) / prev_year_rent_2bed * 100, + 2 + ) + else null + end as rent_yoy_change_pct, + + income_quintile + + from with_yoy +) + +select * from final diff --git a/dbt/models/marts/mart_neighbourhood_overview.sql b/dbt/models/marts/mart_neighbourhood_overview.sql new file mode 100644 index 0000000..a43e681 --- /dev/null +++ b/dbt/models/marts/mart_neighbourhood_overview.sql @@ -0,0 +1,110 @@ +-- Mart: Neighbourhood Overview with Composite Livability Score +-- Dashboard Tab: Overview +-- Grain: One row per neighbourhood per year + +with demographics as ( + select * from {{ ref('int_neighbourhood__demographics') }} +), + +housing as ( + select * from {{ ref('int_neighbourhood__housing') }} +), + +crime as ( + select * from {{ ref('int_neighbourhood__crime_summary') }} +), + +amenities as ( + select * from {{ ref('int_neighbourhood__amenity_scores') }} +), + +-- Compute percentile ranks for scoring components +percentiles as ( + select + d.neighbourhood_id, + d.neighbourhood_name, + d.geometry, + d.census_year as year, + d.population, + d.median_household_income, + + -- Safety score: inverse of crime rate (higher = safer) + case + when c.crime_rate_per_100k is not null + then 100 - percent_rank() over ( + partition by d.census_year + order by c.crime_rate_per_100k + ) * 100 + else null + end as safety_score, + + -- Affordability score: inverse of rent-to-income ratio + case + when h.rent_to_income_pct is not null + then 100 - percent_rank() over ( + partition by d.census_year + order by h.rent_to_income_pct + ) * 100 + else null + end as affordability_score, + + -- Amenity score: based on amenities per capita + case + when a.total_amenities_per_1000 is not null + then percent_rank() over ( + partition by d.census_year + order by a.total_amenities_per_1000 + ) * 100 + else null + end as amenity_score, + + -- Raw metrics for reference + c.crime_rate_per_100k, + h.rent_to_income_pct, + h.avg_rent_2bed, + a.total_amenities_per_1000 + + from demographics d + left join housing h + on d.neighbourhood_id = h.neighbourhood_id + and d.census_year = h.year + left join crime c + on d.neighbourhood_id = c.neighbourhood_id + and d.census_year = c.year + left join amenities a + on d.neighbourhood_id = a.neighbourhood_id + and d.census_year = a.year +), + +final as ( + select + neighbourhood_id, + neighbourhood_name, + geometry, + year, + population, + median_household_income, + + -- Component scores (0-100) + round(safety_score::numeric, 1) as safety_score, + round(affordability_score::numeric, 1) as affordability_score, + round(amenity_score::numeric, 1) as amenity_score, + + -- Composite livability score: safety (30%), affordability (40%), amenities (30%) + round( + (coalesce(safety_score, 50) * 0.30 + + coalesce(affordability_score, 50) * 0.40 + + coalesce(amenity_score, 50) * 0.30)::numeric, + 1 + ) as livability_score, + + -- Raw metrics + crime_rate_per_100k, + rent_to_income_pct, + avg_rent_2bed, + total_amenities_per_1000 + + from percentiles +) + +select * from final diff --git a/dbt/models/marts/mart_neighbourhood_safety.sql b/dbt/models/marts/mart_neighbourhood_safety.sql new file mode 100644 index 0000000..fbd0d2c --- /dev/null +++ b/dbt/models/marts/mart_neighbourhood_safety.sql @@ -0,0 +1,78 @@ +-- Mart: Neighbourhood Safety Analysis +-- Dashboard Tab: Safety +-- Grain: One row per neighbourhood per year + +with crime as ( + select * from {{ ref('int_neighbourhood__crime_summary') }} +), + +-- City-wide averages for comparison +city_avg as ( + select + year, + avg(crime_rate_per_100k) as city_avg_crime_rate, + avg(assault_count) as city_avg_assault, + avg(auto_theft_count) as city_avg_auto_theft, + avg(break_enter_count) as city_avg_break_enter + from crime + group by year +), + +final as ( + select + c.neighbourhood_id, + c.neighbourhood_name, + c.geometry, + c.population, + c.year, + + -- Total crime + c.total_incidents, + c.crime_rate_per_100k, + c.yoy_change_pct as crime_yoy_change_pct, + + -- Crime breakdown + c.assault_count, + c.auto_theft_count, + c.break_enter_count, + c.robbery_count, + c.theft_over_count, + c.homicide_count, + + -- Per 100K rates by type + case when c.population > 0 + then round(c.assault_count::numeric / c.population * 100000, 2) + else null + end as assault_rate_per_100k, + + case when c.population > 0 + then round(c.auto_theft_count::numeric / c.population * 100000, 2) + else null + end as auto_theft_rate_per_100k, + + case when c.population > 0 + then round(c.break_enter_count::numeric / c.population * 100000, 2) + else null + end as break_enter_rate_per_100k, + + -- Comparison to city average + round(ca.city_avg_crime_rate::numeric, 2) as city_avg_crime_rate, + + -- Crime index (100 = city average) + case + when ca.city_avg_crime_rate > 0 + then round(c.crime_rate_per_100k / ca.city_avg_crime_rate * 100, 1) + else null + end as crime_index, + + -- Safety tier based on crime rate percentile + ntile(5) over ( + partition by c.year + order by c.crime_rate_per_100k desc + ) as safety_tier + + from crime c + left join city_avg ca on c.year = ca.year +) + +select * from final diff --git a/dbt/models/staging/_sources.yml b/dbt/models/staging/_sources.yml index 1fffd9a..74f4888 100644 --- a/dbt/models/staging/_sources.yml +++ b/dbt/models/staging/_sources.yml @@ -41,3 +41,59 @@ sources: columns: - name: event_id description: "Primary key" + + - name: fact_census + description: "Census demographics by neighbourhood and year" + columns: + - name: id + description: "Primary key" + - name: neighbourhood_id + description: "Foreign key to dim_neighbourhood" + - name: census_year + description: "Census year (2016, 2021, etc.)" + - name: population + description: "Total population" + - name: median_household_income + description: "Median household income" + + - name: fact_crime + description: "Crime statistics by neighbourhood, year, and type" + columns: + - name: id + description: "Primary key" + - name: neighbourhood_id + description: "Foreign key to dim_neighbourhood" + - name: year + description: "Statistics year" + - name: crime_type + description: "Type of crime" + - name: count + description: "Number of incidents" + - name: rate_per_100k + description: "Rate per 100,000 population" + + - name: fact_amenities + description: "Amenity counts by neighbourhood and type" + columns: + - name: id + description: "Primary key" + - name: neighbourhood_id + description: "Foreign key to dim_neighbourhood" + - name: amenity_type + description: "Type of amenity (parks, schools, transit)" + - name: count + description: "Number of amenities" + - name: year + description: "Reference year" + + - name: bridge_cmhc_neighbourhood + description: "CMHC zone to neighbourhood mapping with area weights" + columns: + - name: id + description: "Primary key" + - name: cmhc_zone_code + description: "CMHC zone code" + - name: neighbourhood_id + description: "Neighbourhood ID" + - name: weight + description: "Proportional area weight (0-1)" diff --git a/dbt/models/staging/_staging.yml b/dbt/models/staging/_staging.yml index 1fb83f4..5d0dbeb 100644 --- a/dbt/models/staging/_staging.yml +++ b/dbt/models/staging/_staging.yml @@ -40,3 +40,90 @@ models: tests: - unique - not_null + + - name: stg_toronto__neighbourhoods + description: "Staged Toronto neighbourhood dimension (158 official boundaries)" + columns: + - name: neighbourhood_id + description: "Neighbourhood primary key" + tests: + - unique + - not_null + - name: neighbourhood_name + description: "Official neighbourhood name" + tests: + - not_null + - name: geometry + description: "PostGIS geometry (POLYGON)" + + - name: stg_toronto__census + description: "Staged census demographics by neighbourhood" + columns: + - name: census_id + description: "Census record identifier" + tests: + - unique + - not_null + - name: neighbourhood_id + description: "Neighbourhood foreign key" + tests: + - not_null + - name: census_year + description: "Census year (2016, 2021)" + tests: + - not_null + + - name: stg_toronto__crime + description: "Staged crime statistics by neighbourhood" + columns: + - name: crime_id + description: "Crime record identifier" + tests: + - unique + - not_null + - name: neighbourhood_id + description: "Neighbourhood foreign key" + tests: + - not_null + - name: crime_type + description: "Type of crime" + tests: + - not_null + + - name: stg_toronto__amenities + description: "Staged amenity counts by neighbourhood" + columns: + - name: amenity_id + description: "Amenity record identifier" + tests: + - unique + - not_null + - name: neighbourhood_id + description: "Neighbourhood foreign key" + tests: + - not_null + - name: amenity_type + description: "Type of amenity" + tests: + - not_null + + - name: stg_cmhc__zone_crosswalk + description: "Staged CMHC zone to neighbourhood crosswalk with area weights" + columns: + - name: crosswalk_id + description: "Crosswalk record identifier" + tests: + - unique + - not_null + - name: cmhc_zone_code + description: "CMHC zone code" + tests: + - not_null + - name: neighbourhood_id + description: "Neighbourhood foreign key" + tests: + - not_null + - name: area_weight + description: "Proportional area weight (0-1)" + tests: + - not_null diff --git a/dbt/models/staging/stg_cmhc__zone_crosswalk.sql b/dbt/models/staging/stg_cmhc__zone_crosswalk.sql new file mode 100644 index 0000000..d54148e --- /dev/null +++ b/dbt/models/staging/stg_cmhc__zone_crosswalk.sql @@ -0,0 +1,18 @@ +-- Staged CMHC zone to neighbourhood crosswalk +-- Source: bridge_cmhc_neighbourhood table +-- Grain: One row per zone-neighbourhood intersection + +with source as ( + select * from {{ source('toronto_housing', 'bridge_cmhc_neighbourhood') }} +), + +staged as ( + select + id as crosswalk_id, + cmhc_zone_code, + neighbourhood_id, + weight as area_weight + from source +) + +select * from staged diff --git a/dbt/models/staging/stg_toronto__amenities.sql b/dbt/models/staging/stg_toronto__amenities.sql new file mode 100644 index 0000000..c891692 --- /dev/null +++ b/dbt/models/staging/stg_toronto__amenities.sql @@ -0,0 +1,19 @@ +-- Staged amenity counts by neighbourhood +-- Source: fact_amenities table +-- Grain: One row per neighbourhood per amenity type per year + +with source as ( + select * from {{ source('toronto_housing', 'fact_amenities') }} +), + +staged as ( + select + id as amenity_id, + neighbourhood_id, + amenity_type, + count as amenity_count, + year as amenity_year + from source +) + +select * from staged diff --git a/dbt/models/staging/stg_toronto__census.sql b/dbt/models/staging/stg_toronto__census.sql new file mode 100644 index 0000000..66bea60 --- /dev/null +++ b/dbt/models/staging/stg_toronto__census.sql @@ -0,0 +1,27 @@ +-- Staged census demographics by neighbourhood +-- Source: fact_census table +-- Grain: One row per neighbourhood per census year + +with source as ( + select * from {{ source('toronto_housing', 'fact_census') }} +), + +staged as ( + select + id as census_id, + neighbourhood_id, + census_year, + population, + population_density, + median_household_income, + average_household_income, + unemployment_rate, + pct_bachelors_or_higher, + pct_owner_occupied, + pct_renter_occupied, + median_age, + average_dwelling_value + from source +) + +select * from staged diff --git a/dbt/models/staging/stg_toronto__crime.sql b/dbt/models/staging/stg_toronto__crime.sql new file mode 100644 index 0000000..1ebde64 --- /dev/null +++ b/dbt/models/staging/stg_toronto__crime.sql @@ -0,0 +1,20 @@ +-- Staged crime statistics by neighbourhood +-- Source: fact_crime table +-- Grain: One row per neighbourhood per year per crime type + +with source as ( + select * from {{ source('toronto_housing', 'fact_crime') }} +), + +staged as ( + select + id as crime_id, + neighbourhood_id, + year as crime_year, + crime_type, + count as incident_count, + rate_per_100k + from source +) + +select * from staged diff --git a/dbt/models/staging/stg_toronto__neighbourhoods.sql b/dbt/models/staging/stg_toronto__neighbourhoods.sql new file mode 100644 index 0000000..90d7f0b --- /dev/null +++ b/dbt/models/staging/stg_toronto__neighbourhoods.sql @@ -0,0 +1,25 @@ +-- Staged Toronto neighbourhood dimension +-- Source: dim_neighbourhood table +-- Grain: One row per neighbourhood (158 total) + +with source as ( + select * from {{ source('toronto_housing', 'dim_neighbourhood') }} +), + +staged as ( + select + neighbourhood_id, + name as neighbourhood_name, + geometry, + population, + land_area_sqkm, + pop_density_per_sqkm, + pct_bachelors_or_higher, + median_household_income, + pct_owner_occupied, + pct_renter_occupied, + census_year + from source +) + +select * from staged diff --git a/dbt/package-lock.yml b/dbt/package-lock.yml new file mode 100644 index 0000000..28a757f --- /dev/null +++ b/dbt/package-lock.yml @@ -0,0 +1,11 @@ +packages: + - name: dbt_utils + package: dbt-labs/dbt_utils + version: 1.3.3 + - name: dbt_expectations + package: calogica/dbt_expectations + version: 0.10.4 + - name: dbt_date + package: calogica/dbt_date + version: 0.10.1 +sha1_hash: 51a51ab489f7b302c8745ae3c3781271816b01be