- Add StatCan CMHC parser to fetch rental data from Statistics Canada API - Create year spine (2014-2025) as time dimension driver instead of census - Add CMA-level rental and income intermediate models - Update mart_neighbourhood_overview to use rental years as base - Fix neighbourhood_service queries to match dbt schema - Add CMHC data loading to pipeline script Data now flows correctly: 158 neighbourhoods × 12 years = 1,896 records Rent data available 2019-2025, crime data 2014-2024 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
84 lines
2.6 KiB
SQL
84 lines
2.6 KiB
SQL
-- Intermediate: Aggregated crime by neighbourhood with YoY change
|
|
-- Pivots crime types and calculates year-over-year trends
|
|
-- Grain: One row per neighbourhood per year
|
|
|
|
with neighbourhoods as (
|
|
select * from {{ ref('stg_toronto__neighbourhoods') }}
|
|
),
|
|
|
|
crime as (
|
|
select * from {{ ref('stg_toronto__crime') }}
|
|
),
|
|
|
|
-- Aggregate crime types
|
|
crime_by_year as (
|
|
select
|
|
neighbourhood_id,
|
|
crime_year as year,
|
|
sum(incident_count) as total_incidents,
|
|
sum(case when crime_type = 'Assault' then incident_count else 0 end) as assault_count,
|
|
sum(case when crime_type = 'Auto Theft' then incident_count else 0 end) as auto_theft_count,
|
|
sum(case when crime_type = 'Break and Enter' then incident_count else 0 end) as break_enter_count,
|
|
sum(case when crime_type = 'Robbery' then incident_count else 0 end) as robbery_count,
|
|
sum(case when crime_type = 'Theft Over' then incident_count else 0 end) as theft_over_count,
|
|
sum(case when crime_type = 'Homicide' then incident_count else 0 end) as homicide_count,
|
|
avg(rate_per_100k) as avg_rate_per_100k
|
|
from crime
|
|
group by neighbourhood_id, crime_year
|
|
),
|
|
|
|
-- Add year-over-year changes
|
|
with_yoy as (
|
|
select
|
|
c.*,
|
|
lag(c.total_incidents, 1) over (
|
|
partition by c.neighbourhood_id
|
|
order by c.year
|
|
) as prev_year_incidents,
|
|
round(
|
|
(c.total_incidents - lag(c.total_incidents, 1) over (
|
|
partition by c.neighbourhood_id
|
|
order by c.year
|
|
))::numeric /
|
|
nullif(lag(c.total_incidents, 1) over (
|
|
partition by c.neighbourhood_id
|
|
order by c.year
|
|
), 0) * 100,
|
|
2
|
|
) as yoy_change_pct
|
|
from crime_by_year c
|
|
),
|
|
|
|
crime_summary as (
|
|
select
|
|
n.neighbourhood_id,
|
|
n.neighbourhood_name,
|
|
n.geometry,
|
|
n.population,
|
|
|
|
w.year,
|
|
w.total_incidents,
|
|
w.assault_count,
|
|
w.auto_theft_count,
|
|
w.break_enter_count,
|
|
w.robbery_count,
|
|
w.theft_over_count,
|
|
w.homicide_count,
|
|
w.yoy_change_pct,
|
|
|
|
-- Crime rate per 100K population (use source data avg, or calculate if population available)
|
|
coalesce(
|
|
w.avg_rate_per_100k,
|
|
case
|
|
when n.population > 0
|
|
then round(w.total_incidents::numeric / n.population * 100000, 2)
|
|
else null
|
|
end
|
|
) as crime_rate_per_100k
|
|
|
|
from neighbourhoods n
|
|
inner join with_yoy w on n.neighbourhood_id = w.neighbourhood_id
|
|
)
|
|
|
|
select * from crime_summary
|