feat: add loaders and dbt models for Toronto housing data

Sprint 4 implementation:

Loaders:
- base.py: Session management, bulk insert, upsert utilities
- dimensions.py: Load time, district, zone, neighbourhood, policy dimensions
- trreb.py: Load TRREB purchase data to fact_purchases
- cmhc.py: Load CMHC rental data to fact_rentals

dbt Project:
- Project configuration (dbt_project.yml, packages.yml)
- Staging models for all fact and dimension tables
- Intermediate models with dimension enrichment
- Marts: purchase analysis, rental analysis, market summary

Closes #16

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 16:07:30 -05:00
parent 88e23674a8
commit 457bb49395
22 changed files with 1320 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
version: 2
models:
- name: int_purchases__monthly
description: "Purchase data enriched with time and district dimensions"
columns:
- name: purchase_id
tests:
- unique
- not_null
- name: district_code
tests:
- not_null
- name: int_rentals__annual
description: "Rental data enriched with time and zone dimensions"
columns:
- name: rental_id
tests:
- unique
- not_null
- name: zone_code
tests:
- not_null

View File

@@ -0,0 +1,62 @@
-- Intermediate: Monthly purchase data enriched with dimensions
-- Joins purchases with time and district dimensions for analysis
with purchases as (
select * from {{ ref('stg_trreb__purchases') }}
),
time_dim as (
select * from {{ ref('stg_dimensions__time') }}
),
district_dim as (
select * from {{ ref('stg_dimensions__trreb_districts') }}
),
enriched as (
select
p.purchase_id,
-- Time attributes
t.date_key,
t.full_date,
t.year,
t.month,
t.quarter,
t.month_name,
-- District attributes
d.district_key,
d.district_code,
d.district_name,
d.area_type,
-- Metrics
p.sales_count,
p.dollar_volume,
p.avg_price,
p.median_price,
p.new_listings,
p.active_listings,
p.days_on_market,
p.sale_to_list_ratio,
-- Calculated metrics
case
when p.active_listings > 0
then round(p.sales_count::numeric / p.active_listings, 3)
else null
end as absorption_rate,
case
when p.sales_count > 0
then round(p.active_listings::numeric / p.sales_count, 1)
else null
end as months_of_inventory
from purchases p
inner join time_dim t on p.date_key = t.date_key
inner join district_dim d on p.district_key = d.district_key
)
select * from enriched

View File

@@ -0,0 +1,57 @@
-- Intermediate: Annual rental data enriched with dimensions
-- Joins rentals with time and zone dimensions for analysis
with rentals as (
select * from {{ ref('stg_cmhc__rentals') }}
),
time_dim as (
select * from {{ ref('stg_dimensions__time') }}
),
zone_dim as (
select * from {{ ref('stg_dimensions__cmhc_zones') }}
),
enriched as (
select
r.rental_id,
-- Time attributes
t.date_key,
t.full_date,
t.year,
t.month,
t.quarter,
-- Zone attributes
z.zone_key,
z.zone_code,
z.zone_name,
-- Bedroom type
r.bedroom_type,
-- Metrics
r.rental_universe,
r.avg_rent,
r.median_rent,
r.vacancy_rate,
r.availability_rate,
r.turnover_rate,
r.year_over_year_rent_change,
r.reliability_code,
-- Calculated metrics
case
when r.rental_universe > 0 and r.vacancy_rate is not null
then round(r.rental_universe * (r.vacancy_rate / 100), 0)
else null
end as vacant_units_estimate
from rentals r
inner join time_dim t on r.date_key = t.date_key
inner join zone_dim z on r.zone_key = z.zone_key
)
select * from enriched