feat: add loaders and dbt models for Toronto housing data
Sprint 4 implementation: Loaders: - base.py: Session management, bulk insert, upsert utilities - dimensions.py: Load time, district, zone, neighbourhood, policy dimensions - trreb.py: Load TRREB purchase data to fact_purchases - cmhc.py: Load CMHC rental data to fact_rentals dbt Project: - Project configuration (dbt_project.yml, packages.yml) - Staging models for all fact and dimension tables - Intermediate models with dimension enrichment - Marts: purchase analysis, rental analysis, market summary Closes #16 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
24
dbt/models/intermediate/_intermediate.yml
Normal file
24
dbt/models/intermediate/_intermediate.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: int_purchases__monthly
|
||||
description: "Purchase data enriched with time and district dimensions"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: district_code
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: int_rentals__annual
|
||||
description: "Rental data enriched with time and zone dimensions"
|
||||
columns:
|
||||
- name: rental_id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: zone_code
|
||||
tests:
|
||||
- not_null
|
||||
62
dbt/models/intermediate/int_purchases__monthly.sql
Normal file
62
dbt/models/intermediate/int_purchases__monthly.sql
Normal file
@@ -0,0 +1,62 @@
|
||||
-- Intermediate: Monthly purchase data enriched with dimensions
|
||||
-- Joins purchases with time and district dimensions for analysis
|
||||
|
||||
with purchases as (
|
||||
select * from {{ ref('stg_trreb__purchases') }}
|
||||
),
|
||||
|
||||
time_dim as (
|
||||
select * from {{ ref('stg_dimensions__time') }}
|
||||
),
|
||||
|
||||
district_dim as (
|
||||
select * from {{ ref('stg_dimensions__trreb_districts') }}
|
||||
),
|
||||
|
||||
enriched as (
|
||||
select
|
||||
p.purchase_id,
|
||||
|
||||
-- Time attributes
|
||||
t.date_key,
|
||||
t.full_date,
|
||||
t.year,
|
||||
t.month,
|
||||
t.quarter,
|
||||
t.month_name,
|
||||
|
||||
-- District attributes
|
||||
d.district_key,
|
||||
d.district_code,
|
||||
d.district_name,
|
||||
d.area_type,
|
||||
|
||||
-- Metrics
|
||||
p.sales_count,
|
||||
p.dollar_volume,
|
||||
p.avg_price,
|
||||
p.median_price,
|
||||
p.new_listings,
|
||||
p.active_listings,
|
||||
p.days_on_market,
|
||||
p.sale_to_list_ratio,
|
||||
|
||||
-- Calculated metrics
|
||||
case
|
||||
when p.active_listings > 0
|
||||
then round(p.sales_count::numeric / p.active_listings, 3)
|
||||
else null
|
||||
end as absorption_rate,
|
||||
|
||||
case
|
||||
when p.sales_count > 0
|
||||
then round(p.active_listings::numeric / p.sales_count, 1)
|
||||
else null
|
||||
end as months_of_inventory
|
||||
|
||||
from purchases p
|
||||
inner join time_dim t on p.date_key = t.date_key
|
||||
inner join district_dim d on p.district_key = d.district_key
|
||||
)
|
||||
|
||||
select * from enriched
|
||||
57
dbt/models/intermediate/int_rentals__annual.sql
Normal file
57
dbt/models/intermediate/int_rentals__annual.sql
Normal file
@@ -0,0 +1,57 @@
|
||||
-- Intermediate: Annual rental data enriched with dimensions
|
||||
-- Joins rentals with time and zone dimensions for analysis
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('stg_cmhc__rentals') }}
|
||||
),
|
||||
|
||||
time_dim as (
|
||||
select * from {{ ref('stg_dimensions__time') }}
|
||||
),
|
||||
|
||||
zone_dim as (
|
||||
select * from {{ ref('stg_dimensions__cmhc_zones') }}
|
||||
),
|
||||
|
||||
enriched as (
|
||||
select
|
||||
r.rental_id,
|
||||
|
||||
-- Time attributes
|
||||
t.date_key,
|
||||
t.full_date,
|
||||
t.year,
|
||||
t.month,
|
||||
t.quarter,
|
||||
|
||||
-- Zone attributes
|
||||
z.zone_key,
|
||||
z.zone_code,
|
||||
z.zone_name,
|
||||
|
||||
-- Bedroom type
|
||||
r.bedroom_type,
|
||||
|
||||
-- Metrics
|
||||
r.rental_universe,
|
||||
r.avg_rent,
|
||||
r.median_rent,
|
||||
r.vacancy_rate,
|
||||
r.availability_rate,
|
||||
r.turnover_rate,
|
||||
r.year_over_year_rent_change,
|
||||
r.reliability_code,
|
||||
|
||||
-- Calculated metrics
|
||||
case
|
||||
when r.rental_universe > 0 and r.vacancy_rate is not null
|
||||
then round(r.rental_universe * (r.vacancy_rate / 100), 0)
|
||||
else null
|
||||
end as vacant_units_estimate
|
||||
|
||||
from rentals r
|
||||
inner join time_dim t on r.date_key = t.date_key
|
||||
inner join zone_dim z on r.zone_key = z.zone_key
|
||||
)
|
||||
|
||||
select * from enriched
|
||||
23
dbt/models/marts/_marts.yml
Normal file
23
dbt/models/marts/_marts.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: mart_toronto_purchases
|
||||
description: "Final mart for Toronto purchase/sales analysis by district and time"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
description: "Unique purchase record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: mart_toronto_rentals
|
||||
description: "Final mart for Toronto rental market analysis by zone and time"
|
||||
columns:
|
||||
- name: rental_id
|
||||
description: "Unique rental record identifier"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: mart_toronto_market_summary
|
||||
description: "Combined market summary aggregating purchases and rentals at Toronto level"
|
||||
81
dbt/models/marts/mart_toronto_market_summary.sql
Normal file
81
dbt/models/marts/mart_toronto_market_summary.sql
Normal file
@@ -0,0 +1,81 @@
|
||||
-- Mart: Toronto Market Summary
|
||||
-- Aggregated view combining purchase and rental market indicators
|
||||
-- Grain: One row per year-month
|
||||
|
||||
with purchases_agg as (
|
||||
select
|
||||
year,
|
||||
month,
|
||||
month_name,
|
||||
quarter,
|
||||
|
||||
-- Aggregate purchase metrics across all districts
|
||||
sum(sales_count) as total_sales,
|
||||
sum(dollar_volume) as total_dollar_volume,
|
||||
round(avg(avg_price), 0) as avg_price_all_districts,
|
||||
round(avg(median_price), 0) as median_price_all_districts,
|
||||
sum(new_listings) as total_new_listings,
|
||||
sum(active_listings) as total_active_listings,
|
||||
round(avg(days_on_market), 0) as avg_days_on_market,
|
||||
round(avg(sale_to_list_ratio), 2) as avg_sale_to_list_ratio,
|
||||
round(avg(absorption_rate), 3) as avg_absorption_rate,
|
||||
round(avg(months_of_inventory), 1) as avg_months_of_inventory,
|
||||
round(avg(avg_price_yoy_pct), 2) as avg_price_yoy_pct
|
||||
|
||||
from {{ ref('mart_toronto_purchases') }}
|
||||
group by year, month, month_name, quarter
|
||||
),
|
||||
|
||||
rentals_agg as (
|
||||
select
|
||||
year,
|
||||
|
||||
-- Aggregate rental metrics across all zones (all bedroom types)
|
||||
round(avg(avg_rent), 0) as avg_rent_all_zones,
|
||||
round(avg(vacancy_rate), 2) as avg_vacancy_rate,
|
||||
round(avg(rent_change_pct), 2) as avg_rent_change_pct,
|
||||
sum(rental_universe) as total_rental_universe
|
||||
|
||||
from {{ ref('mart_toronto_rentals') }}
|
||||
group by year
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
p.year,
|
||||
p.month,
|
||||
p.month_name,
|
||||
p.quarter,
|
||||
|
||||
-- Purchase market indicators
|
||||
p.total_sales,
|
||||
p.total_dollar_volume,
|
||||
p.avg_price_all_districts,
|
||||
p.median_price_all_districts,
|
||||
p.total_new_listings,
|
||||
p.total_active_listings,
|
||||
p.avg_days_on_market,
|
||||
p.avg_sale_to_list_ratio,
|
||||
p.avg_absorption_rate,
|
||||
p.avg_months_of_inventory,
|
||||
p.avg_price_yoy_pct,
|
||||
|
||||
-- Rental market indicators (annual, so join on year)
|
||||
r.avg_rent_all_zones,
|
||||
r.avg_vacancy_rate,
|
||||
r.avg_rent_change_pct,
|
||||
r.total_rental_universe,
|
||||
|
||||
-- Affordability indicator (price to rent ratio)
|
||||
case
|
||||
when r.avg_rent_all_zones > 0
|
||||
then round(p.avg_price_all_districts / (r.avg_rent_all_zones * 12), 1)
|
||||
else null
|
||||
end as price_to_annual_rent_ratio
|
||||
|
||||
from purchases_agg p
|
||||
left join rentals_agg r on p.year = r.year
|
||||
)
|
||||
|
||||
select * from final
|
||||
order by year desc, month desc
|
||||
79
dbt/models/marts/mart_toronto_purchases.sql
Normal file
79
dbt/models/marts/mart_toronto_purchases.sql
Normal file
@@ -0,0 +1,79 @@
|
||||
-- Mart: Toronto Purchase Market Analysis
|
||||
-- Final analytical table for purchase/sales data visualization
|
||||
-- Grain: One row per district per month
|
||||
|
||||
with purchases as (
|
||||
select * from {{ ref('int_purchases__monthly') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year calculations
|
||||
with_yoy as (
|
||||
select
|
||||
p.*,
|
||||
|
||||
-- Previous year same month values
|
||||
lag(p.avg_price, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as avg_price_prev_year,
|
||||
|
||||
lag(p.sales_count, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as sales_count_prev_year,
|
||||
|
||||
lag(p.median_price, 12) over (
|
||||
partition by p.district_code
|
||||
order by p.date_key
|
||||
) as median_price_prev_year
|
||||
|
||||
from purchases p
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
purchase_id,
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
month,
|
||||
quarter,
|
||||
month_name,
|
||||
district_key,
|
||||
district_code,
|
||||
district_name,
|
||||
area_type,
|
||||
sales_count,
|
||||
dollar_volume,
|
||||
avg_price,
|
||||
median_price,
|
||||
new_listings,
|
||||
active_listings,
|
||||
days_on_market,
|
||||
sale_to_list_ratio,
|
||||
absorption_rate,
|
||||
months_of_inventory,
|
||||
|
||||
-- Year-over-year changes
|
||||
case
|
||||
when avg_price_prev_year > 0
|
||||
then round(((avg_price - avg_price_prev_year) / avg_price_prev_year) * 100, 2)
|
||||
else null
|
||||
end as avg_price_yoy_pct,
|
||||
|
||||
case
|
||||
when sales_count_prev_year > 0
|
||||
then round(((sales_count - sales_count_prev_year)::numeric / sales_count_prev_year) * 100, 2)
|
||||
else null
|
||||
end as sales_count_yoy_pct,
|
||||
|
||||
case
|
||||
when median_price_prev_year > 0
|
||||
then round(((median_price - median_price_prev_year) / median_price_prev_year) * 100, 2)
|
||||
else null
|
||||
end as median_price_yoy_pct
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
64
dbt/models/marts/mart_toronto_rentals.sql
Normal file
64
dbt/models/marts/mart_toronto_rentals.sql
Normal file
@@ -0,0 +1,64 @@
|
||||
-- Mart: Toronto Rental Market Analysis
|
||||
-- Final analytical table for rental market visualization
|
||||
-- Grain: One row per zone per bedroom type per survey year
|
||||
|
||||
with rentals as (
|
||||
select * from {{ ref('int_rentals__annual') }}
|
||||
),
|
||||
|
||||
-- Add year-over-year calculations
|
||||
with_yoy as (
|
||||
select
|
||||
r.*,
|
||||
|
||||
-- Previous year values
|
||||
lag(r.avg_rent, 1) over (
|
||||
partition by r.zone_code, r.bedroom_type
|
||||
order by r.year
|
||||
) as avg_rent_prev_year,
|
||||
|
||||
lag(r.vacancy_rate, 1) over (
|
||||
partition by r.zone_code, r.bedroom_type
|
||||
order by r.year
|
||||
) as vacancy_rate_prev_year
|
||||
|
||||
from rentals r
|
||||
),
|
||||
|
||||
final as (
|
||||
select
|
||||
rental_id,
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
quarter,
|
||||
zone_key,
|
||||
zone_code,
|
||||
zone_name,
|
||||
bedroom_type,
|
||||
rental_universe,
|
||||
avg_rent,
|
||||
median_rent,
|
||||
vacancy_rate,
|
||||
availability_rate,
|
||||
turnover_rate,
|
||||
year_over_year_rent_change,
|
||||
reliability_code,
|
||||
vacant_units_estimate,
|
||||
|
||||
-- Calculated year-over-year (if not provided)
|
||||
coalesce(
|
||||
year_over_year_rent_change,
|
||||
case
|
||||
when avg_rent_prev_year > 0
|
||||
then round(((avg_rent - avg_rent_prev_year) / avg_rent_prev_year) * 100, 2)
|
||||
else null
|
||||
end
|
||||
) as rent_change_pct,
|
||||
|
||||
vacancy_rate - vacancy_rate_prev_year as vacancy_rate_change
|
||||
|
||||
from with_yoy
|
||||
)
|
||||
|
||||
select * from final
|
||||
61
dbt/models/staging/_sources.yml
Normal file
61
dbt/models/staging/_sources.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
version: 2
|
||||
|
||||
sources:
|
||||
- name: toronto_housing
|
||||
description: "Toronto housing data loaded from TRREB and CMHC sources"
|
||||
database: portfolio
|
||||
schema: public
|
||||
tables:
|
||||
- name: fact_purchases
|
||||
description: "TRREB monthly purchase/sales statistics by district"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: date_key
|
||||
description: "Foreign key to dim_time"
|
||||
- name: district_key
|
||||
description: "Foreign key to dim_trreb_district"
|
||||
|
||||
- name: fact_rentals
|
||||
description: "CMHC annual rental survey data by zone and bedroom type"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Primary key"
|
||||
- name: date_key
|
||||
description: "Foreign key to dim_time"
|
||||
- name: zone_key
|
||||
description: "Foreign key to dim_cmhc_zone"
|
||||
|
||||
- name: dim_time
|
||||
description: "Time dimension (monthly grain)"
|
||||
columns:
|
||||
- name: date_key
|
||||
description: "Primary key (YYYYMMDD format)"
|
||||
|
||||
- name: dim_trreb_district
|
||||
description: "TRREB district dimension with geometry"
|
||||
columns:
|
||||
- name: district_key
|
||||
description: "Primary key"
|
||||
- name: district_code
|
||||
description: "TRREB district code"
|
||||
|
||||
- name: dim_cmhc_zone
|
||||
description: "CMHC zone dimension with geometry"
|
||||
columns:
|
||||
- name: zone_key
|
||||
description: "Primary key"
|
||||
- name: zone_code
|
||||
description: "CMHC zone code"
|
||||
|
||||
- name: dim_neighbourhood
|
||||
description: "City of Toronto neighbourhoods (reference only)"
|
||||
columns:
|
||||
- name: neighbourhood_id
|
||||
description: "Primary key"
|
||||
|
||||
- name: dim_policy_event
|
||||
description: "Housing policy events for annotation"
|
||||
columns:
|
||||
- name: event_id
|
||||
description: "Primary key"
|
||||
73
dbt/models/staging/_staging.yml
Normal file
73
dbt/models/staging/_staging.yml
Normal file
@@ -0,0 +1,73 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: stg_trreb__purchases
|
||||
description: "Staged TRREB purchase/sales data from fact_purchases"
|
||||
columns:
|
||||
- name: purchase_id
|
||||
description: "Unique identifier for purchase record"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: date_key
|
||||
description: "Date dimension key (YYYYMMDD)"
|
||||
tests:
|
||||
- not_null
|
||||
- name: district_key
|
||||
description: "TRREB district dimension key"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_cmhc__rentals
|
||||
description: "Staged CMHC rental market data from fact_rentals"
|
||||
columns:
|
||||
- name: rental_id
|
||||
description: "Unique identifier for rental record"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: date_key
|
||||
description: "Date dimension key (YYYYMMDD)"
|
||||
tests:
|
||||
- not_null
|
||||
- name: zone_key
|
||||
description: "CMHC zone dimension key"
|
||||
tests:
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__time
|
||||
description: "Staged time dimension"
|
||||
columns:
|
||||
- name: date_key
|
||||
description: "Date dimension key (YYYYMMDD)"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__trreb_districts
|
||||
description: "Staged TRREB district dimension"
|
||||
columns:
|
||||
- name: district_key
|
||||
description: "District dimension key"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: district_code
|
||||
description: "TRREB district code (e.g., W01, C01)"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: stg_dimensions__cmhc_zones
|
||||
description: "Staged CMHC zone dimension"
|
||||
columns:
|
||||
- name: zone_key
|
||||
description: "Zone dimension key"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
- name: zone_code
|
||||
description: "CMHC zone code"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
26
dbt/models/staging/stg_cmhc__rentals.sql
Normal file
26
dbt/models/staging/stg_cmhc__rentals.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- Staged CMHC rental market survey data
|
||||
-- Source: fact_rentals table loaded from CMHC CSV exports
|
||||
-- Grain: One row per zone per bedroom type per survey year
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_rentals') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as rental_id,
|
||||
date_key,
|
||||
zone_key,
|
||||
bedroom_type,
|
||||
universe as rental_universe,
|
||||
avg_rent,
|
||||
median_rent,
|
||||
vacancy_rate,
|
||||
availability_rate,
|
||||
turnover_rate,
|
||||
rent_change_pct as year_over_year_rent_change,
|
||||
reliability_code
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
18
dbt/models/staging/stg_dimensions__cmhc_zones.sql
Normal file
18
dbt/models/staging/stg_dimensions__cmhc_zones.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Staged CMHC zone dimension
|
||||
-- Source: dim_cmhc_zone table
|
||||
-- Grain: One row per zone
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'dim_cmhc_zone') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
zone_key,
|
||||
zone_code,
|
||||
zone_name,
|
||||
geometry
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
21
dbt/models/staging/stg_dimensions__time.sql
Normal file
21
dbt/models/staging/stg_dimensions__time.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
-- Staged time dimension
|
||||
-- Source: dim_time table
|
||||
-- Grain: One row per month
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'dim_time') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
date_key,
|
||||
full_date,
|
||||
year,
|
||||
month,
|
||||
quarter,
|
||||
month_name,
|
||||
is_month_start
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
19
dbt/models/staging/stg_dimensions__trreb_districts.sql
Normal file
19
dbt/models/staging/stg_dimensions__trreb_districts.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Staged TRREB district dimension
|
||||
-- Source: dim_trreb_district table
|
||||
-- Grain: One row per district
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'dim_trreb_district') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
district_key,
|
||||
district_code,
|
||||
district_name,
|
||||
area_type,
|
||||
geometry
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
25
dbt/models/staging/stg_trreb__purchases.sql
Normal file
25
dbt/models/staging/stg_trreb__purchases.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Staged TRREB purchase/sales data
|
||||
-- Source: fact_purchases table loaded from TRREB Market Watch PDFs
|
||||
-- Grain: One row per district per month
|
||||
|
||||
with source as (
|
||||
select * from {{ source('toronto_housing', 'fact_purchases') }}
|
||||
),
|
||||
|
||||
staged as (
|
||||
select
|
||||
id as purchase_id,
|
||||
date_key,
|
||||
district_key,
|
||||
sales_count,
|
||||
dollar_volume,
|
||||
avg_price,
|
||||
median_price,
|
||||
new_listings,
|
||||
active_listings,
|
||||
avg_dom as days_on_market,
|
||||
avg_sp_lp as sale_to_list_ratio
|
||||
from source
|
||||
)
|
||||
|
||||
select * from staged
|
||||
Reference in New Issue
Block a user