Files
personal-portfolio/portfolio_app/toronto/loaders/cmhc_crosswalk.py
lmiranda 053acf6436 feat: Implement Phase 3 neighbourhood data model
Add schemas, parsers, loaders, and models for Toronto neighbourhood-centric
data including census profiles, crime statistics, and amenities.

Schemas:
- NeighbourhoodRecord, CensusRecord, CrimeRecord, CrimeType
- AmenityType, AmenityRecord, AmenityCount

Models:
- BridgeCMHCNeighbourhood (zone-to-neighbourhood mapping with weights)
- FactCensus, FactCrime, FactAmenities

Parsers:
- TorontoOpenDataParser (CKAN API for neighbourhoods, census, amenities)
- TorontoPoliceParser (crime rates, MCI data)

Loaders:
- load_census_data, load_crime_data, load_amenities
- build_cmhc_neighbourhood_crosswalk (PostGIS area weights)

Also updates CLAUDE.md with projman plugin workflow documentation.

Closes #53, #54, #55, #56, #57, #58, #59

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 11:07:13 -05:00

132 lines
4.0 KiB
Python

"""Loader for CMHC zone to neighbourhood crosswalk with area weights."""
from sqlalchemy import text
from sqlalchemy.orm import Session
from .base import get_session
def build_cmhc_neighbourhood_crosswalk(
session: Session | None = None,
) -> int:
"""Calculate area overlap weights between CMHC zones and neighbourhoods.
Uses PostGIS ST_Intersection and ST_Area functions to compute the
proportion of each CMHC zone that overlaps with each neighbourhood.
This enables disaggregation of CMHC zone-level data to neighbourhood level.
The function is idempotent - it clears existing crosswalk data before
rebuilding.
Args:
session: Optional existing session.
Returns:
Number of bridge records created.
Note:
Requires both dim_cmhc_zone and dim_neighbourhood tables to have
geometry columns populated with valid PostGIS geometries.
"""
def _build(sess: Session) -> int:
# Clear existing crosswalk data
sess.execute(text("DELETE FROM bridge_cmhc_neighbourhood"))
# Calculate overlap weights using PostGIS
# Weight = area of intersection / total area of CMHC zone
crosswalk_query = text(
"""
INSERT INTO bridge_cmhc_neighbourhood (cmhc_zone_code, neighbourhood_id, weight)
SELECT
z.zone_code,
n.neighbourhood_id,
CASE
WHEN ST_Area(z.geometry::geography) > 0 THEN
ST_Area(ST_Intersection(z.geometry, n.geometry)::geography) /
ST_Area(z.geometry::geography)
ELSE 0
END as weight
FROM dim_cmhc_zone z
JOIN dim_neighbourhood n
ON ST_Intersects(z.geometry, n.geometry)
WHERE
z.geometry IS NOT NULL
AND n.geometry IS NOT NULL
AND ST_Area(ST_Intersection(z.geometry, n.geometry)::geography) > 0
"""
)
sess.execute(crosswalk_query)
# Count records created
count_result = sess.execute(
text("SELECT COUNT(*) FROM bridge_cmhc_neighbourhood")
)
count = count_result.scalar() or 0
return int(count)
if session:
return _build(session)
with get_session() as sess:
return _build(sess)
def get_neighbourhood_weights_for_zone(
zone_code: str,
session: Session | None = None,
) -> list[tuple[int, float]]:
"""Get neighbourhood weights for a specific CMHC zone.
Args:
zone_code: CMHC zone code.
session: Optional existing session.
Returns:
List of (neighbourhood_id, weight) tuples.
"""
def _get(sess: Session) -> list[tuple[int, float]]:
result = sess.execute(
text(
"""
SELECT neighbourhood_id, weight
FROM bridge_cmhc_neighbourhood
WHERE cmhc_zone_code = :zone_code
ORDER BY weight DESC
"""
),
{"zone_code": zone_code},
)
return [(int(row[0]), float(row[1])) for row in result]
if session:
return _get(session)
with get_session() as sess:
return _get(sess)
def disaggregate_zone_value(
zone_code: str,
value: float,
session: Session | None = None,
) -> dict[int, float]:
"""Disaggregate a CMHC zone value to neighbourhoods using weights.
Args:
zone_code: CMHC zone code.
value: Value to disaggregate (e.g., average rent).
session: Optional existing session.
Returns:
Dictionary mapping neighbourhood_id to weighted value.
Note:
For averages (like rent), the weighted value represents the
contribution from this zone. To get a neighbourhood's total,
sum contributions from all overlapping zones.
"""
weights = get_neighbourhood_weights_for_zone(zone_code, session)
return {neighbourhood_id: value * weight for neighbourhood_id, weight in weights}