Add schemas, parsers, loaders, and models for Toronto neighbourhood-centric data including census profiles, crime statistics, and amenities. Schemas: - NeighbourhoodRecord, CensusRecord, CrimeRecord, CrimeType - AmenityType, AmenityRecord, AmenityCount Models: - BridgeCMHCNeighbourhood (zone-to-neighbourhood mapping with weights) - FactCensus, FactCrime, FactAmenities Parsers: - TorontoOpenDataParser (CKAN API for neighbourhoods, census, amenities) - TorontoPoliceParser (crime rates, MCI data) Loaders: - load_census_data, load_crime_data, load_amenities - build_cmhc_neighbourhood_crosswalk (PostGIS area weights) Also updates CLAUDE.md with projman plugin workflow documentation. Closes #53, #54, #55, #56, #57, #58, #59 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
132 lines
4.0 KiB
Python
132 lines
4.0 KiB
Python
"""Loader for CMHC zone to neighbourhood crosswalk with area weights."""
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
from .base import get_session
|
|
|
|
|
|
def build_cmhc_neighbourhood_crosswalk(
|
|
session: Session | None = None,
|
|
) -> int:
|
|
"""Calculate area overlap weights between CMHC zones and neighbourhoods.
|
|
|
|
Uses PostGIS ST_Intersection and ST_Area functions to compute the
|
|
proportion of each CMHC zone that overlaps with each neighbourhood.
|
|
This enables disaggregation of CMHC zone-level data to neighbourhood level.
|
|
|
|
The function is idempotent - it clears existing crosswalk data before
|
|
rebuilding.
|
|
|
|
Args:
|
|
session: Optional existing session.
|
|
|
|
Returns:
|
|
Number of bridge records created.
|
|
|
|
Note:
|
|
Requires both dim_cmhc_zone and dim_neighbourhood tables to have
|
|
geometry columns populated with valid PostGIS geometries.
|
|
"""
|
|
|
|
def _build(sess: Session) -> int:
|
|
# Clear existing crosswalk data
|
|
sess.execute(text("DELETE FROM bridge_cmhc_neighbourhood"))
|
|
|
|
# Calculate overlap weights using PostGIS
|
|
# Weight = area of intersection / total area of CMHC zone
|
|
crosswalk_query = text(
|
|
"""
|
|
INSERT INTO bridge_cmhc_neighbourhood (cmhc_zone_code, neighbourhood_id, weight)
|
|
SELECT
|
|
z.zone_code,
|
|
n.neighbourhood_id,
|
|
CASE
|
|
WHEN ST_Area(z.geometry::geography) > 0 THEN
|
|
ST_Area(ST_Intersection(z.geometry, n.geometry)::geography) /
|
|
ST_Area(z.geometry::geography)
|
|
ELSE 0
|
|
END as weight
|
|
FROM dim_cmhc_zone z
|
|
JOIN dim_neighbourhood n
|
|
ON ST_Intersects(z.geometry, n.geometry)
|
|
WHERE
|
|
z.geometry IS NOT NULL
|
|
AND n.geometry IS NOT NULL
|
|
AND ST_Area(ST_Intersection(z.geometry, n.geometry)::geography) > 0
|
|
"""
|
|
)
|
|
|
|
sess.execute(crosswalk_query)
|
|
|
|
# Count records created
|
|
count_result = sess.execute(
|
|
text("SELECT COUNT(*) FROM bridge_cmhc_neighbourhood")
|
|
)
|
|
count = count_result.scalar() or 0
|
|
|
|
return int(count)
|
|
|
|
if session:
|
|
return _build(session)
|
|
with get_session() as sess:
|
|
return _build(sess)
|
|
|
|
|
|
def get_neighbourhood_weights_for_zone(
|
|
zone_code: str,
|
|
session: Session | None = None,
|
|
) -> list[tuple[int, float]]:
|
|
"""Get neighbourhood weights for a specific CMHC zone.
|
|
|
|
Args:
|
|
zone_code: CMHC zone code.
|
|
session: Optional existing session.
|
|
|
|
Returns:
|
|
List of (neighbourhood_id, weight) tuples.
|
|
"""
|
|
|
|
def _get(sess: Session) -> list[tuple[int, float]]:
|
|
result = sess.execute(
|
|
text(
|
|
"""
|
|
SELECT neighbourhood_id, weight
|
|
FROM bridge_cmhc_neighbourhood
|
|
WHERE cmhc_zone_code = :zone_code
|
|
ORDER BY weight DESC
|
|
"""
|
|
),
|
|
{"zone_code": zone_code},
|
|
)
|
|
return [(int(row[0]), float(row[1])) for row in result]
|
|
|
|
if session:
|
|
return _get(session)
|
|
with get_session() as sess:
|
|
return _get(sess)
|
|
|
|
|
|
def disaggregate_zone_value(
|
|
zone_code: str,
|
|
value: float,
|
|
session: Session | None = None,
|
|
) -> dict[int, float]:
|
|
"""Disaggregate a CMHC zone value to neighbourhoods using weights.
|
|
|
|
Args:
|
|
zone_code: CMHC zone code.
|
|
value: Value to disaggregate (e.g., average rent).
|
|
session: Optional existing session.
|
|
|
|
Returns:
|
|
Dictionary mapping neighbourhood_id to weighted value.
|
|
|
|
Note:
|
|
For averages (like rent), the weighted value represents the
|
|
contribution from this zone. To get a neighbourhood's total,
|
|
sum contributions from all overlapping zones.
|
|
"""
|
|
weights = get_neighbourhood_weights_for_zone(zone_code, session)
|
|
return {neighbourhood_id: value * weight for neighbourhood_id, weight in weights}
|