feat: Implement Phase 3 neighbourhood data model

Add schemas, parsers, loaders, and models for Toronto neighbourhood-centric
data including census profiles, crime statistics, and amenities.

Schemas:
- NeighbourhoodRecord, CensusRecord, CrimeRecord, CrimeType
- AmenityType, AmenityRecord, AmenityCount

Models:
- BridgeCMHCNeighbourhood (zone-to-neighbourhood mapping with weights)
- FactCensus, FactCrime, FactAmenities

Parsers:
- TorontoOpenDataParser (CKAN API for neighbourhoods, census, amenities)
- TorontoPoliceParser (crime rates, MCI data)

Loaders:
- load_census_data, load_crime_data, load_amenities
- build_cmhc_neighbourhood_crosswalk (PostGIS area weights)

Also updates CLAUDE.md with projman plugin workflow documentation.

Closes #53, #54, #55, #56, #57, #58, #59

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-16 11:07:13 -05:00
parent f69d0c15a7
commit 053acf6436
14 changed files with 1466 additions and 2 deletions

View File

@@ -1,11 +1,117 @@
"""SQLAlchemy models for fact tables."""
from sqlalchemy import ForeignKey, Integer, Numeric, String
from sqlalchemy import ForeignKey, Index, Integer, Numeric, String
from sqlalchemy.orm import Mapped, mapped_column, relationship
from .base import Base
class BridgeCMHCNeighbourhood(Base):
"""Bridge table for CMHC zone to neighbourhood mapping with area weights.
Enables disaggregation of CMHC zone-level rental data to neighbourhood level
using area-based proportional weights computed via PostGIS.
"""
__tablename__ = "bridge_cmhc_neighbourhood"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
cmhc_zone_code: Mapped[str] = mapped_column(String(10), nullable=False)
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
weight: Mapped[float] = mapped_column(
Numeric(5, 4), nullable=False
) # 0.0000 to 1.0000
__table_args__ = (
Index("ix_bridge_cmhc_zone", "cmhc_zone_code"),
Index("ix_bridge_neighbourhood", "neighbourhood_id"),
)
class FactCensus(Base):
"""Census statistics by neighbourhood and year.
Grain: One row per neighbourhood per census year.
"""
__tablename__ = "fact_census"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
census_year: Mapped[int] = mapped_column(Integer, nullable=False)
population: Mapped[int | None] = mapped_column(Integer, nullable=True)
population_density: Mapped[float | None] = mapped_column(
Numeric(10, 2), nullable=True
)
median_household_income: Mapped[float | None] = mapped_column(
Numeric(12, 2), nullable=True
)
average_household_income: Mapped[float | None] = mapped_column(
Numeric(12, 2), nullable=True
)
unemployment_rate: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
pct_bachelors_or_higher: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
pct_owner_occupied: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
pct_renter_occupied: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
median_age: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
average_dwelling_value: Mapped[float | None] = mapped_column(
Numeric(12, 2), nullable=True
)
__table_args__ = (
Index("ix_fact_census_neighbourhood_year", "neighbourhood_id", "census_year"),
)
class FactCrime(Base):
"""Crime statistics by neighbourhood and year.
Grain: One row per neighbourhood per year per crime type.
"""
__tablename__ = "fact_crime"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
year: Mapped[int] = mapped_column(Integer, nullable=False)
crime_type: Mapped[str] = mapped_column(String(50), nullable=False)
count: Mapped[int] = mapped_column(Integer, nullable=False)
rate_per_100k: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
__table_args__ = (
Index("ix_fact_crime_neighbourhood_year", "neighbourhood_id", "year"),
Index("ix_fact_crime_type", "crime_type"),
)
class FactAmenities(Base):
"""Amenity counts by neighbourhood.
Grain: One row per neighbourhood per amenity type per year.
"""
__tablename__ = "fact_amenities"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
neighbourhood_id: Mapped[int] = mapped_column(Integer, nullable=False)
amenity_type: Mapped[str] = mapped_column(String(50), nullable=False)
count: Mapped[int] = mapped_column(Integer, nullable=False)
year: Mapped[int] = mapped_column(Integer, nullable=False)
__table_args__ = (
Index("ix_fact_amenities_neighbourhood_year", "neighbourhood_id", "year"),
Index("ix_fact_amenities_type", "amenity_type"),
)
class FactRentals(Base):
"""Fact table for CMHC rental market data.