Add schemas, parsers, loaders, and models for Toronto neighbourhood-centric data including census profiles, crime statistics, and amenities. Schemas: - NeighbourhoodRecord, CensusRecord, CrimeRecord, CrimeType - AmenityType, AmenityRecord, AmenityCount Models: - BridgeCMHCNeighbourhood (zone-to-neighbourhood mapping with weights) - FactCensus, FactCrime, FactAmenities Parsers: - TorontoOpenDataParser (CKAN API for neighbourhoods, census, amenities) - TorontoPoliceParser (crime rates, MCI data) Loaders: - load_census_data, load_crime_data, load_amenities - build_cmhc_neighbourhood_crosswalk (PostGIS area weights) Also updates CLAUDE.md with projman plugin workflow documentation. Closes #53, #54, #55, #56, #57, #58, #59 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
107 lines
3.7 KiB
Python
107 lines
3.7 KiB
Python
"""Pydantic schemas for Toronto neighbourhood data.
|
|
|
|
Includes schemas for neighbourhood boundaries, census profiles, and crime statistics.
|
|
"""
|
|
|
|
from decimal import Decimal
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class CrimeType(str, Enum):
|
|
"""Major crime indicator types from Toronto Police data."""
|
|
|
|
ASSAULT = "assault"
|
|
AUTO_THEFT = "auto_theft"
|
|
BREAK_AND_ENTER = "break_and_enter"
|
|
HOMICIDE = "homicide"
|
|
ROBBERY = "robbery"
|
|
SHOOTING = "shooting"
|
|
THEFT_OVER = "theft_over"
|
|
THEFT_FROM_MOTOR_VEHICLE = "theft_from_motor_vehicle"
|
|
OTHER = "other"
|
|
|
|
|
|
class NeighbourhoodRecord(BaseModel):
|
|
"""Schema for Toronto neighbourhood boundary data.
|
|
|
|
Based on City of Toronto's 158 neighbourhoods dataset.
|
|
AREA_ID maps to neighbourhood_id for consistency with police data (Hood_ID).
|
|
"""
|
|
|
|
area_id: int = Field(description="AREA_ID from Toronto Open Data (1-158)")
|
|
area_name: str = Field(max_length=100, description="Official neighbourhood name")
|
|
area_short_code: str | None = Field(
|
|
default=None, max_length=10, description="Short code (e.g., 'E01')"
|
|
)
|
|
geometry: dict[str, Any] | None = Field(
|
|
default=None, description="GeoJSON geometry object"
|
|
)
|
|
|
|
model_config = {"str_strip_whitespace": True}
|
|
|
|
|
|
class CensusRecord(BaseModel):
|
|
"""Census profile data for a neighbourhood.
|
|
|
|
Contains demographic and socioeconomic indicators from Statistics Canada
|
|
census data, aggregated to the neighbourhood level.
|
|
"""
|
|
|
|
neighbourhood_id: int = Field(
|
|
ge=1, le=200, description="Neighbourhood ID (AREA_ID)"
|
|
)
|
|
census_year: int = Field(ge=2016, le=2030, description="Census year")
|
|
population: int | None = Field(default=None, ge=0, description="Total population")
|
|
population_density: Decimal | None = Field(
|
|
default=None, ge=0, description="Population per square kilometre"
|
|
)
|
|
median_household_income: Decimal | None = Field(
|
|
default=None, ge=0, description="Median household income (CAD)"
|
|
)
|
|
average_household_income: Decimal | None = Field(
|
|
default=None, ge=0, description="Average household income (CAD)"
|
|
)
|
|
unemployment_rate: Decimal | None = Field(
|
|
default=None, ge=0, le=100, description="Unemployment rate percentage"
|
|
)
|
|
pct_bachelors_or_higher: Decimal | None = Field(
|
|
default=None, ge=0, le=100, description="Percentage with bachelor's degree+"
|
|
)
|
|
pct_owner_occupied: Decimal | None = Field(
|
|
default=None, ge=0, le=100, description="Percentage owner-occupied dwellings"
|
|
)
|
|
pct_renter_occupied: Decimal | None = Field(
|
|
default=None, ge=0, le=100, description="Percentage renter-occupied dwellings"
|
|
)
|
|
median_age: Decimal | None = Field(
|
|
default=None, ge=0, le=120, description="Median age of residents"
|
|
)
|
|
average_dwelling_value: Decimal | None = Field(
|
|
default=None, ge=0, description="Average dwelling value (CAD)"
|
|
)
|
|
|
|
model_config = {"str_strip_whitespace": True}
|
|
|
|
|
|
class CrimeRecord(BaseModel):
|
|
"""Crime statistics for a neighbourhood.
|
|
|
|
Based on Toronto Police neighbourhood crime rates data.
|
|
Hood_ID in source data maps to neighbourhood_id (AREA_ID).
|
|
"""
|
|
|
|
neighbourhood_id: int = Field(
|
|
ge=1, le=200, description="Neighbourhood ID (Hood_ID -> AREA_ID)"
|
|
)
|
|
year: int = Field(ge=2014, le=2030, description="Year of crime statistics")
|
|
crime_type: CrimeType = Field(description="Type of crime (MCI category)")
|
|
count: int = Field(ge=0, description="Number of incidents")
|
|
rate_per_100k: Decimal | None = Field(
|
|
default=None, ge=0, description="Rate per 100,000 population"
|
|
)
|
|
|
|
model_config = {"str_strip_whitespace": True}
|