Files
personal-portfolio/portfolio_app/toronto/schemas/dimensions.py
lmiranda ead6d91a28 feat: add Pydantic schemas, SQLAlchemy models, and parser structure
Sprint 3 implementation:
- Pydantic schemas for TRREB, CMHC, and dimension data validation
- SQLAlchemy models with PostGIS geometry for fact and dimension tables
- Parser structure (stubs) for TRREB PDF and CMHC CSV processing

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 14:58:31 -05:00

122 lines
3.5 KiB
Python

"""Pydantic schemas for dimension tables."""
from datetime import date
from decimal import Decimal
from enum import Enum
from pydantic import BaseModel, Field, HttpUrl
class PolicyLevel(str, Enum):
"""Government level for policy events."""
FEDERAL = "federal"
PROVINCIAL = "provincial"
MUNICIPAL = "municipal"
class PolicyCategory(str, Enum):
"""Policy event category."""
MONETARY = "monetary"
TAX = "tax"
REGULATORY = "regulatory"
SUPPLY = "supply"
ECONOMIC = "economic"
class ExpectedDirection(str, Enum):
"""Expected price impact direction."""
BULLISH = "bullish" # Expected to increase prices
BEARISH = "bearish" # Expected to decrease prices
NEUTRAL = "neutral" # Uncertain or mixed impact
class Confidence(str, Enum):
"""Confidence level in policy event data."""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
class AreaType(str, Enum):
"""TRREB area type."""
WEST = "West"
CENTRAL = "Central"
EAST = "East"
NORTH = "North"
class TimeDimension(BaseModel):
"""Schema for time dimension record."""
date_key: int = Field(description="Date key in YYYYMMDD format")
full_date: date
year: int = Field(ge=2000, le=2100)
month: int = Field(ge=1, le=12)
quarter: int = Field(ge=1, le=4)
month_name: str = Field(max_length=20)
is_month_start: bool = True
class TRREBDistrict(BaseModel):
"""Schema for TRREB district dimension."""
district_code: str = Field(max_length=3, description="W01, C01, E01, etc.")
district_name: str = Field(max_length=100)
area_type: AreaType
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
class CMHCZone(BaseModel):
"""Schema for CMHC zone dimension."""
zone_code: str = Field(max_length=10)
zone_name: str = Field(max_length=100)
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
class Neighbourhood(BaseModel):
"""Schema for City of Toronto neighbourhood dimension.
Note: No FK to fact tables in V1 - reference overlay only.
"""
neighbourhood_id: int = Field(ge=1, le=200)
name: str = Field(max_length=100)
geometry_wkt: str | None = Field(default=None)
population: int | None = Field(default=None, ge=0)
land_area_sqkm: Decimal | None = Field(default=None, ge=0)
pop_density_per_sqkm: Decimal | None = Field(default=None, ge=0)
pct_bachelors_or_higher: Decimal | None = Field(default=None, ge=0, le=100)
median_household_income: Decimal | None = Field(default=None, ge=0)
pct_owner_occupied: Decimal | None = Field(default=None, ge=0, le=100)
pct_renter_occupied: Decimal | None = Field(default=None, ge=0, le=100)
census_year: int = Field(default=2021, description="Census year for SCD tracking")
class PolicyEvent(BaseModel):
"""Schema for policy event dimension.
Used for time-series annotation. No causation claims.
"""
event_date: date = Field(description="Date event was announced/occurred")
effective_date: date | None = Field(
default=None, description="Date policy took effect"
)
level: PolicyLevel
category: PolicyCategory
title: str = Field(max_length=200, description="Short event title for display")
description: str | None = Field(
default=None, description="Longer description for tooltip"
)
expected_direction: ExpectedDirection
source_url: HttpUrl | None = Field(default=None)
confidence: Confidence = Field(default=Confidence.MEDIUM)
model_config = {"str_strip_whitespace": True}