feat: add Pydantic schemas, SQLAlchemy models, and parser structure
Sprint 3 implementation: - Pydantic schemas for TRREB, CMHC, and dimension data validation - SQLAlchemy models with PostGIS geometry for fact and dimension tables - Parser structure (stubs) for TRREB PDF and CMHC CSV processing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
121
portfolio_app/toronto/schemas/dimensions.py
Normal file
121
portfolio_app/toronto/schemas/dimensions.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Pydantic schemas for dimension tables."""
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
|
||||
class PolicyLevel(str, Enum):
|
||||
"""Government level for policy events."""
|
||||
|
||||
FEDERAL = "federal"
|
||||
PROVINCIAL = "provincial"
|
||||
MUNICIPAL = "municipal"
|
||||
|
||||
|
||||
class PolicyCategory(str, Enum):
|
||||
"""Policy event category."""
|
||||
|
||||
MONETARY = "monetary"
|
||||
TAX = "tax"
|
||||
REGULATORY = "regulatory"
|
||||
SUPPLY = "supply"
|
||||
ECONOMIC = "economic"
|
||||
|
||||
|
||||
class ExpectedDirection(str, Enum):
|
||||
"""Expected price impact direction."""
|
||||
|
||||
BULLISH = "bullish" # Expected to increase prices
|
||||
BEARISH = "bearish" # Expected to decrease prices
|
||||
NEUTRAL = "neutral" # Uncertain or mixed impact
|
||||
|
||||
|
||||
class Confidence(str, Enum):
|
||||
"""Confidence level in policy event data."""
|
||||
|
||||
HIGH = "high"
|
||||
MEDIUM = "medium"
|
||||
LOW = "low"
|
||||
|
||||
|
||||
class AreaType(str, Enum):
|
||||
"""TRREB area type."""
|
||||
|
||||
WEST = "West"
|
||||
CENTRAL = "Central"
|
||||
EAST = "East"
|
||||
NORTH = "North"
|
||||
|
||||
|
||||
class TimeDimension(BaseModel):
|
||||
"""Schema for time dimension record."""
|
||||
|
||||
date_key: int = Field(description="Date key in YYYYMMDD format")
|
||||
full_date: date
|
||||
year: int = Field(ge=2000, le=2100)
|
||||
month: int = Field(ge=1, le=12)
|
||||
quarter: int = Field(ge=1, le=4)
|
||||
month_name: str = Field(max_length=20)
|
||||
is_month_start: bool = True
|
||||
|
||||
|
||||
class TRREBDistrict(BaseModel):
|
||||
"""Schema for TRREB district dimension."""
|
||||
|
||||
district_code: str = Field(max_length=3, description="W01, C01, E01, etc.")
|
||||
district_name: str = Field(max_length=100)
|
||||
area_type: AreaType
|
||||
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
|
||||
|
||||
|
||||
class CMHCZone(BaseModel):
|
||||
"""Schema for CMHC zone dimension."""
|
||||
|
||||
zone_code: str = Field(max_length=10)
|
||||
zone_name: str = Field(max_length=100)
|
||||
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
|
||||
|
||||
|
||||
class Neighbourhood(BaseModel):
|
||||
"""Schema for City of Toronto neighbourhood dimension.
|
||||
|
||||
Note: No FK to fact tables in V1 - reference overlay only.
|
||||
"""
|
||||
|
||||
neighbourhood_id: int = Field(ge=1, le=200)
|
||||
name: str = Field(max_length=100)
|
||||
geometry_wkt: str | None = Field(default=None)
|
||||
population: int | None = Field(default=None, ge=0)
|
||||
land_area_sqkm: Decimal | None = Field(default=None, ge=0)
|
||||
pop_density_per_sqkm: Decimal | None = Field(default=None, ge=0)
|
||||
pct_bachelors_or_higher: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
median_household_income: Decimal | None = Field(default=None, ge=0)
|
||||
pct_owner_occupied: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
pct_renter_occupied: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
census_year: int = Field(default=2021, description="Census year for SCD tracking")
|
||||
|
||||
|
||||
class PolicyEvent(BaseModel):
|
||||
"""Schema for policy event dimension.
|
||||
|
||||
Used for time-series annotation. No causation claims.
|
||||
"""
|
||||
|
||||
event_date: date = Field(description="Date event was announced/occurred")
|
||||
effective_date: date | None = Field(
|
||||
default=None, description="Date policy took effect"
|
||||
)
|
||||
level: PolicyLevel
|
||||
category: PolicyCategory
|
||||
title: str = Field(max_length=200, description="Short event title for display")
|
||||
description: str | None = Field(
|
||||
default=None, description="Longer description for tooltip"
|
||||
)
|
||||
expected_direction: ExpectedDirection
|
||||
source_url: HttpUrl | None = Field(default=None)
|
||||
confidence: Confidence = Field(default=Confidence.MEDIUM)
|
||||
|
||||
model_config = {"str_strip_whitespace": True}
|
||||
Reference in New Issue
Block a user