feat: add Pydantic schemas, SQLAlchemy models, and parser structure

Sprint 3 implementation: - Pydantic schemas for TRREB, CMHC, and dimension data validation - SQLAlchemy models with PostGIS geometry for fact and dimension tables - Parser structure (stubs) for TRREB PDF and CMHC CSV processing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 14:58:31 -05:00
parent 549e1fcbaf
commit ead6d91a28
11 changed files with 760 additions and 1 deletions
--- a/portfolio_app/toronto/schemas/cmhc.py
+++ b/portfolio_app/toronto/schemas/cmhc.py
@@ -0,0 +1,81 @@
+"""Pydantic schemas for CMHC rental market data."""
+
+from decimal import Decimal
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+
+class BedroomType(str, Enum):
+    """CMHC bedroom type categories."""
+
+    BACHELOR = "Bachelor"
+    ONE_BED = "1 Bedroom"
+    TWO_BED = "2 Bedroom"
+    THREE_BED_PLUS = "3 Bedroom+"
+    TOTAL = "Total"
+
+
+class ReliabilityCode(str, Enum):
+    """CMHC data reliability codes.
+
+    Based on coefficient of variation (CV).
+    """
+
+    EXCELLENT = "a"  # CV <= 2.5%
+    GOOD = "b"  # 2.5% < CV <= 5%
+    FAIR = "c"  # 5% < CV <= 10%
+    POOR = "d"  # CV > 10%
+    SUPPRESSED = "**"  # Sample too small
+
+
+class CMHCRentalRecord(BaseModel):
+    """Schema for a single CMHC rental survey record.
+
+    Represents rental data for one zone and bedroom type in one survey year.
+    """
+
+    survey_year: int = Field(ge=1990, description="Survey year (October snapshot)")
+    zone_code: str = Field(max_length=10, description="CMHC zone identifier")
+    zone_name: str = Field(max_length=100, description="Zone name")
+    bedroom_type: BedroomType = Field(description="Bedroom category")
+    universe: int | None = Field(
+        default=None, ge=0, description="Total rental units in zone"
+    )
+    vacancy_rate: Decimal | None = Field(
+        default=None, ge=0, le=100, description="Vacancy rate (%)"
+    )
+    vacancy_rate_reliability: ReliabilityCode | None = Field(default=None)
+    availability_rate: Decimal | None = Field(
+        default=None, ge=0, le=100, description="Availability rate (%)"
+    )
+    average_rent: Decimal | None = Field(
+        default=None, ge=0, description="Average monthly rent ($)"
+    )
+    average_rent_reliability: ReliabilityCode | None = Field(default=None)
+    median_rent: Decimal | None = Field(
+        default=None, ge=0, description="Median monthly rent ($)"
+    )
+    rent_change_pct: Decimal | None = Field(
+        default=None, description="YoY rent change (%)"
+    )
+    turnover_rate: Decimal | None = Field(
+        default=None, ge=0, le=100, description="Unit turnover rate (%)"
+    )
+
+    model_config = {"str_strip_whitespace": True}
+
+
+class CMHCAnnualSurvey(BaseModel):
+    """Schema for a complete CMHC annual survey for Toronto.
+
+    Contains all zone and bedroom type combinations for one survey year.
+    """
+
+    survey_year: int
+    records: list[CMHCRentalRecord]
+
+    @property
+    def zone_count(self) -> int:
+        """Number of unique zones in survey."""
+        return len({r.zone_code for r in self.records})