diff --git a/portfolio_app/toronto/models/__init__.py b/portfolio_app/toronto/models/__init__.py index 65e6290..e09a8a7 100644 --- a/portfolio_app/toronto/models/__init__.py +++ b/portfolio_app/toronto/models/__init__.py @@ -1 +1,28 @@ """SQLAlchemy models for Toronto housing data.""" + +from .base import Base, create_tables, get_engine, get_session_factory +from .dimensions import ( + DimCMHCZone, + DimNeighbourhood, + DimPolicyEvent, + DimTime, + DimTRREBDistrict, +) +from .facts import FactPurchases, FactRentals + +__all__ = [ + # Base + "Base", + "get_engine", + "get_session_factory", + "create_tables", + # Dimensions + "DimTime", + "DimTRREBDistrict", + "DimCMHCZone", + "DimNeighbourhood", + "DimPolicyEvent", + # Facts + "FactPurchases", + "FactRentals", +] diff --git a/portfolio_app/toronto/models/base.py b/portfolio_app/toronto/models/base.py new file mode 100644 index 0000000..7998726 --- /dev/null +++ b/portfolio_app/toronto/models/base.py @@ -0,0 +1,30 @@ +"""SQLAlchemy base configuration and engine setup.""" + +from sqlalchemy import Engine, create_engine +from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker + +from portfolio_app.config import get_settings + + +class Base(DeclarativeBase): # type: ignore[misc] + """Base class for all SQLAlchemy models.""" + + pass + + +def get_engine() -> Engine: + """Create database engine from settings.""" + settings = get_settings() + return create_engine(settings.database_url, echo=False) + + +def get_session_factory() -> sessionmaker[Session]: + """Create session factory.""" + engine = get_engine() + return sessionmaker(bind=engine) + + +def create_tables() -> None: + """Create all tables in database.""" + engine = get_engine() + Base.metadata.create_all(engine) diff --git a/portfolio_app/toronto/models/dimensions.py b/portfolio_app/toronto/models/dimensions.py new file mode 100644 index 0000000..a8f8bef --- /dev/null +++ b/portfolio_app/toronto/models/dimensions.py @@ -0,0 +1,104 @@ +"""SQLAlchemy models for dimension tables.""" + +from datetime import date + +from geoalchemy2 import Geometry +from sqlalchemy import Boolean, Date, Integer, Numeric, String, Text +from sqlalchemy.orm import Mapped, mapped_column + +from .base import Base + + +class DimTime(Base): + """Time dimension table.""" + + __tablename__ = "dim_time" + + date_key: Mapped[int] = mapped_column(Integer, primary_key=True) + full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True) + year: Mapped[int] = mapped_column(Integer, nullable=False) + month: Mapped[int] = mapped_column(Integer, nullable=False) + quarter: Mapped[int] = mapped_column(Integer, nullable=False) + month_name: Mapped[str] = mapped_column(String(20), nullable=False) + is_month_start: Mapped[bool] = mapped_column(Boolean, default=True) + + +class DimTRREBDistrict(Base): + """TRREB district dimension table with PostGIS geometry.""" + + __tablename__ = "dim_trreb_district" + + district_key: Mapped[int] = mapped_column( + Integer, primary_key=True, autoincrement=True + ) + district_code: Mapped[str] = mapped_column(String(3), nullable=False, unique=True) + district_name: Mapped[str] = mapped_column(String(100), nullable=False) + area_type: Mapped[str] = mapped_column(String(10), nullable=False) + geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True) + + +class DimCMHCZone(Base): + """CMHC zone dimension table with PostGIS geometry.""" + + __tablename__ = "dim_cmhc_zone" + + zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True) + zone_name: Mapped[str] = mapped_column(String(100), nullable=False) + geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True) + + +class DimNeighbourhood(Base): + """City of Toronto neighbourhood dimension. + + Note: No FK to fact tables in V1 - reference overlay only. + """ + + __tablename__ = "dim_neighbourhood" + + neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True) + name: Mapped[str] = mapped_column(String(100), nullable=False) + geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True) + population: Mapped[int | None] = mapped_column(Integer, nullable=True) + land_area_sqkm: Mapped[float | None] = mapped_column(Numeric(10, 4), nullable=True) + pop_density_per_sqkm: Mapped[float | None] = mapped_column( + Numeric(10, 2), nullable=True + ) + pct_bachelors_or_higher: Mapped[float | None] = mapped_column( + Numeric(5, 2), nullable=True + ) + median_household_income: Mapped[float | None] = mapped_column( + Numeric(12, 2), nullable=True + ) + pct_owner_occupied: Mapped[float | None] = mapped_column( + Numeric(5, 2), nullable=True + ) + pct_renter_occupied: Mapped[float | None] = mapped_column( + Numeric(5, 2), nullable=True + ) + census_year: Mapped[int] = mapped_column(Integer, default=2021) + + +class DimPolicyEvent(Base): + """Policy event dimension for time-series annotation.""" + + __tablename__ = "dim_policy_event" + + event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + event_date: Mapped[date] = mapped_column(Date, nullable=False) + effective_date: Mapped[date | None] = mapped_column(Date, nullable=True) + level: Mapped[str] = mapped_column( + String(20), nullable=False + ) # federal/provincial/municipal + category: Mapped[str] = mapped_column( + String(20), nullable=False + ) # monetary/tax/regulatory/supply/economic + title: Mapped[str] = mapped_column(String(200), nullable=False) + description: Mapped[str | None] = mapped_column(Text, nullable=True) + expected_direction: Mapped[str] = mapped_column( + String(10), nullable=False + ) # bearish/bullish/neutral + source_url: Mapped[str | None] = mapped_column(String(500), nullable=True) + confidence: Mapped[str] = mapped_column( + String(10), default="medium" + ) # high/medium/low diff --git a/portfolio_app/toronto/models/facts.py b/portfolio_app/toronto/models/facts.py new file mode 100644 index 0000000..3a072a8 --- /dev/null +++ b/portfolio_app/toronto/models/facts.py @@ -0,0 +1,69 @@ +"""SQLAlchemy models for fact tables.""" + +from sqlalchemy import ForeignKey, Integer, Numeric, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from .base import Base + + +class FactPurchases(Base): + """Fact table for TRREB purchase/sales data. + + Grain: One row per district per month. + """ + + __tablename__ = "fact_purchases" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + date_key: Mapped[int] = mapped_column( + Integer, ForeignKey("dim_time.date_key"), nullable=False + ) + district_key: Mapped[int] = mapped_column( + Integer, ForeignKey("dim_trreb_district.district_key"), nullable=False + ) + sales_count: Mapped[int] = mapped_column(Integer, nullable=False) + dollar_volume: Mapped[float] = mapped_column(Numeric(15, 2), nullable=False) + avg_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False) + median_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False) + new_listings: Mapped[int] = mapped_column(Integer, nullable=False) + active_listings: Mapped[int] = mapped_column(Integer, nullable=False) + avg_dom: Mapped[int] = mapped_column(Integer, nullable=False) # Days on market + avg_sp_lp: Mapped[float] = mapped_column( + Numeric(5, 2), nullable=False + ) # Sale/List ratio + + # Relationships + time = relationship("DimTime", backref="purchases") + district = relationship("DimTRREBDistrict", backref="purchases") + + +class FactRentals(Base): + """Fact table for CMHC rental market data. + + Grain: One row per zone per bedroom type per survey year. + """ + + __tablename__ = "fact_rentals" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + date_key: Mapped[int] = mapped_column( + Integer, ForeignKey("dim_time.date_key"), nullable=False + ) + zone_key: Mapped[int] = mapped_column( + Integer, ForeignKey("dim_cmhc_zone.zone_key"), nullable=False + ) + bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False) + universe: Mapped[int | None] = mapped_column(Integer, nullable=True) + avg_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True) + median_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True) + vacancy_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True) + availability_rate: Mapped[float | None] = mapped_column( + Numeric(5, 2), nullable=True + ) + turnover_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True) + rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True) + reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True) + + # Relationships + time = relationship("DimTime", backref="rentals") + zone = relationship("DimCMHCZone", backref="rentals") diff --git a/portfolio_app/toronto/parsers/__init__.py b/portfolio_app/toronto/parsers/__init__.py index add9fab..30f1919 100644 --- a/portfolio_app/toronto/parsers/__init__.py +++ b/portfolio_app/toronto/parsers/__init__.py @@ -1 +1,9 @@ -"""Data parsers for Toronto housing data sources.""" +"""Parsers for Toronto housing data sources.""" + +from .cmhc import CMHCParser +from .trreb import TRREBParser + +__all__ = [ + "TRREBParser", + "CMHCParser", +] diff --git a/portfolio_app/toronto/parsers/cmhc.py b/portfolio_app/toronto/parsers/cmhc.py new file mode 100644 index 0000000..52ac3ad --- /dev/null +++ b/portfolio_app/toronto/parsers/cmhc.py @@ -0,0 +1,147 @@ +"""CMHC CSV processor for rental market survey data. + +This module provides the structure for processing CMHC (Canada Mortgage and Housing +Corporation) rental market survey data from CSV exports. +""" + +from pathlib import Path +from typing import Any, cast + +import pandas as pd + +from portfolio_app.toronto.schemas import CMHCAnnualSurvey, CMHCRentalRecord + + +class CMHCParser: + """Parser for CMHC Rental Market Survey CSV data. + + CMHC conducts annual rental market surveys and publishes data including: + - Average and median rents by zone and bedroom type + - Vacancy rates + - Universe (total rental units) + - Year-over-year rent changes + + Data is available via the Housing Market Information Portal as CSV exports. + """ + + # Expected columns in CMHC CSV exports + REQUIRED_COLUMNS = { + "zone_code", + "zone_name", + "bedroom_type", + "survey_year", + } + + # Column name mappings from CMHC export format + COLUMN_MAPPINGS = { + "Zone Code": "zone_code", + "Zone Name": "zone_name", + "Bedroom Type": "bedroom_type", + "Survey Year": "survey_year", + "Universe": "universe", + "Average Rent ($)": "avg_rent", + "Median Rent ($)": "median_rent", + "Vacancy Rate (%)": "vacancy_rate", + "Availability Rate (%)": "availability_rate", + "Turnover Rate (%)": "turnover_rate", + "% Change in Rent": "rent_change_pct", + "Reliability Code": "reliability_code", + } + + def __init__(self, csv_path: Path) -> None: + """Initialize parser with path to CSV file. + + Args: + csv_path: Path to the CMHC CSV export file. + """ + self.csv_path = csv_path + self._validate_path() + + def _validate_path(self) -> None: + """Validate that the CSV path exists and is readable.""" + if not self.csv_path.exists(): + raise FileNotFoundError(f"CSV not found: {self.csv_path}") + if not self.csv_path.suffix.lower() == ".csv": + raise ValueError(f"Expected CSV file, got: {self.csv_path.suffix}") + + def parse(self) -> CMHCAnnualSurvey: + """Parse the CSV and return structured data. + + Returns: + CMHCAnnualSurvey containing all extracted records. + + Raises: + ValueError: If required columns are missing. + """ + df = self._load_csv() + df = self._normalize_columns(df) + self._validate_columns(df) + records = self._convert_to_records(df) + survey_year = self._infer_survey_year(df) + + return CMHCAnnualSurvey(survey_year=survey_year, records=records) + + def _load_csv(self) -> pd.DataFrame: + """Load CSV file into DataFrame. + + Returns: + Raw DataFrame from CSV. + """ + return pd.read_csv(self.csv_path) + + def _normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame: + """Normalize column names to standard format. + + Args: + df: DataFrame with original column names. + + Returns: + DataFrame with normalized column names. + """ + rename_map = {k: v for k, v in self.COLUMN_MAPPINGS.items() if k in df.columns} + return df.rename(columns=rename_map) + + def _validate_columns(self, df: pd.DataFrame) -> None: + """Validate that all required columns are present. + + Args: + df: DataFrame to validate. + + Raises: + ValueError: If required columns are missing. + """ + missing = self.REQUIRED_COLUMNS - set(df.columns) + if missing: + raise ValueError(f"Missing required columns: {missing}") + + def _convert_to_records(self, df: pd.DataFrame) -> list[CMHCRentalRecord]: + """Convert DataFrame rows to validated schema records. + + Args: + df: Normalized DataFrame. + + Returns: + List of validated CMHCRentalRecord objects. + """ + records = [] + for _, row in df.iterrows(): + record_data = row.to_dict() + # Handle NaN values + record_data = { + k: (None if pd.isna(v) else v) for k, v in record_data.items() + } + records.append(CMHCRentalRecord(**cast(dict[str, Any], record_data))) + return records + + def _infer_survey_year(self, df: pd.DataFrame) -> int: + """Infer survey year from data. + + Args: + df: DataFrame with survey_year column. + + Returns: + Survey year as integer. + """ + if "survey_year" in df.columns: + return int(df["survey_year"].iloc[0]) + raise ValueError("Cannot infer survey year from data.") diff --git a/portfolio_app/toronto/parsers/trreb.py b/portfolio_app/toronto/parsers/trreb.py new file mode 100644 index 0000000..fad5869 --- /dev/null +++ b/portfolio_app/toronto/parsers/trreb.py @@ -0,0 +1,82 @@ +"""TRREB PDF parser for monthly market watch reports. + +This module provides the structure for parsing TRREB (Toronto Regional Real Estate Board) +monthly Market Watch PDF reports into structured data. +""" + +from pathlib import Path +from typing import Any + +from portfolio_app.toronto.schemas import TRREBMonthlyRecord, TRREBMonthlyReport + + +class TRREBParser: + """Parser for TRREB Market Watch PDF reports. + + TRREB publishes monthly Market Watch reports as PDFs containing: + - Summary statistics by area (416, 905, Total) + - District-level breakdowns + - Year-over-year comparisons + + The parser extracts tabular data from these PDFs and validates + against the TRREBMonthlyRecord schema. + """ + + def __init__(self, pdf_path: Path) -> None: + """Initialize parser with path to PDF file. + + Args: + pdf_path: Path to the TRREB Market Watch PDF file. + """ + self.pdf_path = pdf_path + self._validate_path() + + def _validate_path(self) -> None: + """Validate that the PDF path exists and is readable.""" + if not self.pdf_path.exists(): + raise FileNotFoundError(f"PDF not found: {self.pdf_path}") + if not self.pdf_path.suffix.lower() == ".pdf": + raise ValueError(f"Expected PDF file, got: {self.pdf_path.suffix}") + + def parse(self) -> TRREBMonthlyReport: + """Parse the PDF and return structured data. + + Returns: + TRREBMonthlyReport containing all extracted records. + + Raises: + NotImplementedError: PDF parsing not yet implemented. + """ + raise NotImplementedError( + "PDF parsing requires pdfplumber/tabula-py. " + "Implementation pending Sprint 4 data ingestion." + ) + + def _extract_tables(self) -> list[dict[str, Any]]: + """Extract raw tables from PDF pages. + + Returns: + List of dictionaries representing table data. + """ + raise NotImplementedError("Table extraction not yet implemented.") + + def _parse_district_table( + self, table_data: list[dict[str, Any]] + ) -> list[TRREBMonthlyRecord]: + """Parse district-level statistics table. + + Args: + table_data: Raw table data extracted from PDF. + + Returns: + List of validated TRREBMonthlyRecord objects. + """ + raise NotImplementedError("District table parsing not yet implemented.") + + def _infer_report_date(self) -> tuple[int, int]: + """Infer report year and month from PDF filename or content. + + Returns: + Tuple of (year, month). + """ + raise NotImplementedError("Date inference not yet implemented.") diff --git a/portfolio_app/toronto/schemas/__init__.py b/portfolio_app/toronto/schemas/__init__.py index 5c60cc7..1d33f3e 100644 --- a/portfolio_app/toronto/schemas/__init__.py +++ b/portfolio_app/toronto/schemas/__init__.py @@ -1 +1,39 @@ """Pydantic schemas for Toronto housing data validation.""" + +from .cmhc import BedroomType, CMHCAnnualSurvey, CMHCRentalRecord, ReliabilityCode +from .dimensions import ( + AreaType, + CMHCZone, + Confidence, + ExpectedDirection, + Neighbourhood, + PolicyCategory, + PolicyEvent, + PolicyLevel, + TimeDimension, + TRREBDistrict, +) +from .trreb import TRREBMonthlyRecord, TRREBMonthlyReport + +__all__ = [ + # TRREB + "TRREBMonthlyRecord", + "TRREBMonthlyReport", + # CMHC + "CMHCRentalRecord", + "CMHCAnnualSurvey", + "BedroomType", + "ReliabilityCode", + # Dimensions + "TimeDimension", + "TRREBDistrict", + "CMHCZone", + "Neighbourhood", + "PolicyEvent", + # Enums + "AreaType", + "PolicyLevel", + "PolicyCategory", + "ExpectedDirection", + "Confidence", +] diff --git a/portfolio_app/toronto/schemas/cmhc.py b/portfolio_app/toronto/schemas/cmhc.py new file mode 100644 index 0000000..edd388b --- /dev/null +++ b/portfolio_app/toronto/schemas/cmhc.py @@ -0,0 +1,81 @@ +"""Pydantic schemas for CMHC rental market data.""" + +from decimal import Decimal +from enum import Enum + +from pydantic import BaseModel, Field + + +class BedroomType(str, Enum): + """CMHC bedroom type categories.""" + + BACHELOR = "Bachelor" + ONE_BED = "1 Bedroom" + TWO_BED = "2 Bedroom" + THREE_BED_PLUS = "3 Bedroom+" + TOTAL = "Total" + + +class ReliabilityCode(str, Enum): + """CMHC data reliability codes. + + Based on coefficient of variation (CV). + """ + + EXCELLENT = "a" # CV <= 2.5% + GOOD = "b" # 2.5% < CV <= 5% + FAIR = "c" # 5% < CV <= 10% + POOR = "d" # CV > 10% + SUPPRESSED = "**" # Sample too small + + +class CMHCRentalRecord(BaseModel): + """Schema for a single CMHC rental survey record. + + Represents rental data for one zone and bedroom type in one survey year. + """ + + survey_year: int = Field(ge=1990, description="Survey year (October snapshot)") + zone_code: str = Field(max_length=10, description="CMHC zone identifier") + zone_name: str = Field(max_length=100, description="Zone name") + bedroom_type: BedroomType = Field(description="Bedroom category") + universe: int | None = Field( + default=None, ge=0, description="Total rental units in zone" + ) + vacancy_rate: Decimal | None = Field( + default=None, ge=0, le=100, description="Vacancy rate (%)" + ) + vacancy_rate_reliability: ReliabilityCode | None = Field(default=None) + availability_rate: Decimal | None = Field( + default=None, ge=0, le=100, description="Availability rate (%)" + ) + average_rent: Decimal | None = Field( + default=None, ge=0, description="Average monthly rent ($)" + ) + average_rent_reliability: ReliabilityCode | None = Field(default=None) + median_rent: Decimal | None = Field( + default=None, ge=0, description="Median monthly rent ($)" + ) + rent_change_pct: Decimal | None = Field( + default=None, description="YoY rent change (%)" + ) + turnover_rate: Decimal | None = Field( + default=None, ge=0, le=100, description="Unit turnover rate (%)" + ) + + model_config = {"str_strip_whitespace": True} + + +class CMHCAnnualSurvey(BaseModel): + """Schema for a complete CMHC annual survey for Toronto. + + Contains all zone and bedroom type combinations for one survey year. + """ + + survey_year: int + records: list[CMHCRentalRecord] + + @property + def zone_count(self) -> int: + """Number of unique zones in survey.""" + return len({r.zone_code for r in self.records}) diff --git a/portfolio_app/toronto/schemas/dimensions.py b/portfolio_app/toronto/schemas/dimensions.py new file mode 100644 index 0000000..66fd509 --- /dev/null +++ b/portfolio_app/toronto/schemas/dimensions.py @@ -0,0 +1,121 @@ +"""Pydantic schemas for dimension tables.""" + +from datetime import date +from decimal import Decimal +from enum import Enum + +from pydantic import BaseModel, Field, HttpUrl + + +class PolicyLevel(str, Enum): + """Government level for policy events.""" + + FEDERAL = "federal" + PROVINCIAL = "provincial" + MUNICIPAL = "municipal" + + +class PolicyCategory(str, Enum): + """Policy event category.""" + + MONETARY = "monetary" + TAX = "tax" + REGULATORY = "regulatory" + SUPPLY = "supply" + ECONOMIC = "economic" + + +class ExpectedDirection(str, Enum): + """Expected price impact direction.""" + + BULLISH = "bullish" # Expected to increase prices + BEARISH = "bearish" # Expected to decrease prices + NEUTRAL = "neutral" # Uncertain or mixed impact + + +class Confidence(str, Enum): + """Confidence level in policy event data.""" + + HIGH = "high" + MEDIUM = "medium" + LOW = "low" + + +class AreaType(str, Enum): + """TRREB area type.""" + + WEST = "West" + CENTRAL = "Central" + EAST = "East" + NORTH = "North" + + +class TimeDimension(BaseModel): + """Schema for time dimension record.""" + + date_key: int = Field(description="Date key in YYYYMMDD format") + full_date: date + year: int = Field(ge=2000, le=2100) + month: int = Field(ge=1, le=12) + quarter: int = Field(ge=1, le=4) + month_name: str = Field(max_length=20) + is_month_start: bool = True + + +class TRREBDistrict(BaseModel): + """Schema for TRREB district dimension.""" + + district_code: str = Field(max_length=3, description="W01, C01, E01, etc.") + district_name: str = Field(max_length=100) + area_type: AreaType + geometry_wkt: str | None = Field(default=None, description="WKT geometry string") + + +class CMHCZone(BaseModel): + """Schema for CMHC zone dimension.""" + + zone_code: str = Field(max_length=10) + zone_name: str = Field(max_length=100) + geometry_wkt: str | None = Field(default=None, description="WKT geometry string") + + +class Neighbourhood(BaseModel): + """Schema for City of Toronto neighbourhood dimension. + + Note: No FK to fact tables in V1 - reference overlay only. + """ + + neighbourhood_id: int = Field(ge=1, le=200) + name: str = Field(max_length=100) + geometry_wkt: str | None = Field(default=None) + population: int | None = Field(default=None, ge=0) + land_area_sqkm: Decimal | None = Field(default=None, ge=0) + pop_density_per_sqkm: Decimal | None = Field(default=None, ge=0) + pct_bachelors_or_higher: Decimal | None = Field(default=None, ge=0, le=100) + median_household_income: Decimal | None = Field(default=None, ge=0) + pct_owner_occupied: Decimal | None = Field(default=None, ge=0, le=100) + pct_renter_occupied: Decimal | None = Field(default=None, ge=0, le=100) + census_year: int = Field(default=2021, description="Census year for SCD tracking") + + +class PolicyEvent(BaseModel): + """Schema for policy event dimension. + + Used for time-series annotation. No causation claims. + """ + + event_date: date = Field(description="Date event was announced/occurred") + effective_date: date | None = Field( + default=None, description="Date policy took effect" + ) + level: PolicyLevel + category: PolicyCategory + title: str = Field(max_length=200, description="Short event title for display") + description: str | None = Field( + default=None, description="Longer description for tooltip" + ) + expected_direction: ExpectedDirection + source_url: HttpUrl | None = Field(default=None) + confidence: Confidence = Field(default=Confidence.MEDIUM) + + model_config = {"str_strip_whitespace": True} diff --git a/portfolio_app/toronto/schemas/trreb.py b/portfolio_app/toronto/schemas/trreb.py new file mode 100644 index 0000000..e972ff6 --- /dev/null +++ b/portfolio_app/toronto/schemas/trreb.py @@ -0,0 +1,52 @@ +"""Pydantic schemas for TRREB monthly market data.""" + +from datetime import date +from decimal import Decimal + +from pydantic import BaseModel, Field + + +class TRREBMonthlyRecord(BaseModel): + """Schema for a single TRREB monthly summary record. + + Represents aggregated sales data for one district in one month. + """ + + report_date: date = Field(description="First of month (YYYY-MM-01)") + area_code: str = Field( + max_length=3, description="District code (W01, C01, E01, etc.)" + ) + area_name: str = Field(max_length=100, description="District name") + area_type: str = Field(max_length=10, description="West / Central / East / North") + sales: int = Field(ge=0, description="Number of transactions") + dollar_volume: Decimal = Field(ge=0, description="Total sales volume ($)") + avg_price: Decimal = Field(ge=0, description="Average sale price ($)") + median_price: Decimal = Field(ge=0, description="Median sale price ($)") + new_listings: int = Field(ge=0, description="New listings count") + active_listings: int = Field(ge=0, description="Active listings at month end") + avg_sp_lp: Decimal = Field( + ge=0, le=200, description="Avg sale price / list price ratio (%)" + ) + avg_dom: int = Field(ge=0, description="Average days on market") + + model_config = {"str_strip_whitespace": True} + + +class TRREBMonthlyReport(BaseModel): + """Schema for a complete TRREB monthly report. + + Contains all district records for a single month. + """ + + report_date: date + records: list[TRREBMonthlyRecord] + + @property + def total_sales(self) -> int: + """Total sales across all districts.""" + return sum(r.sales for r in self.records) + + @property + def district_count(self) -> int: + """Number of districts in report.""" + return len(self.records)