feat: add Pydantic schemas, SQLAlchemy models, and parser structure
Sprint 3 implementation: - Pydantic schemas for TRREB, CMHC, and dimension data validation - SQLAlchemy models with PostGIS geometry for fact and dimension tables - Parser structure (stubs) for TRREB PDF and CMHC CSV processing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1 +1,28 @@
|
||||
"""SQLAlchemy models for Toronto housing data."""
|
||||
|
||||
from .base import Base, create_tables, get_engine, get_session_factory
|
||||
from .dimensions import (
|
||||
DimCMHCZone,
|
||||
DimNeighbourhood,
|
||||
DimPolicyEvent,
|
||||
DimTime,
|
||||
DimTRREBDistrict,
|
||||
)
|
||||
from .facts import FactPurchases, FactRentals
|
||||
|
||||
__all__ = [
|
||||
# Base
|
||||
"Base",
|
||||
"get_engine",
|
||||
"get_session_factory",
|
||||
"create_tables",
|
||||
# Dimensions
|
||||
"DimTime",
|
||||
"DimTRREBDistrict",
|
||||
"DimCMHCZone",
|
||||
"DimNeighbourhood",
|
||||
"DimPolicyEvent",
|
||||
# Facts
|
||||
"FactPurchases",
|
||||
"FactRentals",
|
||||
]
|
||||
|
||||
30
portfolio_app/toronto/models/base.py
Normal file
30
portfolio_app/toronto/models/base.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""SQLAlchemy base configuration and engine setup."""
|
||||
|
||||
from sqlalchemy import Engine, create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from portfolio_app.config import get_settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase): # type: ignore[misc]
|
||||
"""Base class for all SQLAlchemy models."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def get_engine() -> Engine:
|
||||
"""Create database engine from settings."""
|
||||
settings = get_settings()
|
||||
return create_engine(settings.database_url, echo=False)
|
||||
|
||||
|
||||
def get_session_factory() -> sessionmaker[Session]:
|
||||
"""Create session factory."""
|
||||
engine = get_engine()
|
||||
return sessionmaker(bind=engine)
|
||||
|
||||
|
||||
def create_tables() -> None:
|
||||
"""Create all tables in database."""
|
||||
engine = get_engine()
|
||||
Base.metadata.create_all(engine)
|
||||
104
portfolio_app/toronto/models/dimensions.py
Normal file
104
portfolio_app/toronto/models/dimensions.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""SQLAlchemy models for dimension tables."""
|
||||
|
||||
from datetime import date
|
||||
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import Boolean, Date, Integer, Numeric, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class DimTime(Base):
|
||||
"""Time dimension table."""
|
||||
|
||||
__tablename__ = "dim_time"
|
||||
|
||||
date_key: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
|
||||
year: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
month: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
quarter: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
month_name: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
is_month_start: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
|
||||
|
||||
class DimTRREBDistrict(Base):
|
||||
"""TRREB district dimension table with PostGIS geometry."""
|
||||
|
||||
__tablename__ = "dim_trreb_district"
|
||||
|
||||
district_key: Mapped[int] = mapped_column(
|
||||
Integer, primary_key=True, autoincrement=True
|
||||
)
|
||||
district_code: Mapped[str] = mapped_column(String(3), nullable=False, unique=True)
|
||||
district_name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
area_type: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
|
||||
|
||||
class DimCMHCZone(Base):
|
||||
"""CMHC zone dimension table with PostGIS geometry."""
|
||||
|
||||
__tablename__ = "dim_cmhc_zone"
|
||||
|
||||
zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True)
|
||||
zone_name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
|
||||
|
||||
class DimNeighbourhood(Base):
|
||||
"""City of Toronto neighbourhood dimension.
|
||||
|
||||
Note: No FK to fact tables in V1 - reference overlay only.
|
||||
"""
|
||||
|
||||
__tablename__ = "dim_neighbourhood"
|
||||
|
||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
population: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
land_area_sqkm: Mapped[float | None] = mapped_column(Numeric(10, 4), nullable=True)
|
||||
pop_density_per_sqkm: Mapped[float | None] = mapped_column(
|
||||
Numeric(10, 2), nullable=True
|
||||
)
|
||||
pct_bachelors_or_higher: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
median_household_income: Mapped[float | None] = mapped_column(
|
||||
Numeric(12, 2), nullable=True
|
||||
)
|
||||
pct_owner_occupied: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
pct_renter_occupied: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
census_year: Mapped[int] = mapped_column(Integer, default=2021)
|
||||
|
||||
|
||||
class DimPolicyEvent(Base):
|
||||
"""Policy event dimension for time-series annotation."""
|
||||
|
||||
__tablename__ = "dim_policy_event"
|
||||
|
||||
event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
event_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||||
effective_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
level: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False
|
||||
) # federal/provincial/municipal
|
||||
category: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False
|
||||
) # monetary/tax/regulatory/supply/economic
|
||||
title: Mapped[str] = mapped_column(String(200), nullable=False)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
expected_direction: Mapped[str] = mapped_column(
|
||||
String(10), nullable=False
|
||||
) # bearish/bullish/neutral
|
||||
source_url: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||
confidence: Mapped[str] = mapped_column(
|
||||
String(10), default="medium"
|
||||
) # high/medium/low
|
||||
69
portfolio_app/toronto/models/facts.py
Normal file
69
portfolio_app/toronto/models/facts.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""SQLAlchemy models for fact tables."""
|
||||
|
||||
from sqlalchemy import ForeignKey, Integer, Numeric, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class FactPurchases(Base):
|
||||
"""Fact table for TRREB purchase/sales data.
|
||||
|
||||
Grain: One row per district per month.
|
||||
"""
|
||||
|
||||
__tablename__ = "fact_purchases"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
date_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_time.date_key"), nullable=False
|
||||
)
|
||||
district_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_trreb_district.district_key"), nullable=False
|
||||
)
|
||||
sales_count: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
dollar_volume: Mapped[float] = mapped_column(Numeric(15, 2), nullable=False)
|
||||
avg_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
|
||||
median_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
|
||||
new_listings: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
active_listings: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
avg_dom: Mapped[int] = mapped_column(Integer, nullable=False) # Days on market
|
||||
avg_sp_lp: Mapped[float] = mapped_column(
|
||||
Numeric(5, 2), nullable=False
|
||||
) # Sale/List ratio
|
||||
|
||||
# Relationships
|
||||
time = relationship("DimTime", backref="purchases")
|
||||
district = relationship("DimTRREBDistrict", backref="purchases")
|
||||
|
||||
|
||||
class FactRentals(Base):
|
||||
"""Fact table for CMHC rental market data.
|
||||
|
||||
Grain: One row per zone per bedroom type per survey year.
|
||||
"""
|
||||
|
||||
__tablename__ = "fact_rentals"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
date_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_time.date_key"), nullable=False
|
||||
)
|
||||
zone_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_cmhc_zone.zone_key"), nullable=False
|
||||
)
|
||||
bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
universe: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
avg_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||
median_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||
vacancy_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
availability_rate: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
turnover_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True)
|
||||
|
||||
# Relationships
|
||||
time = relationship("DimTime", backref="rentals")
|
||||
zone = relationship("DimCMHCZone", backref="rentals")
|
||||
@@ -1 +1,9 @@
|
||||
"""Data parsers for Toronto housing data sources."""
|
||||
"""Parsers for Toronto housing data sources."""
|
||||
|
||||
from .cmhc import CMHCParser
|
||||
from .trreb import TRREBParser
|
||||
|
||||
__all__ = [
|
||||
"TRREBParser",
|
||||
"CMHCParser",
|
||||
]
|
||||
|
||||
147
portfolio_app/toronto/parsers/cmhc.py
Normal file
147
portfolio_app/toronto/parsers/cmhc.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""CMHC CSV processor for rental market survey data.
|
||||
|
||||
This module provides the structure for processing CMHC (Canada Mortgage and Housing
|
||||
Corporation) rental market survey data from CSV exports.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, cast
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from portfolio_app.toronto.schemas import CMHCAnnualSurvey, CMHCRentalRecord
|
||||
|
||||
|
||||
class CMHCParser:
|
||||
"""Parser for CMHC Rental Market Survey CSV data.
|
||||
|
||||
CMHC conducts annual rental market surveys and publishes data including:
|
||||
- Average and median rents by zone and bedroom type
|
||||
- Vacancy rates
|
||||
- Universe (total rental units)
|
||||
- Year-over-year rent changes
|
||||
|
||||
Data is available via the Housing Market Information Portal as CSV exports.
|
||||
"""
|
||||
|
||||
# Expected columns in CMHC CSV exports
|
||||
REQUIRED_COLUMNS = {
|
||||
"zone_code",
|
||||
"zone_name",
|
||||
"bedroom_type",
|
||||
"survey_year",
|
||||
}
|
||||
|
||||
# Column name mappings from CMHC export format
|
||||
COLUMN_MAPPINGS = {
|
||||
"Zone Code": "zone_code",
|
||||
"Zone Name": "zone_name",
|
||||
"Bedroom Type": "bedroom_type",
|
||||
"Survey Year": "survey_year",
|
||||
"Universe": "universe",
|
||||
"Average Rent ($)": "avg_rent",
|
||||
"Median Rent ($)": "median_rent",
|
||||
"Vacancy Rate (%)": "vacancy_rate",
|
||||
"Availability Rate (%)": "availability_rate",
|
||||
"Turnover Rate (%)": "turnover_rate",
|
||||
"% Change in Rent": "rent_change_pct",
|
||||
"Reliability Code": "reliability_code",
|
||||
}
|
||||
|
||||
def __init__(self, csv_path: Path) -> None:
|
||||
"""Initialize parser with path to CSV file.
|
||||
|
||||
Args:
|
||||
csv_path: Path to the CMHC CSV export file.
|
||||
"""
|
||||
self.csv_path = csv_path
|
||||
self._validate_path()
|
||||
|
||||
def _validate_path(self) -> None:
|
||||
"""Validate that the CSV path exists and is readable."""
|
||||
if not self.csv_path.exists():
|
||||
raise FileNotFoundError(f"CSV not found: {self.csv_path}")
|
||||
if not self.csv_path.suffix.lower() == ".csv":
|
||||
raise ValueError(f"Expected CSV file, got: {self.csv_path.suffix}")
|
||||
|
||||
def parse(self) -> CMHCAnnualSurvey:
|
||||
"""Parse the CSV and return structured data.
|
||||
|
||||
Returns:
|
||||
CMHCAnnualSurvey containing all extracted records.
|
||||
|
||||
Raises:
|
||||
ValueError: If required columns are missing.
|
||||
"""
|
||||
df = self._load_csv()
|
||||
df = self._normalize_columns(df)
|
||||
self._validate_columns(df)
|
||||
records = self._convert_to_records(df)
|
||||
survey_year = self._infer_survey_year(df)
|
||||
|
||||
return CMHCAnnualSurvey(survey_year=survey_year, records=records)
|
||||
|
||||
def _load_csv(self) -> pd.DataFrame:
|
||||
"""Load CSV file into DataFrame.
|
||||
|
||||
Returns:
|
||||
Raw DataFrame from CSV.
|
||||
"""
|
||||
return pd.read_csv(self.csv_path)
|
||||
|
||||
def _normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Normalize column names to standard format.
|
||||
|
||||
Args:
|
||||
df: DataFrame with original column names.
|
||||
|
||||
Returns:
|
||||
DataFrame with normalized column names.
|
||||
"""
|
||||
rename_map = {k: v for k, v in self.COLUMN_MAPPINGS.items() if k in df.columns}
|
||||
return df.rename(columns=rename_map)
|
||||
|
||||
def _validate_columns(self, df: pd.DataFrame) -> None:
|
||||
"""Validate that all required columns are present.
|
||||
|
||||
Args:
|
||||
df: DataFrame to validate.
|
||||
|
||||
Raises:
|
||||
ValueError: If required columns are missing.
|
||||
"""
|
||||
missing = self.REQUIRED_COLUMNS - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"Missing required columns: {missing}")
|
||||
|
||||
def _convert_to_records(self, df: pd.DataFrame) -> list[CMHCRentalRecord]:
|
||||
"""Convert DataFrame rows to validated schema records.
|
||||
|
||||
Args:
|
||||
df: Normalized DataFrame.
|
||||
|
||||
Returns:
|
||||
List of validated CMHCRentalRecord objects.
|
||||
"""
|
||||
records = []
|
||||
for _, row in df.iterrows():
|
||||
record_data = row.to_dict()
|
||||
# Handle NaN values
|
||||
record_data = {
|
||||
k: (None if pd.isna(v) else v) for k, v in record_data.items()
|
||||
}
|
||||
records.append(CMHCRentalRecord(**cast(dict[str, Any], record_data)))
|
||||
return records
|
||||
|
||||
def _infer_survey_year(self, df: pd.DataFrame) -> int:
|
||||
"""Infer survey year from data.
|
||||
|
||||
Args:
|
||||
df: DataFrame with survey_year column.
|
||||
|
||||
Returns:
|
||||
Survey year as integer.
|
||||
"""
|
||||
if "survey_year" in df.columns:
|
||||
return int(df["survey_year"].iloc[0])
|
||||
raise ValueError("Cannot infer survey year from data.")
|
||||
82
portfolio_app/toronto/parsers/trreb.py
Normal file
82
portfolio_app/toronto/parsers/trreb.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""TRREB PDF parser for monthly market watch reports.
|
||||
|
||||
This module provides the structure for parsing TRREB (Toronto Regional Real Estate Board)
|
||||
monthly Market Watch PDF reports into structured data.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from portfolio_app.toronto.schemas import TRREBMonthlyRecord, TRREBMonthlyReport
|
||||
|
||||
|
||||
class TRREBParser:
|
||||
"""Parser for TRREB Market Watch PDF reports.
|
||||
|
||||
TRREB publishes monthly Market Watch reports as PDFs containing:
|
||||
- Summary statistics by area (416, 905, Total)
|
||||
- District-level breakdowns
|
||||
- Year-over-year comparisons
|
||||
|
||||
The parser extracts tabular data from these PDFs and validates
|
||||
against the TRREBMonthlyRecord schema.
|
||||
"""
|
||||
|
||||
def __init__(self, pdf_path: Path) -> None:
|
||||
"""Initialize parser with path to PDF file.
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the TRREB Market Watch PDF file.
|
||||
"""
|
||||
self.pdf_path = pdf_path
|
||||
self._validate_path()
|
||||
|
||||
def _validate_path(self) -> None:
|
||||
"""Validate that the PDF path exists and is readable."""
|
||||
if not self.pdf_path.exists():
|
||||
raise FileNotFoundError(f"PDF not found: {self.pdf_path}")
|
||||
if not self.pdf_path.suffix.lower() == ".pdf":
|
||||
raise ValueError(f"Expected PDF file, got: {self.pdf_path.suffix}")
|
||||
|
||||
def parse(self) -> TRREBMonthlyReport:
|
||||
"""Parse the PDF and return structured data.
|
||||
|
||||
Returns:
|
||||
TRREBMonthlyReport containing all extracted records.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: PDF parsing not yet implemented.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"PDF parsing requires pdfplumber/tabula-py. "
|
||||
"Implementation pending Sprint 4 data ingestion."
|
||||
)
|
||||
|
||||
def _extract_tables(self) -> list[dict[str, Any]]:
|
||||
"""Extract raw tables from PDF pages.
|
||||
|
||||
Returns:
|
||||
List of dictionaries representing table data.
|
||||
"""
|
||||
raise NotImplementedError("Table extraction not yet implemented.")
|
||||
|
||||
def _parse_district_table(
|
||||
self, table_data: list[dict[str, Any]]
|
||||
) -> list[TRREBMonthlyRecord]:
|
||||
"""Parse district-level statistics table.
|
||||
|
||||
Args:
|
||||
table_data: Raw table data extracted from PDF.
|
||||
|
||||
Returns:
|
||||
List of validated TRREBMonthlyRecord objects.
|
||||
"""
|
||||
raise NotImplementedError("District table parsing not yet implemented.")
|
||||
|
||||
def _infer_report_date(self) -> tuple[int, int]:
|
||||
"""Infer report year and month from PDF filename or content.
|
||||
|
||||
Returns:
|
||||
Tuple of (year, month).
|
||||
"""
|
||||
raise NotImplementedError("Date inference not yet implemented.")
|
||||
@@ -1 +1,39 @@
|
||||
"""Pydantic schemas for Toronto housing data validation."""
|
||||
|
||||
from .cmhc import BedroomType, CMHCAnnualSurvey, CMHCRentalRecord, ReliabilityCode
|
||||
from .dimensions import (
|
||||
AreaType,
|
||||
CMHCZone,
|
||||
Confidence,
|
||||
ExpectedDirection,
|
||||
Neighbourhood,
|
||||
PolicyCategory,
|
||||
PolicyEvent,
|
||||
PolicyLevel,
|
||||
TimeDimension,
|
||||
TRREBDistrict,
|
||||
)
|
||||
from .trreb import TRREBMonthlyRecord, TRREBMonthlyReport
|
||||
|
||||
__all__ = [
|
||||
# TRREB
|
||||
"TRREBMonthlyRecord",
|
||||
"TRREBMonthlyReport",
|
||||
# CMHC
|
||||
"CMHCRentalRecord",
|
||||
"CMHCAnnualSurvey",
|
||||
"BedroomType",
|
||||
"ReliabilityCode",
|
||||
# Dimensions
|
||||
"TimeDimension",
|
||||
"TRREBDistrict",
|
||||
"CMHCZone",
|
||||
"Neighbourhood",
|
||||
"PolicyEvent",
|
||||
# Enums
|
||||
"AreaType",
|
||||
"PolicyLevel",
|
||||
"PolicyCategory",
|
||||
"ExpectedDirection",
|
||||
"Confidence",
|
||||
]
|
||||
|
||||
81
portfolio_app/toronto/schemas/cmhc.py
Normal file
81
portfolio_app/toronto/schemas/cmhc.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Pydantic schemas for CMHC rental market data."""
|
||||
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class BedroomType(str, Enum):
|
||||
"""CMHC bedroom type categories."""
|
||||
|
||||
BACHELOR = "Bachelor"
|
||||
ONE_BED = "1 Bedroom"
|
||||
TWO_BED = "2 Bedroom"
|
||||
THREE_BED_PLUS = "3 Bedroom+"
|
||||
TOTAL = "Total"
|
||||
|
||||
|
||||
class ReliabilityCode(str, Enum):
|
||||
"""CMHC data reliability codes.
|
||||
|
||||
Based on coefficient of variation (CV).
|
||||
"""
|
||||
|
||||
EXCELLENT = "a" # CV <= 2.5%
|
||||
GOOD = "b" # 2.5% < CV <= 5%
|
||||
FAIR = "c" # 5% < CV <= 10%
|
||||
POOR = "d" # CV > 10%
|
||||
SUPPRESSED = "**" # Sample too small
|
||||
|
||||
|
||||
class CMHCRentalRecord(BaseModel):
|
||||
"""Schema for a single CMHC rental survey record.
|
||||
|
||||
Represents rental data for one zone and bedroom type in one survey year.
|
||||
"""
|
||||
|
||||
survey_year: int = Field(ge=1990, description="Survey year (October snapshot)")
|
||||
zone_code: str = Field(max_length=10, description="CMHC zone identifier")
|
||||
zone_name: str = Field(max_length=100, description="Zone name")
|
||||
bedroom_type: BedroomType = Field(description="Bedroom category")
|
||||
universe: int | None = Field(
|
||||
default=None, ge=0, description="Total rental units in zone"
|
||||
)
|
||||
vacancy_rate: Decimal | None = Field(
|
||||
default=None, ge=0, le=100, description="Vacancy rate (%)"
|
||||
)
|
||||
vacancy_rate_reliability: ReliabilityCode | None = Field(default=None)
|
||||
availability_rate: Decimal | None = Field(
|
||||
default=None, ge=0, le=100, description="Availability rate (%)"
|
||||
)
|
||||
average_rent: Decimal | None = Field(
|
||||
default=None, ge=0, description="Average monthly rent ($)"
|
||||
)
|
||||
average_rent_reliability: ReliabilityCode | None = Field(default=None)
|
||||
median_rent: Decimal | None = Field(
|
||||
default=None, ge=0, description="Median monthly rent ($)"
|
||||
)
|
||||
rent_change_pct: Decimal | None = Field(
|
||||
default=None, description="YoY rent change (%)"
|
||||
)
|
||||
turnover_rate: Decimal | None = Field(
|
||||
default=None, ge=0, le=100, description="Unit turnover rate (%)"
|
||||
)
|
||||
|
||||
model_config = {"str_strip_whitespace": True}
|
||||
|
||||
|
||||
class CMHCAnnualSurvey(BaseModel):
|
||||
"""Schema for a complete CMHC annual survey for Toronto.
|
||||
|
||||
Contains all zone and bedroom type combinations for one survey year.
|
||||
"""
|
||||
|
||||
survey_year: int
|
||||
records: list[CMHCRentalRecord]
|
||||
|
||||
@property
|
||||
def zone_count(self) -> int:
|
||||
"""Number of unique zones in survey."""
|
||||
return len({r.zone_code for r in self.records})
|
||||
121
portfolio_app/toronto/schemas/dimensions.py
Normal file
121
portfolio_app/toronto/schemas/dimensions.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Pydantic schemas for dimension tables."""
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
|
||||
class PolicyLevel(str, Enum):
|
||||
"""Government level for policy events."""
|
||||
|
||||
FEDERAL = "federal"
|
||||
PROVINCIAL = "provincial"
|
||||
MUNICIPAL = "municipal"
|
||||
|
||||
|
||||
class PolicyCategory(str, Enum):
|
||||
"""Policy event category."""
|
||||
|
||||
MONETARY = "monetary"
|
||||
TAX = "tax"
|
||||
REGULATORY = "regulatory"
|
||||
SUPPLY = "supply"
|
||||
ECONOMIC = "economic"
|
||||
|
||||
|
||||
class ExpectedDirection(str, Enum):
|
||||
"""Expected price impact direction."""
|
||||
|
||||
BULLISH = "bullish" # Expected to increase prices
|
||||
BEARISH = "bearish" # Expected to decrease prices
|
||||
NEUTRAL = "neutral" # Uncertain or mixed impact
|
||||
|
||||
|
||||
class Confidence(str, Enum):
|
||||
"""Confidence level in policy event data."""
|
||||
|
||||
HIGH = "high"
|
||||
MEDIUM = "medium"
|
||||
LOW = "low"
|
||||
|
||||
|
||||
class AreaType(str, Enum):
|
||||
"""TRREB area type."""
|
||||
|
||||
WEST = "West"
|
||||
CENTRAL = "Central"
|
||||
EAST = "East"
|
||||
NORTH = "North"
|
||||
|
||||
|
||||
class TimeDimension(BaseModel):
|
||||
"""Schema for time dimension record."""
|
||||
|
||||
date_key: int = Field(description="Date key in YYYYMMDD format")
|
||||
full_date: date
|
||||
year: int = Field(ge=2000, le=2100)
|
||||
month: int = Field(ge=1, le=12)
|
||||
quarter: int = Field(ge=1, le=4)
|
||||
month_name: str = Field(max_length=20)
|
||||
is_month_start: bool = True
|
||||
|
||||
|
||||
class TRREBDistrict(BaseModel):
|
||||
"""Schema for TRREB district dimension."""
|
||||
|
||||
district_code: str = Field(max_length=3, description="W01, C01, E01, etc.")
|
||||
district_name: str = Field(max_length=100)
|
||||
area_type: AreaType
|
||||
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
|
||||
|
||||
|
||||
class CMHCZone(BaseModel):
|
||||
"""Schema for CMHC zone dimension."""
|
||||
|
||||
zone_code: str = Field(max_length=10)
|
||||
zone_name: str = Field(max_length=100)
|
||||
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
|
||||
|
||||
|
||||
class Neighbourhood(BaseModel):
|
||||
"""Schema for City of Toronto neighbourhood dimension.
|
||||
|
||||
Note: No FK to fact tables in V1 - reference overlay only.
|
||||
"""
|
||||
|
||||
neighbourhood_id: int = Field(ge=1, le=200)
|
||||
name: str = Field(max_length=100)
|
||||
geometry_wkt: str | None = Field(default=None)
|
||||
population: int | None = Field(default=None, ge=0)
|
||||
land_area_sqkm: Decimal | None = Field(default=None, ge=0)
|
||||
pop_density_per_sqkm: Decimal | None = Field(default=None, ge=0)
|
||||
pct_bachelors_or_higher: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
median_household_income: Decimal | None = Field(default=None, ge=0)
|
||||
pct_owner_occupied: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
pct_renter_occupied: Decimal | None = Field(default=None, ge=0, le=100)
|
||||
census_year: int = Field(default=2021, description="Census year for SCD tracking")
|
||||
|
||||
|
||||
class PolicyEvent(BaseModel):
|
||||
"""Schema for policy event dimension.
|
||||
|
||||
Used for time-series annotation. No causation claims.
|
||||
"""
|
||||
|
||||
event_date: date = Field(description="Date event was announced/occurred")
|
||||
effective_date: date | None = Field(
|
||||
default=None, description="Date policy took effect"
|
||||
)
|
||||
level: PolicyLevel
|
||||
category: PolicyCategory
|
||||
title: str = Field(max_length=200, description="Short event title for display")
|
||||
description: str | None = Field(
|
||||
default=None, description="Longer description for tooltip"
|
||||
)
|
||||
expected_direction: ExpectedDirection
|
||||
source_url: HttpUrl | None = Field(default=None)
|
||||
confidence: Confidence = Field(default=Confidence.MEDIUM)
|
||||
|
||||
model_config = {"str_strip_whitespace": True}
|
||||
52
portfolio_app/toronto/schemas/trreb.py
Normal file
52
portfolio_app/toronto/schemas/trreb.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Pydantic schemas for TRREB monthly market data."""
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TRREBMonthlyRecord(BaseModel):
|
||||
"""Schema for a single TRREB monthly summary record.
|
||||
|
||||
Represents aggregated sales data for one district in one month.
|
||||
"""
|
||||
|
||||
report_date: date = Field(description="First of month (YYYY-MM-01)")
|
||||
area_code: str = Field(
|
||||
max_length=3, description="District code (W01, C01, E01, etc.)"
|
||||
)
|
||||
area_name: str = Field(max_length=100, description="District name")
|
||||
area_type: str = Field(max_length=10, description="West / Central / East / North")
|
||||
sales: int = Field(ge=0, description="Number of transactions")
|
||||
dollar_volume: Decimal = Field(ge=0, description="Total sales volume ($)")
|
||||
avg_price: Decimal = Field(ge=0, description="Average sale price ($)")
|
||||
median_price: Decimal = Field(ge=0, description="Median sale price ($)")
|
||||
new_listings: int = Field(ge=0, description="New listings count")
|
||||
active_listings: int = Field(ge=0, description="Active listings at month end")
|
||||
avg_sp_lp: Decimal = Field(
|
||||
ge=0, le=200, description="Avg sale price / list price ratio (%)"
|
||||
)
|
||||
avg_dom: int = Field(ge=0, description="Average days on market")
|
||||
|
||||
model_config = {"str_strip_whitespace": True}
|
||||
|
||||
|
||||
class TRREBMonthlyReport(BaseModel):
|
||||
"""Schema for a complete TRREB monthly report.
|
||||
|
||||
Contains all district records for a single month.
|
||||
"""
|
||||
|
||||
report_date: date
|
||||
records: list[TRREBMonthlyRecord]
|
||||
|
||||
@property
|
||||
def total_sales(self) -> int:
|
||||
"""Total sales across all districts."""
|
||||
return sum(r.sales for r in self.records)
|
||||
|
||||
@property
|
||||
def district_count(self) -> int:
|
||||
"""Number of districts in report."""
|
||||
return len(self.records)
|
||||
Reference in New Issue
Block a user