feat: add Pydantic schemas, SQLAlchemy models, and parser structure

Sprint 3 implementation:
- Pydantic schemas for TRREB, CMHC, and dimension data validation
- SQLAlchemy models with PostGIS geometry for fact and dimension tables
- Parser structure (stubs) for TRREB PDF and CMHC CSV processing

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 14:58:31 -05:00
parent 549e1fcbaf
commit ead6d91a28
11 changed files with 760 additions and 1 deletions

View File

@@ -0,0 +1,104 @@
"""SQLAlchemy models for dimension tables."""
from datetime import date
from geoalchemy2 import Geometry
from sqlalchemy import Boolean, Date, Integer, Numeric, String, Text
from sqlalchemy.orm import Mapped, mapped_column
from .base import Base
class DimTime(Base):
"""Time dimension table."""
__tablename__ = "dim_time"
date_key: Mapped[int] = mapped_column(Integer, primary_key=True)
full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
year: Mapped[int] = mapped_column(Integer, nullable=False)
month: Mapped[int] = mapped_column(Integer, nullable=False)
quarter: Mapped[int] = mapped_column(Integer, nullable=False)
month_name: Mapped[str] = mapped_column(String(20), nullable=False)
is_month_start: Mapped[bool] = mapped_column(Boolean, default=True)
class DimTRREBDistrict(Base):
"""TRREB district dimension table with PostGIS geometry."""
__tablename__ = "dim_trreb_district"
district_key: Mapped[int] = mapped_column(
Integer, primary_key=True, autoincrement=True
)
district_code: Mapped[str] = mapped_column(String(3), nullable=False, unique=True)
district_name: Mapped[str] = mapped_column(String(100), nullable=False)
area_type: Mapped[str] = mapped_column(String(10), nullable=False)
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
class DimCMHCZone(Base):
"""CMHC zone dimension table with PostGIS geometry."""
__tablename__ = "dim_cmhc_zone"
zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True)
zone_name: Mapped[str] = mapped_column(String(100), nullable=False)
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
class DimNeighbourhood(Base):
"""City of Toronto neighbourhood dimension.
Note: No FK to fact tables in V1 - reference overlay only.
"""
__tablename__ = "dim_neighbourhood"
neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True)
name: Mapped[str] = mapped_column(String(100), nullable=False)
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
population: Mapped[int | None] = mapped_column(Integer, nullable=True)
land_area_sqkm: Mapped[float | None] = mapped_column(Numeric(10, 4), nullable=True)
pop_density_per_sqkm: Mapped[float | None] = mapped_column(
Numeric(10, 2), nullable=True
)
pct_bachelors_or_higher: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
median_household_income: Mapped[float | None] = mapped_column(
Numeric(12, 2), nullable=True
)
pct_owner_occupied: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
pct_renter_occupied: Mapped[float | None] = mapped_column(
Numeric(5, 2), nullable=True
)
census_year: Mapped[int] = mapped_column(Integer, default=2021)
class DimPolicyEvent(Base):
"""Policy event dimension for time-series annotation."""
__tablename__ = "dim_policy_event"
event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
event_date: Mapped[date] = mapped_column(Date, nullable=False)
effective_date: Mapped[date | None] = mapped_column(Date, nullable=True)
level: Mapped[str] = mapped_column(
String(20), nullable=False
) # federal/provincial/municipal
category: Mapped[str] = mapped_column(
String(20), nullable=False
) # monetary/tax/regulatory/supply/economic
title: Mapped[str] = mapped_column(String(200), nullable=False)
description: Mapped[str | None] = mapped_column(Text, nullable=True)
expected_direction: Mapped[str] = mapped_column(
String(10), nullable=False
) # bearish/bullish/neutral
source_url: Mapped[str | None] = mapped_column(String(500), nullable=True)
confidence: Mapped[str] = mapped_column(
String(10), default="medium"
) # high/medium/low