feat: add Pydantic schemas, SQLAlchemy models, and parser structure
Sprint 3 implementation: - Pydantic schemas for TRREB, CMHC, and dimension data validation - SQLAlchemy models with PostGIS geometry for fact and dimension tables - Parser structure (stubs) for TRREB PDF and CMHC CSV processing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1 +1,28 @@
|
||||
"""SQLAlchemy models for Toronto housing data."""
|
||||
|
||||
from .base import Base, create_tables, get_engine, get_session_factory
|
||||
from .dimensions import (
|
||||
DimCMHCZone,
|
||||
DimNeighbourhood,
|
||||
DimPolicyEvent,
|
||||
DimTime,
|
||||
DimTRREBDistrict,
|
||||
)
|
||||
from .facts import FactPurchases, FactRentals
|
||||
|
||||
__all__ = [
|
||||
# Base
|
||||
"Base",
|
||||
"get_engine",
|
||||
"get_session_factory",
|
||||
"create_tables",
|
||||
# Dimensions
|
||||
"DimTime",
|
||||
"DimTRREBDistrict",
|
||||
"DimCMHCZone",
|
||||
"DimNeighbourhood",
|
||||
"DimPolicyEvent",
|
||||
# Facts
|
||||
"FactPurchases",
|
||||
"FactRentals",
|
||||
]
|
||||
|
||||
30
portfolio_app/toronto/models/base.py
Normal file
30
portfolio_app/toronto/models/base.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""SQLAlchemy base configuration and engine setup."""
|
||||
|
||||
from sqlalchemy import Engine, create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from portfolio_app.config import get_settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase): # type: ignore[misc]
|
||||
"""Base class for all SQLAlchemy models."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def get_engine() -> Engine:
|
||||
"""Create database engine from settings."""
|
||||
settings = get_settings()
|
||||
return create_engine(settings.database_url, echo=False)
|
||||
|
||||
|
||||
def get_session_factory() -> sessionmaker[Session]:
|
||||
"""Create session factory."""
|
||||
engine = get_engine()
|
||||
return sessionmaker(bind=engine)
|
||||
|
||||
|
||||
def create_tables() -> None:
|
||||
"""Create all tables in database."""
|
||||
engine = get_engine()
|
||||
Base.metadata.create_all(engine)
|
||||
104
portfolio_app/toronto/models/dimensions.py
Normal file
104
portfolio_app/toronto/models/dimensions.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""SQLAlchemy models for dimension tables."""
|
||||
|
||||
from datetime import date
|
||||
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import Boolean, Date, Integer, Numeric, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class DimTime(Base):
|
||||
"""Time dimension table."""
|
||||
|
||||
__tablename__ = "dim_time"
|
||||
|
||||
date_key: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
full_date: Mapped[date] = mapped_column(Date, nullable=False, unique=True)
|
||||
year: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
month: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
quarter: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
month_name: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
is_month_start: Mapped[bool] = mapped_column(Boolean, default=True)
|
||||
|
||||
|
||||
class DimTRREBDistrict(Base):
|
||||
"""TRREB district dimension table with PostGIS geometry."""
|
||||
|
||||
__tablename__ = "dim_trreb_district"
|
||||
|
||||
district_key: Mapped[int] = mapped_column(
|
||||
Integer, primary_key=True, autoincrement=True
|
||||
)
|
||||
district_code: Mapped[str] = mapped_column(String(3), nullable=False, unique=True)
|
||||
district_name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
area_type: Mapped[str] = mapped_column(String(10), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
|
||||
|
||||
class DimCMHCZone(Base):
|
||||
"""CMHC zone dimension table with PostGIS geometry."""
|
||||
|
||||
__tablename__ = "dim_cmhc_zone"
|
||||
|
||||
zone_key: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
zone_code: Mapped[str] = mapped_column(String(10), nullable=False, unique=True)
|
||||
zone_name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
|
||||
|
||||
class DimNeighbourhood(Base):
|
||||
"""City of Toronto neighbourhood dimension.
|
||||
|
||||
Note: No FK to fact tables in V1 - reference overlay only.
|
||||
"""
|
||||
|
||||
__tablename__ = "dim_neighbourhood"
|
||||
|
||||
neighbourhood_id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
||||
name: Mapped[str] = mapped_column(String(100), nullable=False)
|
||||
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
|
||||
population: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
land_area_sqkm: Mapped[float | None] = mapped_column(Numeric(10, 4), nullable=True)
|
||||
pop_density_per_sqkm: Mapped[float | None] = mapped_column(
|
||||
Numeric(10, 2), nullable=True
|
||||
)
|
||||
pct_bachelors_or_higher: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
median_household_income: Mapped[float | None] = mapped_column(
|
||||
Numeric(12, 2), nullable=True
|
||||
)
|
||||
pct_owner_occupied: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
pct_renter_occupied: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
census_year: Mapped[int] = mapped_column(Integer, default=2021)
|
||||
|
||||
|
||||
class DimPolicyEvent(Base):
|
||||
"""Policy event dimension for time-series annotation."""
|
||||
|
||||
__tablename__ = "dim_policy_event"
|
||||
|
||||
event_id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
event_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||||
effective_date: Mapped[date | None] = mapped_column(Date, nullable=True)
|
||||
level: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False
|
||||
) # federal/provincial/municipal
|
||||
category: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False
|
||||
) # monetary/tax/regulatory/supply/economic
|
||||
title: Mapped[str] = mapped_column(String(200), nullable=False)
|
||||
description: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
expected_direction: Mapped[str] = mapped_column(
|
||||
String(10), nullable=False
|
||||
) # bearish/bullish/neutral
|
||||
source_url: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||
confidence: Mapped[str] = mapped_column(
|
||||
String(10), default="medium"
|
||||
) # high/medium/low
|
||||
69
portfolio_app/toronto/models/facts.py
Normal file
69
portfolio_app/toronto/models/facts.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""SQLAlchemy models for fact tables."""
|
||||
|
||||
from sqlalchemy import ForeignKey, Integer, Numeric, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class FactPurchases(Base):
|
||||
"""Fact table for TRREB purchase/sales data.
|
||||
|
||||
Grain: One row per district per month.
|
||||
"""
|
||||
|
||||
__tablename__ = "fact_purchases"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
date_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_time.date_key"), nullable=False
|
||||
)
|
||||
district_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_trreb_district.district_key"), nullable=False
|
||||
)
|
||||
sales_count: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
dollar_volume: Mapped[float] = mapped_column(Numeric(15, 2), nullable=False)
|
||||
avg_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
|
||||
median_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
|
||||
new_listings: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
active_listings: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
avg_dom: Mapped[int] = mapped_column(Integer, nullable=False) # Days on market
|
||||
avg_sp_lp: Mapped[float] = mapped_column(
|
||||
Numeric(5, 2), nullable=False
|
||||
) # Sale/List ratio
|
||||
|
||||
# Relationships
|
||||
time = relationship("DimTime", backref="purchases")
|
||||
district = relationship("DimTRREBDistrict", backref="purchases")
|
||||
|
||||
|
||||
class FactRentals(Base):
|
||||
"""Fact table for CMHC rental market data.
|
||||
|
||||
Grain: One row per zone per bedroom type per survey year.
|
||||
"""
|
||||
|
||||
__tablename__ = "fact_rentals"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
date_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_time.date_key"), nullable=False
|
||||
)
|
||||
zone_key: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("dim_cmhc_zone.zone_key"), nullable=False
|
||||
)
|
||||
bedroom_type: Mapped[str] = mapped_column(String(20), nullable=False)
|
||||
universe: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
avg_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||
median_rent: Mapped[float | None] = mapped_column(Numeric(10, 2), nullable=True)
|
||||
vacancy_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
availability_rate: Mapped[float | None] = mapped_column(
|
||||
Numeric(5, 2), nullable=True
|
||||
)
|
||||
turnover_rate: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
rent_change_pct: Mapped[float | None] = mapped_column(Numeric(5, 2), nullable=True)
|
||||
reliability_code: Mapped[str | None] = mapped_column(String(2), nullable=True)
|
||||
|
||||
# Relationships
|
||||
time = relationship("DimTime", backref="rentals")
|
||||
zone = relationship("DimCMHCZone", backref="rentals")
|
||||
Reference in New Issue
Block a user