staging #96

Merged
lmiranda merged 90 commits from staging into main 2026-02-01 21:33:13 +00:00
15 changed files with 16 additions and 464 deletions
Showing only changes of commit fcaefabce8 - Show all commits

View File

@@ -2,7 +2,6 @@
from .choropleth import (
create_choropleth_figure,
create_district_map,
create_zone_map,
)
from .summary_cards import create_metric_card_figure, create_summary_metrics
@@ -17,7 +16,6 @@ from .time_series import (
__all__ = [
# Choropleth
"create_choropleth_figure",
"create_district_map",
"create_zone_map",
# Time series
"create_price_time_series",

View File

@@ -115,34 +115,6 @@ def create_choropleth_figure(
return fig
def create_district_map(
districts_geojson: dict[str, Any] | None,
purchase_data: list[dict[str, Any]],
metric: str = "avg_price",
) -> go.Figure:
"""Create choropleth map for TRREB districts.
Args:
districts_geojson: GeoJSON for TRREB district boundaries.
purchase_data: Purchase statistics by district.
metric: Metric to display (avg_price, sales_count, etc.).
Returns:
Plotly Figure object.
"""
hover_columns = ["district_name", "sales_count", "avg_price", "median_price"]
return create_choropleth_figure(
geojson=districts_geojson,
data=purchase_data,
location_key="district_code",
color_column=metric,
hover_data=[c for c in hover_columns if c != metric],
color_scale="Blues" if "price" in metric else "Greens",
title="Toronto Purchase Market by District",
)
def create_zone_map(
zones_geojson: dict[str, Any] | None,
rental_data: list[dict[str, Any]],

View File

@@ -18,8 +18,7 @@ _CMHC_ZONES_PATH = Path("data/toronto/raw/geo/cmhc_zones.geojson")
_cmhc_parser = CMHCZoneParser(_CMHC_ZONES_PATH) if _CMHC_ZONES_PATH.exists() else None
CMHC_ZONES_GEOJSON = _cmhc_parser.get_geojson_for_choropleth() if _cmhc_parser else None
# Load Toronto neighbourhoods GeoJSON for purchase choropleth maps
# Note: This is a temporary proxy until TRREB district boundaries are digitized
# Load Toronto neighbourhoods GeoJSON for choropleth maps
_NEIGHBOURHOODS_PATH = Path("data/toronto/raw/geo/toronto_neighbourhoods.geojson")
_neighbourhood_parser = (
NeighbourhoodParser(_NEIGHBOURHOODS_PATH) if _NEIGHBOURHOODS_PATH.exists() else None
@@ -30,9 +29,7 @@ NEIGHBOURHOODS_GEOJSON = (
else None
)
# Sample purchase data for all 158 City of Toronto neighbourhoods
# Note: This is SAMPLE DATA until TRREB district boundaries are digitized (Issue #25)
# Once TRREB boundaries are available, this will be replaced with real TRREB data by district
# Sample data for all 158 City of Toronto neighbourhoods
SAMPLE_PURCHASE_DATA = [
{
"neighbourhood_id": 1,
@@ -1486,11 +1483,7 @@ SAMPLE_TIME_SERIES_DATA = [
Input("toronto-year-selector", "value"),
)
def update_purchase_choropleth(metric: str, year: str) -> go.Figure:
"""Update the purchase market choropleth map.
Note: Currently using City of Toronto neighbourhoods as a proxy.
Will switch to TRREB districts when boundaries are digitized.
"""
"""Update the neighbourhood choropleth map."""
return create_choropleth_figure(
geojson=NEIGHBOURHOODS_GEOJSON,
data=SAMPLE_PURCHASE_DATA,

View File

@@ -257,9 +257,8 @@ def create_data_notice() -> dmc.Alert:
return dmc.Alert(
children=[
dmc.Text(
"This dashboard uses TRREB and CMHC data. "
"Geographic boundaries require QGIS digitization to enable choropleth maps. "
"Sample data is shown below.",
"This dashboard displays Toronto neighbourhood and CMHC rental data. "
"Sample data is shown for demonstration purposes.",
size="sm",
),
],

View File

@@ -46,42 +46,8 @@ def layout() -> dmc.Container:
mb="lg",
children=[
dmc.Title("Data Sources", order=2, mb="md"),
# TRREB
dmc.Title("Purchase Data: TRREB", order=3, size="h4", mb="sm"),
dmc.Text(
[
"The Toronto Regional Real Estate Board (TRREB) publishes monthly ",
html.Strong("Market Watch"),
" reports containing aggregate statistics for residential real estate "
"transactions across the Greater Toronto Area.",
],
mb="sm",
),
dmc.List(
[
dmc.ListItem("Source: TRREB Market Watch Reports (PDF)"),
dmc.ListItem("Geographic granularity: ~35 TRREB Districts"),
dmc.ListItem("Temporal granularity: Monthly"),
dmc.ListItem("Coverage: 2021-present"),
dmc.ListItem(
[
"Metrics: Sales count, average/median price, new listings, ",
"active listings, days on market, sale-to-list ratio",
]
),
],
mb="md",
),
dmc.Anchor(
"TRREB Market Watch Archive",
href="https://trreb.ca/market-data/market-watch/market-watch-archive/",
target="_blank",
mb="lg",
),
# CMHC
dmc.Title(
"Rental Data: CMHC", order=3, size="h4", mb="sm", mt="md"
),
dmc.Title("Rental Data: CMHC", order=3, size="h4", mb="sm"),
dmc.Text(
[
"Canada Mortgage and Housing Corporation (CMHC) conducts the annual ",
@@ -124,28 +90,17 @@ def layout() -> dmc.Container:
mb="lg",
children=[
dmc.Title("Geographic Considerations", order=2, mb="md"),
dmc.Alert(
title="Important: Non-Aligned Geographies",
color="yellow",
mb="md",
children=[
"TRREB Districts and CMHC Zones do ",
html.Strong("not"),
" align geographically. They are displayed as separate layers and "
"should not be directly compared at the sub-regional level.",
],
),
dmc.Text(
"The dashboard presents three geographic layers:",
"The dashboard presents two geographic layers:",
mb="sm",
),
dmc.List(
[
dmc.ListItem(
[
html.Strong("TRREB Districts (~35): "),
"Used for purchase/sales data visualization. "
"Districts are defined by TRREB and labeled with codes like W01, C01, E01.",
html.Strong("City Neighbourhoods (158): "),
"Official City of Toronto neighbourhood boundaries, "
"used for neighbourhood-level analysis.",
]
),
dmc.ListItem(
@@ -155,13 +110,6 @@ def layout() -> dmc.Container:
"Zones are aligned with Census Tract boundaries.",
]
),
dmc.ListItem(
[
html.Strong("City Neighbourhoods (158): "),
"Reference overlay only. "
"These are official City of Toronto neighbourhood boundaries.",
]
),
],
),
],
@@ -212,22 +160,15 @@ def layout() -> dmc.Container:
dmc.ListItem(
[
html.Strong("Reporting Lag: "),
"TRREB data reflects closed transactions, which may lag market "
"conditions by 1-3 months. CMHC data is annual.",
]
),
dmc.ListItem(
[
html.Strong("Geographic Boundaries: "),
"TRREB district boundaries were manually digitized from reference maps "
"and may contain minor inaccuracies.",
"CMHC rental data is annual (October survey). "
"Other data sources may have different update frequencies.",
]
),
dmc.ListItem(
[
html.Strong("Data Suppression: "),
"Some cells may be suppressed for confidentiality when transaction "
"counts are below thresholds.",
"Some cells may be suppressed for confidentiality when counts "
"are below thresholds.",
]
),
],

View File

@@ -8,98 +8,6 @@ from datetime import date
from typing import Any
def get_demo_districts() -> list[dict[str, Any]]:
"""Return sample TRREB district data."""
return [
{"district_code": "W01", "district_name": "Long Branch", "area_type": "West"},
{"district_code": "W02", "district_name": "Mimico", "area_type": "West"},
{
"district_code": "W03",
"district_name": "Kingsway South",
"area_type": "West",
},
{"district_code": "W04", "district_name": "Edenbridge", "area_type": "West"},
{"district_code": "W05", "district_name": "Islington", "area_type": "West"},
{"district_code": "W06", "district_name": "Rexdale", "area_type": "West"},
{"district_code": "W07", "district_name": "Willowdale", "area_type": "West"},
{"district_code": "W08", "district_name": "York", "area_type": "West"},
{
"district_code": "C01",
"district_name": "Downtown Core",
"area_type": "Central",
},
{"district_code": "C02", "district_name": "Annex", "area_type": "Central"},
{
"district_code": "C03",
"district_name": "Forest Hill",
"area_type": "Central",
},
{
"district_code": "C04",
"district_name": "Lawrence Park",
"area_type": "Central",
},
{
"district_code": "C06",
"district_name": "Willowdale East",
"area_type": "Central",
},
{"district_code": "C07", "district_name": "Thornhill", "area_type": "Central"},
{"district_code": "C08", "district_name": "Waterfront", "area_type": "Central"},
{"district_code": "E01", "district_name": "Leslieville", "area_type": "East"},
{"district_code": "E02", "district_name": "The Beaches", "area_type": "East"},
{"district_code": "E03", "district_name": "Danforth", "area_type": "East"},
{"district_code": "E04", "district_name": "Birch Cliff", "area_type": "East"},
{"district_code": "E05", "district_name": "Scarborough", "area_type": "East"},
]
def get_demo_purchase_data() -> list[dict[str, Any]]:
"""Return sample purchase data for time series visualization."""
import random
random.seed(42)
data = []
base_prices = {
"W01": 850000,
"C01": 1200000,
"E01": 950000,
}
for year in [2024, 2025]:
for month in range(1, 13):
if year == 2025 and month > 12:
break
for district, base_price in base_prices.items():
# Add some randomness and trend
trend = (year - 2024) * 12 + month
price_variation = random.uniform(-0.05, 0.05)
trend_factor = 1 + (trend * 0.002) # Slight upward trend
avg_price = int(base_price * trend_factor * (1 + price_variation))
sales = random.randint(50, 200)
data.append(
{
"district_code": district,
"full_date": date(year, month, 1),
"year": year,
"month": month,
"avg_price": avg_price,
"median_price": int(avg_price * 0.95),
"sales_count": sales,
"new_listings": int(sales * random.uniform(1.2, 1.8)),
"active_listings": int(sales * random.uniform(2.0, 3.5)),
"days_on_market": random.randint(15, 45),
"sale_to_list_ratio": round(random.uniform(0.95, 1.05), 2),
}
)
return data
def get_demo_rental_data() -> list[dict[str, Any]]:
"""Return sample rental data for visualization."""
data = []
@@ -219,23 +127,6 @@ def get_demo_policy_events() -> list[dict[str, Any]]:
def get_demo_summary_metrics() -> dict[str, dict[str, Any]]:
"""Return summary metrics for KPI cards."""
return {
"avg_price": {
"value": 1067968,
"title": "Avg. Price (2025)",
"delta": -4.7,
"delta_suffix": "%",
"prefix": "$",
"format_spec": ",.0f",
"positive_is_good": True,
},
"total_sales": {
"value": 67610,
"title": "Total Sales (2024)",
"delta": 2.6,
"delta_suffix": "%",
"format_spec": ",.0f",
"positive_is_good": True,
},
"avg_rent": {
"value": 2450,
"title": "Avg. Rent (2025)",

View File

@@ -8,7 +8,6 @@ from .dimensions import (
load_neighbourhoods,
load_policy_events,
load_time_dimension,
load_trreb_districts,
)
__all__ = [
@@ -19,7 +18,6 @@ __all__ = [
# Dimension loaders
"generate_date_key",
"load_time_dimension",
"load_trreb_districts",
"load_cmhc_zones",
"load_neighbourhoods",
"load_policy_events",

View File

@@ -9,13 +9,11 @@ from portfolio_app.toronto.models import (
DimNeighbourhood,
DimPolicyEvent,
DimTime,
DimTRREBDistrict,
)
from portfolio_app.toronto.schemas import (
CMHCZone,
Neighbourhood,
PolicyEvent,
TRREBDistrict,
)
from .base import get_session, upsert_by_key
@@ -97,42 +95,6 @@ def load_time_dimension(
return _load(sess)
def load_trreb_districts(
districts: list[TRREBDistrict],
session: Session | None = None,
) -> int:
"""Load TRREB district dimension.
Args:
districts: List of validated district schemas.
session: Optional existing session.
Returns:
Number of records loaded.
"""
def _load(sess: Session) -> int:
records = []
for d in districts:
dim = DimTRREBDistrict(
district_code=d.district_code,
district_name=d.district_name,
area_type=d.area_type.value,
geometry=d.geometry_wkt,
)
records.append(dim)
inserted, updated = upsert_by_key(
sess, DimTRREBDistrict, records, ["district_code"]
)
return inserted + updated
if session:
return _load(session)
with get_session() as sess:
return _load(sess)
def load_cmhc_zones(
zones: list[CMHCZone],
session: Session | None = None,

View File

@@ -6,9 +6,8 @@ from .dimensions import (
DimNeighbourhood,
DimPolicyEvent,
DimTime,
DimTRREBDistrict,
)
from .facts import FactPurchases, FactRentals
from .facts import FactRentals
__all__ = [
# Base
@@ -18,11 +17,9 @@ __all__ = [
"create_tables",
# Dimensions
"DimTime",
"DimTRREBDistrict",
"DimCMHCZone",
"DimNeighbourhood",
"DimPolicyEvent",
# Facts
"FactPurchases",
"FactRentals",
]

View File

@@ -23,20 +23,6 @@ class DimTime(Base):
is_month_start: Mapped[bool] = mapped_column(Boolean, default=True)
class DimTRREBDistrict(Base):
"""TRREB district dimension table with PostGIS geometry."""
__tablename__ = "dim_trreb_district"
district_key: Mapped[int] = mapped_column(
Integer, primary_key=True, autoincrement=True
)
district_code: Mapped[str] = mapped_column(String(3), nullable=False, unique=True)
district_name: Mapped[str] = mapped_column(String(100), nullable=False)
area_type: Mapped[str] = mapped_column(String(10), nullable=False)
geometry = mapped_column(Geometry("POLYGON", srid=4326), nullable=True)
class DimCMHCZone(Base):
"""CMHC zone dimension table with PostGIS geometry."""

View File

@@ -6,37 +6,6 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
from .base import Base
class FactPurchases(Base):
"""Fact table for TRREB purchase/sales data.
Grain: One row per district per month.
"""
__tablename__ = "fact_purchases"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
date_key: Mapped[int] = mapped_column(
Integer, ForeignKey("dim_time.date_key"), nullable=False
)
district_key: Mapped[int] = mapped_column(
Integer, ForeignKey("dim_trreb_district.district_key"), nullable=False
)
sales_count: Mapped[int] = mapped_column(Integer, nullable=False)
dollar_volume: Mapped[float] = mapped_column(Numeric(15, 2), nullable=False)
avg_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
median_price: Mapped[float] = mapped_column(Numeric(12, 2), nullable=False)
new_listings: Mapped[int] = mapped_column(Integer, nullable=False)
active_listings: Mapped[int] = mapped_column(Integer, nullable=False)
avg_dom: Mapped[int] = mapped_column(Integer, nullable=False) # Days on market
avg_sp_lp: Mapped[float] = mapped_column(
Numeric(5, 2), nullable=False
) # Sale/List ratio
# Relationships
time = relationship("DimTime", backref="purchases")
district = relationship("DimTRREBDistrict", backref="purchases")
class FactRentals(Base):
"""Fact table for CMHC rental market data.

View File

@@ -4,7 +4,6 @@ from .cmhc import CMHCParser
from .geo import (
CMHCZoneParser,
NeighbourhoodParser,
TRREBDistrictParser,
load_geojson,
)
@@ -12,7 +11,6 @@ __all__ = [
"CMHCParser",
# GeoJSON parsers
"CMHCZoneParser",
"TRREBDistrictParser",
"NeighbourhoodParser",
"load_geojson",
]

View File

@@ -13,8 +13,7 @@ from pyproj import Transformer
from shapely.geometry import mapping, shape
from shapely.ops import transform
from portfolio_app.toronto.schemas import CMHCZone, Neighbourhood, TRREBDistrict
from portfolio_app.toronto.schemas.dimensions import AreaType
from portfolio_app.toronto.schemas import CMHCZone, Neighbourhood
# Transformer for reprojecting from Web Mercator to WGS84
_TRANSFORMER_3857_TO_4326 = Transformer.from_crs(
@@ -221,135 +220,6 @@ class CMHCZoneParser:
return {"type": "FeatureCollection", "features": features}
class TRREBDistrictParser:
"""Parser for TRREB district boundary GeoJSON files.
TRREB district boundaries are manually digitized from the TRREB PDF map
using QGIS.
Expected GeoJSON properties:
- district_code: District code (W01, C01, E01, etc.)
- district_name: District name
- area_type: West, Central, East, or North
"""
CODE_PROPERTIES = [
"district_code",
"District_Code",
"DISTRICT_CODE",
"districtcode",
"code",
]
NAME_PROPERTIES = [
"district_name",
"District_Name",
"DISTRICT_NAME",
"districtname",
"name",
"NAME",
]
AREA_PROPERTIES = [
"area_type",
"Area_Type",
"AREA_TYPE",
"areatype",
"area",
"type",
]
def __init__(self, geojson_path: Path) -> None:
"""Initialize parser with path to GeoJSON file."""
self.geojson_path = geojson_path
self._geojson: dict[str, Any] | None = None
@property
def geojson(self) -> dict[str, Any]:
"""Lazy-load and return raw GeoJSON data."""
if self._geojson is None:
self._geojson = load_geojson(self.geojson_path)
return self._geojson
def _find_property(
self, properties: dict[str, Any], candidates: list[str]
) -> str | None:
"""Find a property value by checking multiple candidate names."""
for name in candidates:
if name in properties and properties[name] is not None:
return str(properties[name])
return None
def _infer_area_type(self, district_code: str) -> AreaType:
"""Infer area type from district code prefix."""
prefix = district_code[0].upper()
mapping = {"W": AreaType.WEST, "C": AreaType.CENTRAL, "E": AreaType.EAST}
return mapping.get(prefix, AreaType.NORTH)
def parse(self) -> list[TRREBDistrict]:
"""Parse GeoJSON and return list of TRREBDistrict schemas."""
districts = []
for feature in self.geojson.get("features", []):
props = feature.get("properties", {})
geom = feature.get("geometry")
district_code = self._find_property(props, self.CODE_PROPERTIES)
district_name = self._find_property(props, self.NAME_PROPERTIES)
area_type_str = self._find_property(props, self.AREA_PROPERTIES)
if not district_code:
raise ValueError(
f"District code not found in properties: {list(props.keys())}"
)
if not district_name:
district_name = district_code
# Infer or parse area type
if area_type_str:
try:
area_type = AreaType(area_type_str)
except ValueError:
area_type = self._infer_area_type(district_code)
else:
area_type = self._infer_area_type(district_code)
geometry_wkt = geometry_to_wkt(geom) if geom else None
districts.append(
TRREBDistrict(
district_code=district_code,
district_name=district_name,
area_type=area_type,
geometry_wkt=geometry_wkt,
)
)
return districts
def get_geojson_for_choropleth(
self, key_property: str = "district_code"
) -> dict[str, Any]:
"""Get GeoJSON formatted for Plotly choropleth maps."""
features = []
for feature in self.geojson.get("features", []):
props = feature.get("properties", {})
new_props = dict(props)
district_code = self._find_property(props, self.CODE_PROPERTIES)
district_name = self._find_property(props, self.NAME_PROPERTIES)
new_props["district_code"] = district_code
new_props["district_name"] = district_name or district_code
features.append(
{
"type": "Feature",
"properties": new_props,
"geometry": feature.get("geometry"),
}
)
return {"type": "FeatureCollection", "features": features}
class NeighbourhoodParser:
"""Parser for City of Toronto neighbourhood boundary GeoJSON files.

View File

@@ -2,7 +2,6 @@
from .cmhc import BedroomType, CMHCAnnualSurvey, CMHCRentalRecord, ReliabilityCode
from .dimensions import (
AreaType,
CMHCZone,
Confidence,
ExpectedDirection,
@@ -11,7 +10,6 @@ from .dimensions import (
PolicyEvent,
PolicyLevel,
TimeDimension,
TRREBDistrict,
)
__all__ = [
@@ -22,12 +20,10 @@ __all__ = [
"ReliabilityCode",
# Dimensions
"TimeDimension",
"TRREBDistrict",
"CMHCZone",
"Neighbourhood",
"PolicyEvent",
# Enums
"AreaType",
"PolicyLevel",
"PolicyCategory",
"ExpectedDirection",

View File

@@ -41,15 +41,6 @@ class Confidence(str, Enum):
LOW = "low"
class AreaType(str, Enum):
"""TRREB area type."""
WEST = "West"
CENTRAL = "Central"
EAST = "East"
NORTH = "North"
class TimeDimension(BaseModel):
"""Schema for time dimension record."""
@@ -62,15 +53,6 @@ class TimeDimension(BaseModel):
is_month_start: bool = True
class TRREBDistrict(BaseModel):
"""Schema for TRREB district dimension."""
district_code: str = Field(max_length=3, description="W01, C01, E01, etc.")
district_name: str = Field(max_length=100)
area_type: AreaType
geometry_wkt: str | None = Field(default=None, description="WKT geometry string")
class CMHCZone(BaseModel):
"""Schema for CMHC zone dimension."""