feat: Sprint 6 polish - methodology, demo data, deployment prep

- Add policy event markers to time series charts
- Create methodology page (/toronto/methodology) with data sources
- Add demo data module for testing without full pipeline
- Update README with project documentation
- Add health check endpoint (/health)
- Add database initialization script
- Export new figure factory functions

Closes #21

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 21:50:45 -05:00
parent ad6ee3d37f
commit 140d3085bf
7 changed files with 849 additions and 4 deletions

View File

@@ -1,12 +1,31 @@
"""Plotly figure factories for data visualization."""
from .choropleth import create_choropleth_figure
from .summary_cards import create_metric_card_figure
from .time_series import create_price_time_series, create_volume_time_series
from .choropleth import (
create_choropleth_figure,
create_district_map,
create_zone_map,
)
from .summary_cards import create_metric_card_figure, create_summary_metrics
from .time_series import (
add_policy_markers,
create_market_comparison_chart,
create_price_time_series,
create_time_series_with_events,
create_volume_time_series,
)
__all__ = [
# Choropleth
"create_choropleth_figure",
"create_district_map",
"create_zone_map",
# Time series
"create_price_time_series",
"create_volume_time_series",
"create_market_comparison_chart",
"create_time_series_with_events",
"add_policy_markers",
# Summary
"create_metric_card_figure",
"create_summary_metrics",
]

View File

@@ -231,3 +231,119 @@ def create_market_comparison_chart(
)
return fig
def add_policy_markers(
fig: go.Figure,
policy_events: list[dict[str, Any]],
date_column: str = "event_date",
y_position: float | None = None,
) -> go.Figure:
"""Add policy event markers to an existing time series figure.
Args:
fig: Existing Plotly figure to add markers to.
policy_events: List of policy event dicts with date and metadata.
date_column: Column name for event dates.
y_position: Y position for markers. If None, uses top of chart.
Returns:
Updated Plotly Figure object with policy markers.
"""
if not policy_events:
return fig
# Color mapping for policy categories
category_colors = {
"monetary": "#1f77b4", # Blue
"tax": "#2ca02c", # Green
"regulatory": "#ff7f0e", # Orange
"supply": "#9467bd", # Purple
"economic": "#d62728", # Red
}
# Symbol mapping for expected direction
direction_symbols = {
"bullish": "triangle-up",
"bearish": "triangle-down",
"neutral": "circle",
}
for event in policy_events:
event_date = event.get(date_column)
category = event.get("category", "economic")
direction = event.get("expected_direction", "neutral")
title = event.get("title", "Policy Event")
level = event.get("level", "federal")
color = category_colors.get(category, "#666666")
symbol = direction_symbols.get(direction, "circle")
# Add vertical line for the event
fig.add_vline(
x=event_date,
line_dash="dot",
line_color=color,
opacity=0.5,
annotation_text="",
)
# Add marker with hover info
fig.add_trace(
go.Scatter(
x=[event_date],
y=[y_position] if y_position else [None], # type: ignore[list-item]
mode="markers",
marker={
"symbol": symbol,
"size": 12,
"color": color,
"line": {"width": 1, "color": "white"},
},
name=title,
hovertemplate=(
f"<b>{title}</b><br>"
f"Date: %{{x}}<br>"
f"Level: {level.title()}<br>"
f"Category: {category.title()}<br>"
f"<extra></extra>"
),
showlegend=False,
)
)
return fig
def create_time_series_with_events(
data: list[dict[str, Any]],
policy_events: list[dict[str, Any]],
date_column: str = "full_date",
value_column: str = "avg_price",
title: str = "Price Trend with Policy Events",
) -> go.Figure:
"""Create a time series chart with policy event markers.
Args:
data: Time series data.
policy_events: Policy events to overlay.
date_column: Column name for dates.
value_column: Column name for values.
title: Chart title.
Returns:
Plotly Figure with time series and policy markers.
"""
# Create base time series
fig = create_price_time_series(
data=data,
date_column=date_column,
price_column=value_column,
title=title,
)
# Add policy markers at the top of the chart
if policy_events:
fig = add_policy_markers(fig, policy_events)
return fig

View File

@@ -0,0 +1,20 @@
"""Health check endpoint for deployment monitoring."""
import dash
from dash import html
dash.register_page(
__name__,
path="/health",
title="Health Check",
)
def layout() -> html.Div:
"""Return simple health check response."""
return html.Div(
[
html.Pre("status: ok"),
],
id="health-check",
)

View File

@@ -0,0 +1,263 @@
"""Methodology page for Toronto Housing Dashboard."""
import dash
import dash_mantine_components as dmc
from dash import html
dash.register_page(
__name__,
path="/toronto/methodology",
title="Methodology | Toronto Housing Dashboard",
description="Data sources, methodology, and limitations for the Toronto Housing Dashboard",
)
def layout() -> dmc.Container:
"""Render the methodology page layout."""
return dmc.Container(
size="md",
py="xl",
children=[
# Header
dmc.Title("Methodology", order=1, mb="lg"),
dmc.Text(
"This page documents the data sources, processing methodology, "
"and known limitations of the Toronto Housing Dashboard.",
size="lg",
c="dimmed",
mb="xl",
),
# Data Sources Section
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Data Sources", order=2, mb="md"),
# TRREB
dmc.Title("Purchase Data: TRREB", order=3, size="h4", mb="sm"),
dmc.Text(
[
"The Toronto Regional Real Estate Board (TRREB) publishes monthly ",
html.Strong("Market Watch"),
" reports containing aggregate statistics for residential real estate "
"transactions across the Greater Toronto Area.",
],
mb="sm",
),
dmc.List(
[
dmc.ListItem("Source: TRREB Market Watch Reports (PDF)"),
dmc.ListItem("Geographic granularity: ~35 TRREB Districts"),
dmc.ListItem("Temporal granularity: Monthly"),
dmc.ListItem("Coverage: 2021-present"),
dmc.ListItem(
[
"Metrics: Sales count, average/median price, new listings, ",
"active listings, days on market, sale-to-list ratio",
]
),
],
mb="md",
),
dmc.Anchor(
"TRREB Market Watch Archive",
href="https://trreb.ca/market-data/market-watch/market-watch-archive/",
target="_blank",
mb="lg",
),
# CMHC
dmc.Title(
"Rental Data: CMHC", order=3, size="h4", mb="sm", mt="md"
),
dmc.Text(
[
"Canada Mortgage and Housing Corporation (CMHC) conducts the annual ",
html.Strong("Rental Market Survey"),
" providing rental market statistics for major urban centres.",
],
mb="sm",
),
dmc.List(
[
dmc.ListItem("Source: CMHC Rental Market Survey (Excel)"),
dmc.ListItem(
"Geographic granularity: ~20 CMHC Zones (Census Tract aligned)"
),
dmc.ListItem(
"Temporal granularity: Annual (October survey)"
),
dmc.ListItem("Coverage: 2021-present"),
dmc.ListItem(
[
"Metrics: Average/median rent, vacancy rate, universe count, ",
"turnover rate, year-over-year rent change",
]
),
],
mb="md",
),
dmc.Anchor(
"CMHC Housing Market Information Portal",
href="https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market",
target="_blank",
),
],
),
# Geographic Considerations
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Geographic Considerations", order=2, mb="md"),
dmc.Alert(
title="Important: Non-Aligned Geographies",
color="yellow",
mb="md",
children=[
"TRREB Districts and CMHC Zones do ",
html.Strong("not"),
" align geographically. They are displayed as separate layers and "
"should not be directly compared at the sub-regional level.",
],
),
dmc.Text(
"The dashboard presents three geographic layers:",
mb="sm",
),
dmc.List(
[
dmc.ListItem(
[
html.Strong("TRREB Districts (~35): "),
"Used for purchase/sales data visualization. "
"Districts are defined by TRREB and labeled with codes like W01, C01, E01.",
]
),
dmc.ListItem(
[
html.Strong("CMHC Zones (~20): "),
"Used for rental data visualization. "
"Zones are aligned with Census Tract boundaries.",
]
),
dmc.ListItem(
[
html.Strong("City Neighbourhoods (158): "),
"Reference overlay only. "
"These are official City of Toronto neighbourhood boundaries.",
]
),
],
),
],
),
# Policy Events
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Policy Event Annotations", order=2, mb="md"),
dmc.Text(
"The time series charts include markers for significant policy events "
"that may have influenced housing market conditions. These annotations are "
"for contextual reference only.",
mb="md",
),
dmc.Alert(
title="No Causation Claims",
color="blue",
children=[
"The presence of a policy marker near a market trend change does ",
html.Strong("not"),
" imply causation. Housing markets are influenced by numerous factors "
"beyond policy interventions.",
],
),
],
),
# Limitations
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Limitations", order=2, mb="md"),
dmc.List(
[
dmc.ListItem(
[
html.Strong("Aggregate Data: "),
"All statistics are aggregates. Individual property characteristics, "
"condition, and micro-location are not reflected.",
]
),
dmc.ListItem(
[
html.Strong("Reporting Lag: "),
"TRREB data reflects closed transactions, which may lag market "
"conditions by 1-3 months. CMHC data is annual.",
]
),
dmc.ListItem(
[
html.Strong("Geographic Boundaries: "),
"TRREB district boundaries were manually digitized from reference maps "
"and may contain minor inaccuracies.",
]
),
dmc.ListItem(
[
html.Strong("Data Suppression: "),
"Some cells may be suppressed for confidentiality when transaction "
"counts are below thresholds.",
]
),
],
),
],
),
# Technical Implementation
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
children=[
dmc.Title("Technical Implementation", order=2, mb="md"),
dmc.Text("This dashboard is built with:", mb="sm"),
dmc.List(
[
dmc.ListItem("Python 3.11+ with Dash and Plotly"),
dmc.ListItem("PostgreSQL with PostGIS for geospatial data"),
dmc.ListItem("dbt for data transformation"),
dmc.ListItem("Pydantic for data validation"),
dmc.ListItem("SQLAlchemy 2.0 for database operations"),
],
mb="md",
),
dmc.Anchor(
"View source code on GitHub",
href="https://github.com/lmiranda/personal-portfolio",
target="_blank",
),
],
),
# Back link
dmc.Group(
mt="xl",
children=[
dmc.Anchor(
"← Back to Dashboard",
href="/toronto",
size="lg",
),
],
),
],
)

View File

@@ -0,0 +1,257 @@
"""Demo/sample data for testing the Toronto Housing Dashboard without full pipeline.
This module provides synthetic data for development and demonstration purposes.
Replace with real data from the database in production.
"""
from datetime import date
from typing import Any
def get_demo_districts() -> list[dict[str, Any]]:
"""Return sample TRREB district data."""
return [
{"district_code": "W01", "district_name": "Long Branch", "area_type": "West"},
{"district_code": "W02", "district_name": "Mimico", "area_type": "West"},
{
"district_code": "W03",
"district_name": "Kingsway South",
"area_type": "West",
},
{"district_code": "W04", "district_name": "Edenbridge", "area_type": "West"},
{"district_code": "W05", "district_name": "Islington", "area_type": "West"},
{"district_code": "W06", "district_name": "Rexdale", "area_type": "West"},
{"district_code": "W07", "district_name": "Willowdale", "area_type": "West"},
{"district_code": "W08", "district_name": "York", "area_type": "West"},
{
"district_code": "C01",
"district_name": "Downtown Core",
"area_type": "Central",
},
{"district_code": "C02", "district_name": "Annex", "area_type": "Central"},
{
"district_code": "C03",
"district_name": "Forest Hill",
"area_type": "Central",
},
{
"district_code": "C04",
"district_name": "Lawrence Park",
"area_type": "Central",
},
{
"district_code": "C06",
"district_name": "Willowdale East",
"area_type": "Central",
},
{"district_code": "C07", "district_name": "Thornhill", "area_type": "Central"},
{"district_code": "C08", "district_name": "Waterfront", "area_type": "Central"},
{"district_code": "E01", "district_name": "Leslieville", "area_type": "East"},
{"district_code": "E02", "district_name": "The Beaches", "area_type": "East"},
{"district_code": "E03", "district_name": "Danforth", "area_type": "East"},
{"district_code": "E04", "district_name": "Birch Cliff", "area_type": "East"},
{"district_code": "E05", "district_name": "Scarborough", "area_type": "East"},
]
def get_demo_purchase_data() -> list[dict[str, Any]]:
"""Return sample purchase data for time series visualization."""
import random
random.seed(42)
data = []
base_prices = {
"W01": 850000,
"C01": 1200000,
"E01": 950000,
}
for year in [2024, 2025]:
for month in range(1, 13):
if year == 2025 and month > 12:
break
for district, base_price in base_prices.items():
# Add some randomness and trend
trend = (year - 2024) * 12 + month
price_variation = random.uniform(-0.05, 0.05)
trend_factor = 1 + (trend * 0.002) # Slight upward trend
avg_price = int(base_price * trend_factor * (1 + price_variation))
sales = random.randint(50, 200)
data.append(
{
"district_code": district,
"full_date": date(year, month, 1),
"year": year,
"month": month,
"avg_price": avg_price,
"median_price": int(avg_price * 0.95),
"sales_count": sales,
"new_listings": int(sales * random.uniform(1.2, 1.8)),
"active_listings": int(sales * random.uniform(2.0, 3.5)),
"days_on_market": random.randint(15, 45),
"sale_to_list_ratio": round(random.uniform(0.95, 1.05), 2),
}
)
return data
def get_demo_rental_data() -> list[dict[str, Any]]:
"""Return sample rental data for visualization."""
data = []
zones = [
("Zone01", "Downtown"),
("Zone02", "Midtown"),
("Zone03", "North York"),
("Zone04", "Scarborough"),
("Zone05", "Etobicoke"),
]
bedroom_types = ["bachelor", "1_bedroom", "2_bedroom", "3_bedroom"]
base_rents = {
"bachelor": 1800,
"1_bedroom": 2200,
"2_bedroom": 2800,
"3_bedroom": 3400,
}
for year in [2021, 2022, 2023, 2024, 2025]:
for zone_code, zone_name in zones:
for bedroom in bedroom_types:
# Rental trend: ~5% increase per year
year_factor = 1 + ((year - 2021) * 0.05)
base_rent = base_rents[bedroom]
data.append(
{
"zone_code": zone_code,
"zone_name": zone_name,
"survey_year": year,
"full_date": date(year, 10, 1),
"bedroom_type": bedroom,
"average_rent": int(base_rent * year_factor),
"median_rent": int(base_rent * year_factor * 0.98),
"vacancy_rate": round(
2.5 - (year - 2021) * 0.3, 1
), # Decreasing vacancy
"universe": 5000 + (year - 2021) * 200,
}
)
return data
def get_demo_policy_events() -> list[dict[str, Any]]:
"""Return sample policy events for annotation."""
return [
{
"event_date": date(2024, 6, 5),
"effective_date": date(2024, 6, 5),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.75%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 7, 24),
"effective_date": date(2024, 7, 24),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.50%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 4),
"effective_date": date(2024, 9, 4),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.25%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 10, 23),
"effective_date": date(2024, 10, 23),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (50bp)",
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.75%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 12, 11),
"effective_date": date(2024, 12, 11),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (50bp)",
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.25%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 16),
"effective_date": date(2024, 12, 15),
"level": "federal",
"category": "regulatory",
"title": "CMHC 30-Year Amortization",
"description": "30-year amortization extended to all first-time buyers and new builds",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 16),
"effective_date": date(2024, 12, 15),
"level": "federal",
"category": "regulatory",
"title": "Insured Mortgage Cap $1.5M",
"description": "Insured mortgage cap raised from $1M to $1.5M",
"expected_direction": "bullish",
},
]
def get_demo_summary_metrics() -> dict[str, dict[str, Any]]:
"""Return summary metrics for KPI cards."""
return {
"avg_price": {
"value": 1067968,
"title": "Avg. Price (2025)",
"delta": -4.7,
"delta_suffix": "%",
"prefix": "$",
"format_spec": ",.0f",
"positive_is_good": True,
},
"total_sales": {
"value": 67610,
"title": "Total Sales (2024)",
"delta": 2.6,
"delta_suffix": "%",
"format_spec": ",.0f",
"positive_is_good": True,
},
"avg_rent": {
"value": 2450,
"title": "Avg. Rent (2025)",
"delta": 3.2,
"delta_suffix": "%",
"prefix": "$",
"format_spec": ",.0f",
"positive_is_good": False,
},
"vacancy_rate": {
"value": 1.8,
"title": "Vacancy Rate",
"delta": -0.4,
"delta_suffix": "pp",
"suffix": "%",
"format_spec": ".1f",
"positive_is_good": False,
},
}