staging #96

Merged
lmiranda merged 90 commits from staging into main 2026-02-01 21:33:13 +00:00
7 changed files with 849 additions and 4 deletions
Showing only changes of commit 140d3085bf - Show all commits

120
README.md
View File

@@ -1,2 +1,120 @@
# personal-portfolio
# Analytics Portfolio
A data analytics portfolio showcasing end-to-end data engineering, visualization, and analysis capabilities.
## Projects
### Toronto Housing Dashboard
An interactive choropleth dashboard analyzing Toronto's housing market using multi-source data integration.
**Features:**
- Purchase market analysis from TRREB monthly reports
- Rental market analysis from CMHC annual surveys
- Interactive choropleth maps by district/zone
- Time series visualization with policy event annotations
- Purchase/Rental mode toggle
**Data Sources:**
- [TRREB Market Watch](https://trreb.ca/market-data/market-watch/) - Monthly purchase statistics
- [CMHC Rental Market Survey](https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market) - Annual rental data
**Tech Stack:**
- Python 3.11+ / Dash / Plotly
- PostgreSQL + PostGIS
- dbt for data transformation
- Pydantic for validation
- SQLAlchemy 2.0
## Quick Start
```bash
# Clone and setup
git clone https://github.com/lmiranda/personal-portfolio.git
cd personal-portfolio
# Install dependencies and configure environment
make setup
# Start database
make docker-up
# Initialize database schema
make db-init
# Run development server
make run
```
Visit `http://localhost:8050` to view the portfolio.
## Project Structure
```
portfolio_app/
├── app.py # Dash app factory
├── config.py # Pydantic settings
├── pages/
│ ├── home.py # Bio landing page (/)
│ └── toronto/ # Toronto dashboard (/toronto)
├── components/ # Shared UI components
├── figures/ # Plotly figure factories
└── toronto/ # Toronto data logic
├── parsers/ # PDF/CSV extraction
├── loaders/ # Database operations
├── schemas/ # Pydantic models
└── models/ # SQLAlchemy ORM
dbt/
├── models/
│ ├── staging/ # 1:1 source tables
│ ├── intermediate/ # Business logic
│ └── marts/ # Analytical tables
```
## Development
```bash
make test # Run tests
make lint # Run linter
make format # Format code
make ci # Run all checks
```
## Data Pipeline
```
Raw Files (PDF/Excel)
Parsers (pdfplumber, pandas)
Pydantic Validation
SQLAlchemy Loaders
PostgreSQL + PostGIS
dbt Transformations
Dash Visualization
```
## Environment Variables
Copy `.env.example` to `.env` and configure:
```bash
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
POSTGRES_USER=portfolio
POSTGRES_PASSWORD=<secure>
POSTGRES_DB=portfolio
DASH_DEBUG=true
```
## License
MIT
## Author
Leo Miranda - [GitHub](https://github.com/lmiranda) | [LinkedIn](https://linkedin.com/in/yourprofile)

View File

@@ -1,12 +1,31 @@
"""Plotly figure factories for data visualization."""
from .choropleth import create_choropleth_figure
from .summary_cards import create_metric_card_figure
from .time_series import create_price_time_series, create_volume_time_series
from .choropleth import (
create_choropleth_figure,
create_district_map,
create_zone_map,
)
from .summary_cards import create_metric_card_figure, create_summary_metrics
from .time_series import (
add_policy_markers,
create_market_comparison_chart,
create_price_time_series,
create_time_series_with_events,
create_volume_time_series,
)
__all__ = [
# Choropleth
"create_choropleth_figure",
"create_district_map",
"create_zone_map",
# Time series
"create_price_time_series",
"create_volume_time_series",
"create_market_comparison_chart",
"create_time_series_with_events",
"add_policy_markers",
# Summary
"create_metric_card_figure",
"create_summary_metrics",
]

View File

@@ -231,3 +231,119 @@ def create_market_comparison_chart(
)
return fig
def add_policy_markers(
fig: go.Figure,
policy_events: list[dict[str, Any]],
date_column: str = "event_date",
y_position: float | None = None,
) -> go.Figure:
"""Add policy event markers to an existing time series figure.
Args:
fig: Existing Plotly figure to add markers to.
policy_events: List of policy event dicts with date and metadata.
date_column: Column name for event dates.
y_position: Y position for markers. If None, uses top of chart.
Returns:
Updated Plotly Figure object with policy markers.
"""
if not policy_events:
return fig
# Color mapping for policy categories
category_colors = {
"monetary": "#1f77b4", # Blue
"tax": "#2ca02c", # Green
"regulatory": "#ff7f0e", # Orange
"supply": "#9467bd", # Purple
"economic": "#d62728", # Red
}
# Symbol mapping for expected direction
direction_symbols = {
"bullish": "triangle-up",
"bearish": "triangle-down",
"neutral": "circle",
}
for event in policy_events:
event_date = event.get(date_column)
category = event.get("category", "economic")
direction = event.get("expected_direction", "neutral")
title = event.get("title", "Policy Event")
level = event.get("level", "federal")
color = category_colors.get(category, "#666666")
symbol = direction_symbols.get(direction, "circle")
# Add vertical line for the event
fig.add_vline(
x=event_date,
line_dash="dot",
line_color=color,
opacity=0.5,
annotation_text="",
)
# Add marker with hover info
fig.add_trace(
go.Scatter(
x=[event_date],
y=[y_position] if y_position else [None], # type: ignore[list-item]
mode="markers",
marker={
"symbol": symbol,
"size": 12,
"color": color,
"line": {"width": 1, "color": "white"},
},
name=title,
hovertemplate=(
f"<b>{title}</b><br>"
f"Date: %{{x}}<br>"
f"Level: {level.title()}<br>"
f"Category: {category.title()}<br>"
f"<extra></extra>"
),
showlegend=False,
)
)
return fig
def create_time_series_with_events(
data: list[dict[str, Any]],
policy_events: list[dict[str, Any]],
date_column: str = "full_date",
value_column: str = "avg_price",
title: str = "Price Trend with Policy Events",
) -> go.Figure:
"""Create a time series chart with policy event markers.
Args:
data: Time series data.
policy_events: Policy events to overlay.
date_column: Column name for dates.
value_column: Column name for values.
title: Chart title.
Returns:
Plotly Figure with time series and policy markers.
"""
# Create base time series
fig = create_price_time_series(
data=data,
date_column=date_column,
price_column=value_column,
title=title,
)
# Add policy markers at the top of the chart
if policy_events:
fig = add_policy_markers(fig, policy_events)
return fig

View File

@@ -0,0 +1,20 @@
"""Health check endpoint for deployment monitoring."""
import dash
from dash import html
dash.register_page(
__name__,
path="/health",
title="Health Check",
)
def layout() -> html.Div:
"""Return simple health check response."""
return html.Div(
[
html.Pre("status: ok"),
],
id="health-check",
)

View File

@@ -0,0 +1,263 @@
"""Methodology page for Toronto Housing Dashboard."""
import dash
import dash_mantine_components as dmc
from dash import html
dash.register_page(
__name__,
path="/toronto/methodology",
title="Methodology | Toronto Housing Dashboard",
description="Data sources, methodology, and limitations for the Toronto Housing Dashboard",
)
def layout() -> dmc.Container:
"""Render the methodology page layout."""
return dmc.Container(
size="md",
py="xl",
children=[
# Header
dmc.Title("Methodology", order=1, mb="lg"),
dmc.Text(
"This page documents the data sources, processing methodology, "
"and known limitations of the Toronto Housing Dashboard.",
size="lg",
c="dimmed",
mb="xl",
),
# Data Sources Section
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Data Sources", order=2, mb="md"),
# TRREB
dmc.Title("Purchase Data: TRREB", order=3, size="h4", mb="sm"),
dmc.Text(
[
"The Toronto Regional Real Estate Board (TRREB) publishes monthly ",
html.Strong("Market Watch"),
" reports containing aggregate statistics for residential real estate "
"transactions across the Greater Toronto Area.",
],
mb="sm",
),
dmc.List(
[
dmc.ListItem("Source: TRREB Market Watch Reports (PDF)"),
dmc.ListItem("Geographic granularity: ~35 TRREB Districts"),
dmc.ListItem("Temporal granularity: Monthly"),
dmc.ListItem("Coverage: 2021-present"),
dmc.ListItem(
[
"Metrics: Sales count, average/median price, new listings, ",
"active listings, days on market, sale-to-list ratio",
]
),
],
mb="md",
),
dmc.Anchor(
"TRREB Market Watch Archive",
href="https://trreb.ca/market-data/market-watch/market-watch-archive/",
target="_blank",
mb="lg",
),
# CMHC
dmc.Title(
"Rental Data: CMHC", order=3, size="h4", mb="sm", mt="md"
),
dmc.Text(
[
"Canada Mortgage and Housing Corporation (CMHC) conducts the annual ",
html.Strong("Rental Market Survey"),
" providing rental market statistics for major urban centres.",
],
mb="sm",
),
dmc.List(
[
dmc.ListItem("Source: CMHC Rental Market Survey (Excel)"),
dmc.ListItem(
"Geographic granularity: ~20 CMHC Zones (Census Tract aligned)"
),
dmc.ListItem(
"Temporal granularity: Annual (October survey)"
),
dmc.ListItem("Coverage: 2021-present"),
dmc.ListItem(
[
"Metrics: Average/median rent, vacancy rate, universe count, ",
"turnover rate, year-over-year rent change",
]
),
],
mb="md",
),
dmc.Anchor(
"CMHC Housing Market Information Portal",
href="https://www.cmhc-schl.gc.ca/professionals/housing-markets-data-and-research/housing-data/data-tables/rental-market",
target="_blank",
),
],
),
# Geographic Considerations
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Geographic Considerations", order=2, mb="md"),
dmc.Alert(
title="Important: Non-Aligned Geographies",
color="yellow",
mb="md",
children=[
"TRREB Districts and CMHC Zones do ",
html.Strong("not"),
" align geographically. They are displayed as separate layers and "
"should not be directly compared at the sub-regional level.",
],
),
dmc.Text(
"The dashboard presents three geographic layers:",
mb="sm",
),
dmc.List(
[
dmc.ListItem(
[
html.Strong("TRREB Districts (~35): "),
"Used for purchase/sales data visualization. "
"Districts are defined by TRREB and labeled with codes like W01, C01, E01.",
]
),
dmc.ListItem(
[
html.Strong("CMHC Zones (~20): "),
"Used for rental data visualization. "
"Zones are aligned with Census Tract boundaries.",
]
),
dmc.ListItem(
[
html.Strong("City Neighbourhoods (158): "),
"Reference overlay only. "
"These are official City of Toronto neighbourhood boundaries.",
]
),
],
),
],
),
# Policy Events
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Policy Event Annotations", order=2, mb="md"),
dmc.Text(
"The time series charts include markers for significant policy events "
"that may have influenced housing market conditions. These annotations are "
"for contextual reference only.",
mb="md",
),
dmc.Alert(
title="No Causation Claims",
color="blue",
children=[
"The presence of a policy marker near a market trend change does ",
html.Strong("not"),
" imply causation. Housing markets are influenced by numerous factors "
"beyond policy interventions.",
],
),
],
),
# Limitations
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
mb="lg",
children=[
dmc.Title("Limitations", order=2, mb="md"),
dmc.List(
[
dmc.ListItem(
[
html.Strong("Aggregate Data: "),
"All statistics are aggregates. Individual property characteristics, "
"condition, and micro-location are not reflected.",
]
),
dmc.ListItem(
[
html.Strong("Reporting Lag: "),
"TRREB data reflects closed transactions, which may lag market "
"conditions by 1-3 months. CMHC data is annual.",
]
),
dmc.ListItem(
[
html.Strong("Geographic Boundaries: "),
"TRREB district boundaries were manually digitized from reference maps "
"and may contain minor inaccuracies.",
]
),
dmc.ListItem(
[
html.Strong("Data Suppression: "),
"Some cells may be suppressed for confidentiality when transaction "
"counts are below thresholds.",
]
),
],
),
],
),
# Technical Implementation
dmc.Paper(
p="lg",
radius="md",
withBorder=True,
children=[
dmc.Title("Technical Implementation", order=2, mb="md"),
dmc.Text("This dashboard is built with:", mb="sm"),
dmc.List(
[
dmc.ListItem("Python 3.11+ with Dash and Plotly"),
dmc.ListItem("PostgreSQL with PostGIS for geospatial data"),
dmc.ListItem("dbt for data transformation"),
dmc.ListItem("Pydantic for data validation"),
dmc.ListItem("SQLAlchemy 2.0 for database operations"),
],
mb="md",
),
dmc.Anchor(
"View source code on GitHub",
href="https://github.com/lmiranda/personal-portfolio",
target="_blank",
),
],
),
# Back link
dmc.Group(
mt="xl",
children=[
dmc.Anchor(
"← Back to Dashboard",
href="/toronto",
size="lg",
),
],
),
],
)

View File

@@ -0,0 +1,257 @@
"""Demo/sample data for testing the Toronto Housing Dashboard without full pipeline.
This module provides synthetic data for development and demonstration purposes.
Replace with real data from the database in production.
"""
from datetime import date
from typing import Any
def get_demo_districts() -> list[dict[str, Any]]:
"""Return sample TRREB district data."""
return [
{"district_code": "W01", "district_name": "Long Branch", "area_type": "West"},
{"district_code": "W02", "district_name": "Mimico", "area_type": "West"},
{
"district_code": "W03",
"district_name": "Kingsway South",
"area_type": "West",
},
{"district_code": "W04", "district_name": "Edenbridge", "area_type": "West"},
{"district_code": "W05", "district_name": "Islington", "area_type": "West"},
{"district_code": "W06", "district_name": "Rexdale", "area_type": "West"},
{"district_code": "W07", "district_name": "Willowdale", "area_type": "West"},
{"district_code": "W08", "district_name": "York", "area_type": "West"},
{
"district_code": "C01",
"district_name": "Downtown Core",
"area_type": "Central",
},
{"district_code": "C02", "district_name": "Annex", "area_type": "Central"},
{
"district_code": "C03",
"district_name": "Forest Hill",
"area_type": "Central",
},
{
"district_code": "C04",
"district_name": "Lawrence Park",
"area_type": "Central",
},
{
"district_code": "C06",
"district_name": "Willowdale East",
"area_type": "Central",
},
{"district_code": "C07", "district_name": "Thornhill", "area_type": "Central"},
{"district_code": "C08", "district_name": "Waterfront", "area_type": "Central"},
{"district_code": "E01", "district_name": "Leslieville", "area_type": "East"},
{"district_code": "E02", "district_name": "The Beaches", "area_type": "East"},
{"district_code": "E03", "district_name": "Danforth", "area_type": "East"},
{"district_code": "E04", "district_name": "Birch Cliff", "area_type": "East"},
{"district_code": "E05", "district_name": "Scarborough", "area_type": "East"},
]
def get_demo_purchase_data() -> list[dict[str, Any]]:
"""Return sample purchase data for time series visualization."""
import random
random.seed(42)
data = []
base_prices = {
"W01": 850000,
"C01": 1200000,
"E01": 950000,
}
for year in [2024, 2025]:
for month in range(1, 13):
if year == 2025 and month > 12:
break
for district, base_price in base_prices.items():
# Add some randomness and trend
trend = (year - 2024) * 12 + month
price_variation = random.uniform(-0.05, 0.05)
trend_factor = 1 + (trend * 0.002) # Slight upward trend
avg_price = int(base_price * trend_factor * (1 + price_variation))
sales = random.randint(50, 200)
data.append(
{
"district_code": district,
"full_date": date(year, month, 1),
"year": year,
"month": month,
"avg_price": avg_price,
"median_price": int(avg_price * 0.95),
"sales_count": sales,
"new_listings": int(sales * random.uniform(1.2, 1.8)),
"active_listings": int(sales * random.uniform(2.0, 3.5)),
"days_on_market": random.randint(15, 45),
"sale_to_list_ratio": round(random.uniform(0.95, 1.05), 2),
}
)
return data
def get_demo_rental_data() -> list[dict[str, Any]]:
"""Return sample rental data for visualization."""
data = []
zones = [
("Zone01", "Downtown"),
("Zone02", "Midtown"),
("Zone03", "North York"),
("Zone04", "Scarborough"),
("Zone05", "Etobicoke"),
]
bedroom_types = ["bachelor", "1_bedroom", "2_bedroom", "3_bedroom"]
base_rents = {
"bachelor": 1800,
"1_bedroom": 2200,
"2_bedroom": 2800,
"3_bedroom": 3400,
}
for year in [2021, 2022, 2023, 2024, 2025]:
for zone_code, zone_name in zones:
for bedroom in bedroom_types:
# Rental trend: ~5% increase per year
year_factor = 1 + ((year - 2021) * 0.05)
base_rent = base_rents[bedroom]
data.append(
{
"zone_code": zone_code,
"zone_name": zone_name,
"survey_year": year,
"full_date": date(year, 10, 1),
"bedroom_type": bedroom,
"average_rent": int(base_rent * year_factor),
"median_rent": int(base_rent * year_factor * 0.98),
"vacancy_rate": round(
2.5 - (year - 2021) * 0.3, 1
), # Decreasing vacancy
"universe": 5000 + (year - 2021) * 200,
}
)
return data
def get_demo_policy_events() -> list[dict[str, Any]]:
"""Return sample policy events for annotation."""
return [
{
"event_date": date(2024, 6, 5),
"effective_date": date(2024, 6, 5),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.75%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 7, 24),
"effective_date": date(2024, 7, 24),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.50%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 4),
"effective_date": date(2024, 9, 4),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (25bp)",
"description": "Bank of Canada cuts overnight rate by 25 basis points to 4.25%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 10, 23),
"effective_date": date(2024, 10, 23),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (50bp)",
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.75%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 12, 11),
"effective_date": date(2024, 12, 11),
"level": "federal",
"category": "monetary",
"title": "BoC Rate Cut (50bp)",
"description": "Bank of Canada cuts overnight rate by 50 basis points to 3.25%",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 16),
"effective_date": date(2024, 12, 15),
"level": "federal",
"category": "regulatory",
"title": "CMHC 30-Year Amortization",
"description": "30-year amortization extended to all first-time buyers and new builds",
"expected_direction": "bullish",
},
{
"event_date": date(2024, 9, 16),
"effective_date": date(2024, 12, 15),
"level": "federal",
"category": "regulatory",
"title": "Insured Mortgage Cap $1.5M",
"description": "Insured mortgage cap raised from $1M to $1.5M",
"expected_direction": "bullish",
},
]
def get_demo_summary_metrics() -> dict[str, dict[str, Any]]:
"""Return summary metrics for KPI cards."""
return {
"avg_price": {
"value": 1067968,
"title": "Avg. Price (2025)",
"delta": -4.7,
"delta_suffix": "%",
"prefix": "$",
"format_spec": ",.0f",
"positive_is_good": True,
},
"total_sales": {
"value": 67610,
"title": "Total Sales (2024)",
"delta": 2.6,
"delta_suffix": "%",
"format_spec": ",.0f",
"positive_is_good": True,
},
"avg_rent": {
"value": 2450,
"title": "Avg. Rent (2025)",
"delta": 3.2,
"delta_suffix": "%",
"prefix": "$",
"format_spec": ",.0f",
"positive_is_good": False,
},
"vacancy_rate": {
"value": 1.8,
"title": "Vacancy Rate",
"delta": -0.4,
"delta_suffix": "pp",
"suffix": "%",
"format_spec": ".1f",
"positive_is_good": False,
},
}

52
scripts/db/init_schema.py Normal file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python3
"""Initialize database schema.
Usage:
python scripts/db/init_schema.py
This script creates all SQLAlchemy tables in the database.
Run this after docker-compose up to initialize the schema.
"""
import sys
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from portfolio_app.toronto.models import create_tables, get_engine # noqa: E402
def main() -> int:
"""Initialize the database schema."""
print("Initializing database schema...")
try:
engine = get_engine()
# Test connection
with engine.connect() as conn:
result = conn.execute("SELECT 1")
result.fetchone()
print("Database connection successful")
# Create all tables
create_tables()
print("Schema created successfully")
# List created tables
from sqlalchemy import inspect
inspector = inspect(engine)
tables = inspector.get_table_names()
print(f"Created tables: {', '.join(tables)}")
return 0
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())