feat: Sprint 10 - Architecture docs, CI/CD, operational scripts
Some checks failed
CI / lint-and-test (push) Has been cancelled
Some checks failed
CI / lint-and-test (push) Has been cancelled
Phase 1 - Architecture Documentation: - Add Architecture section with Mermaid flowchart to README - Create docs/DATABASE_SCHEMA.md with full ERD Phase 2 - CI/CD: - Add CI badge to README - Create .gitea/workflows/ci.yml for linting and tests - Create .gitea/workflows/deploy-staging.yml - Create .gitea/workflows/deploy-production.yml Phase 3 - Operational Scripts: - Create scripts/logs.sh for docker compose log following - Create scripts/run-detached.sh with health check loop - Create scripts/etl/toronto.sh for Toronto data pipeline - Add Makefile targets: logs, run-detached, etl-toronto Phase 4 - Runbooks: - Create docs/runbooks/adding-dashboard.md - Create docs/runbooks/deployment.md Phase 5 - Hygiene: - Create MIT LICENSE file Phase 6 - Production: - Add live demo link to README (leodata.science) Closes #78, #79, #80, #81, #82, #83, #84, #85, #86, #87, #88, #89, #91 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- development
|
||||
- staging
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- development
|
||||
|
||||
jobs:
|
||||
lint-and-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install ruff pytest
|
||||
|
||||
- name: Run linter
|
||||
run: ruff check .
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Production
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Production Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.PROD_HOST }}
|
||||
username: ${{ secrets.PROD_USER }}
|
||||
key: ${{ secrets.PROD_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin main
|
||||
git reset --hard origin/main
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Production deployment complete!"
|
||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Deploy to Staging
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- staging
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Deploy to Staging Server
|
||||
uses: appleboy/ssh-action@v1.0.3
|
||||
with:
|
||||
host: ${{ secrets.STAGING_HOST }}
|
||||
username: ${{ secrets.STAGING_USER }}
|
||||
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||
script: |
|
||||
set -euo pipefail
|
||||
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
echo "Pulling latest changes..."
|
||||
git fetch origin staging
|
||||
git reset --hard origin/staging
|
||||
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
|
||||
echo "Installing dependencies..."
|
||||
pip install -r requirements.txt --quiet
|
||||
|
||||
echo "Running dbt models..."
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
echo "Restarting application..."
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for health check..."
|
||||
sleep 10
|
||||
curl -f http://localhost:8050/health || exit 1
|
||||
|
||||
echo "Staging deployment complete!"
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024-2025 Leo Miranda
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
15
Makefile
15
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help
|
||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||
|
||||
# Default target
|
||||
.DEFAULT_GOAL := help
|
||||
@@ -151,6 +151,19 @@ ci: ## Run all checks (lint, typecheck, test)
|
||||
$(MAKE) test
|
||||
@echo "$(GREEN)All checks passed!$(NC)"
|
||||
|
||||
# =============================================================================
|
||||
# Operations
|
||||
# =============================================================================
|
||||
|
||||
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||
@./scripts/logs.sh $(SERVICE)
|
||||
|
||||
run-detached: ## Start containers and wait for health check
|
||||
@./scripts/run-detached.sh
|
||||
|
||||
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||
@./scripts/etl/toronto.sh $(MODE)
|
||||
|
||||
# =============================================================================
|
||||
# Deployment
|
||||
# =============================================================================
|
||||
|
||||
40
README.md
40
README.md
@@ -1,5 +1,9 @@
|
||||
# Analytics Portfolio
|
||||
|
||||
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||
|
||||
**Live Demo:** [leodata.science](https://leodata.science)
|
||||
|
||||
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||
|
||||
## Live Pages
|
||||
@@ -32,6 +36,42 @@ An interactive choropleth dashboard analyzing Toronto's 158 official neighbourho
|
||||
- Toronto Police Service (crime statistics)
|
||||
- CMHC Rental Market Survey (rental data by zone)
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Sources
|
||||
A1[City of Toronto API]
|
||||
A2[Toronto Police API]
|
||||
A3[CMHC Data]
|
||||
end
|
||||
|
||||
subgraph ETL
|
||||
B1[Parsers]
|
||||
B2[Loaders]
|
||||
end
|
||||
|
||||
subgraph Database
|
||||
C1[(PostgreSQL/PostGIS)]
|
||||
C2[dbt Models]
|
||||
end
|
||||
|
||||
subgraph Application
|
||||
D1[Dash App]
|
||||
D2[Plotly Figures]
|
||||
end
|
||||
|
||||
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||
```
|
||||
|
||||
**Pipeline Stages:**
|
||||
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||
|
||||
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
|
||||
307
docs/DATABASE_SCHEMA.md
Normal file
307
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,307 @@
|
||||
# Database Schema
|
||||
|
||||
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||
|
||||
## Entity Relationship Diagram
|
||||
|
||||
```mermaid
|
||||
erDiagram
|
||||
dim_time {
|
||||
int date_key PK
|
||||
date full_date UK
|
||||
int year
|
||||
int month
|
||||
int quarter
|
||||
string month_name
|
||||
bool is_month_start
|
||||
}
|
||||
|
||||
dim_cmhc_zone {
|
||||
int zone_key PK
|
||||
string zone_code UK
|
||||
string zone_name
|
||||
geometry geometry
|
||||
}
|
||||
|
||||
dim_neighbourhood {
|
||||
int neighbourhood_id PK
|
||||
string name
|
||||
geometry geometry
|
||||
int population
|
||||
numeric land_area_sqkm
|
||||
numeric pop_density_per_sqkm
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric median_household_income
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
int census_year
|
||||
}
|
||||
|
||||
dim_policy_event {
|
||||
int event_id PK
|
||||
date event_date
|
||||
date effective_date
|
||||
string level
|
||||
string category
|
||||
string title
|
||||
text description
|
||||
string expected_direction
|
||||
string source_url
|
||||
string confidence
|
||||
}
|
||||
|
||||
fact_rentals {
|
||||
int id PK
|
||||
int date_key FK
|
||||
int zone_key FK
|
||||
string bedroom_type
|
||||
int universe
|
||||
numeric avg_rent
|
||||
numeric median_rent
|
||||
numeric vacancy_rate
|
||||
numeric availability_rate
|
||||
numeric turnover_rate
|
||||
numeric rent_change_pct
|
||||
string reliability_code
|
||||
}
|
||||
|
||||
fact_census {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int census_year
|
||||
int population
|
||||
numeric population_density
|
||||
numeric median_household_income
|
||||
numeric average_household_income
|
||||
numeric unemployment_rate
|
||||
numeric pct_bachelors_or_higher
|
||||
numeric pct_owner_occupied
|
||||
numeric pct_renter_occupied
|
||||
numeric median_age
|
||||
numeric average_dwelling_value
|
||||
}
|
||||
|
||||
fact_crime {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
int year
|
||||
string crime_type
|
||||
int count
|
||||
numeric rate_per_100k
|
||||
}
|
||||
|
||||
fact_amenities {
|
||||
int id PK
|
||||
int neighbourhood_id FK
|
||||
string amenity_type
|
||||
int count
|
||||
int year
|
||||
}
|
||||
|
||||
bridge_cmhc_neighbourhood {
|
||||
int id PK
|
||||
string cmhc_zone_code FK
|
||||
int neighbourhood_id FK
|
||||
numeric weight
|
||||
}
|
||||
|
||||
dim_time ||--o{ fact_rentals : "date_key"
|
||||
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||
```
|
||||
|
||||
## Schema Layers
|
||||
|
||||
### Raw Schema
|
||||
|
||||
Raw data is loaded directly from external sources without transformation:
|
||||
|
||||
| Table | Source | Description |
|
||||
|-------|--------|-------------|
|
||||
| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries |
|
||||
| `raw.census_profiles` | City of Toronto API | Census profile data |
|
||||
| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood |
|
||||
| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data |
|
||||
|
||||
### Staging Schema (dbt)
|
||||
|
||||
Staging models provide 1:1 cleaned representations of source data:
|
||||
|
||||
| Model | Source Table | Purpose |
|
||||
|-------|-------------|---------|
|
||||
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||
| `stg_police__crimes` | raw.crime_data | Standardized crime categories |
|
||||
|
||||
### Marts Schema (dbt)
|
||||
|
||||
Analytical tables ready for dashboard consumption:
|
||||
|
||||
| Model | Grain | Purpose |
|
||||
|-------|-------|---------|
|
||||
| `mart_neighbourhood_summary` | neighbourhood | Composite livability scores |
|
||||
| `mart_rental_trends` | zone × month | Time-series rental analysis |
|
||||
| `mart_crime_rates` | neighbourhood × year | Crime rate calculations |
|
||||
| `mart_amenity_density` | neighbourhood | Amenity accessibility scores |
|
||||
|
||||
## Table Details
|
||||
|
||||
### Dimension Tables
|
||||
|
||||
#### dim_time
|
||||
Time dimension for date-based analysis. Grain: one row per month.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||
|
||||
#### dim_cmhc_zone
|
||||
CMHC rental market zones (~20 zones covering Toronto).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||
|
||||
#### dim_neighbourhood
|
||||
Toronto's 158 official neighbourhoods.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||
| population | INTEGER | | Total population |
|
||||
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||
|
||||
#### dim_policy_event
|
||||
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| event_date | DATE | NOT NULL | Announcement date |
|
||||
| effective_date | DATE | | Implementation date |
|
||||
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||
| description | TEXT | | Detailed description |
|
||||
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||
| source_url | VARCHAR(500) | | Reference link |
|
||||
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||
|
||||
### Fact Tables
|
||||
|
||||
#### fact_rentals
|
||||
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||
| universe | INTEGER | | Total rental units |
|
||||
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||
|
||||
#### fact_census
|
||||
Census statistics. Grain: neighbourhood × census year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||
| population | INTEGER | | Total population |
|
||||
| population_density | NUMERIC(10,2) | | People per km² |
|
||||
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||
|
||||
#### fact_crime
|
||||
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| year | INTEGER | NOT NULL | Calendar year |
|
||||
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||
| count | INTEGER | NOT NULL | Number of incidents |
|
||||
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||
|
||||
#### fact_amenities
|
||||
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||
| count | INTEGER | NOT NULL | Number of amenities |
|
||||
| year | INTEGER | NOT NULL | Reference year |
|
||||
|
||||
### Bridge Tables
|
||||
|
||||
#### bridge_cmhc_neighbourhood
|
||||
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||
|
||||
| Column | Type | Constraints | Description |
|
||||
|--------|------|-------------|-------------|
|
||||
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||
|
||||
## Indexes
|
||||
|
||||
| Table | Index | Columns | Purpose |
|
||||
|-------|-------|---------|---------|
|
||||
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||
|
||||
## PostGIS Extensions
|
||||
|
||||
The database requires PostGIS for geospatial operations:
|
||||
|
||||
```sql
|
||||
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||
```
|
||||
|
||||
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||
200
docs/runbooks/adding-dashboard.md
Normal file
200
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,200 @@
|
||||
# Runbook: Adding a New Dashboard
|
||||
|
||||
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [ ] Data sources identified and accessible
|
||||
- [ ] Database schema designed
|
||||
- [ ] Basic Dash/Plotly familiarity
|
||||
|
||||
## Directory Structure
|
||||
|
||||
Create the following structure under `portfolio_app/`:
|
||||
|
||||
```
|
||||
portfolio_app/
|
||||
├── pages/
|
||||
│ └── {dashboard_name}/
|
||||
│ ├── dashboard.py # Main layout with tabs
|
||||
│ ├── methodology.py # Data sources and methods page
|
||||
│ ├── tabs/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── overview.py # Overview tab layout
|
||||
│ │ └── ... # Additional tab layouts
|
||||
│ └── callbacks/
|
||||
│ ├── __init__.py
|
||||
│ └── ... # Callback modules
|
||||
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||
│ ├── __init__.py
|
||||
│ ├── parsers/ # API/CSV extraction
|
||||
│ │ └── __init__.py
|
||||
│ ├── loaders/ # Database operations
|
||||
│ │ └── __init__.py
|
||||
│ ├── schemas/ # Pydantic models
|
||||
│ │ └── __init__.py
|
||||
│ └── models/ # SQLAlchemy ORM
|
||||
│ └── __init__.py
|
||||
```
|
||||
|
||||
## Step-by-Step Checklist
|
||||
|
||||
### 1. Data Layer
|
||||
|
||||
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||
- [ ] Add database migrations if needed
|
||||
|
||||
### 2. dbt Models
|
||||
|
||||
Create dbt models in `dbt/models/`:
|
||||
|
||||
- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||
- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic
|
||||
- [ ] `marts/mart_{domain}.sql` - Final analytical tables
|
||||
|
||||
Follow naming conventions:
|
||||
- Staging: `stg_{source}__{entity}`
|
||||
- Intermediate: `int_{domain}__{transform}`
|
||||
- Marts: `mart_{domain}`
|
||||
|
||||
### 3. Visualization Layer
|
||||
|
||||
- [ ] Create figure factories in `figures/` (or reuse existing)
|
||||
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||
|
||||
### 4. Dashboard Pages
|
||||
|
||||
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||
|
||||
```python
|
||||
import dash
|
||||
from dash import html, dcc
|
||||
import dash_mantine_components as dmc
|
||||
|
||||
dash.register_page(
|
||||
__name__,
|
||||
path="/{dashboard_name}",
|
||||
title="{Dashboard Title}",
|
||||
description="{Description}"
|
||||
)
|
||||
|
||||
def layout():
|
||||
return dmc.Container([
|
||||
# Header
|
||||
dmc.Title("{Dashboard Title}", order=1),
|
||||
|
||||
# Tabs
|
||||
dmc.Tabs([
|
||||
dmc.TabsList([
|
||||
dmc.TabsTab("Overview", value="overview"),
|
||||
# Add more tabs
|
||||
]),
|
||||
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||
# Add more panels
|
||||
], value="overview"),
|
||||
])
|
||||
```
|
||||
|
||||
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||
|
||||
- [ ] Create one file per tab
|
||||
- [ ] Export layout function from each
|
||||
|
||||
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||
|
||||
- [ ] Create callback modules for interactivity
|
||||
- [ ] Import and register in dashboard.py
|
||||
|
||||
### 5. Navigation
|
||||
|
||||
Add to sidebar in `components/sidebar.py`:
|
||||
|
||||
```python
|
||||
dmc.NavLink(
|
||||
label="{Dashboard Name}",
|
||||
href="/{dashboard_name}",
|
||||
icon=DashIconify(icon="..."),
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Documentation
|
||||
|
||||
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||
- [ ] Document data sources
|
||||
- [ ] Document transformation logic
|
||||
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||
|
||||
### 7. Testing
|
||||
|
||||
- [ ] Add unit tests for parsers
|
||||
- [ ] Add unit tests for loaders
|
||||
- [ ] Add integration tests for callbacks
|
||||
- [ ] Run `make test`
|
||||
|
||||
### 8. Final Verification
|
||||
|
||||
- [ ] All pages render without errors
|
||||
- [ ] All callbacks respond correctly
|
||||
- [ ] Data loads successfully
|
||||
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||
- [ ] Linting passes (`make lint`)
|
||||
- [ ] Tests pass (`make test`)
|
||||
|
||||
## Example: Toronto Dashboard
|
||||
|
||||
Reference implementation: `portfolio_app/pages/toronto/`
|
||||
|
||||
Key files:
|
||||
- `dashboard.py` - Main layout with 5 tabs
|
||||
- `tabs/overview.py` - Livability scores, scatter plots
|
||||
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||
- `toronto/models/dimensions.py` - Dimension tables
|
||||
- `toronto/models/facts.py` - Fact tables
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Figure Factories
|
||||
|
||||
```python
|
||||
# figures/choropleth.py
|
||||
def create_choropleth_figure(
|
||||
gdf: gpd.GeoDataFrame,
|
||||
value_column: str,
|
||||
title: str,
|
||||
**kwargs
|
||||
) -> go.Figure:
|
||||
...
|
||||
```
|
||||
|
||||
### Callbacks
|
||||
|
||||
```python
|
||||
# callbacks/map_callbacks.py
|
||||
@callback(
|
||||
Output("neighbourhood-details", "children"),
|
||||
Input("choropleth-map", "clickData"),
|
||||
)
|
||||
def update_details(click_data):
|
||||
...
|
||||
```
|
||||
|
||||
### Data Loading
|
||||
|
||||
```python
|
||||
# {dashboard_name}/loaders/load.py
|
||||
def load_data(session: Session) -> None:
|
||||
# Parse from source
|
||||
records = parse_source_data()
|
||||
|
||||
# Validate with Pydantic
|
||||
validated = [Schema(**r) for r in records]
|
||||
|
||||
# Load to database
|
||||
for record in validated:
|
||||
session.add(Model(**record.model_dump()))
|
||||
|
||||
session.commit()
|
||||
```
|
||||
232
docs/runbooks/deployment.md
Normal file
232
docs/runbooks/deployment.md
Normal file
@@ -0,0 +1,232 @@
|
||||
# Runbook: Deployment
|
||||
|
||||
This runbook covers deployment procedures for the Analytics Portfolio application.
|
||||
|
||||
## Environments
|
||||
|
||||
| Environment | Branch | Server | URL |
|
||||
|-------------|--------|--------|-----|
|
||||
| Development | `development` | Local | http://localhost:8050 |
|
||||
| Staging | `staging` | Homelab (hotserv) | Internal |
|
||||
| Production | `main` | Bandit Labs VPS | https://leodata.science |
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
### Automatic Deployment
|
||||
|
||||
Deployments are triggered automatically via Gitea Actions:
|
||||
|
||||
1. **Push to `staging`** → Deploys to staging server
|
||||
2. **Push to `main`** → Deploys to production server
|
||||
|
||||
### Workflow Files
|
||||
|
||||
- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
|
||||
- `.gitea/workflows/deploy-staging.yml` - Staging deployment
|
||||
- `.gitea/workflows/deploy-production.yml` - Production deployment
|
||||
|
||||
### Required Secrets
|
||||
|
||||
Configure these in Gitea repository settings:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `STAGING_HOST` | Staging server hostname/IP |
|
||||
| `STAGING_USER` | SSH username for staging |
|
||||
| `STAGING_SSH_KEY` | Private key for staging SSH |
|
||||
| `PROD_HOST` | Production server hostname/IP |
|
||||
| `PROD_USER` | SSH username for production |
|
||||
| `PROD_SSH_KEY` | Private key for production SSH |
|
||||
|
||||
## Manual Deployment
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- SSH access to target server
|
||||
- Repository cloned at `~/apps/personal-portfolio`
|
||||
- Virtual environment created at `.venv`
|
||||
- Docker and Docker Compose installed
|
||||
- PostgreSQL container running
|
||||
|
||||
### Steps
|
||||
|
||||
```bash
|
||||
# 1. SSH to server
|
||||
ssh user@server
|
||||
|
||||
# 2. Navigate to app directory
|
||||
cd ~/apps/personal-portfolio
|
||||
|
||||
# 3. Pull latest changes
|
||||
git fetch origin {branch}
|
||||
git reset --hard origin/{branch}
|
||||
|
||||
# 4. Activate virtual environment
|
||||
source .venv/bin/activate
|
||||
|
||||
# 5. Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 6. Run database migrations (if any)
|
||||
# python -m alembic upgrade head
|
||||
|
||||
# 7. Run dbt models
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
# 8. Restart application
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
# 9. Verify health
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
## Rollback Procedure
|
||||
|
||||
### Quick Rollback
|
||||
|
||||
If deployment fails, rollback to previous commit:
|
||||
|
||||
```bash
|
||||
# 1. Find previous working commit
|
||||
git log --oneline -10
|
||||
|
||||
# 2. Reset to that commit
|
||||
git reset --hard {commit_hash}
|
||||
|
||||
# 3. Restart services
|
||||
docker compose down
|
||||
docker compose up -d
|
||||
|
||||
# 4. Verify
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
### Full Rollback (Database)
|
||||
|
||||
If database changes need to be reverted:
|
||||
|
||||
```bash
|
||||
# 1. Stop application
|
||||
docker compose down
|
||||
|
||||
# 2. Restore database backup
|
||||
pg_restore -h localhost -U portfolio -d portfolio backup.dump
|
||||
|
||||
# 3. Revert code
|
||||
git reset --hard {commit_hash}
|
||||
|
||||
# 4. Run dbt at that version
|
||||
cd dbt && dbt run --profiles-dir . && cd ..
|
||||
|
||||
# 5. Restart
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## Health Checks
|
||||
|
||||
### Application Health
|
||||
|
||||
```bash
|
||||
curl http://localhost:8050/health
|
||||
```
|
||||
|
||||
Expected response:
|
||||
```json
|
||||
{"status": "healthy"}
|
||||
```
|
||||
|
||||
### Database Health
|
||||
|
||||
```bash
|
||||
docker compose exec postgres pg_isready -U portfolio
|
||||
```
|
||||
|
||||
### Container Status
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### View Logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
make logs
|
||||
|
||||
# Specific service
|
||||
make logs SERVICE=postgres
|
||||
|
||||
# Or directly
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
### Check Resource Usage
|
||||
|
||||
```bash
|
||||
docker stats
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Application Won't Start
|
||||
|
||||
1. Check container logs: `docker compose logs app`
|
||||
2. Verify environment variables: `cat .env`
|
||||
3. Check database connectivity: `docker compose exec postgres pg_isready`
|
||||
4. Verify port availability: `lsof -i :8050`
|
||||
|
||||
### Database Connection Errors
|
||||
|
||||
1. Check postgres container: `docker compose ps postgres`
|
||||
2. Verify DATABASE_URL in `.env`
|
||||
3. Check postgres logs: `docker compose logs postgres`
|
||||
4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
|
||||
|
||||
### dbt Failures
|
||||
|
||||
1. Check dbt logs: `cd dbt && dbt debug`
|
||||
2. Verify profiles.yml: `cat dbt/profiles.yml`
|
||||
3. Run with verbose output: `dbt run --debug`
|
||||
|
||||
### Out of Memory
|
||||
|
||||
1. Check memory usage: `free -h`
|
||||
2. Review container limits in docker-compose.yml
|
||||
3. Consider increasing swap or server resources
|
||||
|
||||
## Backup Procedures
|
||||
|
||||
### Database Backup
|
||||
|
||||
```bash
|
||||
# Create backup
|
||||
docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
|
||||
|
||||
# Compressed backup
|
||||
docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
|
||||
```
|
||||
|
||||
### Restore from Backup
|
||||
|
||||
```bash
|
||||
# From SQL file
|
||||
docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
|
||||
|
||||
# From dump file
|
||||
docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
|
||||
```
|
||||
|
||||
## Deployment Checklist
|
||||
|
||||
Before deploying to production:
|
||||
|
||||
- [ ] All tests pass (`make test`)
|
||||
- [ ] Linting passes (`make lint`)
|
||||
- [ ] Staging deployment successful
|
||||
- [ ] Manual testing on staging complete
|
||||
- [ ] Database backup taken
|
||||
- [ ] Rollback plan confirmed
|
||||
- [ ] Team notified of deployment window
|
||||
72
scripts/etl/toronto.sh
Executable file
72
scripts/etl/toronto.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/etl/toronto.sh - Run Toronto data pipeline
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/etl/toronto.sh --full # Complete reload of all data
|
||||
# ./scripts/etl/toronto.sh --incremental # Only new data since last run
|
||||
# ./scripts/etl/toronto.sh # Default: incremental
|
||||
#
|
||||
# Logs are written to .dev/logs/etl/
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
LOG_DIR="$PROJECT_ROOT/.dev/logs/etl"
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
LOG_FILE="$LOG_DIR/toronto_${TIMESTAMP}.log"
|
||||
|
||||
MODE="${1:---incremental}"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log "Starting Toronto ETL pipeline (mode: $MODE)"
|
||||
log "Log file: $LOG_FILE"
|
||||
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
# Activate virtual environment if it exists
|
||||
if [ -d ".venv" ]; then
|
||||
source .venv/bin/activate
|
||||
log "Activated virtual environment"
|
||||
fi
|
||||
|
||||
case "$MODE" in
|
||||
--full)
|
||||
log "Running FULL data reload..."
|
||||
|
||||
log "Step 1/4: Parsing neighbourhood data..."
|
||||
python -m portfolio_app.toronto.parsers.neighbourhoods 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
log "Step 2/4: Parsing census data..."
|
||||
python -m portfolio_app.toronto.parsers.census 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
log "Step 3/4: Parsing crime data..."
|
||||
python -m portfolio_app.toronto.parsers.crime 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
log "Step 4/4: Running dbt transformations..."
|
||||
cd dbt && dbt run --full-refresh --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
|
||||
;;
|
||||
|
||||
--incremental)
|
||||
log "Running INCREMENTAL update..."
|
||||
|
||||
log "Step 1/2: Checking for new data..."
|
||||
# Add incremental logic here when implemented
|
||||
|
||||
log "Step 2/2: Running dbt transformations..."
|
||||
cd dbt && dbt run --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
|
||||
;;
|
||||
|
||||
*)
|
||||
log "ERROR: Unknown mode '$MODE'. Use --full or --incremental"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
log "Toronto ETL pipeline completed successfully"
|
||||
log "Full log available at: $LOG_FILE"
|
||||
20
scripts/logs.sh
Executable file
20
scripts/logs.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/logs.sh - Follow docker compose logs
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/logs.sh # All services
|
||||
# ./scripts/logs.sh postgres # Specific service
|
||||
# ./scripts/logs.sh -n 100 # Last 100 lines
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SERVICE="${1:-}"
|
||||
EXTRA_ARGS="${@:2}"
|
||||
|
||||
if [[ -n "$SERVICE" && "$SERVICE" != -* ]]; then
|
||||
echo "Following logs for service: $SERVICE"
|
||||
docker compose logs -f "$SERVICE" $EXTRA_ARGS
|
||||
else
|
||||
echo "Following logs for all services"
|
||||
docker compose logs -f $@
|
||||
fi
|
||||
38
scripts/run-detached.sh
Executable file
38
scripts/run-detached.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/run-detached.sh - Start containers and wait for health
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/run-detached.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
TIMEOUT=60
|
||||
INTERVAL=5
|
||||
|
||||
echo "Starting containers in detached mode..."
|
||||
docker compose up -d
|
||||
|
||||
echo "Waiting for services to become healthy..."
|
||||
elapsed=0
|
||||
|
||||
while [ $elapsed -lt $TIMEOUT ]; do
|
||||
# Check if postgres is ready
|
||||
if docker compose exec -T postgres pg_isready -U portfolio > /dev/null 2>&1; then
|
||||
echo "PostgreSQL is ready!"
|
||||
|
||||
# Check if app health endpoint responds (if running)
|
||||
if curl -sf http://localhost:8050/health > /dev/null 2>&1; then
|
||||
echo "Application health check passed!"
|
||||
echo "All services are healthy."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Waiting... ($elapsed/$TIMEOUT seconds)"
|
||||
sleep $INTERVAL
|
||||
elapsed=$((elapsed + INTERVAL))
|
||||
done
|
||||
|
||||
echo "ERROR: Health check timed out after $TIMEOUT seconds"
|
||||
docker compose ps
|
||||
exit 1
|
||||
Reference in New Issue
Block a user