feat: Sprint 10 - Architecture docs, CI/CD, operational scripts
Some checks failed
CI / lint-and-test (push) Has been cancelled
Some checks failed
CI / lint-and-test (push) Has been cancelled
Phase 1 - Architecture Documentation: - Add Architecture section with Mermaid flowchart to README - Create docs/DATABASE_SCHEMA.md with full ERD Phase 2 - CI/CD: - Add CI badge to README - Create .gitea/workflows/ci.yml for linting and tests - Create .gitea/workflows/deploy-staging.yml - Create .gitea/workflows/deploy-production.yml Phase 3 - Operational Scripts: - Create scripts/logs.sh for docker compose log following - Create scripts/run-detached.sh with health check loop - Create scripts/etl/toronto.sh for Toronto data pipeline - Add Makefile targets: logs, run-detached, etl-toronto Phase 4 - Runbooks: - Create docs/runbooks/adding-dashboard.md - Create docs/runbooks/deployment.md Phase 5 - Hygiene: - Create MIT LICENSE file Phase 6 - Production: - Add live demo link to README (leodata.science) Closes #78, #79, #80, #81, #82, #83, #84, #85, #86, #87, #88, #89, #91 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
35
.gitea/workflows/ci.yml
Normal file
35
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
- staging
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- development
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint-and-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install ruff pytest
|
||||||
|
|
||||||
|
- name: Run linter
|
||||||
|
run: ruff check .
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: pytest tests/ -v --tb=short
|
||||||
44
.gitea/workflows/deploy-production.yml
Normal file
44
.gitea/workflows/deploy-production.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Production
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Production Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.PROD_HOST }}
|
||||||
|
username: ${{ secrets.PROD_USER }}
|
||||||
|
key: ${{ secrets.PROD_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin main
|
||||||
|
git reset --hard origin/main
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Production deployment complete!"
|
||||||
44
.gitea/workflows/deploy-staging.yml
Normal file
44
.gitea/workflows/deploy-staging.yml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
name: Deploy to Staging
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- staging
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Deploy to Staging Server
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.STAGING_HOST }}
|
||||||
|
username: ${{ secrets.STAGING_USER }}
|
||||||
|
key: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
|
script: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
echo "Pulling latest changes..."
|
||||||
|
git fetch origin staging
|
||||||
|
git reset --hard origin/staging
|
||||||
|
|
||||||
|
echo "Activating virtual environment..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -r requirements.txt --quiet
|
||||||
|
|
||||||
|
echo "Running dbt models..."
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
echo "Restarting application..."
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for health check..."
|
||||||
|
sleep 10
|
||||||
|
curl -f http://localhost:8050/health || exit 1
|
||||||
|
|
||||||
|
echo "Staging deployment complete!"
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2024-2025 Leo Miranda
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
15
Makefile
15
Makefile
@@ -1,4 +1,4 @@
|
|||||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help
|
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||||
|
|
||||||
# Default target
|
# Default target
|
||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
@@ -151,6 +151,19 @@ ci: ## Run all checks (lint, typecheck, test)
|
|||||||
$(MAKE) test
|
$(MAKE) test
|
||||||
@echo "$(GREEN)All checks passed!$(NC)"
|
@echo "$(GREEN)All checks passed!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Operations
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
logs: ## Follow docker compose logs (usage: make logs or make logs SERVICE=postgres)
|
||||||
|
@./scripts/logs.sh $(SERVICE)
|
||||||
|
|
||||||
|
run-detached: ## Start containers and wait for health check
|
||||||
|
@./scripts/run-detached.sh
|
||||||
|
|
||||||
|
etl-toronto: ## Run Toronto ETL pipeline (usage: make etl-toronto MODE=--full)
|
||||||
|
@./scripts/etl/toronto.sh $(MODE)
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Deployment
|
# Deployment
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
40
README.md
40
README.md
@@ -1,5 +1,9 @@
|
|||||||
# Analytics Portfolio
|
# Analytics Portfolio
|
||||||
|
|
||||||
|
[](https://gitea.hotserv.cloud/lmiranda/personal-portfolio/actions)
|
||||||
|
|
||||||
|
**Live Demo:** [leodata.science](https://leodata.science)
|
||||||
|
|
||||||
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
A personal portfolio website showcasing data engineering and visualization capabilities, featuring an interactive Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
## Live Pages
|
## Live Pages
|
||||||
@@ -32,6 +36,42 @@ An interactive choropleth dashboard analyzing Toronto's 158 official neighbourho
|
|||||||
- Toronto Police Service (crime statistics)
|
- Toronto Police Service (crime statistics)
|
||||||
- CMHC Rental Market Survey (rental data by zone)
|
- CMHC Rental Market Survey (rental data by zone)
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
subgraph Sources
|
||||||
|
A1[City of Toronto API]
|
||||||
|
A2[Toronto Police API]
|
||||||
|
A3[CMHC Data]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph ETL
|
||||||
|
B1[Parsers]
|
||||||
|
B2[Loaders]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Database
|
||||||
|
C1[(PostgreSQL/PostGIS)]
|
||||||
|
C2[dbt Models]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Application
|
||||||
|
D1[Dash App]
|
||||||
|
D2[Plotly Figures]
|
||||||
|
end
|
||||||
|
|
||||||
|
A1 & A2 & A3 --> B1 --> B2 --> C1 --> C2 --> D1 --> D2
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pipeline Stages:**
|
||||||
|
- **Sources**: External APIs and data files (City of Toronto, Toronto Police, CMHC)
|
||||||
|
- **ETL**: Python parsers extract and validate data; loaders persist to database
|
||||||
|
- **Database**: PostgreSQL with PostGIS for geospatial; dbt transforms raw → staging → marts
|
||||||
|
- **Application**: Dash serves interactive dashboards with Plotly visualizations
|
||||||
|
|
||||||
|
For detailed database schema, see [docs/DATABASE_SCHEMA.md](docs/DATABASE_SCHEMA.md).
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
307
docs/DATABASE_SCHEMA.md
Normal file
307
docs/DATABASE_SCHEMA.md
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
# Database Schema
|
||||||
|
|
||||||
|
This document describes the PostgreSQL/PostGIS database schema for the Toronto Neighbourhood Dashboard.
|
||||||
|
|
||||||
|
## Entity Relationship Diagram
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
dim_time {
|
||||||
|
int date_key PK
|
||||||
|
date full_date UK
|
||||||
|
int year
|
||||||
|
int month
|
||||||
|
int quarter
|
||||||
|
string month_name
|
||||||
|
bool is_month_start
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_cmhc_zone {
|
||||||
|
int zone_key PK
|
||||||
|
string zone_code UK
|
||||||
|
string zone_name
|
||||||
|
geometry geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_neighbourhood {
|
||||||
|
int neighbourhood_id PK
|
||||||
|
string name
|
||||||
|
geometry geometry
|
||||||
|
int population
|
||||||
|
numeric land_area_sqkm
|
||||||
|
numeric pop_density_per_sqkm
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric median_household_income
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
int census_year
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_policy_event {
|
||||||
|
int event_id PK
|
||||||
|
date event_date
|
||||||
|
date effective_date
|
||||||
|
string level
|
||||||
|
string category
|
||||||
|
string title
|
||||||
|
text description
|
||||||
|
string expected_direction
|
||||||
|
string source_url
|
||||||
|
string confidence
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_rentals {
|
||||||
|
int id PK
|
||||||
|
int date_key FK
|
||||||
|
int zone_key FK
|
||||||
|
string bedroom_type
|
||||||
|
int universe
|
||||||
|
numeric avg_rent
|
||||||
|
numeric median_rent
|
||||||
|
numeric vacancy_rate
|
||||||
|
numeric availability_rate
|
||||||
|
numeric turnover_rate
|
||||||
|
numeric rent_change_pct
|
||||||
|
string reliability_code
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_census {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int census_year
|
||||||
|
int population
|
||||||
|
numeric population_density
|
||||||
|
numeric median_household_income
|
||||||
|
numeric average_household_income
|
||||||
|
numeric unemployment_rate
|
||||||
|
numeric pct_bachelors_or_higher
|
||||||
|
numeric pct_owner_occupied
|
||||||
|
numeric pct_renter_occupied
|
||||||
|
numeric median_age
|
||||||
|
numeric average_dwelling_value
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_crime {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
int year
|
||||||
|
string crime_type
|
||||||
|
int count
|
||||||
|
numeric rate_per_100k
|
||||||
|
}
|
||||||
|
|
||||||
|
fact_amenities {
|
||||||
|
int id PK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
string amenity_type
|
||||||
|
int count
|
||||||
|
int year
|
||||||
|
}
|
||||||
|
|
||||||
|
bridge_cmhc_neighbourhood {
|
||||||
|
int id PK
|
||||||
|
string cmhc_zone_code FK
|
||||||
|
int neighbourhood_id FK
|
||||||
|
numeric weight
|
||||||
|
}
|
||||||
|
|
||||||
|
dim_time ||--o{ fact_rentals : "date_key"
|
||||||
|
dim_cmhc_zone ||--o{ fact_rentals : "zone_key"
|
||||||
|
dim_neighbourhood ||--o{ fact_census : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_crime : "neighbourhood_id"
|
||||||
|
dim_neighbourhood ||--o{ fact_amenities : "neighbourhood_id"
|
||||||
|
dim_cmhc_zone ||--o{ bridge_cmhc_neighbourhood : "zone_code"
|
||||||
|
dim_neighbourhood ||--o{ bridge_cmhc_neighbourhood : "neighbourhood_id"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Schema Layers
|
||||||
|
|
||||||
|
### Raw Schema
|
||||||
|
|
||||||
|
Raw data is loaded directly from external sources without transformation:
|
||||||
|
|
||||||
|
| Table | Source | Description |
|
||||||
|
|-------|--------|-------------|
|
||||||
|
| `raw.neighbourhoods` | City of Toronto API | GeoJSON neighbourhood boundaries |
|
||||||
|
| `raw.census_profiles` | City of Toronto API | Census profile data |
|
||||||
|
| `raw.crime_data` | Toronto Police API | Crime statistics by neighbourhood |
|
||||||
|
| `raw.cmhc_rentals` | CMHC Data Files | Rental market survey data |
|
||||||
|
|
||||||
|
### Staging Schema (dbt)
|
||||||
|
|
||||||
|
Staging models provide 1:1 cleaned representations of source data:
|
||||||
|
|
||||||
|
| Model | Source Table | Purpose |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `stg_toronto__neighbourhoods` | raw.neighbourhoods | Cleaned boundaries with standardized names |
|
||||||
|
| `stg_toronto__census` | raw.census_profiles | Typed census metrics |
|
||||||
|
| `stg_cmhc__rentals` | raw.cmhc_rentals | Validated rental data |
|
||||||
|
| `stg_police__crimes` | raw.crime_data | Standardized crime categories |
|
||||||
|
|
||||||
|
### Marts Schema (dbt)
|
||||||
|
|
||||||
|
Analytical tables ready for dashboard consumption:
|
||||||
|
|
||||||
|
| Model | Grain | Purpose |
|
||||||
|
|-------|-------|---------|
|
||||||
|
| `mart_neighbourhood_summary` | neighbourhood | Composite livability scores |
|
||||||
|
| `mart_rental_trends` | zone × month | Time-series rental analysis |
|
||||||
|
| `mart_crime_rates` | neighbourhood × year | Crime rate calculations |
|
||||||
|
| `mart_amenity_density` | neighbourhood | Amenity accessibility scores |
|
||||||
|
|
||||||
|
## Table Details
|
||||||
|
|
||||||
|
### Dimension Tables
|
||||||
|
|
||||||
|
#### dim_time
|
||||||
|
Time dimension for date-based analysis. Grain: one row per month.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| date_key | INTEGER | PK | Surrogate key (YYYYMM format) |
|
||||||
|
| full_date | DATE | UNIQUE, NOT NULL | First day of month |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| month | INTEGER | NOT NULL | Month number (1-12) |
|
||||||
|
| quarter | INTEGER | NOT NULL | Quarter (1-4) |
|
||||||
|
| month_name | VARCHAR(20) | NOT NULL | Month name |
|
||||||
|
| is_month_start | BOOLEAN | DEFAULT TRUE | Always true (monthly grain) |
|
||||||
|
|
||||||
|
#### dim_cmhc_zone
|
||||||
|
CMHC rental market zones (~20 zones covering Toronto).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| zone_key | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| zone_code | VARCHAR(10) | UNIQUE, NOT NULL | CMHC zone identifier |
|
||||||
|
| zone_name | VARCHAR(100) | NOT NULL | Zone display name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS zone boundary |
|
||||||
|
|
||||||
|
#### dim_neighbourhood
|
||||||
|
Toronto's 158 official neighbourhoods.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| neighbourhood_id | INTEGER | PK | City-assigned ID |
|
||||||
|
| name | VARCHAR(100) | NOT NULL | Neighbourhood name |
|
||||||
|
| geometry | GEOMETRY(POLYGON) | SRID 4326 | PostGIS boundary |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| land_area_sqkm | NUMERIC(10,4) | | Area in km² |
|
||||||
|
| pop_density_per_sqkm | NUMERIC(10,2) | | Population density |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner occupancy rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter occupancy rate |
|
||||||
|
| census_year | INTEGER | DEFAULT 2021 | Census reference year |
|
||||||
|
|
||||||
|
#### dim_policy_event
|
||||||
|
Policy events for time-series annotation (rent control, interest rates, etc.).
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| event_id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| event_date | DATE | NOT NULL | Announcement date |
|
||||||
|
| effective_date | DATE | | Implementation date |
|
||||||
|
| level | VARCHAR(20) | NOT NULL | federal/provincial/municipal |
|
||||||
|
| category | VARCHAR(20) | NOT NULL | monetary/tax/regulatory/supply/economic |
|
||||||
|
| title | VARCHAR(200) | NOT NULL | Event title |
|
||||||
|
| description | TEXT | | Detailed description |
|
||||||
|
| expected_direction | VARCHAR(10) | NOT NULL | bearish/bullish/neutral |
|
||||||
|
| source_url | VARCHAR(500) | | Reference link |
|
||||||
|
| confidence | VARCHAR(10) | DEFAULT 'medium' | high/medium/low |
|
||||||
|
|
||||||
|
### Fact Tables
|
||||||
|
|
||||||
|
#### fact_rentals
|
||||||
|
CMHC rental market survey data. Grain: zone × bedroom type × survey date.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| date_key | INTEGER | FK → dim_time | Survey date reference |
|
||||||
|
| zone_key | INTEGER | FK → dim_cmhc_zone | CMHC zone reference |
|
||||||
|
| bedroom_type | VARCHAR(20) | NOT NULL | bachelor/1-bed/2-bed/3+bed/total |
|
||||||
|
| universe | INTEGER | | Total rental units |
|
||||||
|
| avg_rent | NUMERIC(10,2) | | Average rent |
|
||||||
|
| median_rent | NUMERIC(10,2) | | Median rent |
|
||||||
|
| vacancy_rate | NUMERIC(5,2) | | Vacancy percentage |
|
||||||
|
| availability_rate | NUMERIC(5,2) | | Availability percentage |
|
||||||
|
| turnover_rate | NUMERIC(5,2) | | Turnover percentage |
|
||||||
|
| rent_change_pct | NUMERIC(5,2) | | Year-over-year change |
|
||||||
|
| reliability_code | VARCHAR(2) | | CMHC data quality code |
|
||||||
|
|
||||||
|
#### fact_census
|
||||||
|
Census statistics. Grain: neighbourhood × census year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| census_year | INTEGER | NOT NULL | 2016, 2021, etc. |
|
||||||
|
| population | INTEGER | | Total population |
|
||||||
|
| population_density | NUMERIC(10,2) | | People per km² |
|
||||||
|
| median_household_income | NUMERIC(12,2) | | Median income |
|
||||||
|
| average_household_income | NUMERIC(12,2) | | Average income |
|
||||||
|
| unemployment_rate | NUMERIC(5,2) | | Unemployment % |
|
||||||
|
| pct_bachelors_or_higher | NUMERIC(5,2) | | Education rate |
|
||||||
|
| pct_owner_occupied | NUMERIC(5,2) | | Owner rate |
|
||||||
|
| pct_renter_occupied | NUMERIC(5,2) | | Renter rate |
|
||||||
|
| median_age | NUMERIC(5,2) | | Median resident age |
|
||||||
|
| average_dwelling_value | NUMERIC(12,2) | | Average home value |
|
||||||
|
|
||||||
|
#### fact_crime
|
||||||
|
Crime statistics. Grain: neighbourhood × year × crime type.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| year | INTEGER | NOT NULL | Calendar year |
|
||||||
|
| crime_type | VARCHAR(50) | NOT NULL | Crime category |
|
||||||
|
| count | INTEGER | NOT NULL | Number of incidents |
|
||||||
|
| rate_per_100k | NUMERIC(10,2) | | Rate per 100k population |
|
||||||
|
|
||||||
|
#### fact_amenities
|
||||||
|
Amenity counts. Grain: neighbourhood × amenity type × year.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| amenity_type | VARCHAR(50) | NOT NULL | parks/schools/transit/etc. |
|
||||||
|
| count | INTEGER | NOT NULL | Number of amenities |
|
||||||
|
| year | INTEGER | NOT NULL | Reference year |
|
||||||
|
|
||||||
|
### Bridge Tables
|
||||||
|
|
||||||
|
#### bridge_cmhc_neighbourhood
|
||||||
|
Maps CMHC zones to neighbourhoods with area-based weights for data disaggregation.
|
||||||
|
|
||||||
|
| Column | Type | Constraints | Description |
|
||||||
|
|--------|------|-------------|-------------|
|
||||||
|
| id | INTEGER | PK, AUTO | Surrogate key |
|
||||||
|
| cmhc_zone_code | VARCHAR(10) | FK → dim_cmhc_zone | Zone reference |
|
||||||
|
| neighbourhood_id | INTEGER | FK → dim_neighbourhood | Neighbourhood reference |
|
||||||
|
| weight | NUMERIC(5,4) | NOT NULL | Proportional weight (0-1) |
|
||||||
|
|
||||||
|
## Indexes
|
||||||
|
|
||||||
|
| Table | Index | Columns | Purpose |
|
||||||
|
|-------|-------|---------|---------|
|
||||||
|
| fact_rentals | ix_fact_rentals_date_zone | date_key, zone_key | Time-series queries |
|
||||||
|
| fact_census | ix_fact_census_neighbourhood_year | neighbourhood_id, census_year | Census lookups |
|
||||||
|
| fact_crime | ix_fact_crime_neighbourhood_year | neighbourhood_id, year | Crime trends |
|
||||||
|
| fact_crime | ix_fact_crime_type | crime_type | Crime filtering |
|
||||||
|
| fact_amenities | ix_fact_amenities_neighbourhood_year | neighbourhood_id, year | Amenity queries |
|
||||||
|
| fact_amenities | ix_fact_amenities_type | amenity_type | Amenity filtering |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_cmhc_zone | cmhc_zone_code | Zone lookups |
|
||||||
|
| bridge_cmhc_neighbourhood | ix_bridge_neighbourhood | neighbourhood_id | Neighbourhood lookups |
|
||||||
|
|
||||||
|
## PostGIS Extensions
|
||||||
|
|
||||||
|
The database requires PostGIS for geospatial operations:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||||
|
```
|
||||||
|
|
||||||
|
All geometry columns use SRID 4326 (WGS84) for compatibility with web mapping libraries.
|
||||||
200
docs/runbooks/adding-dashboard.md
Normal file
200
docs/runbooks/adding-dashboard.md
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
# Runbook: Adding a New Dashboard
|
||||||
|
|
||||||
|
This runbook describes how to add a new data dashboard to the portfolio application.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- [ ] Data sources identified and accessible
|
||||||
|
- [ ] Database schema designed
|
||||||
|
- [ ] Basic Dash/Plotly familiarity
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
Create the following structure under `portfolio_app/`:
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── pages/
|
||||||
|
│ └── {dashboard_name}/
|
||||||
|
│ ├── dashboard.py # Main layout with tabs
|
||||||
|
│ ├── methodology.py # Data sources and methods page
|
||||||
|
│ ├── tabs/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── overview.py # Overview tab layout
|
||||||
|
│ │ └── ... # Additional tab layouts
|
||||||
|
│ └── callbacks/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── ... # Callback modules
|
||||||
|
├── {dashboard_name}/ # Data logic (outside pages/)
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── parsers/ # API/CSV extraction
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── loaders/ # Database operations
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ ├── schemas/ # Pydantic models
|
||||||
|
│ │ └── __init__.py
|
||||||
|
│ └── models/ # SQLAlchemy ORM
|
||||||
|
│ └── __init__.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step-by-Step Checklist
|
||||||
|
|
||||||
|
### 1. Data Layer
|
||||||
|
|
||||||
|
- [ ] Create Pydantic schemas in `{dashboard_name}/schemas/`
|
||||||
|
- [ ] Create SQLAlchemy models in `{dashboard_name}/models/`
|
||||||
|
- [ ] Create parsers in `{dashboard_name}/parsers/`
|
||||||
|
- [ ] Create loaders in `{dashboard_name}/loaders/`
|
||||||
|
- [ ] Add database migrations if needed
|
||||||
|
|
||||||
|
### 2. dbt Models
|
||||||
|
|
||||||
|
Create dbt models in `dbt/models/`:
|
||||||
|
|
||||||
|
- [ ] `staging/stg_{source}__{entity}.sql` - Raw data cleaning
|
||||||
|
- [ ] `intermediate/int_{domain}__{transform}.sql` - Business logic
|
||||||
|
- [ ] `marts/mart_{domain}.sql` - Final analytical tables
|
||||||
|
|
||||||
|
Follow naming conventions:
|
||||||
|
- Staging: `stg_{source}__{entity}`
|
||||||
|
- Intermediate: `int_{domain}__{transform}`
|
||||||
|
- Marts: `mart_{domain}`
|
||||||
|
|
||||||
|
### 3. Visualization Layer
|
||||||
|
|
||||||
|
- [ ] Create figure factories in `figures/` (or reuse existing)
|
||||||
|
- [ ] Follow the factory pattern: `create_{chart_type}_figure(data, **kwargs)`
|
||||||
|
|
||||||
|
### 4. Dashboard Pages
|
||||||
|
|
||||||
|
#### Main Dashboard (`pages/{dashboard_name}/dashboard.py`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
import dash
|
||||||
|
from dash import html, dcc
|
||||||
|
import dash_mantine_components as dmc
|
||||||
|
|
||||||
|
dash.register_page(
|
||||||
|
__name__,
|
||||||
|
path="/{dashboard_name}",
|
||||||
|
title="{Dashboard Title}",
|
||||||
|
description="{Description}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def layout():
|
||||||
|
return dmc.Container([
|
||||||
|
# Header
|
||||||
|
dmc.Title("{Dashboard Title}", order=1),
|
||||||
|
|
||||||
|
# Tabs
|
||||||
|
dmc.Tabs([
|
||||||
|
dmc.TabsList([
|
||||||
|
dmc.TabsTab("Overview", value="overview"),
|
||||||
|
# Add more tabs
|
||||||
|
]),
|
||||||
|
dmc.TabsPanel(overview_tab(), value="overview"),
|
||||||
|
# Add more panels
|
||||||
|
], value="overview"),
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tab Layouts (`pages/{dashboard_name}/tabs/`)
|
||||||
|
|
||||||
|
- [ ] Create one file per tab
|
||||||
|
- [ ] Export layout function from each
|
||||||
|
|
||||||
|
#### Callbacks (`pages/{dashboard_name}/callbacks/`)
|
||||||
|
|
||||||
|
- [ ] Create callback modules for interactivity
|
||||||
|
- [ ] Import and register in dashboard.py
|
||||||
|
|
||||||
|
### 5. Navigation
|
||||||
|
|
||||||
|
Add to sidebar in `components/sidebar.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
dmc.NavLink(
|
||||||
|
label="{Dashboard Name}",
|
||||||
|
href="/{dashboard_name}",
|
||||||
|
icon=DashIconify(icon="..."),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Documentation
|
||||||
|
|
||||||
|
- [ ] Create methodology page (`pages/{dashboard_name}/methodology.py`)
|
||||||
|
- [ ] Document data sources
|
||||||
|
- [ ] Document transformation logic
|
||||||
|
- [ ] Add notebooks to `notebooks/{dashboard_name}/` if needed
|
||||||
|
|
||||||
|
### 7. Testing
|
||||||
|
|
||||||
|
- [ ] Add unit tests for parsers
|
||||||
|
- [ ] Add unit tests for loaders
|
||||||
|
- [ ] Add integration tests for callbacks
|
||||||
|
- [ ] Run `make test`
|
||||||
|
|
||||||
|
### 8. Final Verification
|
||||||
|
|
||||||
|
- [ ] All pages render without errors
|
||||||
|
- [ ] All callbacks respond correctly
|
||||||
|
- [ ] Data loads successfully
|
||||||
|
- [ ] dbt models run cleanly (`make dbt-run`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Tests pass (`make test`)
|
||||||
|
|
||||||
|
## Example: Toronto Dashboard
|
||||||
|
|
||||||
|
Reference implementation: `portfolio_app/pages/toronto/`
|
||||||
|
|
||||||
|
Key files:
|
||||||
|
- `dashboard.py` - Main layout with 5 tabs
|
||||||
|
- `tabs/overview.py` - Livability scores, scatter plots
|
||||||
|
- `callbacks/map_callbacks.py` - Choropleth interactions
|
||||||
|
- `toronto/models/dimensions.py` - Dimension tables
|
||||||
|
- `toronto/models/facts.py` - Fact tables
|
||||||
|
|
||||||
|
## Common Patterns
|
||||||
|
|
||||||
|
### Figure Factories
|
||||||
|
|
||||||
|
```python
|
||||||
|
# figures/choropleth.py
|
||||||
|
def create_choropleth_figure(
|
||||||
|
gdf: gpd.GeoDataFrame,
|
||||||
|
value_column: str,
|
||||||
|
title: str,
|
||||||
|
**kwargs
|
||||||
|
) -> go.Figure:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Callbacks
|
||||||
|
|
||||||
|
```python
|
||||||
|
# callbacks/map_callbacks.py
|
||||||
|
@callback(
|
||||||
|
Output("neighbourhood-details", "children"),
|
||||||
|
Input("choropleth-map", "clickData"),
|
||||||
|
)
|
||||||
|
def update_details(click_data):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Loading
|
||||||
|
|
||||||
|
```python
|
||||||
|
# {dashboard_name}/loaders/load.py
|
||||||
|
def load_data(session: Session) -> None:
|
||||||
|
# Parse from source
|
||||||
|
records = parse_source_data()
|
||||||
|
|
||||||
|
# Validate with Pydantic
|
||||||
|
validated = [Schema(**r) for r in records]
|
||||||
|
|
||||||
|
# Load to database
|
||||||
|
for record in validated:
|
||||||
|
session.add(Model(**record.model_dump()))
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
```
|
||||||
232
docs/runbooks/deployment.md
Normal file
232
docs/runbooks/deployment.md
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
# Runbook: Deployment
|
||||||
|
|
||||||
|
This runbook covers deployment procedures for the Analytics Portfolio application.
|
||||||
|
|
||||||
|
## Environments
|
||||||
|
|
||||||
|
| Environment | Branch | Server | URL |
|
||||||
|
|-------------|--------|--------|-----|
|
||||||
|
| Development | `development` | Local | http://localhost:8050 |
|
||||||
|
| Staging | `staging` | Homelab (hotserv) | Internal |
|
||||||
|
| Production | `main` | Bandit Labs VPS | https://leodata.science |
|
||||||
|
|
||||||
|
## CI/CD Pipeline
|
||||||
|
|
||||||
|
### Automatic Deployment
|
||||||
|
|
||||||
|
Deployments are triggered automatically via Gitea Actions:
|
||||||
|
|
||||||
|
1. **Push to `staging`** → Deploys to staging server
|
||||||
|
2. **Push to `main`** → Deploys to production server
|
||||||
|
|
||||||
|
### Workflow Files
|
||||||
|
|
||||||
|
- `.gitea/workflows/ci.yml` - Runs linting and tests on all branches
|
||||||
|
- `.gitea/workflows/deploy-staging.yml` - Staging deployment
|
||||||
|
- `.gitea/workflows/deploy-production.yml` - Production deployment
|
||||||
|
|
||||||
|
### Required Secrets
|
||||||
|
|
||||||
|
Configure these in Gitea repository settings:
|
||||||
|
|
||||||
|
| Secret | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `STAGING_HOST` | Staging server hostname/IP |
|
||||||
|
| `STAGING_USER` | SSH username for staging |
|
||||||
|
| `STAGING_SSH_KEY` | Private key for staging SSH |
|
||||||
|
| `PROD_HOST` | Production server hostname/IP |
|
||||||
|
| `PROD_USER` | SSH username for production |
|
||||||
|
| `PROD_SSH_KEY` | Private key for production SSH |
|
||||||
|
|
||||||
|
## Manual Deployment
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- SSH access to target server
|
||||||
|
- Repository cloned at `~/apps/personal-portfolio`
|
||||||
|
- Virtual environment created at `.venv`
|
||||||
|
- Docker and Docker Compose installed
|
||||||
|
- PostgreSQL container running
|
||||||
|
|
||||||
|
### Steps
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. SSH to server
|
||||||
|
ssh user@server
|
||||||
|
|
||||||
|
# 2. Navigate to app directory
|
||||||
|
cd ~/apps/personal-portfolio
|
||||||
|
|
||||||
|
# 3. Pull latest changes
|
||||||
|
git fetch origin {branch}
|
||||||
|
git reset --hard origin/{branch}
|
||||||
|
|
||||||
|
# 4. Activate virtual environment
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# 5. Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 6. Run database migrations (if any)
|
||||||
|
# python -m alembic upgrade head
|
||||||
|
|
||||||
|
# 7. Run dbt models
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 8. Restart application
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 9. Verify health
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rollback Procedure
|
||||||
|
|
||||||
|
### Quick Rollback
|
||||||
|
|
||||||
|
If deployment fails, rollback to previous commit:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Find previous working commit
|
||||||
|
git log --oneline -10
|
||||||
|
|
||||||
|
# 2. Reset to that commit
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 3. Restart services
|
||||||
|
docker compose down
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
### Full Rollback (Database)
|
||||||
|
|
||||||
|
If database changes need to be reverted:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Stop application
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# 2. Restore database backup
|
||||||
|
pg_restore -h localhost -U portfolio -d portfolio backup.dump
|
||||||
|
|
||||||
|
# 3. Revert code
|
||||||
|
git reset --hard {commit_hash}
|
||||||
|
|
||||||
|
# 4. Run dbt at that version
|
||||||
|
cd dbt && dbt run --profiles-dir . && cd ..
|
||||||
|
|
||||||
|
# 5. Restart
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Health Checks
|
||||||
|
|
||||||
|
### Application Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8050/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
```json
|
||||||
|
{"status": "healthy"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec postgres pg_isready -U portfolio
|
||||||
|
```
|
||||||
|
|
||||||
|
### Container Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose ps
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### View Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# All services
|
||||||
|
make logs
|
||||||
|
|
||||||
|
# Specific service
|
||||||
|
make logs SERVICE=postgres
|
||||||
|
|
||||||
|
# Or directly
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Resource Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker stats
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Application Won't Start
|
||||||
|
|
||||||
|
1. Check container logs: `docker compose logs app`
|
||||||
|
2. Verify environment variables: `cat .env`
|
||||||
|
3. Check database connectivity: `docker compose exec postgres pg_isready`
|
||||||
|
4. Verify port availability: `lsof -i :8050`
|
||||||
|
|
||||||
|
### Database Connection Errors
|
||||||
|
|
||||||
|
1. Check postgres container: `docker compose ps postgres`
|
||||||
|
2. Verify DATABASE_URL in `.env`
|
||||||
|
3. Check postgres logs: `docker compose logs postgres`
|
||||||
|
4. Test connection: `docker compose exec postgres psql -U portfolio -c '\l'`
|
||||||
|
|
||||||
|
### dbt Failures
|
||||||
|
|
||||||
|
1. Check dbt logs: `cd dbt && dbt debug`
|
||||||
|
2. Verify profiles.yml: `cat dbt/profiles.yml`
|
||||||
|
3. Run with verbose output: `dbt run --debug`
|
||||||
|
|
||||||
|
### Out of Memory
|
||||||
|
|
||||||
|
1. Check memory usage: `free -h`
|
||||||
|
2. Review container limits in docker-compose.yml
|
||||||
|
3. Consider increasing swap or server resources
|
||||||
|
|
||||||
|
## Backup Procedures
|
||||||
|
|
||||||
|
### Database Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio portfolio > backup_$(date +%Y%m%d).sql
|
||||||
|
|
||||||
|
# Compressed backup
|
||||||
|
docker compose exec postgres pg_dump -U portfolio -Fc portfolio > backup_$(date +%Y%m%d).dump
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore from Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From SQL file
|
||||||
|
docker compose exec -T postgres psql -U portfolio portfolio < backup.sql
|
||||||
|
|
||||||
|
# From dump file
|
||||||
|
docker compose exec -T postgres pg_restore -U portfolio -d portfolio < backup.dump
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Checklist
|
||||||
|
|
||||||
|
Before deploying to production:
|
||||||
|
|
||||||
|
- [ ] All tests pass (`make test`)
|
||||||
|
- [ ] Linting passes (`make lint`)
|
||||||
|
- [ ] Staging deployment successful
|
||||||
|
- [ ] Manual testing on staging complete
|
||||||
|
- [ ] Database backup taken
|
||||||
|
- [ ] Rollback plan confirmed
|
||||||
|
- [ ] Team notified of deployment window
|
||||||
72
scripts/etl/toronto.sh
Executable file
72
scripts/etl/toronto.sh
Executable file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# scripts/etl/toronto.sh - Run Toronto data pipeline
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/etl/toronto.sh --full # Complete reload of all data
|
||||||
|
# ./scripts/etl/toronto.sh --incremental # Only new data since last run
|
||||||
|
# ./scripts/etl/toronto.sh # Default: incremental
|
||||||
|
#
|
||||||
|
# Logs are written to .dev/logs/etl/
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
|
LOG_DIR="$PROJECT_ROOT/.dev/logs/etl"
|
||||||
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
LOG_FILE="$LOG_DIR/toronto_${TIMESTAMP}.log"
|
||||||
|
|
||||||
|
MODE="${1:---incremental}"
|
||||||
|
|
||||||
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
log "Starting Toronto ETL pipeline (mode: $MODE)"
|
||||||
|
log "Log file: $LOG_FILE"
|
||||||
|
|
||||||
|
cd "$PROJECT_ROOT"
|
||||||
|
|
||||||
|
# Activate virtual environment if it exists
|
||||||
|
if [ -d ".venv" ]; then
|
||||||
|
source .venv/bin/activate
|
||||||
|
log "Activated virtual environment"
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "$MODE" in
|
||||||
|
--full)
|
||||||
|
log "Running FULL data reload..."
|
||||||
|
|
||||||
|
log "Step 1/4: Parsing neighbourhood data..."
|
||||||
|
python -m portfolio_app.toronto.parsers.neighbourhoods 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
log "Step 2/4: Parsing census data..."
|
||||||
|
python -m portfolio_app.toronto.parsers.census 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
log "Step 3/4: Parsing crime data..."
|
||||||
|
python -m portfolio_app.toronto.parsers.crime 2>&1 | tee -a "$LOG_FILE"
|
||||||
|
|
||||||
|
log "Step 4/4: Running dbt transformations..."
|
||||||
|
cd dbt && dbt run --full-refresh --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
|
||||||
|
;;
|
||||||
|
|
||||||
|
--incremental)
|
||||||
|
log "Running INCREMENTAL update..."
|
||||||
|
|
||||||
|
log "Step 1/2: Checking for new data..."
|
||||||
|
# Add incremental logic here when implemented
|
||||||
|
|
||||||
|
log "Step 2/2: Running dbt transformations..."
|
||||||
|
cd dbt && dbt run --profiles-dir . 2>&1 | tee -a "$LOG_FILE" && cd ..
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
log "ERROR: Unknown mode '$MODE'. Use --full or --incremental"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
log "Toronto ETL pipeline completed successfully"
|
||||||
|
log "Full log available at: $LOG_FILE"
|
||||||
20
scripts/logs.sh
Executable file
20
scripts/logs.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# scripts/logs.sh - Follow docker compose logs
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/logs.sh # All services
|
||||||
|
# ./scripts/logs.sh postgres # Specific service
|
||||||
|
# ./scripts/logs.sh -n 100 # Last 100 lines
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SERVICE="${1:-}"
|
||||||
|
EXTRA_ARGS="${@:2}"
|
||||||
|
|
||||||
|
if [[ -n "$SERVICE" && "$SERVICE" != -* ]]; then
|
||||||
|
echo "Following logs for service: $SERVICE"
|
||||||
|
docker compose logs -f "$SERVICE" $EXTRA_ARGS
|
||||||
|
else
|
||||||
|
echo "Following logs for all services"
|
||||||
|
docker compose logs -f $@
|
||||||
|
fi
|
||||||
38
scripts/run-detached.sh
Executable file
38
scripts/run-detached.sh
Executable file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# scripts/run-detached.sh - Start containers and wait for health
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/run-detached.sh
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
TIMEOUT=60
|
||||||
|
INTERVAL=5
|
||||||
|
|
||||||
|
echo "Starting containers in detached mode..."
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
echo "Waiting for services to become healthy..."
|
||||||
|
elapsed=0
|
||||||
|
|
||||||
|
while [ $elapsed -lt $TIMEOUT ]; do
|
||||||
|
# Check if postgres is ready
|
||||||
|
if docker compose exec -T postgres pg_isready -U portfolio > /dev/null 2>&1; then
|
||||||
|
echo "PostgreSQL is ready!"
|
||||||
|
|
||||||
|
# Check if app health endpoint responds (if running)
|
||||||
|
if curl -sf http://localhost:8050/health > /dev/null 2>&1; then
|
||||||
|
echo "Application health check passed!"
|
||||||
|
echo "All services are healthy."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Waiting... ($elapsed/$TIMEOUT seconds)"
|
||||||
|
sleep $INTERVAL
|
||||||
|
elapsed=$((elapsed + INTERVAL))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "ERROR: Health check timed out after $TIMEOUT seconds"
|
||||||
|
docker compose ps
|
||||||
|
exit 1
|
||||||
Reference in New Issue
Block a user