Merge pull request 'feat: project bootstrap and structure' (#4) from feature/sprint1-bootstrap into development
This commit was merged in pull request #4.
This commit is contained in:
15
.env.example
Normal file
15
.env.example
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Database Configuration
|
||||||
|
DATABASE_URL=postgresql://portfolio:portfolio_dev@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=portfolio_dev
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
|
||||||
|
# Application Settings
|
||||||
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=change-me-in-production
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# Optional: dbt profile (defaults to profiles.yml)
|
||||||
|
# DBT_PROFILES_DIR=.
|
||||||
26
.gitignore
vendored
26
.gitignore
vendored
@@ -1,4 +1,28 @@
|
|||||||
# ---> Python
|
# ====================
|
||||||
|
# Project-Specific
|
||||||
|
# ====================
|
||||||
|
|
||||||
|
# Processed data (generated, not source)
|
||||||
|
data/*/processed/
|
||||||
|
|
||||||
|
# Reports (generated)
|
||||||
|
reports/
|
||||||
|
|
||||||
|
# Backups
|
||||||
|
backups/
|
||||||
|
|
||||||
|
# Notebook exports
|
||||||
|
notebooks/*.html
|
||||||
|
|
||||||
|
# dbt
|
||||||
|
dbt/target/
|
||||||
|
dbt/dbt_packages/
|
||||||
|
dbt/logs/
|
||||||
|
|
||||||
|
# ====================
|
||||||
|
# Python
|
||||||
|
# ====================
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|||||||
32
.pre-commit-config.yaml
Normal file
32
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.5.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: ['--maxkb=1000']
|
||||||
|
- id: check-merge-conflict
|
||||||
|
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.1.9
|
||||||
|
hooks:
|
||||||
|
- id: ruff
|
||||||
|
args: [--fix, --exit-non-zero-on-fix]
|
||||||
|
- id: ruff-format
|
||||||
|
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
rev: v1.8.0
|
||||||
|
hooks:
|
||||||
|
- id: mypy
|
||||||
|
additional_dependencies:
|
||||||
|
- pydantic>=2.0
|
||||||
|
- pandas-stubs
|
||||||
|
- types-requests
|
||||||
|
args: [--ignore-missing-imports]
|
||||||
|
exclude: ^(tests/|dbt/)
|
||||||
|
|
||||||
|
ci:
|
||||||
|
autofix_commit_msg: "style: auto-fix by pre-commit hooks"
|
||||||
|
autoupdate_commit_msg: "chore: update pre-commit hooks"
|
||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.11
|
||||||
258
CLAUDE.md
Normal file
258
CLAUDE.md
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
Working context for Claude Code on the Analytics Portfolio project.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Status
|
||||||
|
|
||||||
|
**Current Sprint**: 1 (Project Bootstrap)
|
||||||
|
**Phase**: 1 - Toronto Housing Dashboard
|
||||||
|
**Branch**: `development` (feature branches merge here)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
### Run Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make setup # Install deps, create .env, init pre-commit
|
||||||
|
make docker-up # Start PostgreSQL + PostGIS
|
||||||
|
make docker-down # Stop containers
|
||||||
|
make db-init # Initialize database schema
|
||||||
|
make run # Start Dash dev server
|
||||||
|
make test # Run pytest
|
||||||
|
make lint # Run ruff linter
|
||||||
|
make format # Run ruff formatter
|
||||||
|
make ci # Run all checks
|
||||||
|
```
|
||||||
|
|
||||||
|
### Branch Workflow
|
||||||
|
|
||||||
|
1. Create feature branch FROM `development`: `git checkout -b feature/{sprint}-{description}`
|
||||||
|
2. Work and commit on feature branch
|
||||||
|
3. Merge INTO `development` when complete
|
||||||
|
4. `development` -> `staging` -> `main` for releases
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code Conventions
|
||||||
|
|
||||||
|
### Import Style
|
||||||
|
|
||||||
|
| Context | Style | Example |
|
||||||
|
|---------|-------|---------|
|
||||||
|
| Same directory | Single dot | `from .trreb import TRREBParser` |
|
||||||
|
| Sibling directory | Double dot | `from ..schemas.trreb import TRREBRecord` |
|
||||||
|
| External packages | Absolute | `import pandas as pd` |
|
||||||
|
|
||||||
|
### Module Responsibilities
|
||||||
|
|
||||||
|
| Directory | Contains | Purpose |
|
||||||
|
|-----------|----------|---------|
|
||||||
|
| `schemas/` | Pydantic models | Data validation |
|
||||||
|
| `models/` | SQLAlchemy ORM | Database persistence |
|
||||||
|
| `parsers/` | PDF/CSV extraction | Raw data ingestion |
|
||||||
|
| `loaders/` | Database operations | Data loading |
|
||||||
|
| `figures/` | Chart factories | Plotly figure generation |
|
||||||
|
| `callbacks/` | Dash callbacks | In `pages/{dashboard}/callbacks/` |
|
||||||
|
| `errors/` | Exceptions + handlers | Error handling |
|
||||||
|
|
||||||
|
### Type Hints
|
||||||
|
|
||||||
|
Use Python 3.10+ style:
|
||||||
|
```python
|
||||||
|
def process(items: list[str], config: dict[str, int] | None = None) -> bool:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
# errors/exceptions.py
|
||||||
|
class PortfolioError(Exception):
|
||||||
|
"""Base exception."""
|
||||||
|
|
||||||
|
class ParseError(PortfolioError):
|
||||||
|
"""PDF/CSV parsing failed."""
|
||||||
|
|
||||||
|
class ValidationError(PortfolioError):
|
||||||
|
"""Pydantic or business rule validation failed."""
|
||||||
|
|
||||||
|
class LoadError(PortfolioError):
|
||||||
|
"""Database load operation failed."""
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Standards
|
||||||
|
|
||||||
|
- Single responsibility functions with verb naming
|
||||||
|
- Early returns over deep nesting
|
||||||
|
- Google-style docstrings only for non-obvious behavior
|
||||||
|
- Module-level constants for magic values
|
||||||
|
- Pydantic BaseSettings for runtime config
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Application Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
portfolio_app/
|
||||||
|
├── app.py # Dash app factory with Pages routing
|
||||||
|
├── config.py # Pydantic BaseSettings
|
||||||
|
├── assets/ # CSS, images (auto-served)
|
||||||
|
├── pages/
|
||||||
|
│ ├── home.py # Bio landing page -> /
|
||||||
|
│ └── toronto/
|
||||||
|
│ ├── dashboard.py # Layout only -> /toronto
|
||||||
|
│ └── callbacks/ # Interaction logic
|
||||||
|
├── components/ # Shared UI (navbar, footer, cards)
|
||||||
|
├── figures/ # Shared chart factories
|
||||||
|
├── toronto/ # Toronto data logic
|
||||||
|
│ ├── parsers/
|
||||||
|
│ ├── loaders/
|
||||||
|
│ ├── schemas/ # Pydantic
|
||||||
|
│ └── models/ # SQLAlchemy
|
||||||
|
└── errors/
|
||||||
|
```
|
||||||
|
|
||||||
|
### URL Routing
|
||||||
|
|
||||||
|
| URL | Page | Sprint |
|
||||||
|
|-----|------|--------|
|
||||||
|
| `/` | Bio landing page | 2 |
|
||||||
|
| `/toronto` | Toronto Housing Dashboard | 6 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tech Stack (Locked)
|
||||||
|
|
||||||
|
| Layer | Technology | Version |
|
||||||
|
|-------|------------|---------|
|
||||||
|
| Database | PostgreSQL + PostGIS | 16.x |
|
||||||
|
| Validation | Pydantic | >=2.0 |
|
||||||
|
| ORM | SQLAlchemy | >=2.0 (2.0-style API only) |
|
||||||
|
| Transformation | dbt-postgres | >=1.7 |
|
||||||
|
| Data Processing | Pandas | >=2.1 |
|
||||||
|
| Geospatial | GeoPandas + Shapely | >=0.14 |
|
||||||
|
| Visualization | Dash + Plotly | >=2.14 |
|
||||||
|
| UI Components | dash-mantine-components | Latest stable |
|
||||||
|
| Testing | pytest | >=7.0 |
|
||||||
|
| Python | 3.11+ | Via pyenv |
|
||||||
|
|
||||||
|
**Notes**:
|
||||||
|
- SQLAlchemy 2.0 + Pydantic 2.0 only (never mix 1.x APIs)
|
||||||
|
- PostGIS extension required in database
|
||||||
|
- Docker Compose V2 format (no `version` field)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Data Model Overview
|
||||||
|
|
||||||
|
### Geographic Reality (Toronto Housing)
|
||||||
|
|
||||||
|
```
|
||||||
|
TRREB Districts (~35) - Purchase data (W01, C01, E01...)
|
||||||
|
CMHC Zones (~20) - Rental data (Census Tract aligned)
|
||||||
|
City Neighbourhoods (158) - Enrichment/overlay only
|
||||||
|
```
|
||||||
|
|
||||||
|
**Critical**: These geographies do NOT align. Display as separate layers—do not force crosswalks.
|
||||||
|
|
||||||
|
### Star Schema
|
||||||
|
|
||||||
|
| Table | Type | Keys |
|
||||||
|
|-------|------|------|
|
||||||
|
| `fact_purchases` | Fact | -> dim_time, dim_trreb_district |
|
||||||
|
| `fact_rentals` | Fact | -> dim_time, dim_cmhc_zone |
|
||||||
|
| `dim_time` | Dimension | date_key (PK) |
|
||||||
|
| `dim_trreb_district` | Dimension | district_key (PK), geometry |
|
||||||
|
| `dim_cmhc_zone` | Dimension | zone_key (PK), geometry |
|
||||||
|
| `dim_neighbourhood` | Dimension | neighbourhood_id (PK), geometry |
|
||||||
|
| `dim_policy_event` | Dimension | event_id (PK) |
|
||||||
|
|
||||||
|
**V1 Rule**: `dim_neighbourhood` has NO FK to fact tables—reference overlay only.
|
||||||
|
|
||||||
|
### dbt Layers
|
||||||
|
|
||||||
|
| Layer | Naming | Purpose |
|
||||||
|
|-------|--------|---------|
|
||||||
|
| Staging | `stg_{source}__{entity}` | 1:1 source, cleaned, typed |
|
||||||
|
| Intermediate | `int_{domain}__{transform}` | Business logic |
|
||||||
|
| Marts | `mart_{domain}` | Final analytical tables |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## DO NOT BUILD (Phase 1)
|
||||||
|
|
||||||
|
**Stop and flag if a task seems to require these**:
|
||||||
|
|
||||||
|
| Feature | Reason |
|
||||||
|
|---------|--------|
|
||||||
|
| `bridge_district_neighbourhood` table | Area-weighted aggregation is Phase 4 |
|
||||||
|
| Crime data integration | Deferred to Phase 4 |
|
||||||
|
| Historical boundary reconciliation (140->158) | 2021+ data only for V1 |
|
||||||
|
| ML prediction models | Energy project scope (Phase 3) |
|
||||||
|
| Multi-project shared infrastructure | Build first, abstract second (Phase 2) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sprint 1 Deliverables
|
||||||
|
|
||||||
|
| Category | Tasks |
|
||||||
|
|----------|-------|
|
||||||
|
| **Bootstrap** | Git init, pyproject.toml, .env.example, Makefile, CLAUDE.md |
|
||||||
|
| **Infrastructure** | Docker Compose (PostgreSQL + PostGIS), scripts/ directory |
|
||||||
|
| **App Foundation** | portfolio_app/ structure, config.py, error handling |
|
||||||
|
| **Tests** | tests/ directory, conftest.py, pytest config |
|
||||||
|
| **Data Acquisition** | Download TRREB PDFs, START boundary digitization (HUMAN task) |
|
||||||
|
|
||||||
|
### Human Tasks (Cannot Automate)
|
||||||
|
|
||||||
|
| Task | Tool | Effort |
|
||||||
|
|------|------|--------|
|
||||||
|
| Digitize TRREB district boundaries | QGIS | 3-4 hours |
|
||||||
|
| Research policy events (10-20) | Manual | 2-3 hours |
|
||||||
|
| Replace social link placeholders | Manual | 5 minutes |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Required in `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DATABASE_URL=postgresql://user:pass@localhost:5432/portfolio
|
||||||
|
POSTGRES_USER=portfolio
|
||||||
|
POSTGRES_PASSWORD=<secure>
|
||||||
|
POSTGRES_DB=portfolio
|
||||||
|
DASH_DEBUG=true
|
||||||
|
SECRET_KEY=<random>
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Script Standards
|
||||||
|
|
||||||
|
All scripts in `scripts/`:
|
||||||
|
- Include usage comments at top
|
||||||
|
- Idempotent where possible
|
||||||
|
- Exit codes: 0 = success, 1 = error
|
||||||
|
- Use `set -euo pipefail` for bash
|
||||||
|
- Log to stdout, errors to stderr
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Reference Documents
|
||||||
|
|
||||||
|
| Document | Location | Use When |
|
||||||
|
|----------|----------|----------|
|
||||||
|
| Full specification | `docs/PROJECT_REFERENCE.md` | Architecture decisions |
|
||||||
|
| Data schemas | `docs/toronto_housing_spec.md` | Parser/model tasks |
|
||||||
|
| WBS details | `docs/wbs.md` | Sprint planning |
|
||||||
|
| Bio content | `docs/bio_content.md` | Building home.py |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Last Updated: Sprint 1*
|
||||||
157
Makefile
Normal file
157
Makefile
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
.PHONY: setup docker-up docker-down db-init run test dbt-run dbt-test lint format ci deploy clean help
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
PYTHON := python3
|
||||||
|
PIP := pip
|
||||||
|
DOCKER_COMPOSE := docker compose
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
BLUE := \033[0;34m
|
||||||
|
GREEN := \033[0;32m
|
||||||
|
YELLOW := \033[0;33m
|
||||||
|
NC := \033[0m
|
||||||
|
|
||||||
|
help: ## Show this help message
|
||||||
|
@echo "Usage: make [target]"
|
||||||
|
@echo ""
|
||||||
|
@echo "Targets:"
|
||||||
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-15s$(NC) %s\n", $$1, $$2}'
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Setup
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
setup: ## Install dependencies, create .env, init pre-commit
|
||||||
|
@echo "$(GREEN)Installing dependencies...$(NC)"
|
||||||
|
$(PIP) install -e ".[dev,dbt]"
|
||||||
|
@echo "$(GREEN)Setting up environment...$(NC)"
|
||||||
|
@if [ ! -f .env ]; then cp .env.example .env; echo "$(YELLOW)Created .env from .env.example - please update values$(NC)"; fi
|
||||||
|
@echo "$(GREEN)Installing pre-commit hooks...$(NC)"
|
||||||
|
pre-commit install
|
||||||
|
@echo "$(GREEN)Setup complete!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Docker
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
docker-up: ## Start PostgreSQL + PostGIS containers
|
||||||
|
@echo "$(GREEN)Starting database containers...$(NC)"
|
||||||
|
$(DOCKER_COMPOSE) up -d
|
||||||
|
@echo "$(GREEN)Waiting for database to be ready...$(NC)"
|
||||||
|
@sleep 3
|
||||||
|
@echo "$(GREEN)Database containers started!$(NC)"
|
||||||
|
|
||||||
|
docker-down: ## Stop containers
|
||||||
|
@echo "$(YELLOW)Stopping containers...$(NC)"
|
||||||
|
$(DOCKER_COMPOSE) down
|
||||||
|
|
||||||
|
docker-logs: ## View container logs
|
||||||
|
$(DOCKER_COMPOSE) logs -f
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Database
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
db-init: ## Initialize database schema
|
||||||
|
@echo "$(GREEN)Initializing database schema...$(NC)"
|
||||||
|
@if [ -f scripts/db/init.sh ]; then \
|
||||||
|
bash scripts/db/init.sh; \
|
||||||
|
else \
|
||||||
|
echo "$(YELLOW)scripts/db/init.sh not found - skipping$(NC)"; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
db-reset: ## Drop and recreate database (DESTRUCTIVE)
|
||||||
|
@echo "$(YELLOW)WARNING: This will delete all data!$(NC)"
|
||||||
|
@read -p "Are you sure? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||||
|
$(DOCKER_COMPOSE) down -v
|
||||||
|
$(DOCKER_COMPOSE) up -d
|
||||||
|
@sleep 3
|
||||||
|
$(MAKE) db-init
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Application
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
run: ## Start Dash development server
|
||||||
|
@echo "$(GREEN)Starting Dash server...$(NC)"
|
||||||
|
$(PYTHON) -m portfolio_app.app
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Testing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
test: ## Run pytest
|
||||||
|
@echo "$(GREEN)Running tests...$(NC)"
|
||||||
|
pytest
|
||||||
|
|
||||||
|
test-cov: ## Run pytest with coverage
|
||||||
|
@echo "$(GREEN)Running tests with coverage...$(NC)"
|
||||||
|
pytest --cov=portfolio_app --cov-report=html --cov-report=term
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# dbt
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
dbt-run: ## Run dbt models
|
||||||
|
@echo "$(GREEN)Running dbt models...$(NC)"
|
||||||
|
cd dbt && dbt run
|
||||||
|
|
||||||
|
dbt-test: ## Run dbt tests
|
||||||
|
@echo "$(GREEN)Running dbt tests...$(NC)"
|
||||||
|
cd dbt && dbt test
|
||||||
|
|
||||||
|
dbt-docs: ## Generate dbt documentation
|
||||||
|
@echo "$(GREEN)Generating dbt docs...$(NC)"
|
||||||
|
cd dbt && dbt docs generate && dbt docs serve
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Code Quality
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
lint: ## Run ruff linter
|
||||||
|
@echo "$(GREEN)Running linter...$(NC)"
|
||||||
|
ruff check .
|
||||||
|
|
||||||
|
format: ## Run ruff formatter
|
||||||
|
@echo "$(GREEN)Formatting code...$(NC)"
|
||||||
|
ruff format .
|
||||||
|
ruff check --fix .
|
||||||
|
|
||||||
|
typecheck: ## Run mypy type checker
|
||||||
|
@echo "$(GREEN)Running type checker...$(NC)"
|
||||||
|
mypy portfolio_app
|
||||||
|
|
||||||
|
ci: ## Run all checks (lint, typecheck, test)
|
||||||
|
@echo "$(GREEN)Running CI checks...$(NC)"
|
||||||
|
$(MAKE) lint
|
||||||
|
$(MAKE) typecheck
|
||||||
|
$(MAKE) test
|
||||||
|
@echo "$(GREEN)All checks passed!$(NC)"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Deployment
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
deploy: ## Deploy to production
|
||||||
|
@echo "$(YELLOW)Deployment not yet configured$(NC)"
|
||||||
|
@echo "TODO: Add deployment script"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Cleanup
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
clean: ## Remove build artifacts and caches
|
||||||
|
@echo "$(YELLOW)Cleaning up...$(NC)"
|
||||||
|
rm -rf build/
|
||||||
|
rm -rf dist/
|
||||||
|
rm -rf *.egg-info/
|
||||||
|
rm -rf .pytest_cache/
|
||||||
|
rm -rf .ruff_cache/
|
||||||
|
rm -rf .mypy_cache/
|
||||||
|
rm -rf htmlcov/
|
||||||
|
rm -rf .coverage
|
||||||
|
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||||
|
@echo "$(GREEN)Clean complete!$(NC)"
|
||||||
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/raw/.gitkeep
Normal file
0
data/toronto/reference/.gitkeep
Normal file
0
data/toronto/reference/.gitkeep
Normal file
0
dbt/macros/.gitkeep
Normal file
0
dbt/macros/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
0
dbt/models/intermediate/.gitkeep
Normal file
0
dbt/models/marts/.gitkeep
Normal file
0
dbt/models/marts/.gitkeep
Normal file
0
dbt/models/staging/.gitkeep
Normal file
0
dbt/models/staging/.gitkeep
Normal file
0
dbt/tests/.gitkeep
Normal file
0
dbt/tests/.gitkeep
Normal file
22
docker-compose.yml
Normal file
22
docker-compose.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgis/postgis:16-3.4
|
||||||
|
container_name: portfolio-db
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-portfolio}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-portfolio_dev}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-portfolio}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
- ./scripts/db/init-postgis.sql:/docker-entrypoint-initdb.d/init-postgis.sql:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-portfolio} -d ${POSTGRES_DB:-portfolio}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
0
notebooks/.gitkeep
Normal file
0
notebooks/.gitkeep
Normal file
3
portfolio_app/__init__.py
Normal file
3
portfolio_app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""Analytics Portfolio Application."""
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
0
portfolio_app/assets/.gitkeep
Normal file
0
portfolio_app/assets/.gitkeep
Normal file
0
portfolio_app/components/.gitkeep
Normal file
0
portfolio_app/components/.gitkeep
Normal file
5
portfolio_app/errors/__init__.py
Normal file
5
portfolio_app/errors/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Error handling for the portfolio application."""
|
||||||
|
|
||||||
|
from .exceptions import LoadError, ParseError, PortfolioError, ValidationError
|
||||||
|
|
||||||
|
__all__ = ["PortfolioError", "ParseError", "ValidationError", "LoadError"]
|
||||||
17
portfolio_app/errors/exceptions.py
Normal file
17
portfolio_app/errors/exceptions.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
"""Custom exceptions for the portfolio application."""
|
||||||
|
|
||||||
|
|
||||||
|
class PortfolioError(Exception):
|
||||||
|
"""Base exception for all portfolio errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class ParseError(PortfolioError):
|
||||||
|
"""PDF/CSV parsing failed."""
|
||||||
|
|
||||||
|
|
||||||
|
class ValidationError(PortfolioError):
|
||||||
|
"""Pydantic or business rule validation failed."""
|
||||||
|
|
||||||
|
|
||||||
|
class LoadError(PortfolioError):
|
||||||
|
"""Database load operation failed."""
|
||||||
0
portfolio_app/figures/.gitkeep
Normal file
0
portfolio_app/figures/.gitkeep
Normal file
1
portfolio_app/pages/__init__.py
Normal file
1
portfolio_app/pages/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Dash pages."""
|
||||||
1
portfolio_app/pages/toronto/__init__.py
Normal file
1
portfolio_app/pages/toronto/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Toronto Housing Dashboard page."""
|
||||||
1
portfolio_app/pages/toronto/callbacks/__init__.py
Normal file
1
portfolio_app/pages/toronto/callbacks/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Toronto dashboard callbacks."""
|
||||||
1
portfolio_app/toronto/__init__.py
Normal file
1
portfolio_app/toronto/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Toronto housing data logic."""
|
||||||
1
portfolio_app/toronto/loaders/__init__.py
Normal file
1
portfolio_app/toronto/loaders/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Database loaders for Toronto housing data."""
|
||||||
1
portfolio_app/toronto/models/__init__.py
Normal file
1
portfolio_app/toronto/models/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""SQLAlchemy models for Toronto housing data."""
|
||||||
1
portfolio_app/toronto/parsers/__init__.py
Normal file
1
portfolio_app/toronto/parsers/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Data parsers for Toronto housing data sources."""
|
||||||
1
portfolio_app/toronto/schemas/__init__.py
Normal file
1
portfolio_app/toronto/schemas/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Pydantic schemas for Toronto housing data validation."""
|
||||||
148
pyproject.toml
Normal file
148
pyproject.toml
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=61.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "portfolio"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Analytics Portfolio - Data engineering and visualization showcase"
|
||||||
|
readme = "README.md"
|
||||||
|
license = {text = "MIT"}
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
authors = [
|
||||||
|
{name = "Leo Miranda"}
|
||||||
|
]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 3 - Alpha",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
# Database
|
||||||
|
"sqlalchemy>=2.0",
|
||||||
|
"psycopg2-binary>=2.9",
|
||||||
|
"geoalchemy2>=0.14",
|
||||||
|
|
||||||
|
# Validation
|
||||||
|
"pydantic>=2.0",
|
||||||
|
"pydantic-settings>=2.0",
|
||||||
|
|
||||||
|
# Data Processing
|
||||||
|
"pandas>=2.1",
|
||||||
|
"geopandas>=0.14",
|
||||||
|
"shapely>=2.0",
|
||||||
|
|
||||||
|
# Visualization
|
||||||
|
"dash>=2.14",
|
||||||
|
"plotly>=5.18",
|
||||||
|
"dash-mantine-components>=0.14",
|
||||||
|
|
||||||
|
# PDF Parsing
|
||||||
|
"pdfplumber>=0.10",
|
||||||
|
"tabula-py>=2.9",
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
"python-dotenv>=1.0",
|
||||||
|
"httpx>=0.25",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
# Testing
|
||||||
|
"pytest>=7.0",
|
||||||
|
"pytest-cov>=4.0",
|
||||||
|
"pytest-asyncio>=0.21",
|
||||||
|
|
||||||
|
# Linting & Formatting
|
||||||
|
"ruff>=0.1",
|
||||||
|
"mypy>=1.7",
|
||||||
|
|
||||||
|
# Pre-commit
|
||||||
|
"pre-commit>=3.5",
|
||||||
|
|
||||||
|
# Type stubs
|
||||||
|
"pandas-stubs",
|
||||||
|
"types-requests",
|
||||||
|
]
|
||||||
|
dbt = [
|
||||||
|
"dbt-postgres>=1.7",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
portfolio = "portfolio_app.app:main"
|
||||||
|
|
||||||
|
[tool.setuptools.packages.find]
|
||||||
|
where = ["."]
|
||||||
|
include = ["portfolio_app*"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
python_files = ["test_*.py"]
|
||||||
|
python_functions = ["test_*"]
|
||||||
|
addopts = [
|
||||||
|
"-v",
|
||||||
|
"--tb=short",
|
||||||
|
"--strict-markers",
|
||||||
|
]
|
||||||
|
markers = [
|
||||||
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||||
|
"integration: marks tests as integration tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py311"
|
||||||
|
line-length = 88
|
||||||
|
exclude = [
|
||||||
|
".git",
|
||||||
|
".venv",
|
||||||
|
"__pycache__",
|
||||||
|
"build",
|
||||||
|
"dist",
|
||||||
|
".ruff_cache",
|
||||||
|
"dbt/target",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = [
|
||||||
|
"E", # pycodestyle errors
|
||||||
|
"W", # pycodestyle warnings
|
||||||
|
"F", # pyflakes
|
||||||
|
"I", # isort
|
||||||
|
"B", # flake8-bugbear
|
||||||
|
"C4", # flake8-comprehensions
|
||||||
|
"UP", # pyupgrade
|
||||||
|
"SIM", # flake8-simplify
|
||||||
|
]
|
||||||
|
ignore = [
|
||||||
|
"E501", # line too long (handled by formatter)
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.lint.isort]
|
||||||
|
known-first-party = ["portfolio_app"]
|
||||||
|
|
||||||
|
[tool.ruff.format]
|
||||||
|
quote-style = "double"
|
||||||
|
indent-style = "space"
|
||||||
|
skip-magic-trailing-comma = false
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.11"
|
||||||
|
strict = true
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_ignores = true
|
||||||
|
disallow_untyped_defs = true
|
||||||
|
plugins = ["pydantic.mypy"]
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
module = [
|
||||||
|
"dash.*",
|
||||||
|
"plotly.*",
|
||||||
|
"geopandas.*",
|
||||||
|
"shapely.*",
|
||||||
|
"pdfplumber.*",
|
||||||
|
"tabula.*",
|
||||||
|
]
|
||||||
|
ignore_missing_imports = true
|
||||||
0
scripts/db/.gitkeep
Normal file
0
scripts/db/.gitkeep
Normal file
8
scripts/db/init-postgis.sql
Normal file
8
scripts/db/init-postgis.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
-- Initialize PostGIS extension
|
||||||
|
-- This script runs automatically on first container start
|
||||||
|
|
||||||
|
-- Enable PostGIS extension
|
||||||
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
||||||
|
|
||||||
|
-- Verify installation
|
||||||
|
SELECT PostGIS_Version();
|
||||||
0
scripts/dbt/.gitkeep
Normal file
0
scripts/dbt/.gitkeep
Normal file
0
scripts/deploy/.gitkeep
Normal file
0
scripts/deploy/.gitkeep
Normal file
0
scripts/dev/.gitkeep
Normal file
0
scripts/dev/.gitkeep
Normal file
0
scripts/docker/.gitkeep
Normal file
0
scripts/docker/.gitkeep
Normal file
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Test suite for analytics portfolio."""
|
||||||
9
tests/conftest.py
Normal file
9
tests/conftest.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
"""Pytest configuration and fixtures."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_fixture():
|
||||||
|
"""Example fixture - replace with actual fixtures as needed."""
|
||||||
|
return {}
|
||||||
Reference in New Issue
Block a user