staging #96
6
Makefile
6
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: setup docker-up docker-down db-init load-data run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||
.PHONY: setup docker-up docker-down db-init load-data seed-amenities run test dbt-run dbt-test lint format ci deploy clean help logs run-detached etl-toronto
|
||||
|
||||
# Default target
|
||||
.DEFAULT_GOAL := help
|
||||
@@ -87,6 +87,10 @@ load-data-only: ## Load Toronto data without running dbt
|
||||
@echo "$(GREEN)Loading Toronto data (skip dbt)...$(NC)"
|
||||
$(PYTHON) scripts/data/load_toronto_data.py --skip-dbt
|
||||
|
||||
seed-amenities: ## Seed sample amenity data (run after load-data)
|
||||
@echo "$(GREEN)Seeding amenity data...$(NC)"
|
||||
$(PYTHON) scripts/data/seed_amenity_data.py
|
||||
|
||||
# =============================================================================
|
||||
# Application
|
||||
# =============================================================================
|
||||
|
||||
153
scripts/data/seed_amenity_data.py
Normal file
153
scripts/data/seed_amenity_data.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Seed sample amenity data for development/testing.
|
||||
|
||||
This script populates fact_amenities with sample data and updates
|
||||
dim_neighbourhood with population from fact_census, then runs dbt
|
||||
to rebuild the marts.
|
||||
|
||||
Usage:
|
||||
python scripts/data/seed_amenity_data.py
|
||||
|
||||
Run this after load_toronto_data.py if amenity data is missing.
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from sqlalchemy import create_engine, text
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
||||
load_dotenv(PROJECT_ROOT / ".env")
|
||||
|
||||
DATABASE_URL = os.environ.get("DATABASE_URL")
|
||||
if not DATABASE_URL:
|
||||
print("ERROR: DATABASE_URL not set in .env")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def seed_amenities() -> int:
|
||||
"""Insert sample amenity data for all neighbourhoods."""
|
||||
engine = create_engine(DATABASE_URL)
|
||||
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(
|
||||
text("SELECT neighbourhood_id FROM public.dim_neighbourhood")
|
||||
)
|
||||
neighbourhood_ids = [row[0] for row in result]
|
||||
|
||||
print(f"Found {len(neighbourhood_ids)} neighbourhoods")
|
||||
|
||||
amenity_types = [
|
||||
"Parks",
|
||||
"Schools",
|
||||
"Transit Stops",
|
||||
"Libraries",
|
||||
"Community Centres",
|
||||
"Recreation",
|
||||
]
|
||||
year = 2024
|
||||
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text("DELETE FROM public.fact_amenities"))
|
||||
|
||||
total = 0
|
||||
for n_id in neighbourhood_ids:
|
||||
for amenity_type in amenity_types:
|
||||
count = random.randint(1, 50)
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
INSERT INTO public.fact_amenities
|
||||
(neighbourhood_id, amenity_type, count, year)
|
||||
VALUES (:neighbourhood_id, :amenity_type, :count, :year)
|
||||
"""
|
||||
),
|
||||
{
|
||||
"neighbourhood_id": n_id,
|
||||
"amenity_type": amenity_type,
|
||||
"count": count,
|
||||
"year": year,
|
||||
},
|
||||
)
|
||||
total += 1
|
||||
|
||||
print(f"Inserted {total} amenity records")
|
||||
return total
|
||||
|
||||
|
||||
def update_population() -> int:
|
||||
"""Update dim_neighbourhood with population from fact_census."""
|
||||
engine = create_engine(DATABASE_URL)
|
||||
|
||||
with engine.begin() as conn:
|
||||
result = conn.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE public.dim_neighbourhood dn
|
||||
SET population = fc.population
|
||||
FROM public.fact_census fc
|
||||
WHERE dn.neighbourhood_id = fc.neighbourhood_id
|
||||
AND fc.census_year = 2021
|
||||
"""
|
||||
)
|
||||
)
|
||||
count = int(result.rowcount)
|
||||
|
||||
print(f"Updated {count} neighbourhoods with population")
|
||||
return count
|
||||
|
||||
|
||||
def run_dbt() -> bool:
|
||||
"""Run dbt to rebuild amenity marts."""
|
||||
dbt_dir = PROJECT_ROOT / "dbt"
|
||||
venv_dbt = PROJECT_ROOT / ".venv" / "bin" / "dbt"
|
||||
dbt_cmd = str(venv_dbt) if venv_dbt.exists() else "dbt"
|
||||
|
||||
print("Running dbt to rebuild marts...")
|
||||
|
||||
env = os.environ.copy()
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
dbt_cmd,
|
||||
"run",
|
||||
"--profiles-dir",
|
||||
str(dbt_dir),
|
||||
"--select",
|
||||
"+mart_neighbourhood_amenities",
|
||||
],
|
||||
cwd=dbt_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"dbt failed:\n{result.stdout}\n{result.stderr}")
|
||||
return False
|
||||
|
||||
print("dbt completed successfully")
|
||||
return True
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Main entry point."""
|
||||
print("Seeding amenity data...")
|
||||
|
||||
seed_amenities()
|
||||
update_population()
|
||||
|
||||
if not run_dbt():
|
||||
return 1
|
||||
|
||||
print("\nDone! Amenity data is ready.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
result = main()
|
||||
sys.exit(result)
|
||||
Reference in New Issue
Block a user