fix: Add median_age seeding to development data script
Some checks failed
CI / lint-and-test (push) Has been cancelled

Updates seed_amenity_data.py to also seed median_age values in
fact_census where missing, ensuring demographics notebooks work.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-18 22:49:57 -05:00
parent 28f239e8cd
commit 9cc2cf0e00

View File

@@ -1,14 +1,16 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Seed sample amenity data for development/testing. """Seed sample data for development/testing.
This script populates fact_amenities with sample data and updates This script:
dim_neighbourhood with population from fact_census, then runs dbt - Populates fact_amenities with sample data
to rebuild the marts. - Updates dim_neighbourhood with population from fact_census
- Seeds median_age in fact_census where missing
- Runs dbt to rebuild the marts
Usage: Usage:
python scripts/data/seed_amenity_data.py python scripts/data/seed_amenity_data.py
Run this after load_toronto_data.py if amenity data is missing. Run this after load_toronto_data.py to ensure notebooks have data.
""" """
import os import os
@@ -101,8 +103,33 @@ def update_population() -> int:
return count return count
def seed_median_age() -> int:
"""Seed median_age in fact_census where missing."""
engine = create_engine(DATABASE_URL)
with engine.begin() as conn:
result = conn.execute(
text("SELECT id FROM public.fact_census WHERE median_age IS NULL")
)
null_ids = [row[0] for row in result]
if not null_ids:
print("No NULL median_age values found")
return 0
for census_id in null_ids:
age = random.randint(30, 50)
conn.execute(
text("UPDATE public.fact_census SET median_age = :age WHERE id = :id"),
{"age": age, "id": census_id},
)
print(f"Seeded median_age for {len(null_ids)} census records")
return len(null_ids)
def run_dbt() -> bool: def run_dbt() -> bool:
"""Run dbt to rebuild amenity marts.""" """Run dbt to rebuild marts."""
dbt_dir = PROJECT_ROOT / "dbt" dbt_dir = PROJECT_ROOT / "dbt"
venv_dbt = PROJECT_ROOT / ".venv" / "bin" / "dbt" venv_dbt = PROJECT_ROOT / ".venv" / "bin" / "dbt"
dbt_cmd = str(venv_dbt) if venv_dbt.exists() else "dbt" dbt_cmd = str(venv_dbt) if venv_dbt.exists() else "dbt"
@@ -118,7 +145,7 @@ def run_dbt() -> bool:
"--profiles-dir", "--profiles-dir",
str(dbt_dir), str(dbt_dir),
"--select", "--select",
"+mart_neighbourhood_amenities", "+mart_neighbourhood_amenities +mart_neighbourhood_demographics",
], ],
cwd=dbt_dir, cwd=dbt_dir,
capture_output=True, capture_output=True,
@@ -136,15 +163,16 @@ def run_dbt() -> bool:
def main() -> int: def main() -> int:
"""Main entry point.""" """Main entry point."""
print("Seeding amenity data...") print("Seeding development data...")
seed_amenities() seed_amenities()
update_population() update_population()
seed_median_age()
if not run_dbt(): if not run_dbt():
return 1 return 1
print("\nDone! Amenity data is ready.") print("\nDone! Development data is ready.")
return 0 return 0