From f345d41535971d9d9cbc744f03ff4f6ab38181f5 Mon Sep 17 00:00:00 2001 From: lmiranda Date: Sun, 18 Jan 2026 23:44:08 -0500 Subject: [PATCH] fix: Seed multi-year housing data for rent trend charts The seed script now inserts housing data for years 2019-2024 to support rent trend line visualizations. Co-Authored-By: Claude Opus 4.5 --- scripts/data/seed_amenity_data.py | 80 ++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/scripts/data/seed_amenity_data.py b/scripts/data/seed_amenity_data.py index 5e26578..cdeb3ea 100644 --- a/scripts/data/seed_amenity_data.py +++ b/scripts/data/seed_amenity_data.py @@ -171,9 +171,11 @@ def seed_census_housing() -> int: def seed_housing_mart() -> int: - """Seed housing mart with rental and affordability data.""" + """Seed housing mart with rental and affordability data for multiple years.""" engine = create_engine(DATABASE_URL) + total = 0 + # First update existing NULL records with engine.begin() as conn: result = conn.execute( text( @@ -186,10 +188,6 @@ def seed_housing_mart() -> int: ) rows = [dict(row._mapping) for row in result] - if not rows: - print("No NULL housing mart values found") - return 0 - for row in rows: avg_rent = random.randint(1800, 3200) income = random.randint(55000, 180000) @@ -230,9 +228,77 @@ def seed_housing_mart() -> int: "renter": round(random.uniform(25, 70), 1), }, ) + total += 1 - print(f"Seeded housing mart data for {len(rows)} records") - return len(rows) + # Then insert multi-year data for trend charts + years = [2019, 2020, 2022, 2023, 2024] + with engine.begin() as conn: + result = conn.execute( + text( + "SELECT neighbourhood_id, name, geometry FROM public.dim_neighbourhood" + ) + ) + neighbourhoods = [dict(row._mapping) for row in result] + + for n in neighbourhoods: + for year in years: + # Check if exists + exists = conn.execute( + text( + """ + SELECT 1 FROM public_marts.mart_neighbourhood_housing + WHERE neighbourhood_id = :nid AND year = :year + """ + ), + {"nid": n["neighbourhood_id"], "year": year}, + ).fetchone() + + if exists: + continue + + base_rent = random.randint(1800, 2800) + year_factor = (year - 2019) * random.randint(50, 150) + avg_rent = base_rent + year_factor + income = random.randint(55000, 180000) + rent_to_income = round((avg_rent * 12 / income) * 100, 2) + + conn.execute( + text( + """ + INSERT INTO public_marts.mart_neighbourhood_housing + (neighbourhood_id, neighbourhood_name, geometry, year, + avg_rent_bachelor, avg_rent_1bed, avg_rent_2bed, avg_rent_3bed, + vacancy_rate, rent_to_income_pct, affordability_index, is_affordable, + median_household_income, pct_owner_occupied, pct_renter_occupied) + VALUES + (:nid, :name, :geom, :year, + :bachelor, :onebed, :twobed, :threebed, + :vacancy, :rent_income, :afford_idx, :is_afford, + :income, :owner, :renter) + """ + ), + { + "nid": n["neighbourhood_id"], + "name": n["name"], + "geom": n["geometry"], + "year": year, + "bachelor": avg_rent - 500, + "onebed": avg_rent - 300, + "twobed": avg_rent, + "threebed": avg_rent + 400, + "vacancy": round(random.uniform(0.5, 4.5), 1), + "rent_income": rent_to_income, + "afford_idx": round(rent_to_income / 30 * 100, 1), + "is_afford": rent_to_income <= 30, + "income": income, + "owner": round(random.uniform(30, 75), 1), + "renter": round(random.uniform(25, 70), 1), + }, + ) + total += 1 + + print(f"Seeded housing mart data for {total} records") + return total def seed_overview_mart() -> int: