staging #96
1
.gitignore
vendored
1
.gitignore
vendored
@@ -198,3 +198,4 @@ cython_debug/
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
dbt/.user.yml
|
||||
|
||||
@@ -35,6 +35,7 @@ dependencies = [
|
||||
"geopandas>=1.1",
|
||||
"shapely>=2.0",
|
||||
"pyproj>=3.6",
|
||||
"statsmodels>=0.14",
|
||||
|
||||
# Visualization
|
||||
"dash>=3.3",
|
||||
|
||||
@@ -5,6 +5,9 @@ This script:
|
||||
- Populates fact_amenities with sample data
|
||||
- Updates dim_neighbourhood with population from fact_census
|
||||
- Seeds median_age in fact_census where missing
|
||||
- Seeds census housing columns (tenure, income, dwelling value)
|
||||
- Seeds housing mart data (rent, affordability)
|
||||
- Seeds overview mart data (safety_score, population)
|
||||
- Runs dbt to rebuild the marts
|
||||
|
||||
Usage:
|
||||
@@ -128,6 +131,178 @@ def seed_median_age() -> int:
|
||||
return len(null_ids)
|
||||
|
||||
|
||||
def seed_census_housing() -> int:
|
||||
"""Seed housing columns in fact_census where missing."""
|
||||
engine = create_engine(DATABASE_URL)
|
||||
|
||||
with engine.begin() as conn:
|
||||
result = conn.execute(
|
||||
text("SELECT id FROM public.fact_census WHERE pct_owner_occupied IS NULL")
|
||||
)
|
||||
null_ids = [row[0] for row in result]
|
||||
|
||||
if not null_ids:
|
||||
print("No NULL census housing values found")
|
||||
return 0
|
||||
|
||||
for census_id in null_ids:
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE public.fact_census SET
|
||||
pct_owner_occupied = :owner,
|
||||
pct_renter_occupied = :renter,
|
||||
average_dwelling_value = :dwelling,
|
||||
median_household_income = :income
|
||||
WHERE id = :id
|
||||
"""
|
||||
),
|
||||
{
|
||||
"id": census_id,
|
||||
"owner": round(random.uniform(30, 80), 1),
|
||||
"renter": round(random.uniform(20, 70), 1),
|
||||
"dwelling": random.randint(400000, 1500000),
|
||||
"income": random.randint(50000, 180000),
|
||||
},
|
||||
)
|
||||
|
||||
print(f"Seeded census housing data for {len(null_ids)} records")
|
||||
return len(null_ids)
|
||||
|
||||
|
||||
def seed_housing_mart() -> int:
|
||||
"""Seed housing mart with rental and affordability data."""
|
||||
engine = create_engine(DATABASE_URL)
|
||||
|
||||
with engine.begin() as conn:
|
||||
result = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT neighbourhood_id, year
|
||||
FROM public_marts.mart_neighbourhood_housing
|
||||
WHERE avg_rent_2bed IS NULL
|
||||
"""
|
||||
)
|
||||
)
|
||||
rows = [dict(row._mapping) for row in result]
|
||||
|
||||
if not rows:
|
||||
print("No NULL housing mart values found")
|
||||
return 0
|
||||
|
||||
for row in rows:
|
||||
avg_rent = random.randint(1800, 3200)
|
||||
income = random.randint(55000, 180000)
|
||||
rent_to_income = round((avg_rent * 12 / income) * 100, 2)
|
||||
affordability = round(rent_to_income / 30 * 100, 1)
|
||||
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE public_marts.mart_neighbourhood_housing SET
|
||||
avg_rent_bachelor = :bachelor,
|
||||
avg_rent_1bed = :onebed,
|
||||
avg_rent_2bed = :twobed,
|
||||
avg_rent_3bed = :threebed,
|
||||
vacancy_rate = :vacancy,
|
||||
rent_to_income_pct = :rent_income,
|
||||
affordability_index = :afford_idx,
|
||||
is_affordable = :is_afford,
|
||||
median_household_income = :income,
|
||||
pct_owner_occupied = :owner,
|
||||
pct_renter_occupied = :renter
|
||||
WHERE neighbourhood_id = :nid AND year = :year
|
||||
"""
|
||||
),
|
||||
{
|
||||
"nid": row["neighbourhood_id"],
|
||||
"year": row["year"],
|
||||
"bachelor": avg_rent - 500,
|
||||
"onebed": avg_rent - 300,
|
||||
"twobed": avg_rent,
|
||||
"threebed": avg_rent + 400,
|
||||
"vacancy": round(random.uniform(0.5, 4.5), 1),
|
||||
"rent_income": rent_to_income,
|
||||
"afford_idx": affordability,
|
||||
"is_afford": rent_to_income <= 30,
|
||||
"income": income,
|
||||
"owner": round(random.uniform(30, 75), 1),
|
||||
"renter": round(random.uniform(25, 70), 1),
|
||||
},
|
||||
)
|
||||
|
||||
print(f"Seeded housing mart data for {len(rows)} records")
|
||||
return len(rows)
|
||||
|
||||
|
||||
def seed_overview_mart() -> int:
|
||||
"""Seed overview mart with safety_score and population."""
|
||||
engine = create_engine(DATABASE_URL)
|
||||
total = 0
|
||||
|
||||
with engine.begin() as conn:
|
||||
# Seed safety_score
|
||||
result = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT neighbourhood_id, year
|
||||
FROM public_marts.mart_neighbourhood_overview
|
||||
WHERE safety_score IS NULL
|
||||
"""
|
||||
)
|
||||
)
|
||||
rows = [dict(row._mapping) for row in result]
|
||||
|
||||
for row in rows:
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE public_marts.mart_neighbourhood_overview
|
||||
SET safety_score = :score
|
||||
WHERE neighbourhood_id = :nid AND year = :year
|
||||
"""
|
||||
),
|
||||
{
|
||||
"nid": row["neighbourhood_id"],
|
||||
"year": row["year"],
|
||||
"score": round(random.uniform(40, 95), 1),
|
||||
},
|
||||
)
|
||||
total += 1
|
||||
|
||||
# Seed population
|
||||
result = conn.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT neighbourhood_id, year
|
||||
FROM public_marts.mart_neighbourhood_overview
|
||||
WHERE population IS NULL
|
||||
"""
|
||||
)
|
||||
)
|
||||
rows = [dict(row._mapping) for row in result]
|
||||
|
||||
for row in rows:
|
||||
conn.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE public_marts.mart_neighbourhood_overview
|
||||
SET population = :pop
|
||||
WHERE neighbourhood_id = :nid AND year = :year
|
||||
"""
|
||||
),
|
||||
{
|
||||
"nid": row["neighbourhood_id"],
|
||||
"year": row["year"],
|
||||
"pop": random.randint(8000, 45000),
|
||||
},
|
||||
)
|
||||
total += 1
|
||||
|
||||
print(f"Seeded overview mart data for {total} records")
|
||||
return total
|
||||
|
||||
|
||||
def run_dbt() -> bool:
|
||||
"""Run dbt to rebuild marts."""
|
||||
dbt_dir = PROJECT_ROOT / "dbt"
|
||||
@@ -137,16 +312,10 @@ def run_dbt() -> bool:
|
||||
print("Running dbt to rebuild marts...")
|
||||
|
||||
env = os.environ.copy()
|
||||
env["POSTGRES_PASSWORD"] = os.environ.get("POSTGRES_PASSWORD", "")
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
dbt_cmd,
|
||||
"run",
|
||||
"--profiles-dir",
|
||||
str(dbt_dir),
|
||||
"--select",
|
||||
"+mart_neighbourhood_amenities +mart_neighbourhood_demographics",
|
||||
],
|
||||
[dbt_cmd, "run", "--profiles-dir", str(dbt_dir)],
|
||||
cwd=dbt_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
@@ -168,10 +337,15 @@ def main() -> int:
|
||||
seed_amenities()
|
||||
update_population()
|
||||
seed_median_age()
|
||||
seed_census_housing()
|
||||
|
||||
if not run_dbt():
|
||||
return 1
|
||||
|
||||
# Seed mart tables after dbt rebuild
|
||||
seed_housing_mart()
|
||||
seed_overview_mart()
|
||||
|
||||
print("\nDone! Development data is ready.")
|
||||
return 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user