personal-portfolio/portfolio_app/toronto/services/neighbourhood_service.py

"""Service layer for querying neighbourhood data from dbt marts."""

from functools import lru_cache
from typing import Any

import pandas as pd
from sqlalchemy import text

from portfolio_app.toronto.models import get_engine


def _execute_query(sql: str, params: dict[str, Any] | None = None) -> pd.DataFrame:
    """Execute SQL query and return DataFrame.

    Args:
        sql: SQL query string.
        params: Query parameters.

    Returns:
        pandas DataFrame with results, or empty DataFrame on error.
    """
    try:
        engine = get_engine()
        with engine.connect() as conn:
            return pd.read_sql(text(sql), conn, params=params)
    except Exception:
        # Return empty DataFrame on connection or query error
        return pd.DataFrame()


def get_overview_data(year: int = 2021) -> pd.DataFrame:
    """Get overview data for all neighbourhoods.

    Queries mart_neighbourhood_overview for livability scores and components.

    Args:
        year: Census year to query.

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
        livability_score, safety_score, affordability_score, amenity_score,
        population, median_household_income, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
            population,
            median_household_income,
            livability_score,
            safety_score,
            affordability_score,
            amenity_score,
            crime_rate_per_100k,
            rent_to_income_pct,
            avg_rent_2bed,
            total_amenities_per_1000
        FROM mart_neighbourhood_overview
        WHERE year = :year
        ORDER BY livability_score DESC NULLS LAST
    """
    return _execute_query(sql, {"year": year})


def get_housing_data(year: int = 2021) -> pd.DataFrame:
    """Get housing data for all neighbourhoods.

    Queries mart_neighbourhood_housing for affordability metrics.

    Args:
        year: Year to query.

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
        avg_rent_2bed, vacancy_rate, rent_to_income_pct, affordability_index, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
            pct_owner_occupied,
            pct_renter_occupied,
            average_dwelling_value,
            median_household_income,
            avg_rent_bachelor,
            avg_rent_1bed,
            avg_rent_2bed,
            avg_rent_3bed,
            vacancy_rate,
            total_rental_units,
            rent_to_income_pct,
            is_affordable,
            affordability_index,
            rent_yoy_change_pct,
            income_quintile
        FROM mart_neighbourhood_housing
        WHERE year = :year
        ORDER BY affordability_index ASC NULLS LAST
    """
    return _execute_query(sql, {"year": year})


def get_safety_data(year: int = 2021) -> pd.DataFrame:
    """Get safety/crime data for all neighbourhoods.

    Queries mart_neighbourhood_safety for crime statistics.

    Args:
        year: Year to query.

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
        total_crime_rate, violent_crime_rate, property_crime_rate, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
            total_crimes,
            crime_rate_per_100k as total_crime_rate,
            violent_crimes,
            violent_crime_rate,
            property_crimes,
            property_crime_rate,
            theft_crimes,
            theft_rate,
            crime_yoy_change_pct,
            crime_trend
        FROM mart_neighbourhood_safety
        WHERE year = :year
        ORDER BY total_crime_rate ASC NULLS LAST
    """
    return _execute_query(sql, {"year": year})


def get_demographics_data(year: int = 2021) -> pd.DataFrame:
    """Get demographic data for all neighbourhoods.

    Queries mart_neighbourhood_demographics for population/income metrics.

    Args:
        year: Census year to query.

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
        population, median_age, median_income, diversity_index, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            census_year as year,
            population,
            population_density,
            population_change_pct,
            median_household_income,
            average_household_income,
            income_quintile,
            median_age,
            pct_under_18,
            pct_18_to_64,
            pct_65_plus,
            pct_bachelors_or_higher,
            unemployment_rate,
            diversity_index
        FROM mart_neighbourhood_demographics
        WHERE census_year = :year
        ORDER BY population DESC NULLS LAST
    """
    return _execute_query(sql, {"year": year})


def get_amenities_data(year: int = 2021) -> pd.DataFrame:
    """Get amenities data for all neighbourhoods.

    Queries mart_neighbourhood_amenities for parks, schools, transit.

    Args:
        year: Year to query.

    Returns:
        DataFrame with columns: neighbourhood_id, neighbourhood_name,
        amenity_score, parks_per_capita, schools_per_capita, transit_score, etc.
    """
    sql = """
        SELECT
            neighbourhood_id,
            neighbourhood_name,
            year,
            park_count,
            parks_per_1000,
            school_count,
            schools_per_1000,
            childcare_count,
            childcare_per_1000,
            total_amenities,
            total_amenities_per_1000,
            amenity_score,
            amenity_rank
        FROM mart_neighbourhood_amenities
        WHERE year = :year
        ORDER BY amenity_score DESC NULLS LAST
    """
    return _execute_query(sql, {"year": year})


def get_neighbourhood_details(
    neighbourhood_id: int, year: int = 2021
) -> dict[str, Any]:
    """Get detailed data for a single neighbourhood.

    Combines data from all mart tables for a complete neighbourhood profile.

    Args:
        neighbourhood_id: The neighbourhood ID.
        year: Year to query.

    Returns:
        Dictionary with all metrics for the neighbourhood.
    """
    sql = """
        SELECT
            o.neighbourhood_id,
            o.neighbourhood_name,
            o.year,
            o.population,
            o.median_household_income,
            o.livability_score,
            o.safety_score,
            o.affordability_score,
            o.amenity_score,
            s.total_crimes,
            s.crime_rate_per_100k,
            s.violent_crime_rate,
            s.property_crime_rate,
            h.avg_rent_2bed,
            h.vacancy_rate,
            h.rent_to_income_pct,
            h.affordability_index,
            h.pct_owner_occupied,
            h.pct_renter_occupied,
            d.median_age,
            d.diversity_index,
            d.unemployment_rate,
            d.pct_bachelors_or_higher,
            a.park_count,
            a.school_count,
            a.total_amenities
        FROM mart_neighbourhood_overview o
        LEFT JOIN mart_neighbourhood_safety s
            ON o.neighbourhood_id = s.neighbourhood_id
            AND o.year = s.year
        LEFT JOIN mart_neighbourhood_housing h
            ON o.neighbourhood_id = h.neighbourhood_id
            AND o.year = h.year
        LEFT JOIN mart_neighbourhood_demographics d
            ON o.neighbourhood_id = d.neighbourhood_id
            AND o.year = d.census_year
        LEFT JOIN mart_neighbourhood_amenities a
            ON o.neighbourhood_id = a.neighbourhood_id
            AND o.year = a.year
        WHERE o.neighbourhood_id = :neighbourhood_id
          AND o.year = :year
    """
    df = _execute_query(sql, {"neighbourhood_id": neighbourhood_id, "year": year})

    if df.empty:
        return {}

    return {str(k): v for k, v in df.iloc[0].to_dict().items()}


@lru_cache(maxsize=32)
def get_neighbourhood_list(year: int = 2021) -> list[dict[str, Any]]:
    """Get list of all neighbourhoods for dropdown selectors.

    Args:
        year: Year to query.

    Returns:
        List of dicts with neighbourhood_id, name, and population.
    """
    sql = """
        SELECT DISTINCT
            neighbourhood_id,
            neighbourhood_name,
            population
        FROM mart_neighbourhood_overview
        WHERE year = :year
        ORDER BY neighbourhood_name
    """
    df = _execute_query(sql, {"year": year})
    if df.empty:
        return []
    return list(df.to_dict("records"))  # type: ignore[arg-type]


def get_rankings(
    metric: str,
    year: int = 2021,
    top_n: int = 10,
    ascending: bool = True,
) -> pd.DataFrame:
    """Get top/bottom neighbourhoods for a specific metric.

    Args:
        metric: Column name to rank by.
        year: Year to query.
        top_n: Number of top and bottom records.
        ascending: If True, rank from lowest to highest (good for crime, rent).

    Returns:
        DataFrame with top and bottom neighbourhoods.
    """
    # Map metrics to their source tables
    table_map = {
        "livability_score": "mart_neighbourhood_overview",
        "safety_score": "mart_neighbourhood_overview",
        "affordability_score": "mart_neighbourhood_overview",
        "amenity_score": "mart_neighbourhood_overview",
        "crime_rate_per_100k": "mart_neighbourhood_safety",
        "total_crime_rate": "mart_neighbourhood_safety",
        "avg_rent_2bed": "mart_neighbourhood_housing",
        "affordability_index": "mart_neighbourhood_housing",
        "population": "mart_neighbourhood_demographics",
        "median_household_income": "mart_neighbourhood_demographics",
    }

    table = table_map.get(metric, "mart_neighbourhood_overview")
    year_col = "census_year" if "demographics" in table else "year"

    order = "ASC" if ascending else "DESC"
    reverse_order = "DESC" if ascending else "ASC"

    sql = f"""
        (
            SELECT neighbourhood_id, neighbourhood_name, {metric}, 'bottom' as rank_group
            FROM {table}
            WHERE {year_col} = :year AND {metric} IS NOT NULL
            ORDER BY {metric} {order}
            LIMIT :top_n
        )
        UNION ALL
        (
            SELECT neighbourhood_id, neighbourhood_name, {metric}, 'top' as rank_group
            FROM {table}
            WHERE {year_col} = :year AND {metric} IS NOT NULL
            ORDER BY {metric} {reverse_order}
            LIMIT :top_n
        )
    """
    return _execute_query(sql, {"year": year, "top_n": top_n})


def get_city_averages(year: int = 2021) -> dict[str, Any]:
    """Get city-wide average metrics.

    Args:
        year: Year to query.

    Returns:
        Dictionary with city averages for key metrics.
    """
    sql = """
        SELECT
            AVG(livability_score) as avg_livability_score,
            AVG(safety_score) as avg_safety_score,
            AVG(affordability_score) as avg_affordability_score,
            AVG(amenity_score) as avg_amenity_score,
            SUM(population) as total_population,
            AVG(median_household_income) as avg_median_income,
            AVG(crime_rate_per_100k) as avg_crime_rate,
            AVG(avg_rent_2bed) as avg_rent_2bed,
            AVG(rent_to_income_pct) as avg_rent_to_income
        FROM mart_neighbourhood_overview
        WHERE year = :year
    """
    df = _execute_query(sql, {"year": year})

    if df.empty:
        return {}

    result: dict[str, Any] = {str(k): v for k, v in df.iloc[0].to_dict().items()}
    # Round numeric values
    for key, value in result.items():
        if pd.notna(value) and isinstance(value, float):
            result[key] = round(value, 2)

    return result