{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Age Distribution Analysis\n", "\n", "Compares median age and age index across Toronto neighbourhoods." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data Reference\n", "\n", "### Source Tables\n", "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", "| `mart_neighbourhood_demographics` | neighbourhood × year | median_age, age_index, city_avg_age |\n", "\n", "### SQL Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "from sqlalchemy import create_engine\n", "\n", "# Load .env from project root\n", "load_dotenv(\"../../.env\")\n", "\n", "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", " neighbourhood_name,\n", " median_age,\n", " age_index,\n", " city_avg_age,\n", " population,\n", " income_quintile,\n", " pct_renter_occupied\n", "FROM public_marts.mart_neighbourhood_demographics\n", "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n", " AND median_age IS NOT NULL\n", "ORDER BY median_age DESC\n", "\"\"\"\n", "\n", "df = pd.read_sql(query, engine)\n", "print(f\"Loaded {len(df)} neighbourhoods with age data\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformation Steps\n", "\n", "1. Filter to most recent census year\n", "2. Calculate deviation from city average\n", "3. Classify as younger/older than average" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "city_avg = df[\"city_avg_age\"].iloc[0]\n", "df[\"age_category\"] = df[\"median_age\"].apply(\n", " lambda x: \"Younger\" if x < city_avg else \"Older\"\n", ")\n", "df[\"age_deviation\"] = df[\"median_age\"] - city_avg\n", "\n", "data = df.to_dict(\"records\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"City Average Age: {city_avg:.1f}\")\n", "print(\"\\nYoungest Neighbourhoods:\")\n", "display(\n", " df.tail(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n", ")\n", "print(\"\\nOldest Neighbourhoods:\")\n", "display(\n", " df.head(5)[[\"neighbourhood_name\", \"median_age\", \"age_index\", \"pct_renter_occupied\"]]\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data Visualization\n", "\n", "### Figure Factory\n", "\n", "Uses `create_ranking_bar` from `portfolio_app.figures.toronto.bar_charts`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "sys.path.insert(0, \"../..\")\n", "\n", "from portfolio_app.figures.toronto.bar_charts import create_ranking_bar\n", "\n", "fig = create_ranking_bar(\n", " data=data,\n", " name_column=\"neighbourhood_name\",\n", " value_column=\"median_age\",\n", " title=\"Youngest & Oldest Neighbourhoods (Median Age)\",\n", " top_n=10,\n", " bottom_n=10,\n", " color_top=\"#FF9800\", # Orange for older\n", " color_bottom=\"#2196F3\", # Blue for younger\n", " value_format=\".1f\",\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Age vs Income Correlation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Age by income quintile\n", "print(\"Median Age by Income Quintile:\")\n", "df.groupby(\"income_quintile\")[\"median_age\"].mean().round(1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }