{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Age Distribution Analysis\n", "\n", "Compares median age and age index across Toronto neighbourhoods." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data Reference\n", "\n", "### Source Tables\n", "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", "| `mart_neighbourhood_demographics` | neighbourhood × year | median_age, age_index, city_avg_age |\n", "\n", "### SQL Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sqlalchemy import create_engine\n", "import os\n", "\n", "engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n", "\n", "query = \"\"\"\n", "SELECT\n", " neighbourhood_name,\n", " median_age,\n", " age_index,\n", " city_avg_age,\n", " population,\n", " income_quintile,\n", " pct_renter_occupied\n", "FROM public_marts.mart_neighbourhood_demographics\n", "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n", " AND median_age IS NOT NULL\n", "ORDER BY median_age DESC\n", "\"\"\"\n", "\n", "df = pd.read_sql(query, engine)\n", "print(f\"Loaded {len(df)} neighbourhoods with age data\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformation Steps\n", "\n", "1. Filter to most recent census year\n", "2. Calculate deviation from city average\n", "3. Classify as younger/older than average" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "city_avg = df['city_avg_age'].iloc[0]\n", "df['age_category'] = df['median_age'].apply(\n", " lambda x: 'Younger' if x < city_avg else 'Older'\n", ")\n", "df['age_deviation'] = df['median_age'] - city_avg\n", "\n", "data = df.to_dict('records')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"City Average Age: {city_avg:.1f}\")\n", "print(\"\\nYoungest Neighbourhoods:\")\n", "display(df.tail(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])\n", "print(\"\\nOldest Neighbourhoods:\")\n", "display(df.head(5)[['neighbourhood_name', 'median_age', 'age_index', 'pct_renter_occupied']])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data Visualization\n", "\n", "### Figure Factory\n", "\n", "Uses `create_ranking_bar` from `portfolio_app.figures.bar_charts`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.insert(0, '../..')\n", "\n", "from portfolio_app.figures.bar_charts import create_ranking_bar\n", "\n", "fig = create_ranking_bar(\n", " data=data,\n", " name_column='neighbourhood_name',\n", " value_column='median_age',\n", " title='Youngest & Oldest Neighbourhoods (Median Age)',\n", " top_n=10,\n", " bottom_n=10,\n", " color_top='#FF9800', # Orange for older\n", " color_bottom='#2196F3', # Blue for younger\n", " value_format='.1f',\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Age vs Income Correlation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Age by income quintile\n", "print(\"Median Age by Income Quintile:\")\n", "df.groupby('income_quintile')['median_age'].mean().round(1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }