{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Population Density Bar Chart\n", "\n", "Shows population density (people per sq km) across Toronto neighbourhoods." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data Reference\n", "\n", "### Source Tables\n", "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", "| `mart_neighbourhood_demographics` | neighbourhood × year | population_density, population, land_area_sqkm |\n", "\n", "### SQL Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sqlalchemy import create_engine\n", "import os\n", "\n", "engine = create_engine(os.environ.get('DATABASE_URL', 'postgresql://portfolio:portfolio@localhost:5432/portfolio'))\n", "\n", "query = \"\"\"\n", "SELECT\n", " neighbourhood_name,\n", " population_density,\n", " population,\n", " land_area_sqkm,\n", " median_household_income,\n", " pct_renter_occupied\n", "FROM public_marts.mart_neighbourhood_demographics\n", "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_demographics)\n", " AND population_density IS NOT NULL\n", "ORDER BY population_density DESC\n", "\"\"\"\n", "\n", "df = pd.read_sql(query, engine)\n", "print(f\"Loaded {len(df)} neighbourhoods\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformation Steps\n", "\n", "1. Sort by population density\n", "2. Select top 20 most dense neighbourhoods" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = df.head(20).to_dict('records')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df[['neighbourhood_name', 'population_density', 'population', 'land_area_sqkm']].head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data Visualization\n", "\n", "### Figure Factory\n", "\n", "Uses `create_horizontal_bar` from `portfolio_app.figures.bar_charts`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.insert(0, '../..')\n", "\n", "from portfolio_app.figures.bar_charts import create_horizontal_bar\n", "\n", "fig = create_horizontal_bar(\n", " data=data,\n", " name_column='neighbourhood_name',\n", " value_column='population_density',\n", " title='Top 20 Most Dense Neighbourhoods',\n", " color='#9C27B0',\n", " value_format=',.0f',\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Density Statistics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"City-wide Statistics:\")\n", "print(f\" Total Population: {df['population'].sum():,.0f}\")\n", "print(f\" Total Area: {df['land_area_sqkm'].sum():,.1f} sq km\")\n", "print(f\" Average Density: {df['population_density'].mean():,.0f} per sq km\")\n", "print(f\" Max Density: {df['population_density'].max():,.0f} per sq km\")\n", "print(f\" Min Density: {df['population_density'].min():,.0f} per sq km\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }