{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Housing Tenure Breakdown Bar Chart\n", "\n", "Shows the distribution of owner-occupied vs renter-occupied dwellings across neighbourhoods." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data Reference\n", "\n", "### Source Tables\n", "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", "| `mart_neighbourhood_housing` | neighbourhood × year | pct_owner_occupied, pct_renter_occupied, income_quintile |\n", "\n", "### SQL Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "from sqlalchemy import create_engine\n", "\n", "# Load .env from project root\n", "load_dotenv(\"../../.env\")\n", "\n", "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", " neighbourhood_name,\n", " pct_owner_occupied,\n", " pct_renter_occupied,\n", " income_quintile,\n", " total_rental_units,\n", " average_dwelling_value\n", "FROM public_marts.mart_neighbourhood_housing\n", "WHERE year = (SELECT MAX(year) FROM public_marts.mart_neighbourhood_housing)\n", " AND pct_owner_occupied IS NOT NULL\n", "ORDER BY pct_renter_occupied DESC\n", "\"\"\"\n", "\n", "df = pd.read_sql(query, engine)\n", "print(f\"Loaded {len(df)} neighbourhoods with tenure data\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformation Steps\n", "\n", "1. Filter to most recent year with tenure data\n", "2. Melt owner/renter columns for stacked bar\n", "3. Sort by renter percentage (highest first)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Prepare for stacked bar\n", "df_stacked = df.melt(\n", " id_vars=[\"neighbourhood_name\", \"income_quintile\"],\n", " value_vars=[\"pct_owner_occupied\", \"pct_renter_occupied\"],\n", " var_name=\"tenure_type\",\n", " value_name=\"percentage\",\n", ")\n", "\n", "df_stacked[\"tenure_type\"] = df_stacked[\"tenure_type\"].map(\n", " {\"pct_owner_occupied\": \"Owner\", \"pct_renter_occupied\": \"Renter\"}\n", ")\n", "\n", "data = df_stacked.to_dict(\"records\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"Highest Renter Neighbourhoods:\")\n", "df[\n", " [\n", " \"neighbourhood_name\",\n", " \"pct_renter_occupied\",\n", " \"pct_owner_occupied\",\n", " \"income_quintile\",\n", " ]\n", "].head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data Visualization\n", "\n", "### Figure Factory\n", "\n", "Uses `create_stacked_bar` from `portfolio_app.figures.toronto.bar_charts`.\n", "\n", "**Key Parameters:**\n", "- `x_column`: 'neighbourhood_name'\n", "- `value_column`: 'percentage'\n", "- `category_column`: 'tenure_type'\n", "- `show_percentages`: True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "sys.path.insert(0, \"../..\")\n", "\n", "from portfolio_app.figures.toronto.bar_charts import create_stacked_bar\n", "\n", "# Show top 20 by renter percentage\n", "top_20_names = df.head(20)[\"neighbourhood_name\"].tolist()\n", "data_filtered = [d for d in data if d[\"neighbourhood_name\"] in top_20_names]\n", "\n", "fig = create_stacked_bar(\n", " data=data_filtered,\n", " x_column=\"neighbourhood_name\",\n", " value_column=\"percentage\",\n", " category_column=\"tenure_type\",\n", " title=\"Housing Tenure Mix - Top 20 Renter Neighbourhoods\",\n", " color_map={\"Owner\": \"#4CAF50\", \"Renter\": \"#2196F3\"},\n", " show_percentages=True,\n", ")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### City-Wide Distribution" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# City-wide averages\n", "print(f\"City Average Owner-Occupied: {df['pct_owner_occupied'].mean():.1f}%\")\n", "print(f\"City Average Renter-Occupied: {df['pct_renter_occupied'].mean():.1f}%\")\n", "\n", "# By income quintile\n", "print(\"\\nTenure by Income Quintile:\")\n", "df.groupby(\"income_quintile\")[\n", " [\"pct_owner_occupied\", \"pct_renter_occupied\"]\n", "].mean().round(1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }