{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Crime Trend Line Chart\n", "\n", "Shows 5-year crime rate trends across Toronto." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Data Reference\n", "\n", "### Source Tables\n", "\n", "| Table | Grain | Key Columns |\n", "|-------|-------|-------------|\n", "| `mart_neighbourhood_safety` | neighbourhood × year | year, crime_rate_per_100k, crime_yoy_change_pct |\n", "\n", "### SQL Query" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "from sqlalchemy import create_engine\n", "\n", "# Load .env from project root\n", "load_dotenv(\"../../.env\")\n", "\n", "engine = create_engine(os.environ[\"DATABASE_URL\"])\n", "\n", "query = \"\"\"\n", "SELECT\n", " year,\n", " AVG(crime_rate_per_100k) as avg_crime_rate,\n", " AVG(assault_rate_per_100k) as avg_assault_rate,\n", " AVG(auto_theft_rate_per_100k) as avg_auto_theft_rate,\n", " AVG(break_enter_rate_per_100k) as avg_break_enter_rate,\n", " SUM(total_incidents) as total_city_incidents,\n", " AVG(crime_yoy_change_pct) as avg_yoy_change\n", "FROM public_marts.mart_neighbourhood_safety\n", "WHERE year >= (SELECT MAX(year) - 5 FROM public_marts.mart_neighbourhood_safety)\n", "GROUP BY year\n", "ORDER BY year\n", "\"\"\"\n", "\n", "df = pd.read_sql(query, engine)\n", "print(f\"Loaded {len(df)} years of crime data\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transformation Steps\n", "\n", "1. Aggregate by year (city-wide)\n", "2. Convert year to datetime\n", "3. Melt for multi-line by crime type" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df[\"date\"] = pd.to_datetime(df[\"year\"].astype(str) + \"-01-01\")\n", "\n", "# Melt for multi-line\n", "df_melted = df.melt(\n", " id_vars=[\"year\", \"date\"],\n", " value_vars=[\"avg_assault_rate\", \"avg_auto_theft_rate\", \"avg_break_enter_rate\"],\n", " var_name=\"crime_type\",\n", " value_name=\"rate_per_100k\",\n", ")\n", "\n", "df_melted[\"crime_type\"] = df_melted[\"crime_type\"].map(\n", " {\n", " \"avg_assault_rate\": \"Assault\",\n", " \"avg_auto_theft_rate\": \"Auto Theft\",\n", " \"avg_break_enter_rate\": \"Break & Enter\",\n", " }\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sample Output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df[[\"year\", \"avg_crime_rate\", \"total_city_incidents\", \"avg_yoy_change\"]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Data Visualization\n", "\n", "### Figure Factory\n", "\n", "Uses `create_price_time_series` (reused for any numeric trend)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "sys.path.insert(0, \"../..\")\n", "\n", "from portfolio_app.figures.toronto.time_series import create_price_time_series\n", "\n", "data = df_melted.to_dict(\"records\")\n", "\n", "fig = create_price_time_series(\n", " data=data,\n", " date_column=\"date\",\n", " price_column=\"rate_per_100k\",\n", " group_column=\"crime_type\",\n", " title=\"Toronto Crime Trends by Type (5 Years)\",\n", ")\n", "\n", "# Remove dollar sign formatting since this is rate data\n", "fig.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Overall Trend" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Total crime rate trend\n", "total_data = (\n", " df[[\"date\", \"avg_crime_rate\"]]\n", " .rename(columns={\"avg_crime_rate\": \"total_rate\"})\n", " .to_dict(\"records\")\n", ")\n", "\n", "fig2 = create_price_time_series(\n", " data=total_data,\n", " date_column=\"date\",\n", " price_column=\"total_rate\",\n", " title=\"Toronto Overall Crime Rate Trend\",\n", ")\n", "fig2.update_layout(yaxis_tickprefix=\"\", yaxis_title=\"Rate per 100K\")\n", "fig2.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }