diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..37dc389 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -127,14 +127,1208 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "449513f4-0459-46a0-a18d-9398d974c9ad",
"metadata": {
"id": "449513f4-0459-46a0-a18d-9398d974c9ad"
},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Unnamed: 0', 'Customer', 'State', 'Customer Lifetime Value',\n",
+ " 'Response', 'Coverage', 'Education', 'Effective To Date',\n",
+ " 'EmploymentStatus', 'Gender', 'Income', 'Location Code',\n",
+ " 'Marital Status', 'Monthly Premium Auto', 'Months Since Last Claim',\n",
+ " 'Months Since Policy Inception', 'Number of Open Complaints',\n",
+ " 'Number of Policies', 'Policy Type', 'Policy', 'Renew Offer Type',\n",
+ " 'Sales Channel', 'Total Claim Amount', 'Vehicle Class', 'Vehicle Size',\n",
+ " 'Vehicle Type'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "df=pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\")\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "3ca20992",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Customer | \n",
+ " State | \n",
+ " Customer Lifetime Value | \n",
+ " Response | \n",
+ " Coverage | \n",
+ " Education | \n",
+ " Effective To Date | \n",
+ " EmploymentStatus | \n",
+ " Gender | \n",
+ " ... | \n",
+ " Number of Open Complaints | \n",
+ " Number of Policies | \n",
+ " Policy Type | \n",
+ " Policy | \n",
+ " Renew Offer Type | \n",
+ " Sales Channel | \n",
+ " Total Claim Amount | \n",
+ " Vehicle Class | \n",
+ " Vehicle Size | \n",
+ " Vehicle Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " XL78013 | \n",
+ " Oregon | \n",
+ " 22332.439460 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/11/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 484.013411 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 8 | \n",
+ " FM55990 | \n",
+ " California | \n",
+ " 5989.773931 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " College | \n",
+ " 1/19/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 739.200000 | \n",
+ " Sports Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 15 | \n",
+ " CW49887 | \n",
+ " California | \n",
+ " 4626.801093 | \n",
+ " Yes | \n",
+ " Basic | \n",
+ " Master | \n",
+ " 1/16/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Special Auto | \n",
+ " Special L1 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 547.200000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 19 | \n",
+ " NJ54277 | \n",
+ " California | \n",
+ " 3746.751625 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/26/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer2 | \n",
+ " Call Center | \n",
+ " 19.575683 | \n",
+ " Two-Door Car | \n",
+ " Large | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 27 | \n",
+ " MQ68407 | \n",
+ " Oregon | \n",
+ " 4376.363592 | \n",
+ " Yes | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/28/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 60.036683 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Customer State Customer Lifetime Value Response \\\n",
+ "3 3 XL78013 Oregon 22332.439460 Yes \n",
+ "8 8 FM55990 California 5989.773931 Yes \n",
+ "15 15 CW49887 California 4626.801093 Yes \n",
+ "19 19 NJ54277 California 3746.751625 Yes \n",
+ "27 27 MQ68407 Oregon 4376.363592 Yes \n",
+ "\n",
+ " Coverage Education Effective To Date EmploymentStatus Gender ... \\\n",
+ "3 Extended College 1/11/11 Employed M ... \n",
+ "8 Premium College 1/19/11 Employed M ... \n",
+ "15 Basic Master 1/16/11 Employed F ... \n",
+ "19 Extended College 2/26/11 Employed F ... \n",
+ "27 Premium Bachelor 2/28/11 Employed F ... \n",
+ "\n",
+ " Number of Open Complaints Number of Policies Policy Type \\\n",
+ "3 0.0 2 Corporate Auto \n",
+ "8 0.0 1 Personal Auto \n",
+ "15 0.0 1 Special Auto \n",
+ "19 1.0 1 Personal Auto \n",
+ "27 0.0 1 Personal Auto \n",
+ "\n",
+ " Policy Renew Offer Type Sales Channel Total Claim Amount \\\n",
+ "3 Corporate L3 Offer2 Branch 484.013411 \n",
+ "8 Personal L1 Offer2 Branch 739.200000 \n",
+ "15 Special L1 Offer2 Branch 547.200000 \n",
+ "19 Personal L2 Offer2 Call Center 19.575683 \n",
+ "27 Personal L3 Offer2 Agent 60.036683 \n",
+ "\n",
+ " Vehicle Class Vehicle Size Vehicle Type \n",
+ "3 Four-Door Car Medsize A \n",
+ "8 Sports Car Medsize NaN \n",
+ "15 SUV Medsize NaN \n",
+ "19 Two-Door Car Large A \n",
+ "27 Four-Door Car Medsize NaN \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "filtered_df = df[(df['Total Claim Amount'] < 1000) & (df['Response'] == 'Yes')]\n",
+ "filtered_df.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "49d44665",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yes_df = df[df['Response'] == 'Yes']\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8c13375a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " Monthly Premium Auto | \n",
+ " Customer Lifetime Value | \n",
+ " Total Claim Amount | \n",
+ "
\n",
+ " \n",
+ " | Policy Type | \n",
+ " Gender | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Corporate Auto | \n",
+ " F | \n",
+ " 94.30 | \n",
+ " 7712.63 | \n",
+ " 433.74 | \n",
+ "
\n",
+ " \n",
+ " | M | \n",
+ " 92.19 | \n",
+ " 7944.47 | \n",
+ " 408.58 | \n",
+ "
\n",
+ " \n",
+ " | Personal Auto | \n",
+ " F | \n",
+ " 99.00 | \n",
+ " 8339.79 | \n",
+ " 452.97 | \n",
+ "
\n",
+ " \n",
+ " | M | \n",
+ " 91.09 | \n",
+ " 7448.38 | \n",
+ " 457.01 | \n",
+ "
\n",
+ " \n",
+ " | Special Auto | \n",
+ " F | \n",
+ " 92.31 | \n",
+ " 7691.58 | \n",
+ " 453.28 | \n",
+ "
\n",
+ " \n",
+ " | M | \n",
+ " 86.34 | \n",
+ " 8247.09 | \n",
+ " 429.53 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Monthly Premium Auto Customer Lifetime Value \\\n",
+ "Policy Type Gender \n",
+ "Corporate Auto F 94.30 7712.63 \n",
+ " M 92.19 7944.47 \n",
+ "Personal Auto F 99.00 8339.79 \n",
+ " M 91.09 7448.38 \n",
+ "Special Auto F 92.31 7691.58 \n",
+ " M 86.34 8247.09 \n",
+ "\n",
+ " Total Claim Amount \n",
+ "Policy Type Gender \n",
+ "Corporate Auto F 433.74 \n",
+ " M 408.58 \n",
+ "Personal Auto F 452.97 \n",
+ " M 457.01 \n",
+ "Special Auto F 453.28 \n",
+ " M 429.53 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary = yes_df.groupby(['Policy Type', 'Gender'])[\n",
+ " ['Monthly Premium Auto', 'Customer Lifetime Value', 'Total Claim Amount']\n",
+ "].mean().round(2)\n",
+ "\n",
+ "summary\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "c80b6891",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " State | \n",
+ " Customer_Count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " California | \n",
+ " 3552 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Oregon | \n",
+ " 2909 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Arizona | \n",
+ " 1937 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Nevada | \n",
+ " 993 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Washington | \n",
+ " 888 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " State Customer_Count\n",
+ "0 California 3552\n",
+ "1 Oregon 2909\n",
+ "2 Arizona 1937\n",
+ "3 Nevada 993\n",
+ "4 Washington 888"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "state_counts = df['State'].value_counts().reset_index()\n",
+ "state_counts.columns = ['State', 'Customer_Count']\n",
+ "state_counts.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5ab6a288",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " State | \n",
+ " Customer_Count | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " California | \n",
+ " 3552 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Oregon | \n",
+ " 2909 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Arizona | \n",
+ " 1937 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Nevada | \n",
+ " 993 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Washington | \n",
+ " 888 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " State Customer_Count\n",
+ "0 California 3552\n",
+ "1 Oregon 2909\n",
+ "2 Arizona 1937\n",
+ "3 Nevada 993\n",
+ "4 Washington 888"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "large_states = state_counts[state_counts['Customer_Count'] > 500]\n",
+ "large_states\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "e1e85e0f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Education | \n",
+ " Gender | \n",
+ " min | \n",
+ " median | \n",
+ " max | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Bachelor | \n",
+ " F | \n",
+ " 1904.00 | \n",
+ " 5640.51 | \n",
+ " 73225.96 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Bachelor | \n",
+ " M | \n",
+ " 1898.01 | \n",
+ " 5548.03 | \n",
+ " 67907.27 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " College | \n",
+ " F | \n",
+ " 1898.68 | \n",
+ " 5623.61 | \n",
+ " 61850.19 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " College | \n",
+ " M | \n",
+ " 1918.12 | \n",
+ " 6005.85 | \n",
+ " 61134.68 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Doctor | \n",
+ " F | \n",
+ " 2395.57 | \n",
+ " 5332.46 | \n",
+ " 44856.11 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Doctor | \n",
+ " M | \n",
+ " 2267.60 | \n",
+ " 5577.67 | \n",
+ " 32677.34 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " High School or Below | \n",
+ " F | \n",
+ " 2144.92 | \n",
+ " 6039.55 | \n",
+ " 55277.45 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " High School or Below | \n",
+ " M | \n",
+ " 1940.98 | \n",
+ " 6286.73 | \n",
+ " 83325.38 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Master | \n",
+ " F | \n",
+ " 2417.78 | \n",
+ " 5729.86 | \n",
+ " 51016.07 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Master | \n",
+ " M | \n",
+ " 2272.31 | \n",
+ " 5579.10 | \n",
+ " 50568.26 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Education Gender min median max\n",
+ "0 Bachelor F 1904.00 5640.51 73225.96\n",
+ "1 Bachelor M 1898.01 5548.03 67907.27\n",
+ "2 College F 1898.68 5623.61 61850.19\n",
+ "3 College M 1918.12 6005.85 61134.68\n",
+ "4 Doctor F 2395.57 5332.46 44856.11\n",
+ "5 Doctor M 2267.60 5577.67 32677.34\n",
+ "6 High School or Below F 2144.92 6039.55 55277.45\n",
+ "7 High School or Below M 1940.98 6286.73 83325.38\n",
+ "8 Master F 2417.78 5729.86 51016.07\n",
+ "9 Master M 2272.31 5579.10 50568.26"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clv_stats = (\n",
+ " df.groupby(['Education', 'Gender'])['Customer Lifetime Value']\n",
+ " .agg(['min', 'median', 'max'])\n",
+ " .round(2)\n",
+ " .reset_index()\n",
+ ")\n",
+ "\n",
+ "clv_stats\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "3efed35c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\prodd\\AppData\\Local\\Temp\\ipykernel_19776\\1059259904.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+ " df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n"
+ ]
+ }
+ ],
+ "source": [
+ "df['Effective To Date'] = pd.to_datetime(df['Effective To Date'])\n",
+ "df['Month'] = df['Effective To Date'].dt.month_name()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "fcbd5908",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "policy_table = pd.pivot_table(\n",
+ " df,\n",
+ " values='Customer Lifetime Value', # any column works, we just need a count\n",
+ " index='State',\n",
+ " columns='Month',\n",
+ " aggfunc='count',\n",
+ " fill_value=0\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "6b96b7a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "month_order = [\n",
+ " 'January', 'February', 'March', 'April', 'May', 'June',\n",
+ " 'July', 'August', 'September', 'October', 'November', 'December'\n",
+ "]\n",
+ "policy_table = policy_table.reindex(columns=month_order)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "0b736494",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Month | \n",
+ " January | \n",
+ " February | \n",
+ " March | \n",
+ " April | \n",
+ " May | \n",
+ " June | \n",
+ " July | \n",
+ " August | \n",
+ " September | \n",
+ " October | \n",
+ " November | \n",
+ " December | \n",
+ "
\n",
+ " \n",
+ " | State | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Arizona | \n",
+ " 1008 | \n",
+ " 929 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | California | \n",
+ " 1918 | \n",
+ " 1634 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | Nevada | \n",
+ " 551 | \n",
+ " 442 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | Oregon | \n",
+ " 1565 | \n",
+ " 1344 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | Washington | \n",
+ " 463 | \n",
+ " 425 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Month January February March April May June July August \\\n",
+ "State \n",
+ "Arizona 1008 929 NaN NaN NaN NaN NaN NaN \n",
+ "California 1918 1634 NaN NaN NaN NaN NaN NaN \n",
+ "Nevada 551 442 NaN NaN NaN NaN NaN NaN \n",
+ "Oregon 1565 1344 NaN NaN NaN NaN NaN NaN \n",
+ "Washington 463 425 NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ "Month September October November December \n",
+ "State \n",
+ "Arizona NaN NaN NaN NaN \n",
+ "California NaN NaN NaN NaN \n",
+ "Nevada NaN NaN NaN NaN \n",
+ "Oregon NaN NaN NaN NaN \n",
+ "Washington NaN NaN NaN NaN "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "policy_table.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "c779a9ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "state_month = (\n",
+ " df.groupby(['State', 'Month'])\n",
+ " .size()\n",
+ " .reset_index(name='Policies_Sold')\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "84095fc2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "top_states = (\n",
+ " df['State']\n",
+ " .value_counts()\n",
+ " .head(3)\n",
+ " .index\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "b0efa08d",
+ "metadata": {},
"outputs": [],
"source": [
- "# your code goes here"
+ "top_state_data = state_month[state_month['State'].isin(top_states)]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "429ab106",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "top_state_pivot = top_state_data.pivot_table(\n",
+ " index='State',\n",
+ " columns='Month',\n",
+ " values='Policies_Sold',\n",
+ " fill_value=0\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "b66865e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "month_order = [\n",
+ " 'January', 'February', 'March', 'April', 'May', 'June',\n",
+ " 'July', 'August', 'September', 'October', 'November', 'December'\n",
+ "]\n",
+ "top_state_pivot = top_state_pivot.reindex(columns=month_order)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "148e5b3a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Month | \n",
+ " January | \n",
+ " February | \n",
+ " March | \n",
+ " April | \n",
+ " May | \n",
+ " June | \n",
+ " July | \n",
+ " August | \n",
+ " September | \n",
+ " October | \n",
+ " November | \n",
+ " December | \n",
+ "
\n",
+ " \n",
+ " | State | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Arizona | \n",
+ " 1008.0 | \n",
+ " 929.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | California | \n",
+ " 1918.0 | \n",
+ " 1634.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | Oregon | \n",
+ " 1565.0 | \n",
+ " 1344.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Month January February March April May June July August \\\n",
+ "State \n",
+ "Arizona 1008.0 929.0 NaN NaN NaN NaN NaN NaN \n",
+ "California 1918.0 1634.0 NaN NaN NaN NaN NaN NaN \n",
+ "Oregon 1565.0 1344.0 NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ "Month September October November December \n",
+ "State \n",
+ "Arizona NaN NaN NaN NaN \n",
+ "California NaN NaN NaN NaN \n",
+ "Oregon NaN NaN NaN NaN "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "top_state_pivot\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "3ec448c2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Response | \n",
+ " Sales Channel | \n",
+ " No | \n",
+ " Yes | \n",
+ " Total_Customers | \n",
+ " Response_Rate_% | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Agent | \n",
+ " 3148 | \n",
+ " 742 | \n",
+ " 3890 | \n",
+ " 19.07 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Web | \n",
+ " 1334 | \n",
+ " 177 | \n",
+ " 1511 | \n",
+ " 11.71 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Branch | \n",
+ " 2539 | \n",
+ " 326 | \n",
+ " 2865 | \n",
+ " 11.38 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Call Center | \n",
+ " 1792 | \n",
+ " 221 | \n",
+ " 2013 | \n",
+ " 10.98 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Response Sales Channel No Yes Total_Customers Response_Rate_%\n",
+ "0 Agent 3148 742 3890 19.07\n",
+ "3 Web 1334 177 1511 11.71\n",
+ "1 Branch 2539 326 2865 11.38\n",
+ "2 Call Center 1792 221 2013 10.98"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "response_by_channel = (\n",
+ " df.groupby('Sales Channel')['Response']\n",
+ " .value_counts()\n",
+ " .unstack(fill_value=0)\n",
+ " .reset_index()\n",
+ ")\n",
+ "\n",
+ "response_by_channel['Total_Customers'] = response_by_channel['Yes'] + response_by_channel['No']\n",
+ "response_by_channel['Response_Rate_%'] = (\n",
+ " response_by_channel['Yes'] / response_by_channel['Total_Customers'] * 100\n",
+ ").round(2)\n",
+ "\n",
+ "response_by_channel.sort_values('Response_Rate_%', ascending=False)\n"
]
}
],
@@ -143,7 +1337,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -157,7 +1351,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.13.5"
}
},
"nbformat": 4,