diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..afe3670 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -127,14 +127,702 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "449513f4-0459-46a0-a18d-9398d974c9ad", "metadata": { "id": "449513f4-0459-46a0-a18d-9398d974c9ad" }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
00DK49336Arizona4809.216960NoBasicCollege2/18/11EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege1/18/11UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2/10/11EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
44QA50777Oregon9025.067525NoPremiumBachelor1/17/11Medical LeaveF...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2/18/11 Employed M ... \n", + "1 Basic College 1/18/11 Unemployed F ... \n", + "2 Basic Bachelor 2/10/11 Employed M ... \n", + "3 Extended College 1/11/11 Employed M ... \n", + "4 Premium Bachelor 1/17/11 Medical Leave F ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type policy \\\n", + "0 0.0 9 Corporate Auto Corporate L3 \n", + "1 0.0 1 Personal Auto Personal L3 \n", + "2 0.0 2 Personal Auto Personal L3 \n", + "3 0.0 2 Corporate Auto Corporate L3 \n", + "4 NaN 7 Personal Auto Personal L2 \n", + "\n", + " renew_offer_type sales_channel total_claim_amount vehicle_class \\\n", + "0 Offer3 Agent 292.800000 Four-Door Car \n", + "1 Offer4 Call Center 744.924331 Four-Door Car \n", + "2 Offer3 Call Center 480.000000 SUV \n", + "3 Offer2 Branch 484.013411 Four-Door Car \n", + "4 Offer1 Branch 707.925645 Four-Door Car \n", + "\n", + " vehicle_size vehicle_type \n", + "0 Medsize NaN \n", + "1 Medsize NaN \n", + "2 Medsize A \n", + "3 Medsize A \n", + "4 Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)\n", + "\n", + "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c832fd6", + "metadata": {}, + "outputs": [], + "source": [ + "new_df = df[(df['total_claim_amount'] < 1000) & (df['response'] == 'Yes')]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "1368a438", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valuetotal_claim_amount
responsegenderpolicy_type
NoFCorporate Auto8025.383273388.104574
Personal Auto8074.641406407.499560
Special Auto8442.125882458.313057
MCorporate Auto7696.807337472.461272
Personal Auto8065.774774462.554331
Special Auto9151.145060425.032393
YesFCorporate Auto7712.628736433.738499
Personal Auto8339.791842452.965929
Special Auto7691.584111453.280164
MCorporate Auto7944.465414408.582459
Personal Auto7448.383281457.010178
Special Auto8247.088702429.527942
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value total_claim_amount\n", + "response gender policy_type \n", + "No F Corporate Auto 8025.383273 388.104574\n", + " Personal Auto 8074.641406 407.499560\n", + " Special Auto 8442.125882 458.313057\n", + " M Corporate Auto 7696.807337 472.461272\n", + " Personal Auto 8065.774774 462.554331\n", + " Special Auto 9151.145060 425.032393\n", + "Yes F Corporate Auto 7712.628736 433.738499\n", + " Personal Auto 8339.791842 452.965929\n", + " Special Auto 7691.584111 453.280164\n", + " M Corporate Auto 7944.465414 408.582459\n", + " Personal Auto 7448.383281 457.010178\n", + " Special Auto 8247.088702 429.527942" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['response', 'gender', 'policy_type']).agg({'customer_lifetime_value': 'mean', 'total_claim_amount': 'mean'})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3ac688e", + "metadata": {}, + "outputs": [], + "source": [ + "# When it comes to customers with a 'Corporate Auto' policy or 'Special Auto' policy, it seems that males are the most profitable,\n", + "# but this changes when we look at 'Personal Auto', with women being the most profitable in that category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1980792a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state
state
Arizona1937
California3552
Nevada993
Oregon2909
Washington888
\n", + "
" + ], + "text/plain": [ + " state\n", + "state \n", + "Arizona 1937\n", + "California 3552\n", + "Nevada 993\n", + "Oregon 2909\n", + "Washington 888" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('state').agg({'state': 'count'}) #all states have more than 500 customers" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "0c12319c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_value
meanmaxmin
gendereducation
FBachelor7874.26947873225.956521904.000852
College7748.82332561850.188031898.683686
Doctor7328.50891644856.113972395.570000
High School or Below8675.22020155277.445892144.921535
Master8157.05315451016.067042417.777032
MBachelor7703.60167567907.270501898.007675
College8052.45928861134.683071918.119700
Doctor7415.33363832677.342842267.604038
High School or Below8149.68778383325.381191940.981221
Master8168.83265950568.259122272.307310
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value \n", + " mean max min\n", + "gender education \n", + "F Bachelor 7874.269478 73225.95652 1904.000852\n", + " College 7748.823325 61850.18803 1898.683686\n", + " Doctor 7328.508916 44856.11397 2395.570000\n", + " High School or Below 8675.220201 55277.44589 2144.921535\n", + " Master 8157.053154 51016.06704 2417.777032\n", + "M Bachelor 7703.601675 67907.27050 1898.007675\n", + " College 8052.459288 61134.68307 1918.119700\n", + " Doctor 7415.333638 32677.34284 2267.604038\n", + " High School or Below 8149.687783 83325.38119 1940.981221\n", + " Master 8168.832659 50568.25912 2272.307310" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['gender','education']).agg({'customer_lifetime_value': ['mean', 'max', 'min']})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98f09f14", + "metadata": {}, "outputs": [], "source": [ - "# your code goes here" + "# You could think that customers with a Master's degree would have the highest customer lifetime value, but this is not the case,\n", + "# for both women and men its possible to observe that customers with only high school have an average higher or almost equal to \n", + "# those with a Master's degree. " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "512dcc65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
effective_to_date
state
Arizona1937
California3552
Nevada993
Oregon2909
Washington888
\n", + "
" + ], + "text/plain": [ + " effective_to_date\n", + "state \n", + "Arizona 1937\n", + "California 3552\n", + "Nevada 993\n", + "Oregon 2909\n", + "Washington 888" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('state').agg({'effective_to_date': 'count'})" ] } ], @@ -143,7 +831,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +845,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,