From 224cd39e0df18f989e536e470ef06de7ccbc20b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ant=C3=B3nio=20Gomes?= <antmgomes@outlook.com>
Date: Sun, 19 Oct 2025 19:39:18 +0200
Subject: [PATCH] Done With The Lab Data Filtering

---
 lab-dw-aggregating.ipynb | 202 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 201 insertions(+), 1 deletion(-)

diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb
index fadd718..87c7583 100644
--- a/lab-dw-aggregating.ipynb
+++ b/lab-dw-aggregating.ipynb
@@ -134,7 +134,207 @@
       },
       "outputs": [],
       "source": [
-        "# your code goes here"
+        "# your code goes here\n",
+        "import pandas as pd\n",
+        "\n",
+        "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\")\n",
+        "df.head()\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "db998b3d",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.columns = df.columns.str.lower()\n",
+        "\n",
+        "df.columns = df.columns.str.replace(\" \", \"_\")\n",
+        "\n",
+        "df = df.drop(\"unnamed:_0\", axis = 1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e36e425b",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.drop_duplicates(subset = [\"customer\"], inplace = True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "579503e4",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.duplicated().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "81f91121",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.isnull().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "b492f159",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df[\"state\"].value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "b93581cc",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df['state'] = df['state'].fillna(df['state'].mode()[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "23dda840",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df[\"state\"].value_counts()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d4100f7e",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df['response'] = df['response'].fillna(df['response'].mode()[0])\n",
+        "df['vehicle_class'] = df['vehicle_class'].fillna(df['vehicle_class'].mode()[0])\n",
+        "df['vehicle_size'] = df['vehicle_size'].fillna(df['vehicle_size'].mode()[0])\n",
+        "\n",
+        "df['months_since_last_claim'] = df['months_since_last_claim'].fillna(df['months_since_last_claim'].median())\n",
+        "\n",
+        "df['number_of_open_complaints'] = df['number_of_open_complaints'].fillna(0)\n",
+        "\n",
+        "df = df.dropna(subset = [\"vehicle_type\"])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "051109b5",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.isnull().sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e0ffda1c",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df[\"number_of_open_complaints\"] = df[\"number_of_open_complaints\"].astype(int)\n",
+        "df[\"months_since_last_claim\"] = df[\"months_since_last_claim\"].astype(int)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "65633d50",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df.dtypes"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "2c436343",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dfLow = df[(df['total_claim_amount'] < 1000) & (df['response'] == 'Yes')]\n",
+        "dfLow.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "9da5b059",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dfYes = df[(df[\"response\"] == \"Yes\")]\n",
+        "\n",
+        "df_1 = dfYes.pivot_table(index=[\"policy_type\", \"gender\"], values=[\"monthly_premium_auto\", \"customer_lifetime_value\", \"total_claim_amount\"], aggfunc= \"mean\")\n",
+        "\n",
+        "df_1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d0ba29b4",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#Males with Special Auto are the most profitable and low risk segment, showing high lifetime value and low claims. \n",
+        "#Gender differences are small except in Special Auto."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a92df366",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dfStCounts = df[\"state\"].value_counts()\n",
+        "\n",
+        "dfBigSt = dfStCounts < 500\n",
+        "\n",
+        "dfBigSt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d40e0a36",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "df_2 = df.pivot_table(index = [\"education\", \"gender\"], values = \"customer_lifetime_value\", aggfunc = [\"max\", \"min\", \"median\"])\n",
+        "df_2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ca37e533",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#CLV varies widely within each education gender group, but medians are similar, which shows that gender has little impact. High School\n",
+        "#or Below and Master's customers have the highest median CLVs, while Doctorates are lower. Education influences CLV more than gender."
       ]
     }
   ],