diff --git a/Untitled-1.ipynb b/Untitled-1.ipynb new file mode 100644 index 0000000..66562e2 --- /dev/null +++ b/Untitled-1.ipynb @@ -0,0 +1,1411 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "id": "dfddd406", + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests\n", + "import pandas as pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ccf80559", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = \"https://books.toscrape.com/\"\n", + "response = requests.get(url)\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "891f60cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " All products | Books to Scrape - Sandbox\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + "
Books to Scrape We love being scraped!\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
    \n", + "
  • \n", + "Home\n", + "
  • \n", + "
  • All products
  • \n", + "
\n", + "
\n", + "\n", + "
\n", + "
\n", + "

All products

\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "1000 results - showing 1 to 20.\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
Warning! This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning.
\n", + "
\n", + "
    \n", + "
  1. \n", + "
    \n", + "
    \n", + "\"A\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    A Light in the ...

    \n", + "
    \n", + "

    £51.77

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  2. \n", + "
  3. \n", + "
    \n", + "
    \n", + "\"Tipping\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Tipping the Velvet

    \n", + "
    \n", + "

    £53.74

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  4. \n", + "
  5. \n", + "
    \n", + "
    \n", + "\"Soumission\"\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Soumission

    \n", + "
    \n", + "

    £50.10

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  6. \n", + "
  7. \n", + "
    \n", + "
    \n", + "\"Sharp\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Sharp Objects

    \n", + "
    \n", + "

    £47.82

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  8. \n", + "
  9. \n", + "
    \n", + "
    \n", + "\"Sapiens:\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Sapiens: A Brief History ...

    \n", + "
    \n", + "

    £54.23

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  10. \n", + "
  11. \n", + "
    \n", + "
    \n", + "\"The\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    The Requiem Red

    \n", + "
    \n", + "

    £22.65

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  12. \n", + "
  13. \n", + "
    \n", + "
    \n", + "\"The\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    The Dirty Little Secrets ...

    \n", + "
    \n", + "

    £33.34

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  14. \n", + "
  15. \n", + "
    \n", + "
    \n", + "\"The\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    The Coming Woman: A ...

    \n", + "
    \n", + "

    £17.93

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  16. \n", + "
  17. \n", + "
    \n", + "
    \n", + "\"The\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    The Boys in the ...

    \n", + "
    \n", + "

    £22.60

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  18. \n", + "
  19. \n", + "
    \n", + "
    \n", + "\"The\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    The Black Maria

    \n", + "
    \n", + "

    £52.15

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  20. \n", + "
  21. \n", + "
    \n", + "
    \n", + "\"Starving\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Starving Hearts (Triangular Trade ...

    \n", + "
    \n", + "

    £13.99

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  22. \n", + "
  23. \n", + "
    \n", + "
    \n", + "\"Shakespeare's\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Shakespeare's Sonnets

    \n", + "
    \n", + "

    £20.66

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  24. \n", + "
  25. \n", + "
    \n", + "
    \n", + "\"Set\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Set Me Free

    \n", + "
    \n", + "

    £17.46

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  26. \n", + "
  27. \n", + "
    \n", + "
    \n", + "\"Scott\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Scott Pilgrim's Precious Little ...

    \n", + "
    \n", + "

    £52.29

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  28. \n", + "
  29. \n", + "
    \n", + "
    \n", + "\"Rip\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Rip it Up and ...

    \n", + "
    \n", + "

    £35.02

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  30. \n", + "
  31. \n", + "
    \n", + "
    \n", + "\"Our\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Our Band Could Be ...

    \n", + "
    \n", + "

    £57.25

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  32. \n", + "
  33. \n", + "
    \n", + "
    \n", + "\"Olio\"\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Olio

    \n", + "
    \n", + "

    £23.88

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  34. \n", + "
  35. \n", + "
    \n", + "
    \n", + "\"Mesaerion:\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Mesaerion: The Best Science ...

    \n", + "
    \n", + "

    £37.59

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  36. \n", + "
  37. \n", + "
    \n", + "
    \n", + "\"Libertarianism\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    Libertarianism for Beginners

    \n", + "
    \n", + "

    £51.33

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  38. \n", + "
  39. \n", + "
    \n", + "
    \n", + "\"It's\n", + "
    \n", + "

    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

    \n", + "

    It's Only the Himalayas

    \n", + "
    \n", + "

    £45.17

    \n", + "

    \n", + "\n", + " \n", + " In stock\n", + " \n", + "

    \n", + "
    \n", + "\n", + "
    \n", + "
    \n", + "
    \n", + "
  40. \n", + "
\n", + "
\n", + "
    \n", + "
  • \n", + " \n", + " Page 1 of 50\n", + " \n", + "
  • \n", + "
  • next
  • \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup= BeautifulSoup(response.content, \"html.parser\")\n", + "soup" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a039380c", + "metadata": {}, + "outputs": [], + "source": [ + "articulos = soup.find_all(\"article\", class_=\"product_pod\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "928f43bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "20" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(articulos)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "62922b1a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
\n", + "
\n", + "\"A\n", + "
\n", + "

\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "

\n", + "

A Light in the ...

\n", + "
\n", + "

£51.77

\n", + "

\n", + "\n", + " \n", + " In stock\n", + " \n", + "

\n", + "
\n", + "\n", + "
\n", + "
\n", + "
" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "articulos[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8f3227fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "

A Light in the ...

" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "articulos[0].find(\"h3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "84059457", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A Light in the ..." + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "articulos[0].find(\"h3\").find(\"a\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c8c48d5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'A Light in the Attic'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "articulos[0].find(\"h3\").find(\"a\")[\"title\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "cb22a443", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'£51.77'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "articulos[0].find(\"p\", class_=\"price_color\").text" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ffb36ac2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51.77" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "float(articulos[0].find(\"p\", class_=\"price_color\").text.replace(\"£\", \"\").strip())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "88a91dcf", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "could not convert string to float: '£51.77'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[40], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m titulo\u001b[38;5;241m=\u001b[39m art\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mh3\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ma\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtitle\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m titulos\u001b[38;5;241m.\u001b[39mappend(titulo)\n\u001b[0;32m----> 7\u001b[0m precio\u001b[38;5;241m-\u001b[39m\u001b[38;5;28mfloat\u001b[39m(art\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp\u001b[39m\u001b[38;5;124m\"\u001b[39m, class_\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprice_color\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mE\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 8\u001b[0m precios\u001b[38;5;241m.\u001b[39mappend\n", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: '£51.77'" + ] + } + ], + "source": [ + "titulos= []\n", + "precios=[]\n", + "\n", + "for art in articulos:\n", + " titulo= art.find(\"h3\").find(\"a\")[\"title\"]\n", + " titulos.append(titulo)\n", + " precio-float(art.find(\"p\", class_=\"price_color\").text.replace(\"E\", \"\"))\n", + " precios.append" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "60a61d69", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['A Light in the Attic']" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titulos" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "faa4151b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "precios" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "c11f8b04", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Titulo'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[43], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m dict_books\u001b[38;5;241m=\u001b[39m{}\n\u001b[0;32m----> 2\u001b[0m dict_books[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTitulo\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m-\u001b[39m titulos\n\u001b[1;32m 3\u001b[0m dict_books[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecio\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m-\u001b[39m precios\n\u001b[1;32m 4\u001b[0m dict_books[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDisponibilidad\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m disponibilidades\n", + "\u001b[0;31mKeyError\u001b[0m: 'Titulo'" + ] + } + ], + "source": [ + "dict_books={}\n", + "dict_books[\"Titulo\"] - titulos\n", + "dict_books[\"Precio\"] - precios\n", + "dict_books[\"Disponibilidad\"]= disponibilidades\n", + "dict_books[\"Ratings\"]= ratings \n", + "\n", + "df_books = pd.DataFrame(dict_books)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "163ac9d8", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'df_books' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[44], line 9\u001b[0m\n\u001b[1;32m 1\u001b[0m map_rating \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOne\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTwo\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m2\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFive\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m5\u001b[39m\n\u001b[1;32m 7\u001b[0m }\n\u001b[0;32m----> 9\u001b[0m df_books[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRating\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m df_books[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRating\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mmap(map_rating)\n", + "\u001b[0;31mNameError\u001b[0m: name 'df_books' is not defined" + ] + } + ], + "source": [ + "map_rating = {\n", + " \"One\": 1,\n", + " \"Two\": 2,\n", + " \"Three\": 3,\n", + " \"Four\": 4,\n", + " \"Five\": 5\n", + "}\n", + "\n", + "df_books[\"Rating\"] = df_books[\"Rating\"].map(map_rating)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea74b498", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-dw-aggregating.ipynb b/lab-dw-aggregating.ipynb index fadd718..3882462 100644 --- a/lab-dw-aggregating.ipynb +++ b/lab-dw-aggregating.ipynb @@ -36,6 +36,262 @@ " - have a response \"Yes\" to the last marketing campaign." ] }, + { + "cell_type": "code", + "execution_count": 7, + "id": "024eaa11", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "url = \"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/marketing_customer_analysis.csv\"\n", + "df = pd.read_csv(url)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8ea335fa", + "metadata": {}, + "outputs": [], + "source": [ + "df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "60615f2e", + "metadata": {}, + "outputs": [], + "source": [ + "filtered_df = df[(df['total_claim_amount'] < 1000) & (df['response'] == 'Yes')]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b0f833cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
33XL78013Oregon22332.439460YesExtendedCollege1/11/11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
88FM55990California5989.773931YesPremiumCollege1/19/11EmployedM...0.01Personal AutoPersonal L1Offer2Branch739.200000Sports CarMedsizeNaN
1515CW49887California4626.801093YesBasicMaster1/16/11EmployedF...0.01Special AutoSpecial L1Offer2Branch547.200000SUVMedsizeNaN
1919NJ54277California3746.751625YesExtendedCollege2/26/11EmployedF...1.01Personal AutoPersonal L2Offer2Call Center19.575683Two-Door CarLargeA
2727MQ68407Oregon4376.363592YesPremiumBachelor2/28/11EmployedF...0.01Personal AutoPersonal L3Offer2Agent60.036683Four-Door CarMedsizeNaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "8 8 FM55990 California 5989.773931 Yes \n", + "15 15 CW49887 California 4626.801093 Yes \n", + "19 19 NJ54277 California 3746.751625 Yes \n", + "27 27 MQ68407 Oregon 4376.363592 Yes \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "3 Extended College 1/11/11 Employed M ... \n", + "8 Premium College 1/19/11 Employed M ... \n", + "15 Basic Master 1/16/11 Employed F ... \n", + "19 Extended College 2/26/11 Employed F ... \n", + "27 Premium Bachelor 2/28/11 Employed F ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "3 0.0 2 Corporate Auto \n", + "8 0.0 1 Personal Auto \n", + "15 0.0 1 Special Auto \n", + "19 1.0 1 Personal Auto \n", + "27 0.0 1 Personal Auto \n", + "\n", + " policy renew_offer_type sales_channel total_claim_amount \\\n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "8 Personal L1 Offer2 Branch 739.200000 \n", + "15 Special L1 Offer2 Branch 547.200000 \n", + "19 Personal L2 Offer2 Call Center 19.575683 \n", + "27 Personal L3 Offer2 Agent 60.036683 \n", + "\n", + " vehicle_class vehicle_size vehicle_type \n", + "3 Four-Door Car Medsize A \n", + "8 Sports Car Medsize NaN \n", + "15 SUV Medsize NaN \n", + "19 Two-Door Car Large A \n", + "27 Four-Door Car Medsize NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df.head()" + ] + }, { "cell_type": "markdown", "id": "b9be383e-5165-436e-80c8-57d4c757c8c3", @@ -48,6 +304,108 @@ " - compare these insights to `total_claim_amount` patterns, and discuss which segments appear most profitable or low-risk for the company." ] }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5b3ccad8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valuetotal_claim_amount
policy_typegender
Corporate AutoF7712.628736433.738499
M7944.465414408.582459
Personal AutoF8339.791842452.965929
M7448.383281457.010178
Special AutoF7691.584111453.280164
M8247.088702429.527942
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value total_claim_amount\n", + "policy_type gender \n", + "Corporate Auto F 7712.628736 433.738499\n", + " M 7944.465414 408.582459\n", + "Personal Auto F 8339.791842 452.965929\n", + " M 7448.383281 457.010178\n", + "Special Auto F 7691.584111 453.280164\n", + " M 8247.088702 429.527942" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "yes_response = df[df['response'] == 'Yes']\n", + "\n", + "\n", + "avg_values = yes_response.groupby(['policy_type', 'gender'])[['customer_lifetime_value', 'total_claim_amount']].mean()\n", + "\n", + "avg_values\n" + ] + }, { "cell_type": "markdown", "id": "7050f4ac-53c5-4193-a3c0-8699b87196f0", @@ -58,6 +416,60 @@ "3. Analyze the total number of customers who have policies in each state, and then filter the results to only include states where there are more than 500 customers." ] }, + { + "cell_type": "code", + "execution_count": 22, + "id": "ddfb35f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state\n", + "California 3552\n", + "Oregon 2909\n", + "Arizona 1937\n", + "Nevada 993\n", + "Washington 888\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "\n", + "customers_by_state = df['state'].value_counts()\n", + "\n", + "\n", + "print(customers_by_state)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e6bd505e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state\n", + "California 3552\n", + "Oregon 2909\n", + "Arizona 1937\n", + "Nevada 993\n", + "Washington 888\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "states_over_500 = customers_by_state[customers_by_state > 500]\n", + "\n", + "print(states_over_500)" + ] + }, { "cell_type": "markdown", "id": "b60a4443-a1a7-4bbf-b78e-9ccdf9895e0d", @@ -68,6 +480,38 @@ "4. Find the maximum, minimum, and median customer lifetime value by education level and gender. Write your conclusions." ] }, + { + "cell_type": "code", + "execution_count": 23, + "id": "53cc97a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " max min median\n", + "education gender \n", + "Bachelor F 73225.95652 1904.000852 5640.505303\n", + " M 67907.27050 1898.007675 5548.031892\n", + "College F 61850.18803 1898.683686 5623.611187\n", + " M 61134.68307 1918.119700 6005.847375\n", + "Doctor F 44856.11397 2395.570000 5332.462694\n", + " M 32677.34284 2267.604038 5577.669457\n", + "High School or Below F 55277.44589 2144.921535 6039.553187\n", + " M 83325.38119 1940.981221 6286.731006\n", + "Master F 51016.06704 2417.777032 5729.855012\n", + " M 50568.25912 2272.307310 5579.099207\n" + ] + } + ], + "source": [ + "clv_stats = df.groupby(['education', 'gender'])['customer_lifetime_value'].agg(['max', 'min', 'median'])\n", + "\n", + "\n", + "print(clv_stats)" + ] + }, { "cell_type": "markdown", "id": "b42999f9-311f-481e-ae63-40a5577072c5", @@ -143,7 +587,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -157,7 +601,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,