diff --git a/lab-web-scraping.ipynb b/lab-web-scraping.ipynb index e552783..d0ee0d0 100644 --- a/lab-web-scraping.ipynb +++ b/lab-web-scraping.ipynb @@ -110,14 +110,181 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "40359eee-9cd7-4884-bfa4-83344c222305", "metadata": { "id": "40359eee-9cd7-4884-bfa4-83344c222305" }, "outputs": [], "source": [ - "# Your solution goes here" + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "url = \"https://books.toscrape.com/\"\n", + "\n", + "response = requests.get(url)\n", + "\n", + "soup = BeautifulSoup(response.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "362a4ecc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A Light in the ... £51.77 In stock Add to basket\n", + "Tipping the Velvet £53.74 In stock Add to basket\n", + "Soumission £50.10 In stock Add to basket\n", + "Sharp Objects £47.82 In stock Add to basket\n", + "Sapiens: A Brief History ... £54.23 In stock Add to basket\n", + "The Requiem Red £22.65 In stock Add to basket\n", + "The Dirty Little Secrets ... £33.34 In stock Add to basket\n", + "The Coming Woman: A ... £17.93 In stock Add to basket\n", + "The Boys in the ... £22.60 In stock Add to basket\n", + "The Black Maria £52.15 In stock Add to basket\n", + "Starving Hearts (Triangular Trade ... £13.99 In stock Add to basket\n", + "Shakespeare's Sonnets £20.66 In stock Add to basket\n", + "Set Me Free £17.46 In stock Add to basket\n", + "Scott Pilgrim's Precious Little ... £52.29 In stock Add to basket\n", + "Rip it Up and ... £35.02 In stock Add to basket\n", + "Our Band Could Be ... £57.25 In stock Add to basket\n", + "Olio £23.88 In stock Add to basket\n", + "Mesaerion: The Best Science ... £37.59 In stock Add to basket\n", + "Libertarianism for Beginners £51.33 In stock Add to basket\n", + "It's Only the Himalayas £45.17 In stock Add to basket\n" + ] + } + ], + "source": [ + "grid = soup.find('ol', attrs={\"class\":'row'})\n", + "\n", + "books = grid.find_all(\"li\", attrs={\"class\":\"col-xs-6 col-sm-4 col-md-3 col-lg-3\"})\n", + "\n", + "for book in books:\n", + " print(book.get_text().replace('\\n',' ').strip())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5f0934e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Set Me Free'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#TITLE\n", + "\n", + "books[0].find(\"a\", attrs={\"title\": True}).get_text().strip()\n", + "\n", + "def get_title(book):\n", + " title = book.find(\"a\", attrs={\"title\": True}).get_text().strip()\n", + " return title\n", + "\n", + "get_title(books[12])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dea3c59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'£17.46'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# PRICE\n", + "\n", + "books[0].find(\"p\", attrs={\"class\": \"price_color\"}).get_text().strip()\n", + "\n", + "def get_price(book):\n", + " price = book.find(\"p\", attrs={\"class\": \"price_color\"}).get_text().strip()\n", + " return price\n", + "\n", + "get_price(books[12])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cad30d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Five'" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# RATING\n", + "\n", + "books[1].find(\"p\", attrs={\"class\": \"star-rating\"})[\"class\"][1]\n", + "\n", + "def get_rating(book):\n", + " rating = book.find(\"p\", attrs={\"class\": \"star-rating\"})[\"class\"][1]\n", + " return rating\n", + "\n", + "get_rating(books[12])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a56a3078", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'In stock'" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# STOCK\n", + "\n", + "books[1].find(\"p\", attrs={\"class\": \"instock availability\"}).get_text().strip()\n", + "\n", + "def get_stock(book):\n", + " stock = book.find(\"p\", attrs={\"class\": \"instock availability\"}).get_text().strip()\n", + " return stock\n", + "\n", + "get_stock(books[12])" ] } ], @@ -126,7 +293,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -140,7 +307,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.13.5" } }, "nbformat": 4,