Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 171 additions & 4 deletions lab-web-scraping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,181 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 32,
"id": "40359eee-9cd7-4884-bfa4-83344c222305",
"metadata": {
"id": "40359eee-9cd7-4884-bfa4-83344c222305"
},
"outputs": [],
"source": [
"# Your solution goes here"
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"url = \"https://books.toscrape.com/\"\n",
"\n",
"response = requests.get(url)\n",
"\n",
"soup = BeautifulSoup(response.content)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "362a4ecc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A Light in the ... £51.77 In stock Add to basket\n",
"Tipping the Velvet £53.74 In stock Add to basket\n",
"Soumission £50.10 In stock Add to basket\n",
"Sharp Objects £47.82 In stock Add to basket\n",
"Sapiens: A Brief History ... £54.23 In stock Add to basket\n",
"The Requiem Red £22.65 In stock Add to basket\n",
"The Dirty Little Secrets ... £33.34 In stock Add to basket\n",
"The Coming Woman: A ... £17.93 In stock Add to basket\n",
"The Boys in the ... £22.60 In stock Add to basket\n",
"The Black Maria £52.15 In stock Add to basket\n",
"Starving Hearts (Triangular Trade ... £13.99 In stock Add to basket\n",
"Shakespeare's Sonnets £20.66 In stock Add to basket\n",
"Set Me Free £17.46 In stock Add to basket\n",
"Scott Pilgrim's Precious Little ... £52.29 In stock Add to basket\n",
"Rip it Up and ... £35.02 In stock Add to basket\n",
"Our Band Could Be ... £57.25 In stock Add to basket\n",
"Olio £23.88 In stock Add to basket\n",
"Mesaerion: The Best Science ... £37.59 In stock Add to basket\n",
"Libertarianism for Beginners £51.33 In stock Add to basket\n",
"It's Only the Himalayas £45.17 In stock Add to basket\n"
]
}
],
"source": [
"grid = soup.find('ol', attrs={\"class\":'row'})\n",
"\n",
"books = grid.find_all(\"li\", attrs={\"class\":\"col-xs-6 col-sm-4 col-md-3 col-lg-3\"})\n",
"\n",
"for book in books:\n",
" print(book.get_text().replace('\\n',' ').strip())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5f0934e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Set Me Free'"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#TITLE\n",
"\n",
"books[0].find(\"a\", attrs={\"title\": True}).get_text().strip()\n",
"\n",
"def get_title(book):\n",
" title = book.find(\"a\", attrs={\"title\": True}).get_text().strip()\n",
" return title\n",
"\n",
"get_title(books[12])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dea3c59",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'£17.46'"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# PRICE\n",
"\n",
"books[0].find(\"p\", attrs={\"class\": \"price_color\"}).get_text().strip()\n",
"\n",
"def get_price(book):\n",
" price = book.find(\"p\", attrs={\"class\": \"price_color\"}).get_text().strip()\n",
" return price\n",
"\n",
"get_price(books[12])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cad30d7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Five'"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# RATING\n",
"\n",
"books[1].find(\"p\", attrs={\"class\": \"star-rating\"})[\"class\"][1]\n",
"\n",
"def get_rating(book):\n",
" rating = book.find(\"p\", attrs={\"class\": \"star-rating\"})[\"class\"][1]\n",
" return rating\n",
"\n",
"get_rating(books[12])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a56a3078",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'In stock'"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# STOCK\n",
"\n",
"books[1].find(\"p\", attrs={\"class\": \"instock availability\"}).get_text().strip()\n",
"\n",
"def get_stock(book):\n",
" stock = book.find(\"p\", attrs={\"class\": \"instock availability\"}).get_text().strip()\n",
" return stock\n",
"\n",
"get_stock(books[12])"
]
}
],
Expand All @@ -126,7 +293,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -140,7 +307,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down