-
Notifications
You must be signed in to change notification settings - Fork 1
/
Trip Advisor Hotel Data Scraper.py
73 lines (43 loc) · 1.29 KB
/
Trip Advisor Hotel Data Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# EmreYbs
# converted this Jupyter Notebook file via Visual Studio Code to .py format.
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
import requests
from bs4 import BeautifulSoup as soup
# %%
# You may encounter an issue here. I can get successful StatusCode 200 in my country
# but with some foreigh destinations, I need to rerun this step for a couple of times. I'll try to fix it.
html = requests.get('https://www.tripadvisor.com.tr/Hotels-g14984534-Marmaris_District_Mugla_Province_Turkish_Aegean_Coast-Hotels.html')
html.status_code
# %%
bsobj = soup(html.content,'lxml')
# %%
hotel = []
for name in bsobj.findAll('div',{'class':'listing_title'}):
hotel.append(name.text.strip())
hotel
# %%
ratings = []
for rating in bsobj.findAll('a',{'class':'ui_bubble_rating'}):
ratings.append(rating['alt'])
ratings
# %%
reviews = []
for review in bsobj.findAll('a',{'class':'review_count'}):
reviews.append(review.text.strip())
reviews
# %%
price = []
for p in bsobj.findAll('div',{'class':'price-wrap'}):
price.append(p.text.replace('TL','').strip())
price[:5]
# %%
len(price)
# %%
d1 = {'Hotel':hotel,'Ratings':ratings,'No_of_Reviews':reviews,'Price':price}
# %%
import pandas as pd
# %%
df = pd.DataFrame.from_dict(d1)
df