-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
62 lines (52 loc) · 2.05 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
import time
import credentials
import message_strings as loginfo
from logger import Logger
class Scraper:
logType = 'ScraperLog'
def __init__(self, arguments='-headless'):
"""
Start the scraper driver and set its arguments, by default headless mode is enabled.
"""
self.options = Options()
self.options.add_argument(arguments)
self.Logger = Logger(Scraper.logType)
self.driver = None
def kill_driver(self):
"""
Kills the Selenium driver.
"""
self.Logger.log(loginfo.DRIVER_KILL)
self.driver.quit()
def create_driver(self, driver_type='Firefox'):
"""
Creates the driver for Selenium, by default it will use Firefox.
"""
# Path to your geckodriver, you can download it from here https://github.com/mozilla/geckodriver/releases
if driver_type == 'Firefox':
self.driver = Firefox(executable_path=credentials.driver_path, options=self.options)
elif driver_type:
self.driver = Chrome(executable_path=credentials.driver_path, options=self.options)
self.Logger.log(driver_type+loginfo.DRIVER_OK)
def fetch_page(self, url, sleeptime=5):
"""
Fetches the page through the get method, and sleeps before returning it to wait for JS execution on the target page.
Sleeptime by default is 5.
"""
try:
self.driver.get(url)
time.sleep(sleeptime)
self.Logger.log(loginfo.FETCH_OK)
return self.driver.page_source
except:
self.Logger.log(loginfo.FETCH_ERROR)
def run_javascript(self, javascript, return_result=True):
"""
Method to run JavaScript in the target page, by default it will return the result.
"""
self.Logger.log(loginfo.JS_OK)
if return_result:
return self.driver.execute_script(javascript)
self.driver.execute_script(javascript)