diff --git a/README.md b/README.md index b642de9..c61dd37 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ def parse_result(self, response): ### Additional arguments The `scrapy_selenium.SeleniumRequest` accept 4 additional arguments: -#### `wait_time` / `wait_until` +#### `timeout` / `until` When used, selenium will perform an [Explicit wait](http://selenium-python.readthedocs.io/waits.html#explicit-waits) before returning the response to the spider. ```python @@ -69,8 +69,8 @@ from selenium.webdriver.support import expected_conditions as EC yield SeleniumRequest( url=url, callback=self.parse_result, - wait_time=10, - wait_until=EC.element_to_be_clickable((By.ID, 'someid')) + timeout=10, + until=EC.element_to_be_clickable((By.ID, 'someid')) ) ``` diff --git a/scrapy_selenium/http.py b/scrapy_selenium/http.py index cddf7bf..7d4e5da 100644 --- a/scrapy_selenium/http.py +++ b/scrapy_selenium/http.py @@ -6,14 +6,14 @@ class SeleniumRequest(Request): """Scrapy ``Request`` subclass providing additional arguments""" - def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=None, *args, **kwargs): + def __init__(self, timeout=None, until=None, screenshot=False, script=None, *args, **kwargs): """Initialize a new selenium request Parameters ---------- - wait_time: int + timeout: float The number of seconds to wait. - wait_until: method + until: method One of the "selenium.webdriver.support.expected_conditions". The response will be returned until the given condition is fulfilled. screenshot: bool @@ -24,8 +24,8 @@ def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=Non """ - self.wait_time = wait_time - self.wait_until = wait_until + self.timeout = timeout + self.until = until self.screenshot = screenshot self.script = script diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index 201db2c..e279511 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -14,7 +14,8 @@ class SeleniumMiddleware: """Scrapy middleware handling the requests using selenium""" def __init__(self, driver_name, driver_executable_path, - browser_executable_path, command_executor, driver_arguments): + browser_executable_path, command_executor, driver_arguments, + driver_experimental_options=None): """Initialize the selenium webdriver Parameters @@ -29,6 +30,8 @@ def __init__(self, driver_name, driver_executable_path, The path of the executable binary of the browser command_executor: str Selenium remote server endpoint + driver_experimental_options: dict of dicts + Options to pass to experimental_options of the web driver """ webdriver_base_path = f'selenium.webdriver.{driver_name}' @@ -45,17 +48,15 @@ def __init__(self, driver_name, driver_executable_path, driver_options.binary_location = browser_executable_path for argument in driver_arguments: driver_options.add_argument(argument) - - driver_kwargs = { - 'executable_path': driver_executable_path, - f'{driver_name}_options': driver_options - } + if driver_experimental_options is not None: + for key, options in driver_experimental_options.items(): + driver_options.add_experimental_option(key, options) # locally installed driver if driver_executable_path is not None: driver_kwargs = { 'executable_path': driver_executable_path, - f'{driver_name}_options': driver_options + f'options': driver_options } self.driver = driver_klass(**driver_kwargs) # remote driver @@ -74,6 +75,7 @@ def from_crawler(cls, crawler): browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH') command_executor = crawler.settings.get('SELENIUM_COMMAND_EXECUTOR') driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS') + driver_experimental_options = crawler.settings.get('SELENIUM_DRIVER_EXPERIMENTAL_OPTIONS') if driver_name is None: raise NotConfigured('SELENIUM_DRIVER_NAME must be set') @@ -87,7 +89,8 @@ def from_crawler(cls, crawler): driver_executable_path=driver_executable_path, browser_executable_path=browser_executable_path, command_executor=command_executor, - driver_arguments=driver_arguments + driver_arguments=driver_arguments, + driver_experimental_options=driver_experimental_options ) crawler.signals.connect(middleware.spider_closed, signals.spider_closed) @@ -110,10 +113,8 @@ def process_request(self, request, spider): } ) - if request.wait_until: - WebDriverWait(self.driver, request.wait_time).until( - request.wait_until - ) + if request.until: + WebDriverWait(self.driver, request.timeout).until(request.until) if request.screenshot: request.meta['screenshot'] = self.driver.get_screenshot_as_png() diff --git a/setup.py b/setup.py index 16fd185..df7e2f6 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,6 @@ """This module contains the packaging routine for the pybook package""" from setuptools import setup, find_packages -try: - from pip.download import PipSession - from pip.req import parse_requirements -except ImportError: - # It is quick hack to support pip 10 that has changed its internal - # structure of the modules. - from pip._internal.download import PipSession - from pip._internal.req.req_file import parse_requirements def get_requirements(source): @@ -21,10 +13,8 @@ def get_requirements(source): """ - install_reqs = parse_requirements(filename=source, session=PipSession()) - - return [str(ir.req) for ir in install_reqs] - + with open(source, 'rt') as file: + return file.readlines() setup( packages=find_packages(),