Skip to content

Commit

Permalink
update defaults
Browse files Browse the repository at this point in the history
Signed-off-by: Hiroya Matsubara <[email protected]>
  • Loading branch information
hmtbr committed Oct 28, 2024
1 parent 73a9f1c commit bb7f6a3
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions data-connector-lib/src/dpk_connector/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def async_crawl(
randomize_download_delay: bool = True,
download_timeout: float = 180,
autothrottle_enabled: bool = True,
autothrottle_max_delay: float = 300,
autothrottle_max_delay: float = 60,
autothrottle_target_concurrency: float = 8,
robots_max_crawl_delay: float = 60,
) -> Deferred[None]:
Expand All @@ -118,7 +118,7 @@ def async_crawl(
randomize_download_delay (bool): If specified, the download delay will be randomized between 0.5 * `download_delay and 1.5 * `download_delay`. Default is True.
download_timeout (float): The timeout for each request. Default is 180 seconds.
autothrottle_enabled (bool): If specified, autothrottling will be enabled. Default is True.
autothrottle_max_delay (float): The maximum delay between consecutive requests when autothrottling is enabled. Default is 300 seconds.
autothrottle_max_delay (float): The maximum delay between consecutive requests when autothrottling is enabled. Default is 60 seconds.
autothrottle_target_concurrency (float): The target concurrency for autothrottling. Default is 8.
robots_max_crawl_delay (float): The maximum crawl delay allowed by the robots.txt file. Default is 60 seconds.
Expand Down Expand Up @@ -230,7 +230,7 @@ def crawl(
randomize_download_delay: bool = True,
download_timeout: float = 180,
autothrottle_enabled: bool = True,
autothrottle_max_delay: float = 300,
autothrottle_max_delay: float = 60,
autothrottle_target_concurrency: float = 8,
robots_max_crawl_delay: float = 60,
) -> None:
Expand All @@ -257,7 +257,7 @@ def crawl(
randomize_download_delay (bool): If specified, the download delay will be randomized between 0.5 * `download_delay and 1.5 * `download_delay`. Default is True.
download_timeout (float): The timeout for each request. Default is 180 seconds.
autothrottle_enabled (bool): If specified, autothrottling will be enabled. Default is True.
autothrottle_max_delay (float): The maximum delay between consecutive requests when autothrottling is enabled. Default is 300 seconds.
autothrottle_max_delay (float): The maximum delay between consecutive requests when autothrottling is enabled. Default is 60 seconds.
autothrottle_target_concurrency (float): The target concurrency for autothrottling. Default is 8.
robots_max_crawl_delay (float): The maximum crawl delay allowed by the robots.txt file. Default is 60 seconds.
Expand Down

0 comments on commit bb7f6a3

Please sign in to comment.