Skip to content

Commit

Permalink
improved chrome_scraper.py
Browse files Browse the repository at this point in the history
added cli
  • Loading branch information
dmy.berezovskyi committed Jan 13, 2025
1 parent 7ce6306 commit c1c8305
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ coverage.xml
.pytest_cache/
cover/
*.env
resources/local
resources/chromedriver
resources/firefox
resources/ubuntuchrome

Expand Down
2 changes: 1 addition & 1 deletion core_driver/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_desired_caps(self, browser="chrome"):


class LocalDriver(Driver):
def create_driver(self, environment=None, dr_type="local"):
def create_driver(self, environment=None, dr_type="chromedriver"):
"""Tries to use ChromeDriverManager to install the latest driver,
and if it fails, it falls back to a locally stored driver in resources."""
driver = None
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ requests="^2.31.0"
setuptools="70.0.0"
ruff="0.6.8"
secure-test-automation="^1.3.1"
colorama="==0.4.6"
rich="==13.9.4"


[tool.pytest.ini_options]
Expand Down
File renamed without changes.
55 changes: 40 additions & 15 deletions scraper/chrome_scraper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import json
from typing import Dict
import pathlib
from typing import Dict, Optional

import requests
from pathlib import Path
import zipfile
from io import BytesIO

from bs4 import BeautifulSoup

from scraper.os_checker import OSChecker
Expand All @@ -11,24 +16,23 @@ class ChromePageScraper:
URL_LATEST = (
"https://googlechromelabs.github.io/chrome-for-testing/#stable"
)
URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json" # noqa
URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"

@staticmethod
def __fetch(url: str) -> requests.Response:
response = requests.get(url)
response.raise_for_status() # Raises an exception if status code is not 200 # noqa
response.raise_for_status()
return response

@staticmethod
def parse_latest() -> Dict[str, str]:
# returns a latest stable chrome driver
elements_list = []
drivers = {}
page = ChromePageScraper.__fetch(ChromePageScraper.URL_LATEST)

soup = BeautifulSoup(page.text, "html.parser")
element = soup.select_one(
"section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok" # noqa
"section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok"
)

if not element:
Expand All @@ -55,12 +59,18 @@ def get_latest_driver(self, os_name: str):
print(drivers[os_name])

@staticmethod
def get_chromedriver(platform=None, version=None, milestone=None):
def get_chromedriver(platform=None,
version=None,
milestone=None,
d_dir: Optional[pathlib.Path] = None,
is_extracted: bool = False
):
"""
:param platform: os_name and architecture
:param version: your chrome browser version
:param milestone: first 3 digits of a browser version: 129 or etc
:param d_dir: Directory to save the chromedriver zip file
:param is_extracted: extracts the chromedriver
:return:
"""
if version is None and milestone is None:
Expand All @@ -71,6 +81,8 @@ def get_chromedriver(platform=None, version=None, milestone=None):
if platform is None:
platform = OSChecker.check_os()

download_dir = d_dir or Path(__file__).resolve().parent.parent / "resources"

# Parse the JSON data
parsed_data = json.loads(
ChromePageScraper.__fetch(ChromePageScraper.URL_ALL).text
Expand All @@ -79,18 +91,31 @@ def get_chromedriver(platform=None, version=None, milestone=None):

for milestone_key, milestone_data in milestones_data.items():
if (milestone is None or milestone_key == milestone) and (
version is None or milestone_data["version"] == version
version is None or milestone_data["version"] == version
):
if "chromedriver" in milestone_data["downloads"]:
for chromedriver_info in milestone_data["downloads"][
"chromedriver"
]:
for chromedriver_info in milestone_data["downloads"]["chromedriver"]:
if (
platform is None
or chromedriver_info["platform"] == platform
platform is None
or chromedriver_info["platform"] == platform
):
return chromedriver_info
url = chromedriver_info["url"]
response = requests.get(url)
response.raise_for_status() # Check status

download_dir.mkdir(parents=True, exist_ok=True)
download_path = download_dir / "chromedriver.zip"

with open(download_path, "wb") as file:
file.write(response.content)
print(f"Chromedriver downloaded to {download_dir}")

if is_extracted:
with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
zip_ref.extractall(download_dir)

print(f"Chromedriver extracted to {download_dir}")
return download_path

if __name__ == "__main__":
print(ChromePageScraper.get_chromedriver(milestone="129"))
ChromePageScraper.get_chromedriver(milestone="131")
Empty file added utils/cli/__init__.py
Empty file.
58 changes: 58 additions & 0 deletions utils/cli/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import argparse
from pathlib import Path

from pyfiglet import Figlet
from rich.console import Console
from colorama import Fore, Style

from scraper.chrome_scraper import ChromePageScraper


def create_cli():
# Initialize ArgumentParser
parser = argparse.ArgumentParser(prog="sstf", description="SSTF Command Line Tool")

# Add the 'get' subcommand
subparsers = parser.add_subparsers(dest="command")

# Add subcommand for 'get'
get_parser = subparsers.add_parser("get", help="Download and manage Chromedriver")
get_subparsers = get_parser.add_subparsers(dest="subcommand")

# Add subcommand for 'chromedriver'
chromedriver_parser = get_subparsers.add_parser("chromedriver",
help="Download chromedriver for a specified version and platform")
chromedriver_parser.add_argument('--milestone', type=str,
help=f"{Fore.CYAN}Chromium milestone version (e.g., 131).{Style.RESET_ALL}")
chromedriver_parser.add_argument('--version', type=str,
help=f"{Fore.CYAN}Chromium browser version.{Style.RESET_ALL}")
chromedriver_parser.add_argument('--platform', type=str, choices=["windows", "mac", "linux"],
help=f"{Fore.CYAN}Operating system platform.{Style.RESET_ALL}")
chromedriver_parser.add_argument('--output-dir', type=str, default=None,
help=f"{Fore.CYAN}Directory to save the downloaded Chromedriver.{Style.RESET_ALL}")
chromedriver_parser.add_argument('--extract', action='store_true',
help=f"{Fore.CYAN}Extract the Chromedriver after download.{Style.RESET_ALL}")

# Parse arguments
args = parser.parse_args()

# Handle 'get chromedriver' logic
if args.command == "get" and args.subcommand == "chromedriver":
console = Console()

# ASCII Art Header with Figlet (using Rich)
fig = Figlet(font="slant") # You can use different fonts like 'slant', 'block', etc.
console.print(fig.renderText("Chromedriver Download"), style="bold green")

# Run the actual logic for downloading chromedriver
ChromePageScraper.get_chromedriver(
platform=args.platform,
version=args.version,
milestone=args.milestone,
d_dir=Path(args.output_dir) if args.output_dir else None,
is_extracted=args.extract
)


if __name__ == "__main__":
create_cli()

0 comments on commit c1c8305

Please sign in to comment.