improved chrome_scraper.py

added cli
dmberezovskyii · Jan 13, 2025 · c1c8305 · c1c8305
1 parent 7ce6306
commit c1c8305
Show file tree

Hide file tree

Showing 7 changed files with 102 additions and 17 deletions.
diff --git a/.gitignore b/.gitignore
@@ -51,7 +51,7 @@ coverage.xml
 .pytest_cache/
 cover/
 *.env
-resources/local
+resources/chromedriver
 resources/firefox
 resources/ubuntuchrome
 

diff --git a/core_driver/driver.py b/core_driver/driver.py
@@ -49,7 +49,7 @@ def get_desired_caps(self, browser="chrome"):
 
 
 class LocalDriver(Driver):
-    def create_driver(self, environment=None, dr_type="local"):
+    def create_driver(self, environment=None, dr_type="chromedriver"):
         """Tries to use ChromeDriverManager to install the latest driver,
         and if it fails, it falls back to a locally stored driver in resources."""
         driver = None

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,8 @@ requests="^2.31.0"
 setuptools="70.0.0"
 ruff="0.6.8"
 secure-test-automation="^1.3.1"
+colorama="==0.4.6"
+rich="==13.9.4"
 
 
 [tool.pytest.ini_options]

diff --git a/resources/local → resources/chromedriver b/resources/local → resources/chromedriver
diff --git a/scraper/chrome_scraper.py b/scraper/chrome_scraper.py
@@ -1,7 +1,12 @@
 import json
-from typing import Dict
+import pathlib
+from typing import Dict, Optional
 
 import requests
+from pathlib import Path
+import zipfile
+from io import BytesIO
+
 from bs4 import BeautifulSoup
 
 from scraper.os_checker import OSChecker
@@ -11,24 +16,23 @@ class ChromePageScraper:
     URL_LATEST = (
         "https://googlechromelabs.github.io/chrome-for-testing/#stable"
     )
-    URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json" # noqa
+    URL_ALL = "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"
 
     @staticmethod
     def __fetch(url: str) -> requests.Response:
         response = requests.get(url)
-        response.raise_for_status()  # Raises an exception if status code is not 200 # noqa
+        response.raise_for_status()
         return response
 
     @staticmethod
     def parse_latest() -> Dict[str, str]:
-        # returns a latest stable chrome driver
         elements_list = []
         drivers = {}
         page = ChromePageScraper.__fetch(ChromePageScraper.URL_LATEST)
 
         soup = BeautifulSoup(page.text, "html.parser")
         element = soup.select_one(
-            "section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok" # noqa
+            "section#stable.status-not-ok div.table-wrapper table tbody tr.status-ok"
         )
 
         if not element:
@@ -55,12 +59,18 @@ def get_latest_driver(self, os_name: str):
             print(drivers[os_name])
 
     @staticmethod
-    def get_chromedriver(platform=None, version=None, milestone=None):
+    def get_chromedriver(platform=None,
+                         version=None,
+                         milestone=None,
+                         d_dir: Optional[pathlib.Path] = None,
+                         is_extracted: bool = False
+                         ):
         """
-
         :param platform: os_name and architecture
         :param version: your chrome browser version
         :param milestone: first 3 digits of a browser version: 129 or etc
+        :param d_dir: Directory to save the chromedriver zip file
+        :param is_extracted: extracts the chromedriver
         :return:
         """
         if version is None and milestone is None:
@@ -71,6 +81,8 @@ def get_chromedriver(platform=None, version=None, milestone=None):
         if platform is None:
             platform = OSChecker.check_os()
 
+        download_dir = d_dir or Path(__file__).resolve().parent.parent / "resources"
+
         # Parse the JSON data
         parsed_data = json.loads(
             ChromePageScraper.__fetch(ChromePageScraper.URL_ALL).text
@@ -79,18 +91,31 @@ def get_chromedriver(platform=None, version=None, milestone=None):
 
         for milestone_key, milestone_data in milestones_data.items():
             if (milestone is None or milestone_key == milestone) and (
-                version is None or milestone_data["version"] == version
+                    version is None or milestone_data["version"] == version
             ):
                 if "chromedriver" in milestone_data["downloads"]:
-                    for chromedriver_info in milestone_data["downloads"][
-                        "chromedriver"
-                    ]:
+                    for chromedriver_info in milestone_data["downloads"]["chromedriver"]:
                         if (
-                            platform is None
-                            or chromedriver_info["platform"] == platform
+                                platform is None
+                                or chromedriver_info["platform"] == platform
                         ):
-                            return chromedriver_info
+                            url = chromedriver_info["url"]
+                            response = requests.get(url)
+                            response.raise_for_status()  # Check status
+
+                            download_dir.mkdir(parents=True, exist_ok=True)
+                            download_path = download_dir / "chromedriver.zip"
+
+                            with open(download_path, "wb") as file:
+                                file.write(response.content)
+                                print(f"Chromedriver downloaded to {download_dir}")
+
+                            if is_extracted:
+                                with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
+                                    zip_ref.extractall(download_dir)
 
+                                print(f"Chromedriver extracted to {download_dir}")
+                            return download_path
 
 if __name__ == "__main__":
-    print(ChromePageScraper.get_chromedriver(milestone="129"))
+    ChromePageScraper.get_chromedriver(milestone="131")
diff --git a/utils/cli/__init__.py b/utils/cli/__init__.py
diff --git a/utils/cli/cli.py b/utils/cli/cli.py
@@ -0,0 +1,58 @@
+import argparse
+from pathlib import Path
+
+from pyfiglet import Figlet
+from rich.console import Console
+from colorama import Fore, Style
+
+from scraper.chrome_scraper import ChromePageScraper
+
+
+def create_cli():
+    # Initialize ArgumentParser
+    parser = argparse.ArgumentParser(prog="sstf", description="SSTF Command Line Tool")
+
+    # Add the 'get' subcommand
+    subparsers = parser.add_subparsers(dest="command")
+
+    # Add subcommand for 'get'
+    get_parser = subparsers.add_parser("get", help="Download and manage Chromedriver")
+    get_subparsers = get_parser.add_subparsers(dest="subcommand")
+
+    # Add subcommand for 'chromedriver'
+    chromedriver_parser = get_subparsers.add_parser("chromedriver",
+                                                    help="Download chromedriver for a specified version and platform")
+    chromedriver_parser.add_argument('--milestone', type=str,
+                                     help=f"{Fore.CYAN}Chromium milestone version (e.g., 131).{Style.RESET_ALL}")
+    chromedriver_parser.add_argument('--version', type=str,
+                                     help=f"{Fore.CYAN}Chromium browser version.{Style.RESET_ALL}")
+    chromedriver_parser.add_argument('--platform', type=str, choices=["windows", "mac", "linux"],
+                                     help=f"{Fore.CYAN}Operating system platform.{Style.RESET_ALL}")
+    chromedriver_parser.add_argument('--output-dir', type=str, default=None,
+                                     help=f"{Fore.CYAN}Directory to save the downloaded Chromedriver.{Style.RESET_ALL}")
+    chromedriver_parser.add_argument('--extract', action='store_true',
+                                     help=f"{Fore.CYAN}Extract the Chromedriver after download.{Style.RESET_ALL}")
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Handle 'get chromedriver' logic
+    if args.command == "get" and args.subcommand == "chromedriver":
+        console = Console()
+
+        # ASCII Art Header with Figlet (using Rich)
+        fig = Figlet(font="slant")  # You can use different fonts like 'slant', 'block', etc.
+        console.print(fig.renderText("Chromedriver Download"), style="bold green")
+
+        # Run the actual logic for downloading chromedriver
+        ChromePageScraper.get_chromedriver(
+            platform=args.platform,
+            version=args.version,
+            milestone=args.milestone,
+            d_dir=Path(args.output_dir) if args.output_dir else None,
+            is_extracted=args.extract
+        )
+
+
+if __name__ == "__main__":
+    create_cli()