Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add GitHub attestation discovery #1020

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion src/macaron/artifact/local_artifact.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module declares types and utilities for handling local artifacts."""

import fnmatch
import glob
import hashlib
import logging
import os

from packageurl import PackageURL

from macaron.artifact.maven import construct_maven_repository_path
from macaron.errors import LocalArtifactFinderError
from macaron.slsa_analyzer.package_registry import MavenCentralRegistry

logger: logging.Logger = logging.getLogger(__name__)


def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
@@ -247,3 +252,53 @@ def get_local_artifact_dirs(
)

raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}")


def get_local_artifact_hash(purl: PackageURL, artifact_dirs: list[str], hash_algorithm_name: str) -> str | None:
"""Compute the hash of the local artifact.

Parameters
----------
purl: PackageURL
The PURL of the artifact being sought.
artifact_dirs: list[str]
The possible locations of the artifact.
hash_algorithm_name: str
The hash algorithm to use.

Returns
-------
str | None
The hash, or None if not found.
"""
if not artifact_dirs:
logger.debug("No artifact directories provided.")
return None

if not purl.version:
logger.debug("PURL is missing version.")
return None

artifact_target = None
if purl.type == "maven":
artifact_target = MavenCentralRegistry.get_artifact_file_name(purl)

if not artifact_target:
logger.debug("PURL type not supported: %s", purl.type)
return None

for artifact_dir in artifact_dirs:
full_path = os.path.join(artifact_dir, artifact_target)
if not os.path.exists(full_path):
continue

with open(full_path, "rb") as file:
try:
hash_result = hashlib.file_digest(file, hash_algorithm_name)
except ValueError as error:
logger.debug("Error while hashing file: %s", error)
continue

return hash_result.hexdigest()

return None
4 changes: 2 additions & 2 deletions src/macaron/json_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module provides utility functions for JSON data."""
@@ -53,5 +53,5 @@ def json_extract(entry: dict | list, keys: Sequence[str | int], type_: type[T])
if isinstance(entry, type_):
return entry

logger.debug("Found value of incorrect type: %s instead of %s.", type(entry), type(type_))
logger.debug("Found value of incorrect type: %s instead of %s.", type(entry), type_)
return None
Original file line number Diff line number Diff line change
@@ -95,7 +95,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
The result and related information collected during the analysis.
"""
maintainers_join_date: list[datetime] | None = self._get_maintainers_join_date(
pypi_package_json.pypi_registry, pypi_package_json.component.name
pypi_package_json.pypi_registry, pypi_package_json.component_name
)
latest_release_date: datetime | None = self._get_latest_release_date(pypi_package_json)
detail_info: dict[str, JsonType] = {
Original file line number Diff line number Diff line change
@@ -41,6 +41,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
The result and related information collected during the analysis.
"""
# If a sourcecode repo exists, then this will have already been validated
if not pypi_package_json.component.repository:
if not pypi_package_json.has_repository:
return HeuristicResult.FAIL, {}
return HeuristicResult.PASS, {}
Original file line number Diff line number Diff line change
@@ -61,7 +61,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
logger.debug(error_msg)
raise HeuristicAnalyzerValueError(error_msg)

version = pypi_package_json.component.version
version = pypi_package_json.component_version
if version is None: # check latest release version
version = pypi_package_json.get_latest_version()

47 changes: 45 additions & 2 deletions src/macaron/repo_finder/repo_finder.py
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@
from macaron.config.defaults import defaults
from macaron.config.global_config import global_config
from macaron.errors import CloneError, RepoCheckOutError
from macaron.repo_finder import to_domain_from_known_purl_types
from macaron.repo_finder import repo_finder_pypi, to_domain_from_known_purl_types
from macaron.repo_finder.commit_finder import find_commit, match_tags
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
@@ -66,11 +66,14 @@
list_remote_references,
resolve_local_path,
)
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo

logger: logging.Logger = logging.getLogger(__name__)


def find_repo(purl: PackageURL, check_latest_version: bool = True) -> tuple[str, RepoFinderInfo]:
def find_repo(
purl: PackageURL, check_latest_version: bool = True, all_package_registries: list[PackageRegistryInfo] | None = None
) -> tuple[str, RepoFinderInfo]:
"""Retrieve the repository URL that matches the given PURL.

Parameters
@@ -79,6 +82,8 @@ def find_repo(purl: PackageURL, check_latest_version: bool = True) -> tuple[str,
The parsed PURL to convert to the repository path.
check_latest_version: bool
A flag that determines whether the latest version of the PURL is also checked.
all_package_registries: list[PackageRegistryInfo] | None
The list of package registries, if any.

Returns
-------
@@ -103,6 +108,9 @@ def find_repo(purl: PackageURL, check_latest_version: bool = True) -> tuple[str,
logger.debug("Analyzing %s with Repo Finder: %s", purl, type(repo_finder))
found_repo, outcome = repo_finder.find_repo(purl)

if not found_repo:
found_repo, outcome = find_repo_alternative(purl, outcome, all_package_registries)

if check_latest_version and not defaults.getboolean("repofinder", "try_latest_purl", fallback=True):
check_latest_version = False

@@ -117,13 +125,48 @@ def find_repo(purl: PackageURL, check_latest_version: bool = True) -> tuple[str,
return "", RepoFinderInfo.NO_NEWER_VERSION

found_repo, outcome = DepsDevRepoFinder().find_repo(latest_version_purl)
if found_repo:
return found_repo, outcome

if not found_repo:
found_repo, outcome = find_repo_alternative(latest_version_purl, outcome)

if not found_repo:
logger.debug("Could not find repo from latest version of PURL: %s", latest_version_purl)
return "", RepoFinderInfo.LATEST_VERSION_INVALID

return found_repo, outcome


def find_repo_alternative(
purl: PackageURL, outcome: RepoFinderInfo, all_package_registries: list[PackageRegistryInfo] | None = None
) -> tuple[str, RepoFinderInfo]:
"""Use PURL type specific methods to find the repository when the standard methods have failed.

Parameters
----------
purl : PackageURL
The parsed PURL to convert to the repository path.
outcome: RepoFinderInfo
A previous outcome to report if this method does nothing.
all_package_registries: list[PackageRegistryInfo] | None
The list of package registries, if any.

Returns
-------
tuple[str, RepoFinderOutcome] :
The repository URL for the passed package, if found, and the outcome to report.
"""
found_repo = ""
if purl.type == "pypi":
found_repo, outcome = repo_finder_pypi.find_repo(purl, all_package_registries)

if not found_repo:
logger.debug("Could not find repository using type specific (%s) methods for PURL: %s", purl.type, purl)

return found_repo, outcome


def to_repo_path(purl: PackageURL, available_domains: list[str]) -> str | None:
"""Return the repository path from the PURL string.

17 changes: 16 additions & 1 deletion src/macaron/repo_finder/repo_finder_enums.py
Original file line number Diff line number Diff line change
@@ -57,6 +57,18 @@ class RepoFinderInfo(Enum):
#: Reported if deps.dev returns data that does not contain the desired SCM URL. E.g. The repository URL.
DDEV_NO_URLS = "deps.dev no URLs"

#: Reported if there was an error with the request sent to the PyPI registry.
PYPI_HTTP_ERROR = "PyPI HTTP error"

#: Reported if there was an error parsing the JSON returned by the PyPI registry.
PYPI_JSON_ERROR = "PyPI JSON error"

#: Reported if there was no matching URLs in the JSON returned by the PyPI registry.
PYPI_NO_URLS = "PyPI no matching URLs"

#: Reported if the PyPI registry is disabled or not present in the list of package registries.
PYPI_NO_REGISTRY = "PyPI registry disabled or absent"

#: Reported if the provided PURL did not produce a result, but a more recent version could not be found.
NO_NEWER_VERSION = "No newer version than provided which failed"

@@ -70,7 +82,10 @@ class RepoFinderInfo(Enum):
FOUND_FROM_PARENT = "Found from parent"

#: Reported when a repository is found from a more recent version than was provided by the user.
FOUND_FROM_LATEST = "Found form latest"
FOUND_FROM_LATEST = "Found from latest"

#: Reported when a repository could only be found by checking the PyPI registry JSON.
FOUND_FROM_PYPI = "Found from PyPI"

#: Default value. Reported if the Repo Finder was not called. E.g. Because the repository URL was already present.
NOT_USED = "Not used"
86 changes: 86 additions & 0 deletions src/macaron/repo_finder/repo_finder_pypi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the logic for finding repositories of PyPI projects."""
import logging
import urllib.parse

from packageurl import PackageURL

from macaron.repo_finder.repo_finder_enums import RepoFinderInfo
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, PyPIRegistry
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo

logger: logging.Logger = logging.getLogger(__name__)


def find_repo(
purl: PackageURL, all_package_registries: list[PackageRegistryInfo] | None = None
) -> tuple[str, RepoFinderInfo]:
"""Retrieve the repository URL that matches the given PyPI PURL.

Parameters
----------
purl : PackageURL
The parsed PURL to convert to the repository path.
all_package_registries: list[PackageRegistryInfo] | None
The context of the current analysis, if any.

Returns
-------
tuple[str, RepoFinderOutcome] :
The repository URL for the passed package, if found, and the outcome to report.
"""
pypi_registry = next((registry for registry in PACKAGE_REGISTRIES if isinstance(registry, PyPIRegistry)), None)
if not pypi_registry:
return "", RepoFinderInfo.PYPI_NO_REGISTRY

pypi_registry.load_defaults()
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {})
if not pypi_asset.download(dest=""):
return "", RepoFinderInfo.PYPI_HTTP_ERROR

if all_package_registries:
# Find the package registry info object that contains the PyPI registry and has the pypi build tool.
registry_info = next(
(
info
for info in all_package_registries
if info.package_registry == pypi_registry and info.build_tool_name == "pypi"
),
None,
)
if registry_info:
# Save the asset for later use.
registry_info.metadata.append(pypi_asset)

url_dict = pypi_asset.get_project_links()
if not url_dict:
return "", RepoFinderInfo.PYPI_JSON_ERROR

for url_key in url_dict:
url = url_dict[url_key]
parsed_url = urllib.parse.urlparse(url)
if not parsed_url.hostname:
continue
if not parsed_url.hostname.lower() == "github.com":
continue
# The path starts with a "/".
split_path = parsed_url.path[1:].split("/")
if not split_path or len(split_path) < 2:
continue
# Fix the URL so that it is the base GitHub URL. E.g. github.com/{owner}/{repo}
fixed_url = urllib.parse.ParseResult(
scheme=parsed_url.scheme,
netloc=parsed_url.netloc,
path=f"{split_path[0]}/{split_path[1]}",
params=parsed_url.params,
query=parsed_url.query,
fragment=parsed_url.fragment,
).geturl()
logger.debug("Found repository URL from PyPI: %s", fixed_url)
pypi_asset.has_repository = True
return fixed_url, RepoFinderInfo.FOUND_FROM_PYPI

return "", RepoFinderInfo.PYPI_NO_URLS
Loading