Skip to content

Commit 696d21b

Browse files
Add an ignore, add python, add old sbom
1 parent 99fad4f commit 696d21b

File tree

6 files changed

+1852
-63
lines changed

6 files changed

+1852
-63
lines changed

.github/workflows/generate_sbom.yml

Lines changed: 15 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ jobs:
1212
configure-and-scan:
1313
permissions:
1414
id-token: write # Required to request a json web token (JWT) for keyless authentication with Endor Labs
15-
#packages: write
1615
contents: read
1716
runs-on: ubuntu-latest
1817
steps:
@@ -21,49 +20,18 @@ jobs:
2120
with:
2221
submodules: recursive
2322

24-
# - name: Install dev libs
25-
# run: sudo apt install -y libsasl2-dev libsnappy-dev libssl-dev libmongocrypt-dev
26-
27-
- name: Configure CMake and fetch dependency source
23+
- name: Configure CMake and fetch dependency sources
2824
env:
2925
BUILD_TYPE: Release
3026
BUILD: ${{github.workspace}}/build
3127
CXX_STANDARD: 17
3228
working-directory: ${{env.BUILD}}
3329
run: cmake .. -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_STANDARD=${{env.CXX_STANDARD}} -DENABLE_TESTS=ON
3430

35-
# - name: Create and populate .endorctl/scanprofile.yaml file
36-
# run: |
37-
# mkdir .endorctl
38-
# cat <<EOF > .endorctl/scanprofile.yaml
39-
# kind: AutomatedScanParameters
40-
# spec:
41-
# automated_scan_parameters:
42-
# additional_environment_variables:
43-
# - ENDOR_SCAN_EMBEDDINGS=true
44-
# included_paths:
45-
# - build/_deps/**
46-
# #excluded_paths:
47-
# # - benchmark/**
48-
# # - src/**
49-
# languages:
50-
# - c
51-
# scan_dependencies: true
52-
# tags: github_action
53-
# EOF
54-
# git add .endorctl/scanprofile.yaml
55-
# echo "cat .endorctl/scanprofile.yaml"
56-
# cat .endorctl/scanprofile.yaml
57-
58-
# - name: Rename build folder # Endor Labs will automatically try to exclude "build"
59-
# run: |
60-
# mv build third_party
61-
# git add third_party
62-
6331
- name: Install endorctl and Scan with Endor Labs
64-
uses: endorlabs/github-action@519df81de5f68536c84ae05ebb2986d0bb1d19fc # Release v1.1.8
32+
uses: endorlabs/github-action@519df81de5f68536c84ae05ebb2986d0bb1d19fc # v1.1.8
6533
with:
66-
additional_args: "--languages=c"
34+
additional_args: "--languages=c --exclude-path=\"build/CMakeFiles/**\""
6735
log_level: info
6836
log_verbose: false
6937
namespace: mongodb.${{github.repository_owner}}
@@ -73,31 +41,15 @@ jobs:
7341
env:
7442
ENDOR_SCAN_EMBEDDINGS: true
7543

76-
# - name: Setup Endor Labs Endorctl
77-
# uses: endorlabs/github-action/setup@519df81de5f68536c84ae05ebb2986d0bb1d19fc # Release v1.1.8
78-
# with:
79-
# namespace: mongodb.${{github.repository_owner}}
80-
# enable_github_action_token: true
81-
82-
# - name: Run Endorctl
83-
# env:
84-
# ENDOR_SCAN_USE_SCAN_PROFILE: true
85-
# run: endorctl scan
86-
87-
# - uses: actions/setup-python@v6
88-
# with:
89-
# python-version: '3.10'
90-
# - run: python my_script.py
91-
92-
# ${{ github.sha }}
93-
# - name: Run Endorctl
94-
# env:
95-
# ENDOR_GITHUB_ACTION_TOKEN_ENABLE: true
96-
# ENDOR_SCAN_DEPENDENCIES: true
97-
# ENDOR_SCAN_EMBEDDINGS: true
98-
# ENDOR_SCAN_INCLUDE_PATH:
99-
# ENDOR_SCAN_LANGUAGES: c
100-
# ENDOR_SCAN_SUMMARY_OUTPUT_TYPE: json
101-
# ENDOR_SCAN_TAGS: github_action
102-
# run: |
103-
# endorctl scan
44+
- name: Set up Python 3.10
45+
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
46+
with:
47+
python-version: '3.10'
48+
- uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
49+
with:
50+
python-version: "3.10"
51+
activate-environment: true
52+
- name: Install dependencies
53+
run: uv sync --group make_release
54+
- name: generate_sbom.py
55+
run: uv run etc/sbom/generate_sbom.py --target=branch --sbom-metadata=etc/sbom/metadata.cdx.json

etc/sbom/config.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!/usr/bin/env python3
2+
"""generate_sbom.py config. Operational configuration values stored separately from the core code."""
3+
4+
import json
5+
import logging
6+
import re
7+
8+
logger = logging.getLogger("generate_sbom")
9+
logger.setLevel(logging.NOTSET)
10+
11+
12+
# ################ Component Filters ################
13+
14+
# List of Endor Labs SBOM components that must be removed before processing
15+
endor_components_remove = [
16+
# An incorrect match from parts of pkg:github/madler/zlib
17+
"zlib-ng/zlib-ng",
18+
]
19+
20+
# bom-ref prefixes (Endor Labs has been changing them, so add all that we have seen)
21+
prefixes = [
22+
"pkg:c/github.com/",
23+
"pkg:generic/github.com/",
24+
"pkg:github/",
25+
]
26+
27+
for component in endor_components_remove:
28+
for prefix in prefixes:
29+
endor_components_remove.append(prefix + component)
30+
31+
# ################ Component Renaming ################
32+
# Endor does not have syntactically valid PURLs for C/C++ packages.
33+
# e.g.,
34+
# Invalid: pkg:c/github.com/abseil/[email protected]
35+
# Valid: pkg:github/abseil/[email protected]
36+
# Run string replacements to correct for this:
37+
endor_components_rename = [
38+
["pkg:generic/zlib.net/zlib", "pkg:github/madler/zlib"],
39+
["pkg:github/philsquared/clara", "pkg:github/catchorg/clara"],
40+
["pkg:generic/github.com/", "pkg:github/"],
41+
["pkg:c/github.com/", "pkg:github/"],
42+
]
43+
44+
# ################ PURL Validation ################
45+
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
46+
r"(?:@[^?@#]*)?"
47+
# Optional Qualifiers (any chars except @ #)
48+
r"(?:\?[^@#]*)?"
49+
# Optional Subpath (any chars)
50+
r"(?:#.*)?$"
51+
)
52+
53+
REGEX_PURL = {
54+
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
55+
"deb": re.compile(
56+
r"^pkg:deb/" # Scheme and type
57+
# Namespace (organization/user), letters must be lowercase
58+
r"(debian|ubuntu)+"
59+
r"/"
60+
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
61+
),
62+
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
63+
"generic": re.compile(
64+
r"^pkg:generic/" # Scheme and type
65+
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
66+
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
67+
),
68+
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
69+
"github": re.compile(
70+
r"^pkg:github/" # Scheme and type
71+
# Namespace (organization/user), letters must be lowercase
72+
r"[a-z0-9-]+"
73+
r"/"
74+
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
75+
),
76+
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
77+
"pypi": re.compile(
78+
r"^pkg:pypi/" # Scheme and type
79+
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
80+
+ REGEX_STR_PURL_OPTIONAL
81+
),
82+
}
83+
84+
85+
def is_valid_purl(purl: str) -> bool:
86+
"""Validate a GitHub or Generic PURL"""
87+
for purl_type, regex in REGEX_PURL.items():
88+
if regex.match(purl):
89+
logger.debug(f"PURL: {purl} matched PURL type '{purl_type}' regex '{regex.pattern}'")
90+
return True
91+
return False
92+
93+
94+
# ################ Version Transformation ################
95+
96+
# In some cases we need to transform the version string to strip out tag-related text
97+
# It is unknown what patterns may appear in the future, so we have targeted (not broad) regex
98+
# This a list of 'pattern' and 'repl' inputs to re.sub()
99+
RE_VER_NUM = r"(0|[1-9]\d*)"
100+
RE_VER_LBL = r"(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
101+
RE_SEMVER = rf"{RE_VER_NUM}\.{RE_VER_NUM}\.{RE_VER_NUM}{RE_VER_LBL}"
102+
regex_semver = re.compile(RE_SEMVER)
103+
104+
VERSION_PATTERN_REPL = [
105+
# 'debian/1.28.1-1' pkg:github/mongodb/mongo-c-driver (temporary workaround)
106+
[re.compile(rf"^debian/({RE_SEMVER})-\d$"), r"\1"],
107+
# 'gperftools-2.9.1' pkg:github/gperftools/gperftools
108+
# 'mongo/v1.5.2' pkg:github/google/benchmark
109+
# 'mongodb-8.2.0-alpha2' pkg:github/wiredtiger/wiredtiger
110+
# 'release-1.12.0' pkg:github/apache/avro
111+
# 'yaml-cpp-0.6.3' pkg:github/jbeder/yaml-cpp
112+
[re.compile(rf"^[-a-z]+[-/][vr]?({RE_SEMVER})$"), r"\1"],
113+
# 'asio-1-34-2' pkg:github/chriskohlhoff/asio
114+
# 'cares-1_27_0' pkg:github/c-ares/c-ares
115+
[
116+
re.compile(rf"^[a-z]+-{RE_VER_NUM}[_-]{RE_VER_NUM}[_-]{RE_VER_NUM}{RE_VER_LBL}$"),
117+
r"\1.\2.\3",
118+
],
119+
# 'pcre2-10.40' pkg:github/pcre2project/pcre2
120+
[re.compile(rf"^[a-z0-9]+-({RE_VER_NUM}\.{RE_VER_NUM})$"), r"\1"],
121+
# 'icu-release-57-1' pkg:github/unicode-org/icu
122+
[re.compile(rf"^[a-z]+-?[a-z]+-{RE_VER_NUM}-{RE_VER_NUM}$"), r"\1.\2"],
123+
# 'v2.6.0' pkg:github/confluentinc/librdkafka
124+
# 'r2.5.1'
125+
[re.compile(rf"^[rv]({RE_SEMVER})$"), r"\1"],
126+
# 'v2025.04.21.00' pkg:github/facebook/folly
127+
[re.compile(r"^v(\d+\.\d+\.\d+\.\d+)$"), r"\1"],
128+
]
129+
130+
131+
def get_semver_from_release_version(release_ver: str) -> str:
132+
"""Extract the version number from string with tags or other annotations"""
133+
if release_ver:
134+
for re_obj, repl in VERSION_PATTERN_REPL:
135+
if re_obj.match(release_ver):
136+
return re_obj.sub(repl, release_ver)
137+
return release_ver
138+
139+
140+
# region special component use-case functions
141+
142+
143+
def get_version_from_wiredtiger_import_data(file_path: str) -> str:
144+
"""Get the info in the 'import.data' file saved in the wiredtiger folder"""
145+
try:
146+
with open(file_path, "r") as input_json:
147+
import_data = input_json.read()
148+
result = json.loads(import_data)
149+
except Exception as e:
150+
logger.error(f"Error loading JSON file from {file_path}")
151+
logger.error(e)
152+
return None
153+
return result.get("commit")
154+
155+
156+
def get_version_sasl_from_workspace(file_path: str) -> str:
157+
"""Determine the version that is pulled for Windows Cyrus SASL by searching WORKSPACE.bazel"""
158+
# e.g.,
159+
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
160+
try:
161+
with open(file_path, "r") as file:
162+
for line in file:
163+
if line.strip().startswith(
164+
'"https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-'
165+
):
166+
return line.strip().split("windows_cyrus_sasl-")[1].split(".zip")[0]
167+
except Exception as e:
168+
logger.warning(f"Unable to load {file_path}")
169+
logger.warning(e)
170+
else:
171+
return None
172+
173+
174+
def process_component_special_cases(
175+
component_key: str, component: dict, versions: dict, repo_root: str
176+
) -> None:
177+
## Special case for Cyrus SASL ##
178+
if component_key == "pkg:github/cyrusimap/cyrus-sasl":
179+
# Cycrus SASL is optionally loaded as a Windows library, when needed. There is no source code for Endor Labs to scan.
180+
# The version of Cyrus SASL that is used is defined in the WORKSPACE.bazel file:
181+
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
182+
# Rather than add the complexity of Bazel queries to this script, we just search the text.
183+
184+
versions["import_script"] = get_version_sasl_from_workspace(repo_root + "/WORKSPACE.bazel")
185+
logger.info(
186+
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'WORKSPACE.bazel' file"
187+
)
188+
189+
## Special case for wiredtiger ##
190+
elif component_key == "pkg:github/wiredtiger/wiredtiger":
191+
# MongoDB release branches import wiredtiger commits via a bot. These commits will likely not line up with a release or tag.
192+
# Endor labs will try to pull the nearest release/tag, but we want the more precise commit hash, which is stored in:
193+
# src/third_party/wiredtiget/import.data
194+
occurrences = component.get("evidence", {}).get("occurrences", [])
195+
if occurrences:
196+
location = occurrences[0].get("location")
197+
versions["import_script"] = get_version_from_wiredtiger_import_data(
198+
f"{repo_root}/{location}/import.data"
199+
)
200+
logger.info(
201+
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'import.data' file"
202+
)
203+
204+
# endregion special component use-case functions

0 commit comments

Comments
 (0)