Skip to content

Commit fc26751

Browse files
Refresh
1 parent 696d21b commit fc26751

File tree

4 files changed

+110
-121
lines changed

4 files changed

+110
-121
lines changed

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ else()
5656
message(WARNING "Unknown compiler... recklessly proceeding without a version check")
5757
endif()
5858

59-
# Also update etc/purls.txt.
6059
set(BSON_REQUIRED_VERSION 2.1.2)
6160
set(MONGOC_REQUIRED_VERSION 2.1.2)
6261
set(MONGOC_DOWNLOAD_VERSION 2.1.2)

etc/sbom/config.py

100644100755
Lines changed: 11 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python3
22
"""generate_sbom.py config. Operational configuration values stored separately from the core code."""
33

4-
import json
54
import logging
65
import re
76

@@ -24,7 +23,16 @@
2423
"pkg:github/",
2524
]
2625

27-
for component in endor_components_remove:
26+
components_remove = [
27+
# Endor Labs includes the main component in 'components'. This is not standard, so we remove it.
28+
"10gen/mongo",
29+
# should be pkg:github/antirez/linenoise - waiting on Endor Labs fix
30+
"amokhuginnsson/replxx",
31+
# a transitive dependency of s2 that is not necessary to include
32+
"sparsehash/sparsehash",
33+
]
34+
35+
for component in components_remove:
2836
for prefix in prefixes:
2937
endor_components_remove.append(prefix + component)
3038

@@ -41,56 +49,6 @@
4149
["pkg:c/github.com/", "pkg:github/"],
4250
]
4351

44-
# ################ PURL Validation ################
45-
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
46-
r"(?:@[^?@#]*)?"
47-
# Optional Qualifiers (any chars except @ #)
48-
r"(?:\?[^@#]*)?"
49-
# Optional Subpath (any chars)
50-
r"(?:#.*)?$"
51-
)
52-
53-
REGEX_PURL = {
54-
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
55-
"deb": re.compile(
56-
r"^pkg:deb/" # Scheme and type
57-
# Namespace (organization/user), letters must be lowercase
58-
r"(debian|ubuntu)+"
59-
r"/"
60-
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
61-
),
62-
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
63-
"generic": re.compile(
64-
r"^pkg:generic/" # Scheme and type
65-
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
66-
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
67-
),
68-
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
69-
"github": re.compile(
70-
r"^pkg:github/" # Scheme and type
71-
# Namespace (organization/user), letters must be lowercase
72-
r"[a-z0-9-]+"
73-
r"/"
74-
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
75-
),
76-
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
77-
"pypi": re.compile(
78-
r"^pkg:pypi/" # Scheme and type
79-
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
80-
+ REGEX_STR_PURL_OPTIONAL
81-
),
82-
}
83-
84-
85-
def is_valid_purl(purl: str) -> bool:
86-
"""Validate a GitHub or Generic PURL"""
87-
for purl_type, regex in REGEX_PURL.items():
88-
if regex.match(purl):
89-
logger.debug(f"PURL: {purl} matched PURL type '{purl_type}' regex '{regex.pattern}'")
90-
return True
91-
return False
92-
93-
9452
# ################ Version Transformation ################
9553

9654
# In some cases we need to transform the version string to strip out tag-related text
@@ -140,65 +98,8 @@ def get_semver_from_release_version(release_ver: str) -> str:
14098
# region special component use-case functions
14199

142100

143-
def get_version_from_wiredtiger_import_data(file_path: str) -> str:
144-
"""Get the info in the 'import.data' file saved in the wiredtiger folder"""
145-
try:
146-
with open(file_path, "r") as input_json:
147-
import_data = input_json.read()
148-
result = json.loads(import_data)
149-
except Exception as e:
150-
logger.error(f"Error loading JSON file from {file_path}")
151-
logger.error(e)
152-
return None
153-
return result.get("commit")
154-
155-
156-
def get_version_sasl_from_workspace(file_path: str) -> str:
157-
"""Determine the version that is pulled for Windows Cyrus SASL by searching WORKSPACE.bazel"""
158-
# e.g.,
159-
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
160-
try:
161-
with open(file_path, "r") as file:
162-
for line in file:
163-
if line.strip().startswith(
164-
'"https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-'
165-
):
166-
return line.strip().split("windows_cyrus_sasl-")[1].split(".zip")[0]
167-
except Exception as e:
168-
logger.warning(f"Unable to load {file_path}")
169-
logger.warning(e)
170-
else:
171-
return None
172-
173-
174101
def process_component_special_cases(
175102
component_key: str, component: dict, versions: dict, repo_root: str
176103
) -> None:
177-
## Special case for Cyrus SASL ##
178-
if component_key == "pkg:github/cyrusimap/cyrus-sasl":
179-
# Cycrus SASL is optionally loaded as a Windows library, when needed. There is no source code for Endor Labs to scan.
180-
# The version of Cyrus SASL that is used is defined in the WORKSPACE.bazel file:
181-
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
182-
# Rather than add the complexity of Bazel queries to this script, we just search the text.
183-
184-
versions["import_script"] = get_version_sasl_from_workspace(repo_root + "/WORKSPACE.bazel")
185-
logger.info(
186-
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'WORKSPACE.bazel' file"
187-
)
188-
189-
## Special case for wiredtiger ##
190-
elif component_key == "pkg:github/wiredtiger/wiredtiger":
191-
# MongoDB release branches import wiredtiger commits via a bot. These commits will likely not line up with a release or tag.
192-
# Endor labs will try to pull the nearest release/tag, but we want the more precise commit hash, which is stored in:
193-
# src/third_party/wiredtiget/import.data
194-
occurrences = component.get("evidence", {}).get("occurrences", [])
195-
if occurrences:
196-
location = occurrences[0].get("location")
197-
versions["import_script"] = get_version_from_wiredtiger_import_data(
198-
f"{repo_root}/{location}/import.data"
199-
)
200-
logger.info(
201-
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'import.data' file"
202-
)
203-
104+
pass
204105
# endregion special component use-case functions

etc/sbom/endorctl_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def get_sbom_for_branch(self, git_url: str, branch: str) -> dict:
437437

438438
# ScanResult: search for a completed scan
439439
filter_str = endor_filter.scan_result(
440-
None, project_uuid, repository_version_ref, repository_version_sha
440+
EndorContextType.MAIN, project_uuid, repository_version_ref, repository_version_sha
441441
)
442442
scan_result = self.get_scan_result(filter_str, retry=False)
443443
project_uuid = scan_result["meta"]["parent_uuid"]

etc/sbom/generate_sbom.py

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
endor_components_remove,
2424
endor_components_rename,
2525
get_semver_from_release_version,
26-
is_valid_purl,
2726
process_component_special_cases,
2827
)
2928
from endorctl_utils import EndorCtl
@@ -54,7 +53,6 @@ def emit(self, record):
5453
# Add the handler to the logger
5554
logger.addHandler(warning_handler)
5655

57-
5856
# Get the absolute path of the script file and directory
5957
script_path = Path(__file__).resolve()
6058
script_directory = script_path.parent
@@ -66,6 +64,65 @@ def emit(self, record):
6664
REGEX_RELEASE_BRANCH = r"^v\d\.\d$"
6765
REGEX_RELEASE_TAG = r"^r\d\.\d.\d(-\w*)?$"
6866

67+
# ################ PURL Validation ################
68+
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
69+
r"(?:@[^?@#]*)?"
70+
# Optional Qualifiers (any chars except @ #)
71+
r"(?:\?[^@#]*)?"
72+
# Optional Subpath (any chars)
73+
r"(?:#.*)?$"
74+
)
75+
76+
REGEX_PURL = {
77+
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
78+
"deb": re.compile(
79+
r"^pkg:deb/" # Scheme and type
80+
# Namespace (organization/user), letters must be lowercase
81+
r"(debian|ubuntu)+"
82+
r"/"
83+
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
84+
),
85+
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
86+
"generic": re.compile(
87+
r"^pkg:generic/" # Scheme and type
88+
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
89+
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
90+
),
91+
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
92+
"github": re.compile(
93+
r"^pkg:github/" # Scheme and type
94+
# Namespace (organization/user), letters must be lowercase
95+
r"[a-z0-9-]+"
96+
r"/"
97+
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
98+
),
99+
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
100+
"pypi": re.compile(
101+
r"^pkg:pypi/" # Scheme and type
102+
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
103+
+ REGEX_STR_PURL_OPTIONAL
104+
),
105+
}
106+
107+
108+
# Metadata SBOM requirements
109+
METADATA_FIELDS_REQUIRED = [
110+
"type",
111+
"bom-ref",
112+
"group",
113+
"name",
114+
"version",
115+
"description",
116+
"licenses",
117+
"copyright",
118+
"externalReferences",
119+
"scope",
120+
]
121+
METADATA_FIELDS_ONE_OF = [
122+
["author", "supplier"],
123+
["purl", "cpe"],
124+
]
125+
69126
# endregion init
70127

71128

@@ -80,7 +137,11 @@ def __init__(self):
80137
try:
81138
self.repo_root = Path(
82139
subprocess.run(
83-
"git rev-parse --show-toplevel", shell=True, text=True, capture_output=True
140+
"git rev-parse --show-toplevel",
141+
shell=True,
142+
text=True,
143+
capture_output=True,
144+
check=True,
84145
).stdout.strip()
85146
)
86147
self._repo = Repo(self.repo_root)
@@ -170,6 +231,15 @@ def extract_repo_from_git_url(git_url: str) -> dict:
170231
}
171232

172233

234+
def is_valid_purl(purl: str) -> bool:
235+
"""Validate a GitHub or Generic PURL"""
236+
for purl_type, regex in REGEX_PURL.items():
237+
if regex.match(purl):
238+
logger.debug(f"PURL: {purl} matched PURL type '{purl_type}' regex '{regex.pattern}'")
239+
return True
240+
return False
241+
242+
173243
def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
174244
"""Create a dict of SBOM components with a version-less PURL as the key"""
175245
components = sbom["components"]
@@ -185,6 +255,23 @@ def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
185255
return components_dict
186256

187257

258+
def check_metadata_sbom(meta_bom: dict) -> None:
259+
for component in meta_bom["components"]:
260+
for field in METADATA_FIELDS_REQUIRED:
261+
if field not in component:
262+
logger.warning(
263+
f"METADATA: '{component['bom-ref'] or component['name']} is missing required field '{field}'."
264+
)
265+
for fields in METADATA_FIELDS_ONE_OF:
266+
found = False
267+
for field in fields:
268+
found = found or field in component
269+
if not found:
270+
logger.warning(
271+
f"METADATA: '{component['bom-ref'] or component['name']} is missing one of fields '{fields}'."
272+
)
273+
274+
188275
def read_sbom_json_file(file_path: str) -> dict:
189276
"""Load a JSON SBOM file (schema is not validated)"""
190277
try:
@@ -204,8 +291,8 @@ def write_sbom_json_file(sbom_dict: dict, file_path: str) -> None:
204291
try:
205292
file_path = os.path.abspath(file_path)
206293
with open(file_path, "w", encoding="utf-8") as output_json:
207-
json.dump(sbom_dict, output_json, indent=2)
208-
output_json.write("\n")
294+
formatted_sbom = json.dumps(sbom_dict, indent=2) + "\n"
295+
output_json.write(formatted_sbom)
209296
except Exception as e:
210297
logger.error(f"Error writing SBOM file to {file_path}")
211298
logger.error(e)
@@ -449,6 +536,8 @@ def main() -> None:
449536
endor_bom = endorctl.get_sbom_for_branch(git_info.project, git_info.branch)
450537
elif target == "project":
451538
endor_bom = endorctl.get_sbom_for_project(git_info.project)
539+
else:
540+
endor_bom = None
452541

453542
if not endor_bom:
454543
logger.error("Empty result for Endor SBOM!")
@@ -466,9 +555,6 @@ def main() -> None:
466555

467556
## remove uneeded components ##
468557
# [list]endor_components_remove is defined in config.py
469-
# Endor Labs includes the main component in 'components'. This is not standard, so we remove it.
470-
endor_components_remove.append(f"pkg:github/{git_info.org}/{git_info.repo}")
471-
472558
# Reverse iterate the SBOM components list to safely modify in situ
473559
for i in range(len(endor_bom["components"]) - 1, -1, -1):
474560
component = endor_bom["components"][i]
@@ -529,6 +615,9 @@ def main() -> None:
529615
meta_bom["components"].sort(key=lambda c: c["bom-ref"])
530616
prev_bom["components"].sort(key=lambda c: c["bom-ref"])
531617

618+
# Check metadata SBOM for completeness
619+
check_metadata_sbom(meta_bom)
620+
532621
# Create SBOM component lookup dicts
533622
endor_components = sbom_components_to_dict(endor_bom)
534623
prev_components = sbom_components_to_dict(prev_bom)
@@ -537,7 +626,7 @@ def main() -> None:
537626

538627
# Attempt to determine the MongoDB Version being scanned
539628
logger.debug(
540-
f"Available MongoDB version options, tag: {git_info.release_tag}, branch: {git_info.branch}, previous SBOM: {prev_bom['metadata'].get('component',{}).get('version')}"
629+
f"Available MongoDB version options, tag: {git_info.release_tag}, branch: {git_info.branch}, previous SBOM: {prev_bom['metadata']['component']['version']}"
541630
)
542631
meta_bom_ref = meta_bom["metadata"]["component"]["bom-ref"]
543632

0 commit comments

Comments
 (0)