diff --git a/utils/generate-cyclonedx b/utils/generate-cyclonedx index 60983ac1ea..a103b7b707 100755 --- a/utils/generate-cyclonedx +++ b/utils/generate-cyclonedx @@ -17,6 +17,8 @@ from pathlib import Path import urllib.request import subprocess import sys +import re + CYCLONEDX_VERSION = "1.6" SPDX_SCHEMA_URL = f"https://raw.githubusercontent.com/CycloneDX/specification/{CYCLONEDX_VERSION}/schema/spdx.schema.json" @@ -34,6 +36,11 @@ BR2_VERSION_FULL = ( .strip() ) +# Set of vulnerabilities that were addressed by a patch present in buildroot +# tree. This set is used to set the analysis of the ignored CVEs to +# 'resolved_with_pedigree'. +VULN_WITH_PEDIGREE = set() + SPDX_LICENSES = [] if not SPDX_SCHEMA_PATH.exists(): @@ -111,12 +118,91 @@ def cyclonedx_licenses(lic_list): } -def cyclonedx_patches(patch_list): +def extract_cves_from_header(header: str) -> list[str]: + """Extract CVE identifiers from the patch header. + + Args: + header (str): Content of the header of a patch. + + Returns: + list: Array of CVE identifier present in a patch header passed as + argument. + """ + PATCH_CVE_HEADER = "CVE: " + return [ + line.partition(PATCH_CVE_HEADER)[2].strip() + for line in header.splitlines() + if line.startswith(PATCH_CVE_HEADER) + ] + + +def patch_retrieve_header(content: str) -> str: + """Read the content of a patch and split the header from the content. + + Args: + content (str): Patch content. + + Returns: + str: Patch header content. + """ + DIFF_LINE_REGEX = re.compile(r"^diff\s+(?:--git|-[-\w]+)\s+(\S+)\s+(\S+)$") + INDEX_LINE_REGEX = re.compile(r"^Index:\s+(\S+)$") + + lines = content.split('\n') + + header = [] + for i, line in enumerate(lines): + if DIFF_LINE_REGEX.match(line): + # diff --git a/configure.ac b/configure.ac + # index 1234..1234 100644 + # --- a/configure.ac + # +++ b/configure.ac + break + elif INDEX_LINE_REGEX.match(line): + # Index: + # --- + # +++ + if i < len(lines) - 2 and lines[i + 1].startswith("===") and lines[i + 2].startswith("---"): + break + elif line.startswith("---"): + # Some patches don't have a 'diff' tag just the --- +++ tuple. + # Check next line is starting with '+++' + # ex: package/berkeleydb/0001-cwd-db_config.patch + if i < len(lines) - 2 and lines[i + 1].startswith("+++") and lines[i + 2].startswith("@@"): + break + else: + header.append(line) + + return '\n'.join(header) + + +def read_patch_file(patch_path: Path) -> str: + """Read the content of a patch file, handling compression. + + Args: + patch_path (Path): Patch path. + + Returns: + str: Patch content. + """ + if patch_path.suffix == ".gz": + f = gzip.open(patch_path, mode="rt") + elif patch_path.suffix == ".bz": + f = bz2.open(patch_path, mode="rt") + else: + f = open(patch_path) + + content = f.read() + f.close() + return content + + +def cyclonedx_patches(patch_list: list[str]): """Translate a list of patches from the show-info JSON to a list of patches in CycloneDX format. Args: - patch_list (dict): Information about the patches as a Python dictionary. + patch_list (list): Array of patch relative paths for a given component. Returns: dict: Patch information in CycloneDX format. @@ -125,38 +211,51 @@ def cyclonedx_patches(patch_list): for patch in patch_list: patch_path = brpath / patch if patch_path.exists(): - f = None - if patch.endswith('.gz'): - f = gzip.open(patch_path, mode="rt") - elif patch.endswith('.bz'): - f = bz2.open(patch_path, mode="rt") - else: - f = open(patch_path) - try: - patch_contents.append({ - "text": { - "content": f.read() - } - }) + content = read_patch_file(patch_path) except Exception: # If the patch can't be read it won't be added to # the resulting SBOM. print(f"Failed to handle patch: {patch}", file=sys.stderr) + continue - f.close() + header = patch_retrieve_header(content) + + issue = {} + cves = extract_cves_from_header(header) + if cves: + VULN_WITH_PEDIGREE.update(cves) + issue = { + "resolves": [ + { + "type": "security", + "name": cve + } for cve in cves + ] + } + + patch_contents.append({ + "diff": { + "text": { + "content": content + } + }, + **issue + }) else: # If the patch is not a file it's a tarball or diff url passed # through the `_PATCH` variable. patch_contents.append({ - "url": patch + "diff": { + "url": patch + } }) return { "pedigree": { "patches": [{ "type": "unofficial", - "diff": content + **content } for content in patch_contents] }, } @@ -229,7 +328,7 @@ def cyclonedx_vulnerabilities(show_info_dict): return [{ "id": cve, "analysis": { - "state": "in_triage", + "state": "resolved_with_pedigree" if cve in VULN_WITH_PEDIGREE else "in_triage", "detail": f"The CVE '{cve}' has been marked as ignored by Buildroot" }, "affects": [