ossf · khorben · Oct 15, 2024 · Oct 15, 2024
diff --git a/tools/vuxml/README.md b/tools/vuxml/README.md
@@ -0,0 +1,51 @@
+# VuXML advisory converter
+
+This is relevant to FreeBSD's ports, and possibly any other project using VuXML
+in order to track vulnerabilities.
+
+## Prerequisites
+
+Clone the following repository:
+- https://git.freebsd.org/ports.git
+
+Install the following packages or modules:
+- vuxml
+- python-lxml
+
+## Running the converter
+
+### Usage
+
+From VuXML to OSV format:
+
+```
+Usage: convert_vuxml.py [-e ecosystem][-o output_directory] path/to/vuln.xml
+```
+
+Where the VuXML vulnerabilities are either provided in a sequence of JSON data
+on the standard output, or output to individual files in the output directory.
+
+From OSV format to VuXML:
+
+```
+Usage: convert_osv.py [-o output_file] path/to/osv.json...
+```
+
+Where the OSV files provided are consolidated into a single VuXML file.
+
+#### Options
+`-e`:
+Set a specific ecosystem in the converted output to OSV files (default:
+FreeBSD:ports)
+
+`-o`:
+Output directory to place the converted OSV `.json` files (the directory must
+exist and have write permissions), or output filename where to write the
+converted VuXML file.
+
+### Example
+
+```
+$ python3.9 convert_vuxml.py /usr/ports/security/vuxml/vuln.xml
+$ python3.9 convert_osv.py 002432c8-ef6a-11ea-ba8f-08002728f74c.json
+```
diff --git a/tools/vuxml/convert_osv.py b/tools/vuxml/convert_osv.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 1994-2024 The FreeBSD Project.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Copyright (c) 2024 The FreeBSD Foundation
+#
+# Portions of this software were developed by Pierre Pronchery
+# <[email protected]> at Defora Networks GmbH under sponsorship
+# from the FreeBSD Foundation.
+
+"""VuXML to OSV converter."""
+import getopt
+import json
+from lxml import etree
+import sys
+
+namespace_vuxml = "{http://www.vuxml.org/apps/vuxml-1}"
+namespace_xhtml = "{http://www.w3.org/1999/xhtml}"
+
+url_bid = "https://www.securityfocus.com/bid/"
+url_certsa = "https://www.cert.org/advisories/"
+url_certvu = "https://www.kb.cert.org/vuls/id/"
+url_cve = "https://api.osv.dev/v1/vulns/"
+url_freebsd_bugzilla = "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id="
+url_freebsd_sa = "https://www.freebsd.org/security/advisories/FreeBSD-"
+
+
+# convert
+def convert(filename, vuxml):
+    ret = 0
+
+    try:
+        with open(filename, "r") as f:
+            j = json.load(f)
+            vuln = etree.Element("vuln", vid=j["id"])
+            vuxml.append(vuln)
+
+            # topic
+            topic = etree.Element("topic")
+            topic.text = j["summary"]
+            vuln.append(topic)
+
+            # description
+            if "details" in j:
+                description = etree.Element("description")
+                body = etree.Element(namespace_xhtml+"body")
+                body.text = j["details"]
+                description.append(body)
+                vuln.append(description)
+
+            # affects
+            if "affected" in j:
+                for affected in j["affected"]:
+                    affects = None
+                    package = None
+                    if "package" in affected \
+                            and "name" in affected["package"]:
+                        affects = etree.Element("affects")
+                        package = etree.Element("package")
+                        name = etree.Element("name")
+                        name.text = affected["package"]["name"]
+                        package.append(name)
+                        affects.append(package)
+                    if affects is not None \
+                            and "package" in affected \
+                            and "name" in affected["package"] \
+                            and "versions" in affected:
+                        for version in affected["versions"]:
+                            rnge = etree.Element("range")
+                            eq = etree.Element("eq")
+                            eq.text = version
+                            rnge.append(eq)
+                            package.append(rnge)
+                    if affects is not None \
+                            and "package" in affected \
+                            and "name" in affected["package"] \
+                            and "ranges" in affected:
+                        for r in affected["ranges"]:
+                            if "type" in r \
+                                    and r["type"] == "SEMVER" \
+                                    and "events" in r:
+                                rnge = etree.Element("range")
+                                for event in r["events"]:
+                                    for k, v in event.items():
+                                        if k == "introduced" and v != "0":
+                                            ge = etree.Element("ge")
+                                            ge.text = v
+                                            rnge.append(ge)
+                                        elif k == "fixed":
+                                            lt = etree.Element("lt")
+                                            lt.text = v
+                                            rnge.append(lt)
+                                        elif k == "last_affected":
+                                            le = etree.Element("le")
+                                            le.text = v
+                                            rnge.append(le)
+                                if len(rnge) >= 1:
+                                    package.append(rnge)
+                    if affects is not None:
+                        vuln.append(affects)
+
+            # references
+            references = etree.Element("references")
+            if "references" in j:
+                for ref in j["references"]:
+                    if ref["type"] == "ADVISORY":
+                        if ref["url"].startswith(url_bid):
+                            r = etree.Element("bid")
+                            url = ref["url"][len(url_bid):]
+                            if url.endswith("/info"):
+                                url = url[:-5]
+                            r.text = url
+                            references.append(r)
+                        elif ref["url"].startswith(url_freebsd_sa):
+                            r = etree.Element("freebsdsa")
+                            url = ref["url"][len(url_freebsd_sa):]
+                            if url.endswith(".asc"):
+                                url = url[:-4]
+                            r.text = url
+                            references.append(r)
+                        elif ref["url"].startswith(url_certsa):
+                            r = etree.Element("certsa")
+                            url = ref["url"][len(url_certsa):]
+                            if url.endswith(".html"):
+                                url = url[:-5]
+                            r.text = url
+                            references.append(r)
+                        elif ref["url"].startswith(url_certvu):
+                            r = etree.Element("certvu")
+                            r.text = ref["url"][len(url_certvu):]
+                            references.append(r)
+                        elif ref["url"].startswith(url_cve):
+                            r = etree.Element("cvename")
+                            r.text = ref["url"][len(url_cve):]
+                            references.append(r)
+                        else:
+                            r = etree.Element("url")
+                            r.text = ref["url"]
+                            references.append(r)
+                    elif ref["type"] == "REPORT":
+                        if ref["url"].startswith(url_freebsd_bugzilla):
+                            r = etree.Element("freebsdpr")
+                            r.text = ref["url"][len(url_freebsd_bugzilla):]
+                            references.append(r)
+                        else:
+                            r = etree.Element("url")
+                            r.text = ref["url"]
+                            references.append(r)
+                    else:
+                        r = etree.Element("url")
+                        r.text = ref["url"]
+                        references.append(r)
+            if len(references):
+                vuln.append(references)
+
+            # dates
+            dates = etree.Element("dates")
+            entry = j["modified"][0:10]
+            discovery = entry
+            modified = None
+            if "published" in j:
+                modified = entry
+                entry = j["published"][0:10]
+            if "database_specific" in j \
+                    and "discovery" in j["database_specific"]:
+                discovery = j["database_specific"]["discovery"][0:10]
+            date = etree.Element("discovery")
+            date.text = discovery
+            dates.append(date)
+            date = etree.Element("entry")
+            date.text = entry
+            dates.append(date)
+            if modified is not None:
+                date = etree.Element("modified")
+                date.text = modified
+                dates.append(date)
+            vuln.append(dates)
+
+            # cancelled
+            if "withdrawn" in dates:
+                cancelled = etree.Element("cancelled")
+                vuln.append(cancelled)
+    except Exception as e:
+        ret = error(e)
+    return ret
+
+
+# error
+def error(string):
+    print(f"{sys.argv[0]}: error: {string}", file=sys.stderr)
+    return 2
+
+
+# usage
+def usage(e=None):
+    if e is not None:
+        print(e, file=sys.stderr)
+    print("Usage: %s [-o output.xml] vuln.json..."
+          % sys.argv[0], file=sys.stderr)
+    return 1
+
+
+# warn
+def warn(string):
+    print(f"{sys.argv[0]}: warning: {string}", file=sys.stderr)
+
+
+# main
+def main():
+    ret = 0
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:")
+    except getopt.GetoptError as e:
+        return usage(e)
+    output = None
+    for name, optarg in opts:
+        if name == "-o":
+            output = optarg
+        else:
+            return usage("%s: Unsupported option" % name)
+
+    if len(args) < 1:
+        return usage()
+
+    vuxml = etree.Element(namespace_vuxml+"vuxml")
+    for arg in args:
+        if convert(arg, vuxml) != 0:
+            ret = 2
+            break
+
+    if ret == 0:
+        try:
+            xml = etree.tostring(vuxml, pretty_print=True)
+            if output is not None:
+                with open(output, "w") as f:
+                    print("""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE vuxml PUBLIC "-//vuxml.org//DTD VuXML 1.1//EN" "http://www.vuxml.org/dtd/vuxml-1/vuxml-11.dtd">"""+xml.decode(), file=f)
+            else:
+                print("""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE vuxml PUBLIC "-//vuxml.org//DTD VuXML 1.1//EN" "http://www.vuxml.org/dtd/vuxml-1/vuxml-11.dtd">"""+xml.decode())
+        except Exception as e:
+            ret = error(e)
+
+    return ret
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tools/vuxml/convert_vuxml.py b/tools/vuxml/convert_vuxml.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 1994-2024 The FreeBSD Project.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Copyright (c) 2024 The FreeBSD Foundation
+#
+# Portions of this software were developed by Pierre Pronchery
+# <pierre@defora.net> at Defora Networks GmbH under sponsorship
+# from the FreeBSD Foundation.
+
+"""VuXML to OSV converter."""
+import datetime
+import getopt
+import json
+from lxml import etree
+import re
+import sys
+
+re_date = re.compile(r'^(19|20)[0-9]{2}-[0-9]{2}-[0-9]{2}$')
+re_invalid_package_name = re.compile('[@!#$%^&*()<>?/\\|}{~:]')
+
+# warn if description has more than X characters
+DESCRIPTION_LENGTH = 5000
+
+namespace = "{http://www.vuxml.org/apps/vuxml-1}"
+
+url_advisories = [
+    "https://cve.mitre.org/cgi-bin/cvename.cgi?name=",
+    "https://nvd.nist.gov/vuln/detail/",
+    "https://github.com/advisories/",
+    "https://www.debian.org/security/"
+    ]
+url_bid = "https://www.securityfocus.com/bid/%s/info"
+url_certsa = "https://www.cert.org/advisories/%s.html"
+url_certvu = "https://www.kb.cert.org/vuls/id/%s"
+url_cve = "https://api.osv.dev/v1/vulns/%s"
+url_freebsd_bugzilla = "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=%s"
+url_freebsd_sa = "https://www.freebsd.org/security/advisories/FreeBSD-%s.asc"
+url_reports = [
+    "https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=",
+    "http://bugzilla.mozilla.org/show_bug.cgi?id=",
+    "https://bugzilla.mozilla.org/show_bug.cgi?id=",
+    "https://bugzilla.redhat.com/show_bug.cgi?id=",
+    "https://bugzilla.suse.com/show_bug.cgi?id="
+    ]
+
+
+# dateof
+def dateof(string):
+    return datetime.datetime.strptime(string, "%Y-%m-%d").isoformat()+"Z"
+
+
+# error
+def error(string):
+    print(f"{sys.argv[0]}: error: {string}", file=sys.stderr)
+    return 2
+
+
+# usage
+def usage(e=None):
+    if e is not None:
+        print(e, file=sys.stderr)
+    print("Usage: %s [-e ecosystem][-o output_directory] vuln.xml"
+          % sys.argv[0], file=sys.stderr)
+    return 1
+
+
+# warn
+def warn(string):
+    print(f"{sys.argv[0]}: warning: {string}", file=sys.stderr)
+
+
+# main
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "e:o:")
+    except getopt.GetoptError as e:
+        return usage(e)
+    ecosystem = "FreeBSD:ports"
+    output = None
+    for name, optarg in opts:
+        if name == "-e":
+            ecosystem = optarg
+        elif name == "-o":
+            output = optarg
+        else:
+            return usage("%s: Unsupported option" % name)
+
+    if len(args) != 1:
+        return usage()
+
+    parser = etree.XMLParser(dtd_validation=True)
+    tree = etree.parse(args[0], parser)
+    root = tree.getroot()
+
+    ret = 0
+
+    entries = []
+    for vuln in root:
+        if vuln.find(namespace+"cancelled") is not None:
+            continue
+
+        # id
+        vid = vuln.get("vid")
+        entry = {"schema_version": "1.2.0", "id": vid}
+
+        # modified
+        try:
+            d = vuln.find(namespace+"dates").find(namespace+"entry").text
+            if not re_date.match(d):
+                ret = error("entry date not in YYYY-MM-DD format: {0}"
+                            .format(d))
+                raise
+            else:
+                dates_entry = dateof(d)
+        except Exception as e:
+            dates_entry = None
+        try:
+            d = vuln.find(namespace+"dates").find(namespace+"modified").text
+            if not re_date.match(d):
+                ret = error("modified date not in YYYY-MM-DD format: {0}"
+                            .format(d))
+                raise
+            else:
+                dates_modified = dateof(d)
+        except Exception as e:
+            dates_modified = None
+        if dates_modified is not None:
+            entry["modified"] = dates_modified
+        elif dates_entry is not None:
+            entry["modified"] = dates_entry
+        if dates_entry is not None:
+            entry["published"] = dates_entry
+
+        # summary
+        try:
+            summary = vuln.find(namespace+"topic").text
+        except Exception as e:
+            ret = error(f"{vid} has no topic")
+            summary = None
+        if summary is not None:
+            entry["summary"] = summary
+
+        # details
+        details = vuln.find(namespace+"description")
+        if details is None:
+            ret = error(f"{vid} has no description")
+        else:
+            try:
+                details = etree.tostring(details, encoding='unicode',
+                                         method='text')
+                if len(details) > DESCRIPTION_LENGTH:
+                    warn("%s: description truncated (> %s)"
+                         % (vid, DESCRIPTION_LENGTH))
+                    details = details[0:DESCRIPTION_LENGTH]
+            except Exception as e:
+                ret = error("%s could not parse description: %s: %s"
+                            % (vid, type(e).__name__, e))
+                details = None
+        if details is not None:
+            entry["details"] = details
+
+        # references
+        references = []
+        refs = vuln.find(namespace+"references")
+        for ref in refs:
+            if ref.text is None or len(ref.text) == 0:
+                continue
+            if ref.tag == namespace+"bid":
+                reference = {"type": "ADVISORY", "url": url_bid % ref.text}
+            elif ref.tag == namespace+"certsa":
+                reference = {"type": "ADVISORY", "url": url_certsa % ref.text}
+            elif ref.tag == namespace+"certvu":
+                reference = {"type": "ADVISORY", "url": url_certvu % ref.text}
+            elif ref.tag == namespace+"cvename":
+                reference = {"type": "ADVISORY", "url": url_cve % ref.text}
+            elif ref.tag == namespace+"freebsdpr" \
+                    and len(ref.text.split("/")) == 2:
+                id = ref.text.split("/")[1]
+                reference = {"type": "REPORT",
+                             "url": url_freebsd_bugzilla % id}
+            elif ref.tag == namespace+"freebsdsa":
+                reference = {"type": "ADVISORY",
+                             "url": url_freebsd_sa % ref.text}
+            elif ref.tag == namespace+"mlist":
+                reference = {"type": "DISCUSSION", "url": ref.text}
+            elif ref.tag == namespace+"url":
+                reference = {"type": "WEB", "url": ref.text}
+                for prefix in url_advisories:
+                    if str(ref.text).startswith(prefix):
+                        reference["type"] = "ADVISORY"
+                        break
+                if reference["type"] == "WEB":
+                    for prefix in url_reports:
+                        if str(ref.text).startswith(prefix):
+                            reference["type"] = "REPORT"
+                            break
+            else:
+                continue
+            references.append(reference)
+        if len(references) > 0:
+            entry["references"] = references
+
+        # affected
+        affected = []
+        affects = vuln.find(namespace+"affects")
+        for package in affects.findall(namespace+"package"):
+
+            # affected: package
+            for name in package.findall(namespace+"name"):
+                a = {}
+                if re_invalid_package_name.search(name.text) is not None:
+                    ret = error("%s package with invalid name: %s"
+                                % (vid, name.text))
+                    continue
+                p = {"ecosystem": ecosystem, "name": name.text}
+                a["package"] = p
+
+                # affected: ranges
+                try:
+                    ranges = []
+                    versions = []
+                    for e in package.findall(namespace+"range"):
+                        events = []
+                        semver = {"type": "SEMVER"}
+
+                        # affected: ranges
+                        event = {}
+                        ge = e.find(namespace+"ge")
+                        if ge is not None and len(ge.text) > 0 and ge.text != "*":
+                            event["introduced"] = ge.text
+                        gt = e.find(namespace+"gt")
+                        if gt is not None and len(gt.text) > 0 and gt.text != "*":
+                            # FIXME not accurate!!1
+                            event["introduced"] = gt.text+",1"
+                        le = e.find(namespace+"le")
+                        if le is not None and len(le.text) > 0 and le.text != "*":
+                            event["last_affected"] = le.text
+                        lt = e.find(namespace+"lt")
+                        if lt is not None and len(lt.text) > 0 and lt.text != "*":
+                            event["fixed"] = lt.text
+                        if "fixed" in event or "introduced" in event:
+                            if "introduced" not in event:
+                                event["introduced"] = "0"
+                        for k, v in event.items():
+                            events.append({k: v})
+
+                        # affected: versions
+                        eq = e.find(namespace+"eq")
+                        if eq is not None and len(eq.text) > 0 and eq.text != "*":
+                            versions.append(eq.text)
+
+                        if len(events) > 0:
+                            semver["events"] = events
+                            ranges.append(semver)
+                except Exception as e:
+                    warn(e, file=sys.stderr)
+                    ranges = []
+                if len(ranges) > 0:
+                    a["ranges"] = ranges
+                if len(versions) > 0:
+                    a["versions"] = versions
+
+                if len(a) > 0:
+                    affected.append(a)
+            if len(affected) > 0:
+                entry["affected"] = affected
+
+        # database_specific
+        database_specific = {}
+        try:
+            d = vuln.find(namespace+"dates").find(namespace+"discovery").text
+            if not re_date.match(d):
+                ret = error("discovery date not in YYYY-MM-DD format: {0}"
+                            .format(d))
+                raise
+            else:
+                dates_discovery = dateof(d)
+        except Exception as e:
+            dates_discovery = None
+        if dates_discovery is not None:
+            database_specific["discovery"] = dates_discovery
+        if len(database_specific) > 0:
+            entry["database_specific"] = database_specific
+
+        if output is not None:
+            try:
+                with open(output+f"/{vid}.json", "w") as f:
+                    print(json.dumps(entry, indent=4), file=f)
+            except Exception as e:
+                ret = error(e)
+        else:
+            entries.append(entry)
+
+    if output is None:
+        print(json.dumps(entries, indent=4))
+
+    return ret
+
+
+if __name__ == "__main__":
+    sys.exit(main())