diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 943c592..3d3a639 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,8 +23,11 @@ jobs: - name: Validate run: | - isort --check-only src/ tests/ + isort --version + isort --check-only --diff src/ tests/ + black --version black --check --line-length 100 . + mypy --version mypy build-and-test: diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 170038b..cd97875 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -306,7 +306,12 @@ def normalize( subpath: Optional[AnyStr], encode: Optional[bool] = ..., ) -> Tuple[ - str, Optional[str], str, Optional[str], Union[str, Dict[str, str], None], Optional[str] + str, + Optional[str], + str, + Optional[str], + Union[str, Dict[str, str], None], + Optional[str], ]: ... @@ -335,7 +340,14 @@ def normalize( version_norm = normalize_version(version, encode) qualifiers_norm = normalize_qualifiers(qualifiers, encode) subpath_norm = normalize_subpath(subpath, encode) - return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm + return ( + type_norm, + namespace_norm, + name_norm, + version_norm, + qualifiers_norm, + subpath_norm, + ) class PackageURL( diff --git a/src/packageurl/contrib/url2purl.py b/src/packageurl/contrib/url2purl.py index 5ce8aa1..aa3c256 100644 --- a/src/packageurl/contrib/url2purl.py +++ b/src/packageurl/contrib/url2purl.py @@ -124,10 +124,10 @@ def build_generic_purl(uri): @purl_router.route( - "https?://registry.npmjs.*/.*", - "https?://registry.yarnpkg.com/.*", - "https?://(www\\.)?npmjs.*/package.*", - "https?://(www\\.)?yarnpkg.com/package.*", + r"https?://registry\.npmjs\.(com|org)/.*", + r"https?://registry\.yarnpkg\.com/.*", + r"https?://(www\.)?npmjs\.(com|org)/package.*", + r"https?://(www\.)?yarnpkg\.com/package.*", ) def build_npm_purl(uri): # npm URLs are difficult to disambiguate with regex @@ -221,9 +221,9 @@ def build_npm_web_purl(uri): @purl_router.route( - "https?://repo1.maven.org/maven2/.*", - "https?://central.maven.org/maven2/.*", - "maven-index://repo1.maven.org/.*", + r"https?://repo1\.maven\.org/maven2/.*", + r"https?://central\.maven\.org/maven2/.*", + r"maven-index://repo1\.maven\.org/.*", ) def build_maven_purl(uri): path = unquote_plus(urlparse(uri).path) @@ -253,7 +253,18 @@ def build_maven_purl(uri): qualifiers["classifier"] = classifier.strip("-") - valid_types = ("aar", "ear", "mar", "pom", "rar", "rpm", "sar", "tar.gz", "war", "zip") + valid_types = ( + "aar", + "ear", + "mar", + "pom", + "rar", + "rpm", + "sar", + "tar.gz", + "war", + "zip", + ) if extension in valid_types: qualifiers["type"] = extension @@ -261,7 +272,7 @@ def build_maven_purl(uri): # https://rubygems.org/gems/i18n-js-3.0.11.gem -@purl_router.route("https?://rubygems.org/(downloads|gems)/.*") +@purl_router.route(r"https?://rubygems\.org/(downloads|gems)/.*") def build_rubygems_purl(uri): # We use a more general route pattern instead of using `rubygems_pattern` # below by itself because we want to capture all rubygems download URLs, @@ -272,7 +283,7 @@ def build_rubygems_purl(uri): # https://rubygems.org/downloads/jwt-0.1.8.gem # https://rubygems.org/gems/i18n-js-3.0.11.gem rubygems_pattern = ( - r"^https?://rubygems.org/(downloads|gems)/(?P.+)-(?P.+)(\.gem)$" + r"^https?://rubygems\.org/(downloads|gems)/(?P.+)-(?P.+)(\.gem)$" ) return purl_from_pattern("gem", rubygems_pattern, uri) @@ -296,7 +307,7 @@ def build_cran_purl(uri): # https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg # https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm # https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe -pypi_pattern = r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$" +pypi_pattern = r"(?P(\w\.?)+(-\w+)*)-(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|egg|rpm|exe)$" # This pattern can be found in the following locations: # - wheel.wheelfile.WHEEL_INFO_RE @@ -312,8 +323,9 @@ def build_cran_purl(uri): @purl_router.route( - "https?://pypi.org/(packages|project)/.+", - "https?://.+python.+org/(packages|project)/.*", + r"https?://pypi\.org/(packages|project)/.+", + r"https?://pypi\.python\.org/(packages|project)/.*", + r"https?://files\.pythonhosted\.org/(packages|project)/.*", ) def build_pypi_purl(uri): path = unquote_plus(urlparse(uri).path) @@ -357,14 +369,16 @@ def build_composer_purl(uri): # http://nuget.org/packages/EntityFramework/4.2.0.0 # https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1 -nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P.+)/(?P.+)$" +nuget_www_pattern = ( + r"^https?://(www\.)?nuget\.org/(api/v2/)?packages?/(?P.+)/(?P.+)$" +) register_pattern("nuget", nuget_www_pattern) # https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg nuget_api_pattern = ( - r"^https?://api.nuget.org/v3-flatcontainer/" + r"^https?://api\.nuget\.org/v3-flatcontainer/" r"(?P.+)/" r"(?P.+)/" r".*(nupkg)$" # ends with "nupkg" @@ -378,7 +392,7 @@ def build_composer_purl(uri): # https://sourceforge.net/projects/ventoy/files/v1.0.96/Ventoy%201.0.96%20release%20source%20code.tar.gz/download # https://sourceforge.net/projects/geoserver/files/GeoServer/2.23.4/geoserver-2.23.4-war.zip/download sourceforge_download_pattern = ( - r"^https?://.*sourceforge.net/projects/" + r"^https?://((master|iweb)\.dl\.)?sourceforge\.net/projects/" r"(?P.+)/" r"files/" r"(?i:(?P=name)/)?" # optional case-insensitive name segment repeated @@ -392,7 +406,7 @@ def build_composer_purl(uri): # https://sourceforge.net/projects/spacesniffer/files/spacesniffer_1_3_0_2.zip/download sourceforge_download_pattern_bis = ( - r"^https?://.*sourceforge.net/projects/" + r"^https?://((master|iweb)\.dl\.)?sourceforge\.net/projects/" r"(?P.+)/" r"files/" r"(?i:(?P=name))_*(?P[0-9_]+).*" @@ -402,7 +416,7 @@ def build_composer_purl(uri): register_pattern("sourceforge", sourceforge_download_pattern_bis) -@purl_router.route("https?://.*sourceforge.net/project/.*") +@purl_router.route(r"https?://((master|iweb)\.dl\.)?sourceforge\.net/projects?/.*") def build_sourceforge_purl(uri): # We use a more general route pattern instead of using `sourceforge_pattern` # below by itself because we want to capture all sourceforge download URLs, @@ -412,7 +426,7 @@ def build_sourceforge_purl(uri): # http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2 sourceforge_pattern = ( - r"^https?://.*sourceforge.net/projects?/" + r"^https?://((master|iweb)\.dl\.)?sourceforge\.net/projects?/" r"(?P([^/]+))/" # do not allow more "/" segments r"(OldFiles/)?" r"(?P.+)/" @@ -437,27 +451,29 @@ def build_sourceforge_purl(uri): if remaining_uri_path_segments: project_name = remaining_uri_path_segments[0] # aloyscore sourceforge_purl = PackageURL( - type="sourceforge", name=project_name, qualifiers={"download_url": uri} + type="sourceforge", + name=project_name, + qualifiers={"download_url": uri}, ) return sourceforge_purl # https://crates.io/api/v1/crates/rand/0.7.2/download -cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" +cargo_pattern = r"^https?://crates\.io/api/v1/crates/(?P.+)/(?P.+)(\/download)$" register_pattern("cargo", cargo_pattern) # https://raw.githubusercontent.com/volatilityfoundation/dwarf2json/master/LICENSE.txt github_raw_content_pattern = ( - r"https?://raw.githubusercontent.com/(?P[^/]+)/(?P[^/]+)/" + r"https?://raw\.githubusercontent\.com/(?P[^/]+)/(?P[^/]+)/" r"(?P[^/]+)/(?P.*)$" ) register_pattern("github", github_raw_content_pattern) -@purl_router.route("https?://api.github\\.com/repos/.*") +@purl_router.route(r"https?://api\.github\.com/repos/.*") def build_github_api_purl(url): """ Return a PackageURL object from GitHub API `url`. @@ -488,15 +504,15 @@ def build_github_api_purl(url): # https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1 # https://codeload.github.com/berngp/grails-rest/zip/release/0.7 github_codeload_pattern = ( - r"https?://codeload.github.com/(?P.+)/(?P.+)/" - r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*" + r"https?://codeload\.github\.com/(?P.+)/(?P.+)/" + r"(zip|tar\.gz|tar\.bz2|tgz)/([^/]*/)*" r"(?Pv|V?)(?P.+)$" ) register_pattern("github", github_codeload_pattern) -@purl_router.route("https?://github\\.com/.*") +@purl_router.route(r"https?://github\.com/.*") def build_github_purl(url): """ Return a PackageURL object from GitHub `url`. @@ -504,38 +520,38 @@ def build_github_purl(url): # https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip archive_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" - r"/archive/(.*/)*" + r"https?://github\.com/(?P.+)/(?P.+)" + r"/archive/([^/]*/)*" r"((?P=name)(-|_|@))?" - r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz)" ) # https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip download_pattern = ( - r"https?://github.com/downloads/(?P.+)/(?P.+)/" + r"https?://github\.com/downloads/(?P.+)/(?P.+)/" r"((?P=name)(-|@)?)?" - r"(?Pv|V?)(?P.+).(zip|tar.gz|tar.bz2|.tgz)" + r"(?Pv|V?)(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz)" ) # https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz raw_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/raw/(?Pv|V?)(?P[^/]+)/(?P.*)$" ) # https://github.com/fanf2/unifdef/blob/master/unifdef.c blob_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/blob/(?P[^/]+)/(?P.*)$" ) releases_download_pattern = ( - r"https?://github.com/(?P.+)/(?P.+)" + r"https?://github\.com/(?P.+)/(?P.+)" r"/releases/download/(?Pv|V?)(?P[^/]+)/.*$" ) # https://github.com/pombredanne/schematics.git - git_pattern = r"https?://github.com/(?P.+)/(?P.+).(git)" + git_pattern = r"https?://github\.com/(?P.+)/(?P.+)\.(git)" patterns = ( archive_pattern, @@ -584,7 +600,7 @@ def build_github_purl(url): ) -@purl_router.route("https?://bitbucket\\.org/.*") +@purl_router.route(r"https?://bitbucket\.org/.*") def build_bitbucket_purl(url): """ Return a PackageURL object from BitBucket `url`. @@ -602,9 +618,9 @@ def build_bitbucket_purl(url): name = segments[1] bitbucket_download_pattern = ( - r"https?://bitbucket.org/" + r"https?://bitbucket\.org/" r"(?P.+)/(?P.+)/downloads/" - r"(?P.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)" + r"(?P.+)\.(zip|tar\.gz|tar\.bz2|tgz|exe|msi)" ) matches = re.search(bitbucket_download_pattern, url) @@ -635,7 +651,7 @@ def build_bitbucket_purl(url): ) -@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*") +@purl_router.route(r"https?://gitlab\.com/(?!.*/archive/).*") def build_gitlab_purl(url): """ Return a PackageURL object from Gitlab `url`. @@ -675,7 +691,7 @@ def build_gitlab_purl(url): # https://gitlab.com/hoppr/hoppr/-/archive/v1.11.1-dev.2/hoppr-v1.11.1-dev.2.tar.gz gitlab_archive_pattern = ( - r"^https?://gitlab.com/" + r"^https?://gitlab\.com/" r"(?P.+)/(?P.+)/-/archive/(?P.+)/" r"(?P=name)-(?P=version).*" r"[^/]$" @@ -686,7 +702,7 @@ def build_gitlab_purl(url): # https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz hackage_download_pattern = ( - r"^https?://hackage.haskell.org/package/" + r"^https?://hackage\.haskell\.org/package/" r"(?P.+)-(?P.+)/" r"(?P=name)-(?P=version).*" r"[^/]$" @@ -696,13 +712,15 @@ def build_gitlab_purl(url): # https://hackage.haskell.org/package/cli-extras-0.2.0.0/ -hackage_project_pattern = r"^https?://hackage.haskell.org/package/(?P.+)-(?P[^/]+)/" +hackage_project_pattern = ( + r"^https?://hackage\.haskell\.org/package/(?P.+)-(?P[^/]+)/" +) register_pattern("hackage", hackage_project_pattern) @purl_router.route( - "https?://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/.*" + r"https?://storage\.googleapis\.com/google-code-archive-downloads/v2/code\.google\.com/.*" ) def build_generic_google_code_archive_purl(uri): # https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com