Skip to content

Commit 4e277fb

Browse files
Merge branch 'main' into minecode-pipeline-npm
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
2 parents e2a042e + 26bf2d9 commit 4e277fb

File tree

18 files changed

+972
-23
lines changed

18 files changed

+972
-23
lines changed

minecode/tests/collectors/test_github.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,5 @@ def test_github_get_all_versions(self):
4141
"minecode-pipelines/v0.0.1b7",
4242
"minecode-pipelines/v0.0.1b8",
4343
]
44-
for item in versions:
45-
self.assertIn(item, expected)
44+
for item in expected:
45+
self.assertIn(item, versions)

minecode_pipelines/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
VERSION = "0.0.1b9"
10+
11+
VERSION = "0.0.1b15"

minecode_pipelines/miners/conan.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from pathlib import Path
11+
import saneyaml
12+
13+
from scanpipe.pipes.federatedcode import commit_changes
14+
from scanpipe.pipes.federatedcode import push_changes
15+
from minecode_pipelines import VERSION
16+
from minecode_pipelines.pipes.conan import store_conan_packages
17+
18+
PACKAGE_BATCH_SIZE = 1000
19+
20+
21+
def mine_and_publish_conan_packageurls(conan_index_repo, cloned_data_repo, logger):
22+
base_path = Path(conan_index_repo.working_dir)
23+
24+
yml_files = []
25+
for file_path in base_path.glob("recipes/**/*"):
26+
if not file_path.name == "config.yml":
27+
continue
28+
yml_files.append(file_path)
29+
30+
file_counter = 0
31+
purl_files = []
32+
purls = []
33+
34+
total_files = len(yml_files)
35+
logger(f"Processing total files: {total_files}")
36+
for idx, file_path in enumerate(yml_files, start=1):
37+
# Example: file_path = Path("repo_path/recipes/7zip/config.yml")
38+
# - file_path.parts = ("repo_path", "recipes", "7zip", "config.yml")
39+
# - file_path.parts[-2] = "7zip" (the package name)
40+
package = file_path.parts[-2]
41+
with open(file_path, encoding="utf-8") as f:
42+
versions = saneyaml.load(f)
43+
44+
if not versions:
45+
continue
46+
47+
file_counter += 1
48+
push_commit = file_counter >= PACKAGE_BATCH_SIZE or idx == total_files
49+
50+
result_store = store_conan_packages(package, versions, cloned_data_repo)
51+
if result_store:
52+
purl_file, base_purl = result_store
53+
logger(f"writing packageURLs for package: {base_purl} at: {purl_file}")
54+
55+
purl_files.append(purl_file)
56+
purls.append(str(base_purl))
57+
58+
if push_commit:
59+
commit_changes(
60+
repo=cloned_data_repo,
61+
files_to_commit=purl_files,
62+
purls=purls,
63+
mine_type="packageURL",
64+
tool_name="pkg:pypi/minecode-pipelines",
65+
tool_version=VERSION,
66+
)
67+
push_changes(repo=cloned_data_repo)
68+
file_counter = 0
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
import os
24+
from scanpipe.pipelines import Pipeline
25+
from minecode_pipelines import pipes
26+
from minecode_pipelines.miners import conan
27+
from scanpipe.pipes import federatedcode
28+
29+
MINECODE_CONAN_INDEX_REPO = "https://github.com/conan-io/conan-center-index"
30+
31+
MINECODE_DATA_CONAN_REPO = os.environ.get(
32+
"MINECODE_DATA_CONAN_REPO", "https://github.com/aboutcode-data/minecode-data-conan-test"
33+
)
34+
35+
36+
class MineConan(Pipeline):
37+
"""Pipeline to mine Conan packages and publish them to FederatedCode repo."""
38+
39+
@classmethod
40+
def steps(cls):
41+
return (
42+
cls.check_federatedcode_eligibility,
43+
cls.clone_conan_repos,
44+
cls.mine_and_publish_conan_package_urls,
45+
)
46+
47+
def check_federatedcode_eligibility(self):
48+
"""
49+
Check if the project fulfills the following criteria for
50+
pushing the project result to FederatedCode.
51+
"""
52+
federatedcode.check_federatedcode_configured_and_available(logger=self.log)
53+
54+
def clone_conan_repos(self):
55+
"""
56+
Clone the Conan-related repositories (index, data, and pipelines config)
57+
and store their Repo objects in the corresponding instance variables.
58+
"""
59+
self.conan_index_repo = federatedcode.clone_repository(MINECODE_CONAN_INDEX_REPO)
60+
self.cloned_data_repo = federatedcode.clone_repository(MINECODE_DATA_CONAN_REPO)
61+
62+
if self.log:
63+
self.log(
64+
f"{MINECODE_CONAN_INDEX_REPO} repo cloned at: {self.conan_index_repo.working_dir}"
65+
)
66+
self.log(
67+
f"{MINECODE_DATA_CONAN_REPO} repo cloned at: {self.cloned_data_repo.working_dir}"
68+
)
69+
70+
def mine_and_publish_conan_package_urls(self):
71+
conan.mine_and_publish_conan_packageurls(
72+
self.conan_index_repo, self.cloned_data_repo, self.log
73+
)
74+
75+
def delete_cloned_repos(self):
76+
pipes.delete_cloned_repos(
77+
repos=[self.conan_index_repo, self.cloned_data_repo],
78+
logger=self.log,
79+
)
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
24+
from pathlib import Path
25+
26+
from minecode_pipelines.pipes import nuget
27+
28+
from scanpipe.pipelines import Pipeline
29+
from scanpipe.pipes import federatedcode
30+
31+
32+
class MineNuGet(Pipeline):
33+
"""
34+
Mine and Publish NuGet PackageURLs.
35+
36+
Mine PackageURLs from AboutCode NuGet catalog mirror and publish
37+
them to FederatedCode Git repository.
38+
"""
39+
40+
download_inputs = False
41+
CATALOG_REPO_URL = "https://github.com/aboutcode-org/aboutcode-mirror-nuget-catalog.git"
42+
43+
@classmethod
44+
def steps(cls):
45+
return (
46+
cls.check_federatedcode_eligibility,
47+
cls.fetch_nuget_catalog,
48+
cls.mine_nuget_package_versions,
49+
cls.mine_and_publish_nuget_packageurls,
50+
cls.delete_cloned_repos,
51+
)
52+
53+
def check_federatedcode_eligibility(self):
54+
"""
55+
Check if the project fulfills the following criteria for
56+
pushing the project result to FederatedCode.
57+
"""
58+
federatedcode.check_federatedcode_configured_and_available()
59+
60+
def fetch_nuget_catalog(self):
61+
"""Fetch NuGet package catalog from AboutCode mirror."""
62+
self.catalog_repo = federatedcode.clone_repository(
63+
repo_url=self.CATALOG_REPO_URL,
64+
logger=self.log,
65+
)
66+
67+
def mine_nuget_package_versions(self):
68+
"""Mine NuGet package and versions from NuGet catalog."""
69+
self.package_versions, self.skipped_packages = nuget.mine_nuget_package_versions(
70+
catalog_path=Path(self.catalog_repo.working_dir),
71+
logger=self.log,
72+
)
73+
74+
def mine_and_publish_nuget_packageurls(self):
75+
"""Mine and publish PackageURLs from NuGet package versions."""
76+
nuget.mine_and_publish_nuget_packageurls(
77+
package_versions=self.package_versions,
78+
logger=self.log,
79+
)
80+
81+
def delete_cloned_repos(self):
82+
"""Remove cloned catalog repository."""
83+
if self.catalog_repo:
84+
self.log("Removing cloned repository")
85+
federatedcode.delete_local_clone(repo=self.catalog_repo)

minecode_pipelines/pipes/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
import os
1313
import shutil
1414
from pathlib import Path
15-
15+
from git import Repo
1616
import requests
1717
import saneyaml
1818

1919
from aboutcode.hashid import PURLS_FILENAME
20-
from git import Repo
2120

22-
from scanpipe.pipes.federatedcode import delete_local_clone
2321
from scanpipe.pipes.federatedcode import commit_and_push_changes
2422

2523
from minecode_pipelines.utils import get_temp_file
@@ -188,6 +186,8 @@ def write_data_to_json_file(path, data):
188186

189187

190188
def delete_cloned_repos(repos, logger=None):
189+
from scanpipe.pipes.federatedcode import delete_local_clone
190+
191191
if not repos:
192192
return
193193

minecode_pipelines/pipes/alpine.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ def build_package(extracted_pkginfo, distro, repo):
118118
)
119119

120120
parties = []
121-
maintainers = extracted_pkginfo.get("maintainer")
122-
if maintainers:
123-
name, email = parse_email(maintainers)
124-
if name:
125-
party = Party(name=name, role="maintainer", email=email)
121+
maintainer = extracted_pkginfo.get("maintainer")
122+
if maintainer:
123+
maintainer_name, maintainer_email = parse_email(maintainer)
124+
if maintainer_name:
125+
party = Party(name=maintainer_name, role="maintainer", email=maintainer_email)
126126
parties.append(party)
127127

128128
purl = PackageURL(

minecode_pipelines/pipes/conan.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
from packageurl import PackageURL
24+
from pathlib import Path
25+
from aboutcode import hashid
26+
from minecode_pipelines.pipes import write_data_to_yaml_file
27+
28+
29+
def store_conan_packages(pacakge_name, versions_data, fed_repo):
30+
"""Collect Conan package versions into purls and write them to the repo."""
31+
32+
base_purl = PackageURL(type="conan", name=pacakge_name)
33+
34+
updated_purls = []
35+
versions = list(versions_data["versions"].keys())
36+
for version in versions:
37+
purl = PackageURL(type="conan", name=pacakge_name, version=version).to_string()
38+
updated_purls.append(purl)
39+
40+
ppath = hashid.get_package_purls_yml_file_path(base_purl)
41+
purl_file_full_path = Path(fed_repo.working_dir) / ppath
42+
write_data_to_yaml_file(path=purl_file_full_path, data=updated_purls)
43+
return purl_file_full_path, base_purl

minecode_pipelines/pipes/debian.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,12 @@ def get_packages(self, previous_index_last_modified_date=None, logger=None):
119119
previous_index_last_modified_date, "%Y-%m-%d %H:%M:%S"
120120
)
121121
for entry in ls.parse_directory_listing(content):
122-
entry_date = datetime.strptime(entry.date, "%Y-%m-%d")
122+
entry_date = None
123+
if entry.date:
124+
entry_date = datetime.strptime(entry.date, "%Y-%m-%d")
123125
if (entry.type != ls.FILE) or (
124126
previous_index_last_modified_date
127+
and entry_date
125128
and (entry_date <= previous_index_last_modified_date)
126129
):
127130
continue
@@ -158,8 +161,6 @@ def get_packages(self, previous_index_last_modified_date=None, logger=None):
158161
name=package_url.name,
159162
version=package_url.version,
160163
qualifiers=package_url.qualifiers,
161-
file_name=file_name,
162-
date=entry.date,
163164
size=entry.size,
164165
download_url=url_template.format(path=path),
165166
)
@@ -223,7 +224,7 @@ def collect_packages_from_debian(commits_per_push=PACKAGE_BATCH_SIZE, logger=Non
223224

224225
current_purls = []
225226
prev_purl = current_purl
226-
current_purls.append(package.to_string())
227+
current_purls.append(package.purl)
227228

228229
if current_purls:
229230
# write packageURLs to file

0 commit comments

Comments
 (0)