diff --git a/licenses/check_licenses.yml b/licenses/check_licenses.yml new file mode 100644 index 0000000000..e6bb655816 --- /dev/null +++ b/licenses/check_licenses.yml @@ -0,0 +1,57 @@ +name: Check and update licenses + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + license_update: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.X' + + - name: Run license script and generate patch + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python update_licenses.py --source=pypi TensorFlow + python update_licenses.py --source=github:easybuilders/easybuild EasyBuild + if [ -f license_update.patch ] && [ -s license_update.patch ]; then + PATCH_CONTENT=$(cat license_update.patch) + echo "patch=$PATCH_CONTENT" >> $GITHUB_OUTPUT + fi + + - name: Create a PR (if changes detected) + uses: peter-evans/create-pull-request@v5 + if: steps.check_licenses.outputs.patch != '' + with: + commit-message: "Auto PR: Update licenses" + title: "Auto PR: Update licenses" + body: ${{ steps.check_licenses.outputs.patch }} + branch: main #fork branch + base: main #specify right brancg here + + - name: Apply patch (if no PR created) + if: steps.create_pull_request.outputs.pull-request-number == '' && steps.check_licenses.outputs.patch != '' + run: | + if [ -f license_update.patch ] && [ -s license_update.patch ]; then + git apply license_update.patch + else + echo "No changes to apply" + fi + git add licenses.json + git diff --cached --exit-code || git commit -m "Update licenses.json" + git push + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/licenses/update_licenses.py b/licenses/update_licenses.py new file mode 100644 index 0000000000..d4b898e24c --- /dev/null +++ b/licenses/update_licenses.py @@ -0,0 +1,154 @@ +import requests +import argparse +import json +import os +from datetime import datetime + +parser = argparse.ArgumentParser(description='Script to ingest licenses') +parser.add_argument('--source', help='Source (GitHub, PyPI, CRAN, Repology) or user') +parser.add_argument('projects', nargs='+', help='List of project names') +parser.add_argument('--manual', help='Manually provided license', required=False) +parser.add_argument('--spdx', help='SPDX identifier for the license', required=False) +args = parser.parse_args() + +# Retrieve license from various sources +def github(source): + repo = source.removeprefix('github:') + url = ( + "https://api.github.com/repos/{repo}/license".format(repo=repo) + ) + headers = { + "Accept": "application/vnd.github+json", + "Authorization": "Bearer {}".format(os.getenv('GITHUB_TOKEN')), + "X-GitHub-Api-Version": "2022-11-28", + } + r = requests.get(url, headers=headers) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data['license']['spdx_id'], 'GitHub', data['license']['url'] + +def pypi(project): + url = "https://pypi.org/pypi/{project}/json".format(project=project) + r = requests.get(url) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data['info']['license'], 'PyPI', data['info'].get('project_url') + +def cran(project): + url = "http://crandb.r-pkg.org/{project}".format(project=project) + r = requests.get(url) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data['License'], 'CRAN', None + +def repology(project): + url = "https://repology.org/api/v1/project/{project}".format( + project=project + ) + r = requests.get(url) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data.get('license', 'not found'), 'Repology', None + +def ecosysteDotms_pypi(project): + url = "https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/{project}".format( + project=project + ) + r = requests.get(url) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data.get('license', 'not found'), 'Ecosyste.ms (PyPI)', None + +def ecosysteDotms_github(source): + repo = source.removeprefix('github:') + url = "https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/{repo}".format( + repo=repo + ) + r = requests.get(url) + if r.status_code != 200: + return "not found", None, None + data = r.json() + return data.get('license', 'not found'), 'Ecosyste.ms (GitHub)', None + +# Main license retrieval function +def license_info(project): + if args.source == 'pypi': + lic, source, url = ecosysteDotms_pypi(project) + elif "github" in args.source: + lic, source, url = ecosysteDotms_github(args.source) + elif args.manual: + lic = args.manual + source = args.source + url = None + else: + lic, source, url = "not found", None, None + + spdx_id = args.spdx if args.spdx else (lic if lic and lic != "not found" else None) + + info = { + "license": lic, + "source": source, + "spdx_id": spdx_id, + "retrieved_at": datetime.now().isoformat(), + } + return info + + +def update_json(licenses, project, info): + if project in licenses: + if 'history' not in licenses[project]: + licenses[project]['history'] = [] + licenses[project]['history'].append(info) + licenses[project]['current'] = info + print('Updated license for project {project}'.format(project=project)) + else: + licenses[project] = { + "current": info, + "history": [info], + } + print('Added new license for project {project}'.format(project=project)) + + lic_json = json.dumps(licenses, indent=4) + with open('licenses.json', 'w') as lic_file: + lic_file.write(lic_json) + + return licenses + +# Create patch output +def generate_patch(licenses): + patch = json.dumps(licenses, indent=4) + return patch + +# Function to save patch to a file +def save_patch(patch_content, filename="license_update.patch"): + with open(filename, 'w') as patch_file: + patch_file.write(patch_content) + print("Patch saved to {filename}".format(filename=filename)) + +def main(): + if os.path.exists('licenses.json'): + with open('licenses.json', 'r') as lic_dict: + licenses = json.loads(lic_dict.read()) + else: + licenses = {} + + for project in args.projects: + info = license_info(project) + update_json(licenses, project, info) + + patch = generate_patch(licenses) + save_patch(patch) + + with open('licenses.json', 'w') as lic_file: + lic_file.write(patch) + + print("Patch output:\n{patch}".format(patch=patch)) + +if __name__ == "__main__": + main() + diff --git a/modules/module_check.sh b/modules/module_check.sh new file mode 100755 index 0000000000..cf5d48d071 --- /dev/null +++ b/modules/module_check.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +# This script checks the consistency of EB-generated modules and identifies broken or missing modules. +# Usage: ./module_check.sh [] + +# It uses an adapted approach from check_missing_installations.sh to handling PRs/unmerged PRs +TOPDIR=$(dirname $(realpath $0)) + +if [ "$#" -eq 1 ]; then + echo "No PR diff provided. Processing all modules in the easystack file." + pr_exceptions="" +elif [ "$#" -eq 2 ]; then + echo "Using $2 to create exceptions for PR filtering of easystack" + pr_diff="$2" + pr_exceptions=$(grep '^+' "$pr_diff" | grep 'from-pr' | uniq | awk '{print $3}' | xargs -I {} echo " || /'{}'/") +else + echo "ERROR: Usage: $0 []" >&2 + exit 1 +fi + +easystack="$1" + +LOCAL_TMPDIR=$(mktemp -d) +mkdir -p "$LOCAL_TMPDIR" + +# Clone the develop branch of EasyBuild and use that to search for easyconfigs +git clone -b develop https://github.com/easybuilders/easybuild-easyconfigs.git $LOCAL_TMPDIR/easyconfigs +export EASYBUILD_ROBOT_PATHS=$LOCAL_TMPDIR/easyconfigs/easybuild/easyconfigs + +# All PRs used in EESSI are supposed to be merged, so we can strip ou all cases of from-pr +tmp_easystack="${LOCAL_TMPDIR}/$(basename "${easystack}")" +grep -v 'from-pr' "${easystack}" > "${tmp_easystack}" + +# If PR exceptions exist, modify the easystack file to include exceptions +if [ -n "$pr_exceptions" ]; then + # Use awk to exclude lines containing PR numbers specified in pr_exceptions + awk_command="awk '!/from-pr/ EXCEPTIONS' ${easystack}" + awk_command=${awk_command/\\/} + eval "${awk_command/EXCEPTIONS/$pr_exceptions}" > "${tmp_easystack}" +fi + +# Set up temporary directories for module installation and lock files +TMPDIR=${TMPDIR:-/tmp}/$USER +module_install_dir="$TMPDIR/EESSI/module-only" +locks_dir="$TMPDIR/EESSI/locks" +mkdir -p "$module_install_dir" "$locks_dir" + +# Log file to record broken modules +broken_modules_log="broken_modules.log" +> "$broken_modules_log" + +# To keep track of already-checked modules and avoid re-checking +declare -A checked_modules + +# Identify missing easyconfigs based on the temporary easystack file +echo "Identifying missing easyconfigs using the temporary easystack file..." +missing_easyconfigs=$(eb --easystack "${tmp_easystack}" --missing --robot 2>&1) + +if [ -z "$missing_easyconfigs" ]; then + echo "No missing easyconfigs to install." + rm -rf "$LOCAL_TMPDIR" + exit 0 +fi + +# Process each missing easyconfig file +for easyconfig_file in $missing_easyconfigs; do + package_name=$(basename "$easyconfig_file" .eb) + + # Building of the easyconfig + echo "Building $package_name using EasyBuild..." + eb "$easyconfig_file" --robot + if [ $? -ne 0 ]; then + echo "EasyBuild build failed for $package_name. Skipping..." + echo "$package_name: EasyBuild build failed" >> "$broken_modules_log" + continue + fi + + # Generate the module using --module-only + echo "Generating module for $package_name using --module-only..." + eb "$easyconfig_file" --module-only --installpath-modules "$module_install_dir" --locks-dir "$locks_dir" --force --robot + if [ $? -ne 0 ]; then + echo "EasyBuild --module-only command failed for $package_name. Skipping..." + echo "$package_name: EasyBuild --module-only command failed" >> "$broken_modules_log" + continue + fi + + # Find the module file generated from the build + module_relpath=$(eb "$easyconfig_file" --show-module --robot 2>/dev/null) + if [ -z "$module_relpath" ]; then + echo "Failed to get module relative path for $package_name" + echo "$package_name: Failed to get module relative path" >> "$broken_modules_log" + continue + fi + + # Modules names and version + module_software=$(echo "$module_relpath" | sed 's/\.lua$//') + + # Check if the module has already been validated to avoid redundant checks + if [ -n "${checked_modules[$module_software]}" ]; then + echo "Module $module_software already checked. Skipping." + continue + fi + + # Paths to the module files generated from build and the --module-only + module_file_build="${EASYBUILD_INSTALLPATH}/modules/all/${module_relpath}" + module_file_module_only="${module_install_dir}/all/${module_relpath}" + + # Check if both module files exist + if [ ! -f "$module_file_build" ]; then + echo "Module file from full build not found: $module_file_build" + echo "$package_name: Module file from full build not found" >> "$broken_modules_log" + continue + fi + + if [ ! -f "$module_file_module_only" ]; then + echo "Module file from --module-only build not found: $module_file_module_only" + echo "$package_name: Module file from --module-only build not found" >> "$broken_modules_log" + continue + fi + + # Compare the module files + if diff -q "$module_file_build" "$module_file_module_only" >/dev/null; then + echo "Module files for $package_name match" + else + echo "Module files for $package_name differ" + echo "$package_name: Module files differ" >> "$broken_modules_log" + # Save differences + diff_file="${module_software//\//_}_module_diff.txt" + diff "$module_file_build" "$module_file_module_only" > "$diff_file" + echo "Module file differences saved to $diff_file" + fi + + # Proceed to compare the environments + echo "Testing module: $module_software" + + # Function to get filtered environment variables, excluding lmod-related vars + get_filtered_env() { + env | grep -v -E '^(LMOD_|MODULEPATH|MODULESHOME|LOADEDMODULES|BASH_FUNC_module|_ModuleTable_|PWD=|SHLVL=|OLDPWD=|PS1=|PS2=|_LMFILES_)=.*$' | sort + } + + # Compare the environments of the modules + module purge + module unuse "$module_install_dir" + module load EasyBuild + + # Load the module from the full build + if module --ignore_cache load "$module_software" 2>/dev/null; then + original_env=$(get_filtered_env) + module unload "$module_software" + else + echo "Failed to load module from full build: $module_software." + original_env="" + fi + + # Load the module from the --module-only + module purge + module use "$module_install_dir" + + if module --ignore_cache load "$module_software" 2>/dev/null; then + new_env=$(get_filtered_env) + module unload "$module_software" + else + echo "Failed to load module from --module-only build: $module_software." + echo "$package_name: Failed to load module from --module-only build" >> "$broken_modules_log" + module unuse "$module_install_dir" + continue + fi + + # Compare the environments + if [ -n "$original_env" ]; then + if diff <(echo "$original_env") <(echo "$new_env") >/dev/null; then + echo "$module_software loaded with identical environment." + else + echo "$module_software environment mismatch." + echo "$package_name: $module_software (environment mismatch)" >> "$broken_modules_log" + diff_file="${module_software//\//_}_env_diff.txt" + diff <(echo "$original_env") <(echo "$new_env") > "$diff_file" + echo "Environment differences saved to $diff_file" + fi + else + echo "Original environment not available for comparison for $module_software." + echo "$package_name: $module_software (failed to load module from full build)" >> "$broken_modules_log" + fi + + + module unuse "$module_install_dir" + + # Mark module as checked + checked_modules[$module_software]=1 + +done + +# Report +if [ -f "$broken_modules_log" ] && [ -s "$broken_modules_log" ]; then + echo "Some modules did not match. See $broken_modules_log for details." + exit 1 +else + echo "All modules match between build and --module-only build." +fi + +# Clean up temporary directories +rm -rf "$LOCAL_TMPDIR" +