Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality for verifying tarball signatures to automated ingestion scripts #208

Merged
merged 10 commits into from
Mar 21, 2025
12 changes: 6 additions & 6 deletions .github/workflows/build-test-release-client-packages.yml
Original file line number Diff line number Diff line change
@@ -64,7 +64,7 @@ jobs:
fpm_opts: "--debug -n cvmfs-config-eessi-${{ steps.get_version.outputs.version }} -t tar -a all -s dir -C ./package --description 'CVMFS configuration package for EESSI.'"

- name: Upload packages as build artifacts
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # v3.1.0
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: linux_packages
path: cvmfs-config-eessi*
@@ -135,7 +135,7 @@ jobs:
run: sudo apt-get update && sudo apt-get install cvmfs

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

@@ -174,7 +174,7 @@ jobs:
dnf install -y cvmfs cvmfs-config-none

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

@@ -215,7 +215,7 @@ jobs:
run: sudo apt-get update && sudo apt-get install cvmfs

- name: Download cvmfs-config-eessi package
uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: linux_packages

@@ -288,7 +288,7 @@ jobs:
run: |
echo ::set-output name=version::${GITHUB_REF#refs/tags/}

- uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
- uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
path: ./build_artifacts

@@ -319,7 +319,7 @@ jobs:
- name: Checkout
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- uses: actions/download-artifact@9782bd6a9848b53b110e712e20e42d89988822b7 # v3.0.1
- uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
path: ./build_artifacts

6 changes: 6 additions & 0 deletions scripts/automated_ingestion/automated_ingestion.cfg.example
Original file line number Diff line number Diff line change
@@ -9,6 +9,12 @@ download_dir = /where/to/store/download/tarballs
ingestion_script = /absolute/path/to/ingest-tarball.sh
metadata_file_extension = .meta.txt

[signatures]
signatures_required = no
signature_file_extension = .sig
signature_verification_script = /absolute/path/to/sign_verify_file_ssh.sh
allowed_signers_file = /path/to/allowed_signers

[aws]
staging_buckets = {
"software.eessi.io-2023.06": "software.eessi.io",
118 changes: 100 additions & 18 deletions scripts/automated_ingestion/eessitarball.py
Original file line number Diff line number Diff line change
@@ -24,12 +24,16 @@ def __init__(self, object_name, config, git_staging_repo, s3, bucket, cvmfs_repo
self.config = config
self.git_repo = git_staging_repo
self.metadata_file = object_name + config['paths']['metadata_file_extension']
self.metadata_sig_file = self.metadata_file + config['signatures']['signature_file_extension']
self.object = object_name
self.object_sig = object_name + config['signatures']['signature_file_extension']
self.s3 = s3
self.bucket = bucket
self.cvmfs_repo = cvmfs_repo
self.local_path = os.path.join(config['paths']['download_dir'], os.path.basename(object_name))
self.local_sig_path = self.local_path + config['signatures']['signature_file_extension']
self.local_metadata_path = self.local_path + config['paths']['metadata_file_extension']
self.local_metadata_sig_path = self.local_metadata_path + config['signatures']['signature_file_extension']
self.url = f'https://{bucket}.s3.amazonaws.com/{object_name}'

self.states = {
@@ -48,22 +52,42 @@ def download(self, force=False):
"""
Download this tarball and its corresponding metadata file, if this hasn't been already done.
"""
if force or not os.path.exists(self.local_path):
try:
self.s3.download_file(self.bucket, self.object, self.local_path)
except:
logging.error(
f'Failed to download tarball {self.object} from {self.bucket} to {self.local_path}.'
)
self.local_path = None
if force or not os.path.exists(self.local_metadata_path):
try:
self.s3.download_file(self.bucket, self.metadata_file, self.local_metadata_path)
except:
logging.error(
f'Failed to download metadata file {self.metadata_file} from {self.bucket} to {self.local_metadata_path}.'
)
self.local_metadata_path = None
files = [
(self.object, self.local_path, self.object_sig, self.local_sig_path),
(self.metadata_file, self.local_metadata_path, self.metadata_sig_file, self.local_metadata_sig_path),
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth to rename elements to clearly notify what they refer to and have the naming consistent? For example,

(self.object_remote_path, self.object_local_path, self.object_sig_remote_path, self.object_sig_local_path),
(self.metadata_remote_path, self.metadata_local_path, self.metadata_sig_remote_path, self.metadata_sig_local_path),

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This originated from the object terminology used by the boto client, but I agree that it would be more clear to use the names like you proposed. Is it okay to do that in a follow-up PR, as it would require changes across the entire script?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, perfectly fine to do it in a follow-up PR.

]
skip = False
for (object, local_file, sig_object, local_sig_file) in files:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly synchronise naming with potential changes in files array, e.g.,

for (remote_path, local_path, sig_remote_path, sig_local_path) in files:

if force or not os.path.exists(local_file):
# First we try to download signature file, which may or may not be available
# and may be optional or required.
try:
self.s3.download_file(self.bucket, sig_object, local_sig_file)
except:
if self.config['signatures'].getboolean('signatures_required', True):
logging.error(
f'Failed to download signature file {sig_object} for {object} from {self.bucket} to {local_sig_file}.'
)
skip = True
break
else:
logging.warning(
f'Failed to download signature file {sig_object} for {object} from {self.bucket} to {local_sig_file}. ' +
'Ignoring this, because signatures are not required with the current configuration.'
)
# Npw we download the file itself.
try:
self.s3.download_file(self.bucket, object, local_file)
except:
logging.error(
f'Failed to download {object} from {self.bucket} to {local_file}.'
)
skip = True
break
# If any required download failed, make sure to skip this tarball completely.
if skip:
self.local_path = None
self.local_metadata_path = None

def find_state(self):
"""Find the state of this tarball by searching through the state directories in the git repository."""
@@ -156,6 +180,47 @@ def run_handler(self):
handler = self.states[self.state]['handler']
handler()

def verify_signatures(self):
"""Verify the signatures of the downloaded tarball and metadata file using the corresponding signature files."""

sig_missing_msg = 'Signature file %s is missing.'
sig_missing = False
for sig_file in [self.local_sig_path, self.local_metadata_sig_path]:
if not os.path.exists(sig_file):
logging.warning(sig_missing_msg % sig_file)
sig_missing = True

if sig_missing:
# If signature files are missing, we return a failure,
# unless the configuration specifies that signatures are not required.
if self.config['signatures'].getboolean('signatures_required', True):
return False
else:
return True

verify_script = self.config['signatures']['signature_verification_script']
allowed_signers_file = self.config['signatures']['allowed_signers_file']
if not os.path.exists(verify_script):
logging.error(f'Unable to verify signatures, the specified signature verification script does not exist!')
return False

if not os.path.exists(allowed_signers_file):
logging.error(f'Unable to verify signatures, the specified allowed signers file does not exist!')
return False

for (file, sig_file) in [(self.local_path, self.local_sig_path), (self.local_metadata_path, self.local_metadata_sig_path)]:
verify_cmd = subprocess.run(
[verify_script, '--verify', '--allowed-signers-file', allowed_signers_file, '--file', file, '--signature-file', sig_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if verify_cmd.returncode == 0:
logging.debug(f'Signature for {file} successfully verified.')
else:
logging.error(f'Failed to verify signature for {file}.')
return False

return True

def verify_checksum(self):
"""Verify the checksum of the downloaded tarball with the one in its metadata file."""
local_sha256 = sha256sum(self.local_path)
@@ -171,13 +236,26 @@ def ingest(self):
#TODO: check if there is an open issue for this tarball, and if there is, skip it.
logging.info(f'Tarball {self.object} is ready to be ingested.')
self.download()
logging.info('Verifying its signature...')
if not self.verify_signatures():
issue_msg = f'Failed to verify signatures for `{self.object}`'
logging.error(issue_msg)
if not self.issue_exists(issue_msg, state='open'):
self.git_repo.create_issue(title=issue_msg, body=issue_msg)
return
else:
logging.debug(f'Signatures of {self.object} and its metadata file successfully verified.')

logging.info('Verifying its checksum...')
if not self.verify_checksum():
logging.error('Checksum of downloaded tarball does not match the one in its metadata file!')
# Open issue?
issue_msg = f'Failed to verify checksum for `{self.object}`'
logging.error(issue_msg)
if not self.issue_exists(issue_msg, state='open'):
self.git_repo.create_issue(title=issue_msg, body=issue_msg)
return
else:
logging.debug(f'Checksum of {self.object} matches the one in its metadata file.')

script = self.config['paths']['ingestion_script']
sudo = ['sudo'] if self.config['cvmfs'].getboolean('ingest_as_root', True) else []
logging.info(f'Running the ingestion script for {self.object}...')
@@ -222,6 +300,10 @@ def mark_new_tarball_as_staged(self):
logging.warn('Skipping this tarball...')
return

# Verify the signatures of the tarball and metadata file.
if not self.verify_signatures():
logging.warn('Signature verification of the tarball or its metadata failed, skipping this tarball...')

contents = ''
with open(self.local_metadata_path, 'r') as meta:
contents = meta.read()