Skip to content

Commit

Permalink
Merge pull request #69 from fls-bioinformatics-core/unpack-archive-ha…
Browse files Browse the repository at this point in the history
…ndle-missing-owner-rw

Fix unpacking compressed archive when owner doesn't have 'rw' permission on some files/directories
  • Loading branch information
pjbriggs authored Feb 7, 2025
2 parents 2dbe8b3 + 3742fd5 commit 534fae8
Show file tree
Hide file tree
Showing 4 changed files with 397 additions and 39 deletions.
23 changes: 14 additions & 9 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -412,15 +412,20 @@ directory with the same name will not be overwritten.
The restored archive contents are also verified using
their original checksums as part of the unpacking.

The timestamps and permissions of the contents are
also restored (with the caveat that all restored
content will have read-write permission added for the
user unpacking the archive, regardless of the
permissions of the original files).

Ownership information is not restored (unless the
archiving and unpacking operations are both performed
by superuser).
In addition the timestamps of the unpacked files and
directories are also restored to those from the source
directory; by default permissions are not restored,
instead they be the default permissions for the user
unpacking the archive (read-write permission will also
be added). This mimicks the default behaviour of the
``tar`` utility when unpacking ``.tar.gz`` archives.

The ``--copy-permissions`` option can be specified to
force the permissions to also be restored. Note that
this can produce undesirable results depending on the
permissions on the source directory.

Ownership information is not restored.

If only a subset of files need to be restored from
the archive then the ``extract`` command is recommended
Expand Down
166 changes: 137 additions & 29 deletions ngsarchiver/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,8 @@ def extract_files(self,name,extract_dir=None,include_path=False):
"when extracting '%s'" %
(self.path,m.path))

def unpack(self,extract_dir=None,verify=True,set_read_write=True):
def unpack(self, extract_dir=None, verify=True, set_permissions=False,
set_read_write=True):
"""
Unpacks the archive
Expand All @@ -1243,9 +1244,14 @@ def unpack(self,extract_dir=None,verify=True,set_read_write=True):
archive into (default: cwd)
verify (bool): if True then verify checksums
for extracted archives (default: True)
set_permissions (bool): if True then transfer
original permissions from archive to the
extracted files and directories (default:
False)
set_read_write (bool): if True then ensure
extracted files have read/write permissions
for the user (default: True)
for the user; ignored if 'set_permissions'
is True (default: True)
Returns:
Directory: appropriate subclass instance of
Expand Down Expand Up @@ -1273,12 +1279,17 @@ def unpack(self,extract_dir=None,verify=True,set_read_write=True):
for f in self._archive_metadata['files']:
print("-- copying %s" % f)
f = os.path.join(self._path,f)
shutil.copy2(f,d)
dst = os.path.join(d, os.path.basename(f))
# Use 'copyfile' rather than 'copy2' to avoid setting
# permissions here
shutil.copyfile(f, dst)
# Set timestamp from source file
st = os.lstat(f)
os.utime(dst, times=(st.st_atime, st.st_mtime))
# Unpack individual archive files
unpack_archive_multitgz(
[os.path.join(self._path,a)
for a in self._archive_metadata['subarchives']],
extract_dir)
archive_list = [os.path.join(self._path,a)
for a in self._archive_metadata['subarchives']]
unpack_archive_multitgz(archive_list, extract_dir)
# Do checksum verification on unpacked archive
if verify:
print("-- verifying checksums of unpacked files")
Expand All @@ -1302,16 +1313,38 @@ def unpack(self,extract_dir=None,verify=True,set_read_write=True):
if not os.path.islink(f):
raise NgsArchiverException("%s: missing symlink"
% f)
# Ensure all files etc have read/write permission
# Set attributes on extracted files
print("-- copying attributes from archive")
set_attributes_from_archive_multitgz(archive_list,
extract_dir=extract_dir,
set_permissions=set_permissions,
set_times=True)
# Transfer permissions on copied files if required
if set_permissions:
for f in self._archive_metadata['files']:
f = os.path.join(self._path, f)
dst = os.path.join(d, os.path.basename(f))
chmod(dst, os.stat(f).st_mode)
if set_read_write:
print("-- updating permissions to read-write")
for o in Directory(d).walk():
if not os.path.islink(o):
# Ignore symbolic links
s = os.stat(o)
chmod(o,s.st_mode | stat.S_IRUSR | stat.S_IWUSR)
# Check permissions weren't already explicitly copied
# from the archive
if set_permissions:
print("-- permissions copied from archive: read/write "
"update ignored")
else:
# Ensure all files etc have read/write permission
print("-- updating permissions to read-write")
for o in Directory(d).walk():
if not os.path.islink(o):
# Ignore symbolic links
try:
s = os.stat(o)
chmod(o,s.st_mode | stat.S_IRUSR | stat.S_IWUSR)
except PermissionError:
logger.warning(f"{o}: unable to reset permissions")
# Update the timestamp on the unpacked directory
shutil.copystat(self.path,d)
st = os.lstat(self.path)
os.utime(d, times=(st.st_atime, st.st_mtime))
# Return the appropriate wrapper instance
return get_rundir_instance(d)

Expand Down Expand Up @@ -2214,7 +2247,8 @@ def make_empty_archive(archive_name, root_dir, base_dir=None,
f"to archive: {ex}")
return archive_name

def unpack_archive_multitgz(archive_list,extract_dir=None):
def unpack_archive_multitgz(archive_list, extract_dir=None,
set_permissions=False, set_times=False):
"""
Unpack a multi-volume 'gztar' archive
Expand All @@ -2223,6 +2257,12 @@ def unpack_archive_multitgz(archive_list,extract_dir=None):
unpack
extract_dir (str): specifies directory to unpack
volumes into (default: current directory)
set_permissions (bool): if True then set permissions
on extracted files to those from the archive
(default: don't set permissions)
set_times (bool): if True then set times on extracted
files to those from the archive (default: don't set
times)
"""
if extract_dir is None:
extract_dir = os.getcwd()
Expand All @@ -2234,20 +2274,88 @@ def unpack_archive_multitgz(archive_list,extract_dir=None):
# volumes)
with tarfile.open(a,'r:gz',errorlevel=1) as tgz:
for o in tgz:
try:
tgz.extract(o,path=extract_dir,set_attrs=False)
except Exception as ex:
print("Exception extracting '%s' from '%s': %s"
% (o.name,a,ex))
raise ex
atime = time.time()
if not o.isdir():
# Extract file without attributes
try:
tgz.extract(o, path=extract_dir, set_attrs=False)
except Exception as ex:
print(f"Exception extracting '{o.name}' from '{a}': "
f"{ex}")
raise ex
else:
# Explicitly create directories rather than
# extracting them (workaround for setting
# default permissions)
try:
os.makedirs(os.path.join(extract_dir, o.name),
exist_ok=True)
except Exception as ex:
print(f"Exception creating directory '{o.name}' "
f"from '{a}': {ex}")
raise ex
# Set attributes (time and mode) on extracted files
set_attributes_from_archive_multitgz(archive_list,
extract_dir=extract_dir,
set_permissions=set_permissions,
set_times=set_times)

def set_attributes_from_archive_multitgz(archive_list, extract_dir=None,
set_permissions=False,
set_times=False):
"""
Update permissions and/or times on extracted files
Arguments:
archive_list (list): list of archive volumes to
copy attributes from
extract_dir (str): specifies directory where unpacked
files and directories are (default: current directory)
set_permissions (bool): if True then set permissions
on extracted files to those from the archive
(default: don't set permissions)
set_times (bool): if True then set times on extracted
files to those from the archive (default: don't set
times)
"""
if set_permissions and set_times:
attr_types = "permissions and times"
elif set_permissions and not set_times:
attr_types = "permissions"
elif set_times and not set_permissions:
attr_types = "times"
else:
# Nothing to do
return
if extract_dir is None:
extract_dir = os.getcwd()
attributes = {}
for a in archive_list:
print("Updating attributes from %s..." % a)
with tarfile.open(a,'r:gz',errorlevel=1) as tgz:
for o in tgz:
o_ = os.path.join(extract_dir,o.name)
chmod(o_,o.mode)
utime(o_,(atime,o.mtime))
print(f"Collecting attributes from {a}...")
with tarfile.open(a,'r:gz', errorlevel=1) as tgz:
for src in tgz:
tgt = os.path.join(extract_dir, src.name)
if os.path.islink(tgt):
continue
attributes[src.name] = (src.mtime, src.mode)
atime = time.time()
print(f"Updating {attr_types} on files...")
for src in attributes:
tgt = os.path.join(extract_dir, src)
if not os.path.isdir(tgt):
attrs = attributes[src]
if set_times:
utime(tgt, (atime, attrs[0]))
if set_permissions:
chmod(tgt, attrs[1])
print(f"Updating {attr_types} on directories...")
for src in attributes:
tgt = os.path.join(extract_dir, src)
if os.path.isdir(tgt):
attrs = attributes[src]
if set_times:
utime(tgt, (atime, attrs[0]))
if set_permissions:
chmod(tgt, attrs[1])

def make_copy(d, dest, replace_symlinks=False,
transform_broken_symlinks=False,
Expand Down
8 changes: 7 additions & 1 deletion ngsarchiver/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ def main(argv=None):
action='store',dest='out_dir',
help="unpack archive under OUT_DIR "
"(default: current directory)")
parser_unpack.add_argument('--copy-permissions',
action='store_true', dest='copy_permissions',
help="copy the permissions stored in the "
"archive to the extracted files (default: "
"set permissions to read-write)")

# 'search' command
parser_search = s.add_parser('search',
Expand Down Expand Up @@ -558,7 +563,8 @@ def main(argv=None):
"names but destination cannot handle "
"names which only differ by case")
return CLIStatus.ERROR
d = a.unpack(extract_dir=dest_dir)
d = a.unpack(extract_dir=dest_dir,
set_permissions=args.copy_permissions)
print("Unpacked directory: %s" % d)
return CLIStatus.OK

Expand Down
Loading

0 comments on commit 534fae8

Please sign in to comment.