diff --git a/vision/tools/download_pascal_voc.py b/vision/tools/download_pascal_voc.py index e4a4754..93d117b 100755 --- a/vision/tools/download_pascal_voc.py +++ b/vision/tools/download_pascal_voc.py @@ -35,7 +35,26 @@ def download_voc(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) ##################################################################################### @@ -49,7 +68,26 @@ def download_aug(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']