From d95484af21ce38842c0568f5f357cd52fd5110f0 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 16:09:17 +0200 Subject: [PATCH 1/8] Remove explicit `TarInfo` type check in `ReadTarFS.openbin` --- fs/tarfs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/tarfs.py b/fs/tarfs.py index 4f48d821..01c31e0b 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -431,11 +431,13 @@ def openbin(self, path, mode="r", buffering=-1, **options): except KeyError: six.raise_from(errors.ResourceNotFound(path), None) - if not member.isfile(): + # TarFile.extractfile returns None if the entry is + # neither a file nor a symlink + reader = self._tar.extractfile(member) + if reader is None: raise errors.FileExpected(path) - rw = RawWrapper(cast(IO, self._tar.extractfile(member))) - + rw = RawWrapper(reader) if six.PY2: # Patch nonexistent file.flush in Python2 def _flush(): From 2cdc9af1d85a36c4af432299ce3963f74db1e94e Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 16:10:04 +0200 Subject: [PATCH 2/8] Reformat `fs.tarfs` with `black` --- fs/tarfs.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/tarfs.py b/fs/tarfs.py index 01c31e0b..af541491 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -157,8 +157,7 @@ def __init__( @six.python_2_unicode_compatible class WriteTarFS(WrapFS): - """A writable tar file. - """ + """A writable tar file.""" def __init__( self, @@ -234,8 +233,7 @@ def write_tar( @six.python_2_unicode_compatible class ReadTarFS(FS): - """A readable tar file. - """ + """A readable tar file.""" _meta = { "case_insensitive": True, @@ -433,7 +431,7 @@ def openbin(self, path, mode="r", buffering=-1, **options): # TarFile.extractfile returns None if the entry is # neither a file nor a symlink - reader = self._tar.extractfile(member) + reader = self._tar.extractfile(member) if reader is None: raise errors.FileExpected(path) From 1dc42c643957a15a08d7ded972230a5fa4291645 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 16:13:21 +0200 Subject: [PATCH 3/8] Add `ReadTarFS.islink` method using `TarInfo.issym` method directly --- fs/tarfs.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/tarfs.py b/fs/tarfs.py index af541491..e6e8ebb9 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -389,6 +389,13 @@ def isfile(self, path): except KeyError: return False + def islink(self, path): + _path = relpath(self.validatepath(path)) + try: + return self._directory_entries[_path].issym() + except KeyError: + return False + def setinfo(self, path, info): # type: (Text, RawInfo) -> None self.check() From a61752349627e431fbe6fbe7c7f6bface16078c1 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 16:21:47 +0200 Subject: [PATCH 4/8] Add support for the `link` info namespace to `ReadTarFS` --- fs/tarfs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/tarfs.py b/fs/tarfs.py index e6e8ebb9..d6d6125d 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -348,6 +348,10 @@ def getinfo(self, path, namespaces=None): "is_dir": member.isdir(), } + if "link" in namespaces: + raw_info["link"] = { + "target": self._decode(member.linkname) if member.issym() else None + } if "details" in namespaces: raw_info["details"] = { "size": member.size, From 7ac51fb928c0177b06a571caab754d9228052e9a Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 20:01:15 +0200 Subject: [PATCH 5/8] Refactor `ReadTarFS` to support internal symlinks --- fs/tarfs.py | 173 ++++++++++++++++++++++++++++++++------------ tests/test_tarfs.py | 132 ++++++++++++++++++++++++++++++--- 2 files changed, 251 insertions(+), 54 deletions(-) diff --git a/fs/tarfs.py b/fs/tarfs.py index d6d6125d..2d12016f 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -4,6 +4,7 @@ from __future__ import print_function from __future__ import unicode_literals +import operator import os import tarfile import typing @@ -11,6 +12,7 @@ from typing import cast, IO import six +from six.moves import map from . import errors from .base import FS @@ -22,7 +24,18 @@ from .opener import open_fs from .permissions import Permissions from ._url_tools import url_quote -from .path import relpath, basename, isbase, normpath, parts, frombase +from .path import ( + dirname, + join, + relpath, + basename, + isbase, + normpath, + parts, + frombase, + recursepath, + relativefrom, +) from .wrapfs import WrapFS if typing.TYPE_CHECKING: @@ -255,6 +268,8 @@ class ReadTarFS(FS): tarfile.SYMTYPE: ResourceType.symlink, tarfile.CONTTYPE: ResourceType.file, tarfile.LNKTYPE: ResourceType.symlink, + # this is how we mark implicit directories + tarfile.DIRTYPE + b"i": ResourceType.directory, } @errors.CreateFailed.catch_all @@ -275,24 +290,66 @@ def _directory_entries(self): """Lazy directory cache.""" if self._directory_cache is None: _decode = self._decode + _encode = self._encode + + # collect all directory entries and remove slashes _directory_entries = ( (_decode(info.name).strip("/"), info) for info in self._tar ) - def _list_tar(): - for name, info in _directory_entries: - try: - _name = normpath(name) - except IllegalBackReference: - # Back references outside root, must be up to no good. - pass - else: - if _name: - yield _name, info - - self._directory_cache = OrderedDict(_list_tar()) + # build the cache first before updating it to reduce chances + # of data races + _cache = OrderedDict() + for name, info in _directory_entries: + # check for any invalid back references + try: + _name = normpath(name) + except IllegalBackReference: + continue + + # add all implicit dirnames if not in the cache already + for partial_name in map(relpath, recursepath(_name)): + dirinfo = tarfile.TarInfo(self._encode(partial_name)) + dirinfo.type = tarfile.DIRTYPE + _cache.setdefault(partial_name, dirinfo) + + # add the entry itself, potentially overwriting implicit entries + _cache[_name] = info + + self._directory_cache = _cache return self._directory_cache + def _follow_symlink(self, entry): + """Follow an symlink `TarInfo` to find a concrete entry.""" + _entry = entry + while _entry.issym(): + linkname = normpath( + join(dirname(self._decode(_entry.name)), self._decode(_entry.linkname)) + ) + resolved = self._resolve(linkname) + if resolved is None: + raise errors.ResourceNotFound(linkname) + _entry = self._directory_entries[resolved] + + return _entry + + def _resolve(self, path): + """Replace path components that are symlinks with concrete components. + + Returns: + + + """ + if path in self._directory_entries or not path: + return path + for prefix in map(relpath, reversed(recursepath(path))): + suffix = relativefrom(prefix, path) + entry = self._directory_entries.get(prefix) + if entry is not None and entry.issym(): + entry = self._follow_symlink(entry) + return self._resolve(join(self._decode(entry.name), suffix)) + return None + def __repr__(self): # type: () -> Text return "ReadTarFS({!r})".format(self._file) @@ -327,31 +384,35 @@ def getinfo(self, path, namespaces=None): namespaces = namespaces or () raw_info = {} # type: Dict[Text, Dict[Text, object]] + # special case for root if not _path: raw_info["basic"] = {"name": "", "is_dir": True} if "details" in namespaces: raw_info["details"] = {"type": int(ResourceType.directory)} else: - try: - implicit = False - member = self._directory_entries[_path] - except KeyError: - if not self.isdir(_path): - raise errors.ResourceNotFound(path) - implicit = True - member = tarfile.TarInfo(_path) - member.type = tarfile.DIRTYPE + + _realpath = self._resolve(_path) + if _realpath is None: + raise errors.ResourceNotFound(path) + + implicit = False + member = self._directory_entries[_realpath] raw_info["basic"] = { "name": basename(self._decode(member.name)), - "is_dir": member.isdir(), + "is_dir": self.isdir(_path), # is_dir should follow symlinks } if "link" in namespaces: - raw_info["link"] = { - "target": self._decode(member.linkname) if member.issym() else None - } + if member.issym(): + target = join( + dirname(self._decode(member.name)), + self._decode(member.linkname), + ) + else: + target = None + raw_info["link"] = {"target": target} if "details" in namespaces: raw_info["details"] = { "size": member.size, @@ -381,23 +442,29 @@ def getinfo(self, path, namespaces=None): def isdir(self, path): _path = relpath(self.validatepath(path)) - try: - return self._directory_entries[_path].isdir() - except KeyError: - return any(isbase(_path, name) for name in self._directory_entries) + realpath = self._resolve(_path) + if realpath is not None: + entry = self._directory_entries[realpath] + return self._follow_symlink(entry).isdir() + else: + return False def isfile(self, path): _path = relpath(self.validatepath(path)) - try: - return self._directory_entries[_path].isfile() - except KeyError: + realpath = self._resolve(_path) + if realpath is not None: + entry = self._directory_entries[realpath] + return self._follow_symlink(entry).isfile() + else: return False def islink(self, path): _path = relpath(self.validatepath(path)) - try: - return self._directory_entries[_path].issym() - except KeyError: + realpath = self._resolve(_path) + if realpath is not None: + entry = self._directory_entries[realpath] + return entry.issym() + else: return False def setinfo(self, path, info): @@ -409,13 +476,28 @@ def listdir(self, path): # type: (Text) -> List[Text] _path = relpath(self.validatepath(path)) - if not self.gettype(path) is ResourceType.directory: - raise errors.DirectoryExpected(path) + # check the given path exists + realpath = self._resolve(_path) + if realpath is None: + raise errors.ResourceNotFound(path) + elif realpath: + target = self._follow_symlink(self._directory_entries[realpath]) + # check the path is either a symlink mapping to a directory or a directory + if target.isdir(): + base = target.name + elif target.issym(): + base = target.linkname + else: + raise errors.DirectoryExpected(path) + else: + base = "" + # find all entries in the actual directory children = ( - frombase(_path, n) for n in self._directory_entries if isbase(_path, n) + frombase(base, n) for n in self._directory_entries if isbase(base, n) ) content = (parts(child)[1] for child in children if relpath(child)) + return list(OrderedDict.fromkeys(content)) def makedir( @@ -432,17 +514,18 @@ def openbin(self, path, mode="r", buffering=-1, **options): # type: (Text, Text, int, **Any) -> BinaryIO _path = relpath(self.validatepath(path)) + # check the requested mode is only a reading mode if "w" in mode or "+" in mode or "a" in mode: raise errors.ResourceReadOnly(path) - try: - member = self._directory_entries[_path] - except KeyError: - six.raise_from(errors.ResourceNotFound(path), None) + # check the path actually resolves after following symlinks + _realpath = self._resolve(_path) + if _realpath is None: + raise errors.ResourceNotFound(path) - # TarFile.extractfile returns None if the entry is + # TarFile.extractfile returns None if the entry is not a file # neither a file nor a symlink - reader = self._tar.extractfile(member) + reader = self._tar.extractfile(self._directory_entries[_realpath]) if reader is None: raise errors.FileExpected(path) diff --git a/tests/test_tarfs.py b/tests/test_tarfs.py index a90dc0ea..3e6bab72 100644 --- a/tests/test_tarfs.py +++ b/tests/test_tarfs.py @@ -1,13 +1,13 @@ # -*- encoding: UTF-8 from __future__ import unicode_literals -import io import os import six import tarfile import tempfile import unittest import pytest +from six import BytesIO from fs import tarfs from fs.enums import ResourceType @@ -223,8 +223,8 @@ def tearDownClass(cls): def setUp(self): self.tempfile = self.tmpfs.open("test.tar", "wb+") with tarfile.open(mode="w", fileobj=self.tempfile) as tf: - tf.addfile(tarfile.TarInfo("."), io.StringIO()) - tf.addfile(tarfile.TarInfo("../foo.txt"), io.StringIO()) + tf.addfile(tarfile.TarInfo(".")) + tf.addfile(tarfile.TarInfo("../foo.txt")) self.tempfile.seek(0) self.fs = tarfs.TarFS(self.tempfile) @@ -237,8 +237,7 @@ def test_listdir(self): class TestImplicitDirectories(unittest.TestCase): - """Regression tests for #160. - """ + """Regression tests for #160.""" @classmethod def setUpClass(cls): @@ -251,12 +250,12 @@ def tearDownClass(cls): def setUp(self): self.tempfile = self.tmpfs.open("test.tar", "wb+") with tarfile.open(mode="w", fileobj=self.tempfile) as tf: - tf.addfile(tarfile.TarInfo("foo/bar/baz/spam.txt"), io.StringIO()) - tf.addfile(tarfile.TarInfo("./foo/eggs.bin"), io.StringIO()) - tf.addfile(tarfile.TarInfo("./foo/yolk/beans.txt"), io.StringIO()) + tf.addfile(tarfile.TarInfo("foo/bar/baz/spam.txt")) + tf.addfile(tarfile.TarInfo("./foo/eggs.bin")) + tf.addfile(tarfile.TarInfo("./foo/yolk/beans.txt")) info = tarfile.TarInfo("foo/yolk") info.type = tarfile.DIRTYPE - tf.addfile(info, io.BytesIO()) + tf.addfile(info) self.tempfile.seek(0) self.fs = tarfs.TarFS(self.tempfile) @@ -301,6 +300,121 @@ def test_getinfo(self): self.assertIs(info.type, ResourceType.directory) +class TestSymlinks(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.tmpfs = open_fs("temp://") + + @classmethod + def tearDownClass(cls): + cls.tmpfs.close() + + def setUp(self): + def _info(name, **kwargs): + info = tarfile.TarInfo(name) + for k, v in kwargs.items(): + setattr(info, k, v) + return info + + # /foo + # /foo/bar.txt + # /foo/baz.txt -> /foo/bar.txt + # /spam -> /foo + # /eggs + # /eggs/yolk -> /spam + + self.tempfile = self.tmpfs.open("test.tar", "wb+") + with tarfile.open(mode="w", fileobj=self.tempfile) as tf: + tf.addfile(_info("foo", type=tarfile.DIRTYPE)) + buff = BytesIO(b"hello") + tf.addfile(_info("foo/bar.txt", size=len(buff.getvalue())), buff) + tf.addfile(_info("foo/baz.txt", type=tarfile.SYMTYPE, linkname="bar.txt")) + tf.addfile(_info("spam", type=tarfile.SYMTYPE, linkname="foo")) + tf.addfile(_info("eggs", type=tarfile.DIRTYPE)) + tf.addfile(_info("eggs/yolk", type=tarfile.SYMTYPE, linkname="../spam")) + self.tempfile.seek(0) + self.fs = tarfs.TarFS(self.tempfile) + + def tearDown(self): + self.fs.close() + self.tempfile.close() + + def test_openbin(self): + # read an actual file + with self.fs.openbin("foo/bar.txt") as bar: + self.assertEqual(bar.read(), b"hello") + # read a link to an actual file + with self.fs.openbin("foo/baz.txt") as baz: + self.assertEqual(baz.read(), b"hello") + # read an actual file via a linked directory + with self.fs.openbin("spam/bar.txt") as bar: + self.assertEqual(bar.read(), b"hello") + # read a link via a linked directory + with self.fs.openbin("spam/baz.txt") as baz: + self.assertEqual(baz.read(), b"hello") + + def test_isfile(self): + self.assertFalse(self.fs.isfile("foo")) + self.assertFalse(self.fs.isfile("spam")) + self.assertFalse(self.fs.isfile("eggs")) + self.assertFalse(self.fs.isfile("eggs/yolk")) + self.assertTrue(self.fs.isfile("foo/bar.txt")) + self.assertTrue(self.fs.isfile("foo/baz.txt")) + self.assertTrue(self.fs.isfile("eggs/yolk/bar.txt")) + self.assertTrue(self.fs.isfile("eggs/yolk/baz.txt")) + + def test_isdir(self): + self.assertTrue(self.fs.isdir("foo")) + self.assertTrue(self.fs.isdir("spam")) + self.assertTrue(self.fs.isdir("eggs/yolk")) + self.assertFalse(self.fs.isdir("foo/bar.txt")) + self.assertFalse(self.fs.isdir("foo/baz.txt")) + self.assertFalse(self.fs.isdir("eggs/yolk/bar.txt")) + self.assertFalse(self.fs.isdir("eggs/yolk/baz.txt")) + + def test_islink(self): + self.assertFalse(self.fs.islink("foo")) + self.assertTrue(self.fs.islink("spam")) + self.assertTrue(self.fs.islink("eggs/yolk")) + self.assertFalse(self.fs.islink("foo/bar.txt")) + self.assertTrue(self.fs.islink("foo/baz.txt")) + self.assertFalse(self.fs.islink("eggs/yolk/bar.txt")) + self.assertTrue(self.fs.islink("eggs/yolk/baz.txt")) + + def test_getinfo(self): + file_info = self.fs.getinfo("foo/bar.txt", namespaces=("details", "link")) + self.assertIn("details", file_info.namespaces) + self.assertIn("link", file_info.namespaces) + self.assertFalse(file_info.is_dir) + self.assertIs(file_info.target, None) + self.assertEqual(file_info.type, ResourceType.file) + + link_info = self.fs.getinfo("foo/baz.txt", namespaces=("details", "link")) + self.assertIn("details", link_info.namespaces) + self.assertIn("link", link_info.namespaces) + self.assertFalse(link_info.is_dir) + self.assertEqual(link_info.target, "foo/bar.txt") + self.assertEqual(link_info.type, ResourceType.symlink) + + dir_info = self.fs.getinfo("foo", namespaces=("details", "link")) + self.assertIn("details", dir_info.namespaces) + self.assertIn("link", dir_info.namespaces) + self.assertTrue(dir_info.is_dir) + self.assertEqual(dir_info.target, None) + self.assertEqual(dir_info.type, ResourceType.directory) + + dirlink_info = self.fs.getinfo("spam", namespaces=("details", "link")) + self.assertIn("details", dirlink_info.namespaces) + self.assertIn("link", dirlink_info.namespaces) + self.assertTrue(dirlink_info.is_dir) + self.assertEqual(dirlink_info.target, "foo") + self.assertEqual(dirlink_info.type, ResourceType.symlink) + + def test_listdir(self): + self.assertEqual(sorted(self.fs.listdir("foo")), ["bar.txt", "baz.txt"]) + self.assertEqual(sorted(self.fs.listdir("spam")), ["bar.txt", "baz.txt"]) + + class TestReadTarFSMem(TestReadTarFS): def make_source_fs(self): return open_fs("mem://") From a49deed9c34d471fdaefd3f9e7ca4064e1d1b92c Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 20:57:04 +0200 Subject: [PATCH 6/8] Fix type annotations and unused imports in `fs.tarfs` --- fs/tarfs.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/tarfs.py b/fs/tarfs.py index 2d12016f..8729027d 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -4,12 +4,10 @@ from __future__ import print_function from __future__ import unicode_literals -import operator import os import tarfile import typing from collections import OrderedDict -from typing import cast, IO import six from six.moves import map @@ -309,7 +307,7 @@ def _directory_entries(self): # add all implicit dirnames if not in the cache already for partial_name in map(relpath, recursepath(_name)): - dirinfo = tarfile.TarInfo(self._encode(partial_name)) + dirinfo = tarfile.TarInfo(_encode(partial_name)) dirinfo.type = tarfile.DIRTYPE _cache.setdefault(partial_name, dirinfo) @@ -391,7 +389,6 @@ def getinfo(self, path, namespaces=None): raw_info["details"] = {"type": int(ResourceType.directory)} else: - _realpath = self._resolve(_path) if _realpath is None: raise errors.ResourceNotFound(path) @@ -406,10 +403,10 @@ def getinfo(self, path, namespaces=None): if "link" in namespaces: if member.issym(): - target = join( + target = normpath(join( dirname(self._decode(member.name)), self._decode(member.linkname), - ) + )) # type: Option[Text] else: target = None raw_info["link"] = {"target": target} From 8a07ecdf37e530cbd3d1b3004d1a2d1f02adf487 Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 21:21:45 +0200 Subject: [PATCH 7/8] Update `ReadTarFS` to work on cyclic and dangling symlinks --- fs/tarfs.py | 45 +++++++++++++++++++++++++--------------- tests/test_tarfs.py | 50 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 17 deletions(-) diff --git a/fs/tarfs.py b/fs/tarfs.py index 8729027d..7577c2b3 100644 --- a/fs/tarfs.py +++ b/fs/tarfs.py @@ -318,7 +318,11 @@ def _directory_entries(self): return self._directory_cache def _follow_symlink(self, entry): - """Follow an symlink `TarInfo` to find a concrete entry.""" + """Follow an symlink `TarInfo` to find a concrete entry. + + Returns ``None`` if the symlink is dangling. + """ + done = set() _entry = entry while _entry.issym(): linkname = normpath( @@ -326,17 +330,21 @@ def _follow_symlink(self, entry): ) resolved = self._resolve(linkname) if resolved is None: - raise errors.ResourceNotFound(linkname) + return None + done.add(_entry) _entry = self._directory_entries[resolved] + # if we already saw this symlink, then we are following cyclic + # symlinks and we should break the loop + if _entry in done: + return None return _entry def _resolve(self, path): """Replace path components that are symlinks with concrete components. - Returns: - - + Returns ``None`` when the path could not be resolved to an existing + entry in the archive. """ if path in self._directory_entries or not path: return path @@ -406,7 +414,7 @@ def getinfo(self, path, namespaces=None): target = normpath(join( dirname(self._decode(member.name)), self._decode(member.linkname), - )) # type: Option[Text] + )) # type: Optional[Text] else: target = None raw_info["link"] = {"target": target} @@ -441,8 +449,8 @@ def isdir(self, path): _path = relpath(self.validatepath(path)) realpath = self._resolve(_path) if realpath is not None: - entry = self._directory_entries[realpath] - return self._follow_symlink(entry).isdir() + entry = self._follow_symlink(self._directory_entries[realpath]) + return False if entry is None else entry.isdir() else: return False @@ -450,8 +458,8 @@ def isfile(self, path): _path = relpath(self.validatepath(path)) realpath = self._resolve(_path) if realpath is not None: - entry = self._directory_entries[realpath] - return self._follow_symlink(entry).isfile() + entry = self._follow_symlink(self._directory_entries[realpath]) + return False if entry is None else entry.isfile() else: return False @@ -480,12 +488,12 @@ def listdir(self, path): elif realpath: target = self._follow_symlink(self._directory_entries[realpath]) # check the path is either a symlink mapping to a directory or a directory - if target.isdir(): - base = target.name - elif target.issym(): - base = target.linkname - else: + if target is None: + raise errors.ResourceNotFound(path) + elif not target.isdir(): raise errors.DirectoryExpected(path) + else: + base = target.name else: base = "" @@ -515,11 +523,16 @@ def openbin(self, path, mode="r", buffering=-1, **options): if "w" in mode or "+" in mode or "a" in mode: raise errors.ResourceReadOnly(path) - # check the path actually resolves after following symlinks + # check the path actually resolves after following symlink components _realpath = self._resolve(_path) if _realpath is None: raise errors.ResourceNotFound(path) + # get the entry at the resolved path and follow all symlinks + entry = self._follow_symlink(self._directory_entries[_realpath]) + if entry is None: + raise errors.ResourceNotFound(path) + # TarFile.extractfile returns None if the entry is not a file # neither a file nor a symlink reader = self._tar.extractfile(self._directory_entries[_realpath]) diff --git a/tests/test_tarfs.py b/tests/test_tarfs.py index 3e6bab72..2ebced5a 100644 --- a/tests/test_tarfs.py +++ b/tests/test_tarfs.py @@ -14,7 +14,7 @@ from fs.compress import write_tar from fs.opener import open_fs from fs.opener.errors import NotWriteable -from fs.errors import NoURL +from fs.errors import NoURL, ResourceNotFound from fs.test import FSTestCases from .test_archives import ArchiveTestCases @@ -415,6 +415,54 @@ def test_listdir(self): self.assertEqual(sorted(self.fs.listdir("spam")), ["bar.txt", "baz.txt"]) +class TestBrokenSymlinks(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.tmpfs = open_fs("temp://") + + @classmethod + def tearDownClass(cls): + cls.tmpfs.close() + + def setUp(self): + def _info(name, **kwargs): + info = tarfile.TarInfo(name) + for k, v in kwargs.items(): + setattr(info, k, v) + return info + + # /foo + # /foo/baz.txt -> /foo/bar.txt + # /spam -> /eggs + # /eggs -> /spam + + self.tempfile = self.tmpfs.open("test.tar", "wb+") + with tarfile.open(mode="w", fileobj=self.tempfile) as tf: + tf.addfile(_info("foo", type=tarfile.DIRTYPE)) + tf.addfile(_info("foo/baz.txt", type=tarfile.SYMTYPE, linkname="bar.txt")) + tf.addfile(_info("spam", type=tarfile.SYMTYPE, linkname="eggs")) + tf.addfile(_info("eggs", type=tarfile.SYMTYPE, linkname="spam")) + self.tempfile.seek(0) + self.fs = tarfs.TarFS(self.tempfile) + + def tearDown(self): + self.fs.close() + self.tempfile.close() + + def test_dangling(self): + self.assertFalse(self.fs.isfile("foo/baz.txt")) + self.assertFalse(self.fs.isdir("foo/baz.txt")) + self.assertRaises(ResourceNotFound, self.fs.openbin, "foo/baz.txt") + self.assertRaises(ResourceNotFound, self.fs.listdir, "foo/baz.txt") + + def test_cyclic(self): + self.assertFalse(self.fs.isfile("spam")) + self.assertFalse(self.fs.isdir("spam")) + self.assertRaises(ResourceNotFound, self.fs.openbin, "spam") + self.assertRaises(ResourceNotFound, self.fs.listdir, "spam") + + class TestReadTarFSMem(TestReadTarFS): def make_source_fs(self): return open_fs("mem://") From ac26ebd07df0307242dada74e4d2036827a8963b Mon Sep 17 00:00:00 2001 From: Martin Larralde Date: Sat, 19 Sep 2020 21:46:43 +0200 Subject: [PATCH 8/8] Update `CHANGELOG.md` with `ReadTarFS` fixes [ci skip] --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7597621a..4b8f126a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [2.4.12] - (Unreleased) +### Added + +- Symlink support to `ReadTarFS` [#426](https://github.com/PyFilesystem/pyfilesystem2/pull/426). Closes [#409](https://github.com/PyFilesystem/pyfilesystem2/issues/409). + ### Changed - Start testing on PyPy. Due to [#342](https://github.com/PyFilesystem/pyfilesystem2/issues/342)