From acbd3f9c5c5f23e95267714e41236140d84fe962 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 21 Aug 2023 12:56:46 +0200 Subject: gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Victor Stinner Co-authored-by: Lumír 'Frenzy' Balhar --- Doc/library/tarfile.rst | 5 + Lib/tarfile.py | 11 +- Lib/test/test_tarfile.py | 146 ++++++++++++++++++++- .../2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst | 3 + 4 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 00f3070..62d67bc 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -740,6 +740,11 @@ A ``TarInfo`` object has the following public data attributes: Name of the target file name, which is only present in :class:`TarInfo` objects of type :const:`LNKTYPE` and :const:`SYMTYPE`. + For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory + that contains the link. + For hard links (``LNKTYPE``), the *linkname* is relative to the root of + the archive. + .. attribute:: TarInfo.uid :type: int diff --git a/Lib/tarfile.py b/Lib/tarfile.py index df4e41f..1147d59 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -742,7 +742,7 @@ class SpecialFileError(FilterError): class AbsoluteLinkError(FilterError): def __init__(self, tarinfo): self.tarinfo = tarinfo - super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') + super().__init__(f'{tarinfo.name!r} is a link to an absolute path') class LinkOutsideDestinationError(FilterError): def __init__(self, tarinfo, path): @@ -802,7 +802,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True): if member.islnk() or member.issym(): if os.path.isabs(member.linkname): raise AbsoluteLinkError(member) - target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) + if member.issym(): + target_path = os.path.join(dest_path, + os.path.dirname(name), + member.linkname) + else: + target_path = os.path.join(dest_path, + member.linkname) + target_path = os.path.realpath(target_path) if os.path.commonpath([target_path, dest_path]) != dest_path: raise LinkOutsideDestinationError(member, target_path) return new_attrs diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2eda7fc..2cdf2c1 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3337,10 +3337,12 @@ class ArchiveMaker: self.bio = None def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, - mode=None, **kwargs): + mode=None, size=None, **kwargs): """Add a member to the test archive. Call within `with`.""" name = str(name) tarinfo = tarfile.TarInfo(name).replace(**kwargs) + if size is not None: + tarinfo.size = size if mode: tarinfo.mode = _filemode_to_int(mode) if symlink_to is not None: @@ -3416,7 +3418,8 @@ class TestExtractionFilters(unittest.TestCase): raise self.raised_exception self.assertEqual(self.expected_paths, set()) - def expect_file(self, name, type=None, symlink_to=None, mode=None): + def expect_file(self, name, type=None, symlink_to=None, mode=None, + size=None): """Check a single file. See check_context.""" if self.raised_exception: raise self.raised_exception @@ -3445,6 +3448,8 @@ class TestExtractionFilters(unittest.TestCase): self.assertTrue(path.is_fifo()) else: raise NotImplementedError(type) + if size is not None: + self.assertEqual(path.stat().st_size, size) for parent in path.parents: self.expected_paths.discard(parent) @@ -3491,8 +3496,15 @@ class TestExtractionFilters(unittest.TestCase): # Test interplaying symlinks # Inspired by 'dirsymlink2a' in jwilk/traversal-archives with ArchiveMaker() as arc: + + # `current` links to `.` which is both: + # - the destination directory + # - `current` itself arc.add('current', symlink_to='.') + + # effectively points to ./../ arc.add('parent', symlink_to='current/..') + arc.add('parent/evil') if os_helper.can_symlink(): @@ -3534,9 +3546,46 @@ class TestExtractionFilters(unittest.TestCase): def test_parent_symlink2(self): # Test interplaying symlinks # Inspired by 'dirsymlink2b' in jwilk/traversal-archives + + # Posix and Windows have different pathname resolution: + # either symlink or a '..' component resolve first. + # Let's see which we are on. + if os_helper.can_symlink(): + testpath = os.path.join(TEMPDIR, 'resolution_test') + os.mkdir(testpath) + + # testpath/current links to `.` which is all of: + # - `testpath` + # - `testpath/current` + # - `testpath/current/current` + # - etc. + os.symlink('.', os.path.join(testpath, 'current')) + + # we'll test where `testpath/current/../file` ends up + with open(os.path.join(testpath, 'current', '..', 'file'), 'w'): + pass + + if os.path.exists(os.path.join(testpath, 'file')): + # Windows collapses 'current\..' to '.' first, leaving + # 'testpath\file' + dotdot_resolves_early = True + elif os.path.exists(os.path.join(testpath, '..', 'file')): + # Posix resolves 'current' to '.' first, leaving + # 'testpath/../file' + dotdot_resolves_early = False + else: + raise AssertionError('Could not determine link resolution') + with ArchiveMaker() as arc: + + # `current` links to `.` which is both the destination directory + # and `current` itself arc.add('current', symlink_to='.') + + # `current/parent` is also available as `./parent`, + # and effectively points to `./../` arc.add('current/parent', symlink_to='..') + arc.add('parent/evil') with self.check_context(arc.open(), 'fully_trusted'): @@ -3550,6 +3599,7 @@ class TestExtractionFilters(unittest.TestCase): with self.check_context(arc.open(), 'tar'): if os_helper.can_symlink(): + # Fail when extracting a file outside destination self.expect_exception( tarfile.OutsideDestinationError, "'parent/evil' would be extracted to " @@ -3560,10 +3610,24 @@ class TestExtractionFilters(unittest.TestCase): self.expect_file('parent/evil') with self.check_context(arc.open(), 'data'): - self.expect_exception( - tarfile.LinkOutsideDestinationError, - """'current/parent' would link to ['"].*['"], """ - + "which is outside the destination") + if os_helper.can_symlink(): + if dotdot_resolves_early: + # Fail when extracting a file outside destination + self.expect_exception( + tarfile.OutsideDestinationError, + "'parent/evil' would be extracted to " + + """['"].*evil['"], which is outside """ + + "the destination") + else: + # Fail as soon as we have a symlink outside the destination + self.expect_exception( + tarfile.LinkOutsideDestinationError, + "'current/parent' would link to " + + """['"].*outerdir['"], which is outside """ + + "the destination") + else: + self.expect_file('current/') + self.expect_file('parent/evil') @symlink_test def test_absolute_symlink(self): @@ -3593,12 +3657,30 @@ class TestExtractionFilters(unittest.TestCase): with self.check_context(arc.open(), 'data'): self.expect_exception( tarfile.AbsoluteLinkError, - "'parent' is a symlink to an absolute path") + "'parent' is a link to an absolute path") + + def test_absolute_hardlink(self): + # Test hardlink to an absolute path + # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('parent', hardlink_to=self.outerdir / 'foo') + + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_exception(KeyError, ".*foo. not found") + + with self.check_context(arc.open(), 'tar'): + self.expect_exception(KeyError, ".*foo. not found") + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.AbsoluteLinkError, + "'parent' is a link to an absolute path") @symlink_test def test_sly_relative0(self): # Inspired by 'relative0' in jwilk/traversal-archives with ArchiveMaker() as arc: + # points to `../../tmp/moo` arc.add('../moo', symlink_to='..//tmp/moo') try: @@ -3649,6 +3731,56 @@ class TestExtractionFilters(unittest.TestCase): + """['"].*moo['"], which is outside the """ + "destination") + @symlink_test + def test_deep_symlink(self): + # Test that symlinks and hardlinks inside a directory + # point to the correct file (`target` of size 3). + # If links aren't supported we get a copy of the file. + with ArchiveMaker() as arc: + arc.add('targetdir/target', size=3) + # a hardlink's linkname is relative to the archive + arc.add('linkdir/hardlink', hardlink_to=os.path.join( + 'targetdir', 'target')) + # a symlink's linkname is relative to the link's directory + arc.add('linkdir/symlink', symlink_to=os.path.join( + '..', 'targetdir', 'target')) + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + self.expect_file('targetdir/target', size=3) + self.expect_file('linkdir/hardlink', size=3) + if os_helper.can_symlink(): + self.expect_file('linkdir/symlink', size=3, + symlink_to='../targetdir/target') + else: + self.expect_file('linkdir/symlink', size=3) + + @symlink_test + def test_chains(self): + # Test chaining of symlinks/hardlinks. + # Symlinks are created before the files they point to. + with ArchiveMaker() as arc: + arc.add('linkdir/symlink', symlink_to='hardlink') + arc.add('symlink2', symlink_to=os.path.join( + 'linkdir', 'hardlink2')) + arc.add('targetdir/target', size=3) + arc.add('linkdir/hardlink', hardlink_to='targetdir/target') + arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + self.expect_file('targetdir/target', size=3) + self.expect_file('linkdir/hardlink', size=3) + self.expect_file('linkdir/hardlink2', size=3) + if os_helper.can_symlink(): + self.expect_file('linkdir/symlink', size=3, + symlink_to='hardlink') + self.expect_file('symlink2', size=3, + symlink_to='linkdir/hardlink2') + else: + self.expect_file('linkdir/symlink', size=3) + self.expect_file('symlink2', size=3) + def test_modes(self): # Test how file modes are extracted # (Note that the modes are ignored on platforms without working chmod) diff --git a/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst new file mode 100644 index 0000000..32c1fb9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst @@ -0,0 +1,3 @@ +:func:`tarfile.data_filter` now takes the location of symlinks into account +when determining their target, so it will no longer reject some valid +tarballs with ``LinkOutsideDestinationError``. -- cgit v0.12