summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/tarfile.rst5
-rwxr-xr-xLib/tarfile.py11
-rw-r--r--Lib/test/test_tarfile.py144
-rw-r--r--Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst3
4 files changed, 154 insertions, 9 deletions
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index f1d9bd3..1650885 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes:
Name of the target file name, which is only present in :class:`TarInfo` objects
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
+ For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
+ that contains the link.
+ For hard links (``LNKTYPE``), the *linkname* is relative to the root of
+ the archive.
+
.. attribute:: TarInfo.uid
:type: int
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index b6ad7db..7a6158c 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -740,7 +740,7 @@ class SpecialFileError(FilterError):
class AbsoluteLinkError(FilterError):
def __init__(self, tarinfo):
self.tarinfo = tarinfo
- super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
+ super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
class LinkOutsideDestinationError(FilterError):
def __init__(self, tarinfo, path):
@@ -800,7 +800,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
if member.islnk() or member.issym():
if os.path.isabs(member.linkname):
raise AbsoluteLinkError(member)
- target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
+ if member.issym():
+ target_path = os.path.join(dest_path,
+ os.path.dirname(name),
+ member.linkname)
+ else:
+ target_path = os.path.join(dest_path,
+ member.linkname)
+ target_path = os.path.realpath(target_path)
if os.path.commonpath([target_path, dest_path]) != dest_path:
raise LinkOutsideDestinationError(member, target_path)
return new_attrs
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index a66f7ef..3df64c7 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -3169,10 +3169,12 @@ class ArchiveMaker:
self.bio = None
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
- mode=None, **kwargs):
+ mode=None, size=None, **kwargs):
"""Add a member to the test archive. Call within `with`."""
name = str(name)
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
+ if size is not None:
+ tarinfo.size = size
if mode:
tarinfo.mode = _filemode_to_int(mode)
if symlink_to is not None:
@@ -3236,7 +3238,8 @@ class TestExtractionFilters(unittest.TestCase):
raise self.raised_exception
self.assertEqual(self.expected_paths, set())
- def expect_file(self, name, type=None, symlink_to=None, mode=None):
+ def expect_file(self, name, type=None, symlink_to=None, mode=None,
+ size=None):
"""Check a single file. See check_context."""
if self.raised_exception:
raise self.raised_exception
@@ -3270,6 +3273,8 @@ class TestExtractionFilters(unittest.TestCase):
self.assertTrue(path.is_fifo())
else:
raise NotImplementedError(type)
+ if size is not None:
+ self.assertEqual(path.stat().st_size, size)
for parent in path.parents:
self.expected_paths.discard(parent)
@@ -3315,8 +3320,15 @@ class TestExtractionFilters(unittest.TestCase):
# Test interplaying symlinks
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
with ArchiveMaker() as arc:
+
+ # `current` links to `.` which is both:
+ # - the destination directory
+ # - `current` itself
arc.add('current', symlink_to='.')
+
+ # effectively points to ./../
arc.add('parent', symlink_to='current/..')
+
arc.add('parent/evil')
if support.can_symlink():
@@ -3357,9 +3369,46 @@ class TestExtractionFilters(unittest.TestCase):
def test_parent_symlink2(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
+
+ # Posix and Windows have different pathname resolution:
+ # either symlink or a '..' component resolve first.
+ # Let's see which we are on.
+ if support.can_symlink():
+ testpath = os.path.join(TEMPDIR, 'resolution_test')
+ os.mkdir(testpath)
+
+ # testpath/current links to `.` which is all of:
+ # - `testpath`
+ # - `testpath/current`
+ # - `testpath/current/current`
+ # - etc.
+ os.symlink('.', os.path.join(testpath, 'current'))
+
+ # we'll test where `testpath/current/../file` ends up
+ with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
+ pass
+
+ if os.path.exists(os.path.join(testpath, 'file')):
+ # Windows collapses 'current\..' to '.' first, leaving
+ # 'testpath\file'
+ dotdot_resolves_early = True
+ elif os.path.exists(os.path.join(testpath, '..', 'file')):
+ # Posix resolves 'current' to '.' first, leaving
+ # 'testpath/../file'
+ dotdot_resolves_early = False
+ else:
+ raise AssertionError('Could not determine link resolution')
+
with ArchiveMaker() as arc:
+
+ # `current` links to `.` which is both the destination directory
+ # and `current` itself
arc.add('current', symlink_to='.')
+
+ # `current/parent` is also available as `./parent`,
+ # and effectively points to `./../`
arc.add('current/parent', symlink_to='..')
+
arc.add('parent/evil')
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3373,6 +3422,7 @@ class TestExtractionFilters(unittest.TestCase):
with self.check_context(arc.open(), 'tar'):
if support.can_symlink():
+ # Fail when extracting a file outside destination
self.expect_exception(
tarfile.OutsideDestinationError,
"'parent/evil' would be extracted to "
@@ -3383,10 +3433,24 @@ class TestExtractionFilters(unittest.TestCase):
self.expect_file('parent/evil')
with self.check_context(arc.open(), 'data'):
- self.expect_exception(
- tarfile.LinkOutsideDestinationError,
- """'current/parent' would link to ['"].*['"], """
- + "which is outside the destination")
+ if support.can_symlink():
+ if dotdot_resolves_early:
+ # Fail when extracting a file outside destination
+ self.expect_exception(
+ tarfile.OutsideDestinationError,
+ "'parent/evil' would be extracted to "
+ + """['"].*evil['"], which is outside """
+ + "the destination")
+ else:
+ # Fail as soon as we have a symlink outside the destination
+ self.expect_exception(
+ tarfile.LinkOutsideDestinationError,
+ "'current/parent' would link to "
+ + """['"].*outerdir['"], which is outside """
+ + "the destination")
+ else:
+ self.expect_file('current/')
+ self.expect_file('parent/evil')
def test_absolute_symlink(self):
# Test symlink to an absolute path
@@ -3415,11 +3479,29 @@ class TestExtractionFilters(unittest.TestCase):
with self.check_context(arc.open(), 'data'):
self.expect_exception(
tarfile.AbsoluteLinkError,
- "'parent' is a symlink to an absolute path")
+ "'parent' is a link to an absolute path")
+
+ def test_absolute_hardlink(self):
+ # Test hardlink to an absolute path
+ # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
+ with ArchiveMaker() as arc:
+ arc.add('parent', hardlink_to=self.outerdir / 'foo')
+
+ with self.check_context(arc.open(), 'fully_trusted'):
+ self.expect_exception(KeyError, ".*foo. not found")
+
+ with self.check_context(arc.open(), 'tar'):
+ self.expect_exception(KeyError, ".*foo. not found")
+
+ with self.check_context(arc.open(), 'data'):
+ self.expect_exception(
+ tarfile.AbsoluteLinkError,
+ "'parent' is a link to an absolute path")
def test_sly_relative0(self):
# Inspired by 'relative0' in jwilk/traversal-archives
with ArchiveMaker() as arc:
+ # points to `../../tmp/moo`
arc.add('../moo', symlink_to='..//tmp/moo')
try:
@@ -3469,6 +3551,54 @@ class TestExtractionFilters(unittest.TestCase):
+ """['"].*moo['"], which is outside the """
+ "destination")
+ def test_deep_symlink(self):
+ # Test that symlinks and hardlinks inside a directory
+ # point to the correct file (`target` of size 3).
+ # If links aren't supported we get a copy of the file.
+ with ArchiveMaker() as arc:
+ arc.add('targetdir/target', size=3)
+ # a hardlink's linkname is relative to the archive
+ arc.add('linkdir/hardlink', hardlink_to=os.path.join(
+ 'targetdir', 'target'))
+ # a symlink's linkname is relative to the link's directory
+ arc.add('linkdir/symlink', symlink_to=os.path.join(
+ '..', 'targetdir', 'target'))
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter):
+ self.expect_file('targetdir/target', size=3)
+ self.expect_file('linkdir/hardlink', size=3)
+ if support.can_symlink():
+ self.expect_file('linkdir/symlink', size=3,
+ symlink_to='../targetdir/target')
+ else:
+ self.expect_file('linkdir/symlink', size=3)
+
+ def test_chains(self):
+ # Test chaining of symlinks/hardlinks.
+ # Symlinks are created before the files they point to.
+ with ArchiveMaker() as arc:
+ arc.add('linkdir/symlink', symlink_to='hardlink')
+ arc.add('symlink2', symlink_to=os.path.join(
+ 'linkdir', 'hardlink2'))
+ arc.add('targetdir/target', size=3)
+ arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
+ arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter):
+ self.expect_file('targetdir/target', size=3)
+ self.expect_file('linkdir/hardlink', size=3)
+ self.expect_file('linkdir/hardlink2', size=3)
+ if support.can_symlink():
+ self.expect_file('linkdir/symlink', size=3,
+ symlink_to='hardlink')
+ self.expect_file('symlink2', size=3,
+ symlink_to='linkdir/hardlink2')
+ else:
+ self.expect_file('linkdir/symlink', size=3)
+ self.expect_file('symlink2', size=3)
+
def test_modes(self):
# Test how file modes are extracted
# (Note that the modes are ignored on platforms without working chmod)
diff --git a/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst
new file mode 100644
index 0000000..32c1fb9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst
@@ -0,0 +1,3 @@
+:func:`tarfile.data_filter` now takes the location of symlinks into account
+when determining their target, so it will no longer reject some valid
+tarballs with ``LinkOutsideDestinationError``.