diff options
-rw-r--r-- | Lib/tarfile.py | 71 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 45 | ||||
-rw-r--r-- | Lib/test/testtar.tar | bin | 289792 -> 298496 bytes |
3 files changed, 90 insertions, 26 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 8559e96..31967dd 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2163,8 +2163,7 @@ class TarFile(object): raise StreamError("cannot extract (sym)link as file object") else: # A (sym)link's file object is its target's file object. - return self.extractfile(self._getmember(tarinfo.linkname, - tarinfo)) + return self.extractfile(self._find_link_target(tarinfo)) else: # If there's no data associated with the member (directory, chrdev, # blkdev, etc.), return None instead of a file object. @@ -2273,27 +2272,21 @@ class TarFile(object): (platform limitation), we try to make a copy of the referenced file instead of a link. """ - try: + if hasattr(os, "symlink") and hasattr(os, "link"): + # For systems that support symbolic and hard links. if tarinfo.issym(): os.symlink(tarinfo.linkname, targetpath) else: # See extract(). - os.link(tarinfo._link_target, targetpath) - except AttributeError: - if tarinfo.issym(): - linkpath = os.path.dirname(tarinfo.name) + "/" + \ - tarinfo.linkname - else: - linkpath = tarinfo.linkname - + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) + else: + self._extract_member(self._find_link_target(tarinfo), targetpath) + else: try: - self._extract_member(self.getmember(linkpath), targetpath) - except (EnvironmentError, KeyError) as e: - linkpath = linkpath.replace("/", os.sep) - try: - shutil.copy2(linkpath, targetpath) - except EnvironmentError as e: - raise IOError("link could not be created") + self._extract_member(self._find_link_target(tarinfo), targetpath) + except KeyError: + raise ExtractError("unable to resolve link inside archive") def chown(self, tarinfo, targetpath): """Set owner of targetpath according to tarinfo. @@ -2392,21 +2385,28 @@ class TarFile(object): #-------------------------------------------------------------------------- # Little helper methods: - def _getmember(self, name, tarinfo=None): + def _getmember(self, name, tarinfo=None, normalize=False): """Find an archive member by name from bottom to top. If tarinfo is given, it is used as the starting point. """ # Ensure that all members have been loaded. members = self.getmembers() - if tarinfo is None: - end = len(members) - else: - end = members.index(tarinfo) + # Limit the member search list up to tarinfo. + if tarinfo is not None: + members = members[:members.index(tarinfo)] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): + if normalize: + member_name = os.path.normpath(member.name) + else: + member_name = member.name - for i in range(end - 1, -1, -1): - if name == members[i].name: - return members[i] + if name == member_name: + return member def _load(self): """Read through the entire archive file and look for readable @@ -2427,6 +2427,25 @@ class TarFile(object): if mode is not None and self.mode not in mode: raise IOError("bad operation for mode %r" % self.mode) + def _find_link_target(self, tarinfo): + """Find the target member of a symlink or hardlink member in the + archive. + """ + if tarinfo.issym(): + # Always search the entire archive. + linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname + limit = None + else: + # Search the archive before the link, because a hard link is + # just a reference to an already archived file. + linkname = tarinfo.linkname + limit = tarinfo + + member = self._getmember(linkname, tarinfo=limit, normalize=True) + if member is None: + raise KeyError("linkname %r not found" % linkname) + return member + def __iter__(self): """Provide an iterator object. """ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index ec6f82f..858f45b 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -133,6 +133,26 @@ class UstarReadTest(ReadTest): "read() after readline() failed") fobj.close() + # Test if symbolic and hard links are resolved by extractfile(). The + # test link members each point to a regular member whose data is + # supposed to be exported. + def _test_fileobj_link(self, lnktype, regtype): + a = self.tar.extractfile(lnktype) + b = self.tar.extractfile(regtype) + self.assertEqual(a.name, b.name) + + def test_fileobj_link1(self): + self._test_fileobj_link("ustar/lnktype", "ustar/regtype") + + def test_fileobj_link2(self): + self._test_fileobj_link("./ustar/linktest2/lnktype", "ustar/linktest1/regtype") + + def test_fileobj_symlink1(self): + self._test_fileobj_link("ustar/symtype", "ustar/regtype") + + def test_fileobj_symlink2(self): + self._test_fileobj_link("./ustar/linktest2/symtype", "ustar/linktest1/regtype") + class CommonReadTest(ReadTest): @@ -1378,6 +1398,29 @@ class ContextManagerTest(unittest.TestCase): fobj.close() +class LinkEmulationTest(ReadTest): + + # Test for issue #8741 regression. On platforms that do not support + # symbolic or hard links tarfile tries to extract these types of members as + # the regular files they point to. + def _test_link_extraction(self, name): + self.tar.extract(name, TEMPDIR) + data = open(os.path.join(TEMPDIR, name), "rb").read() + self.assertEqual(md5sum(data), md5_regtype) + + def test_hardlink_extraction1(self): + self._test_link_extraction("ustar/lnktype") + + def test_hardlink_extraction2(self): + self._test_link_extraction("./ustar/linktest2/lnktype") + + def test_symlink_extraction1(self): + self._test_link_extraction("ustar/symtype") + + def test_symlink_extraction2(self): + self._test_link_extraction("./ustar/linktest2/symtype") + + class GzipMiscReadTest(MiscReadTest): tarname = gzipname mode = "r:gz" @@ -1463,6 +1506,8 @@ def test_main(): if hasattr(os, "link"): tests.append(HardlinkTest) + else: + tests.append(LinkEmulationTest) fobj = open(tarname, "rb") data = fobj.read() diff --git a/Lib/test/testtar.tar b/Lib/test/testtar.tar Binary files differindex c3022ed..dc1942c 100644 --- a/Lib/test/testtar.tar +++ b/Lib/test/testtar.tar |