From 4360e73e1aa98c32304e5bcf302d6c77dff15642 Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Fri, 28 Oct 2005 06:00:51 +0000 Subject: Backport: - Patch #1338314, Bug #1336623: fix tarfile so it can extract REGTYPE directories from tarfiles written by old programs. --- Lib/tarfile.py | 11 ++++++----- Lib/test/test_tarfile.py | 24 ++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index ed7b509..9a4e515 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -688,6 +688,11 @@ class TarInfo(object): tarinfo.devmajor = tarinfo.devmajor = 0 tarinfo.prefix = buf[345:500] + # Some old tar programs represent a directory as a regular + # file with a trailing slash. + if tarinfo.isreg() and tarinfo.name.endswith("/"): + tarinfo.type = DIRTYPE + # The prefix field is used for filenames > 100 in # the POSIX standard. # name = prefix + '/' + name @@ -695,7 +700,7 @@ class TarInfo(object): tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) # Directory names should have a '/' at the end. - if tarinfo.isdir() and tarinfo.name[-1:] != "/": + if tarinfo.isdir(): tarinfo.name += "/" return tarinfo @@ -1628,10 +1633,6 @@ class TarFile(object): # Skip the following data blocks. self.offset += self._block(tarinfo.size) - if tarinfo.isreg() and tarinfo.name[:-1] == "/": - # some old tar programs don't know DIRTYPE - tarinfo.type = DIRTYPE - self.members.append(tarinfo) return tarinfo diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7362af3..53dd238 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -134,6 +134,30 @@ class ReadTest(BaseTest): "readlines() after seek failed") fobj.close() + def test_old_dirtype(self): + """Test old style dirtype member (bug #1336623). + """ + # Old tars create directory members using a REGTYPE + # header with a "/" appended to the filename field. + + # Create an old tar style directory entry. + filename = tmpname() + tarinfo = tarfile.TarInfo("directory/") + tarinfo.type = tarfile.REGTYPE + + fobj = file(filename, "w") + fobj.write(tarinfo.tobuf()) + fobj.close() + + # Test if it is still a directory entry when + # read back. + tar = tarfile.open(filename) + tarinfo = tar.getmembers()[0] + tar.close() + + self.assert_(tarinfo.type == tarfile.DIRTYPE) + self.assert_(tarinfo.name.endswith("/")) + class ReadStreamTest(ReadTest): sep = "|" diff --git a/Misc/NEWS b/Misc/NEWS index ab43a0c..c22d598 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -25,6 +25,9 @@ Core and builtins Extension Modules ----------------- +- Patch #1338314, Bug #1336623: fix tarfile so it can extract + REGTYPE directories from tarfiles written by old programs. + - Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format -- cgit v0.12