diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-05 18:54:43 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-02-05 18:54:43 (GMT) |
commit | 255493c81321f56afd32fb2e18d5eb3a13bed42f (patch) | |
tree | a928f22b921ffcf250451e3b271407ff1cca6adb | |
parent | 4d5d69d452d37bfc29d8f182cb4b99a344f0dadb (diff) | |
parent | 3b4f1594ffb360e9a93841a4c8dfe00d6f519bf6 (diff) | |
download | cpython-255493c81321f56afd32fb2e18d5eb3a13bed42f.zip cpython-255493c81321f56afd32fb2e18d5eb3a13bed42f.tar.gz cpython-255493c81321f56afd32fb2e18d5eb3a13bed42f.tar.bz2 |
Issue #19920: TarFile.list() no longer fails when outputs a listing
containing non-encodable characters. Added tests for TarFile.list().
Based on patch by Vajrasky Kok.
-rwxr-xr-x | Lib/tarfile.py | 28 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 117 | ||||
-rw-r--r-- | Misc/NEWS | 5 |
3 files changed, 121 insertions, 29 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 79566e7..3409efe 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -257,6 +257,12 @@ def filemode(mode): DeprecationWarning, 2) return stat.filemode(mode) +def _safe_print(s): + encoding = getattr(sys.stdout, 'encoding', None) + if encoding is not None: + s = s.encode(encoding, 'backslashreplace').decode(encoding) + print(s, end=' ') + class TarError(Exception): """Base exception.""" @@ -1846,24 +1852,24 @@ class TarFile(object): for tarinfo in self: if verbose: - print(stat.filemode(tarinfo.mode), end=' ') - print("%s/%s" % (tarinfo.uname or tarinfo.uid, - tarinfo.gname or tarinfo.gid), end=' ') + _safe_print(stat.filemode(tarinfo.mode)) + _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid)) if tarinfo.ischr() or tarinfo.isblk(): - print("%10s" % ("%d,%d" \ - % (tarinfo.devmajor, tarinfo.devminor)), end=' ') + _safe_print("%10s" % + ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) else: - print("%10d" % tarinfo.size, end=' ') - print("%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6], end=' ') + _safe_print("%10d" % tarinfo.size) + _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6]) - print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') + _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) if verbose: if tarinfo.issym(): - print("->", tarinfo.linkname, end=' ') + _safe_print("-> " + tarinfo.linkname) if tarinfo.islnk(): - print("link to", tarinfo.linkname, end=' ') + _safe_print("link to " + tarinfo.linkname) print() def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None): diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index b53f3ac..ab88be4 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -219,6 +219,84 @@ class LzmaUstarReadTest(LzmaTest, UstarReadTest): pass +class ListTest(ReadTest, unittest.TestCase): + + # Override setUp to use default encoding (UTF-8) + def setUp(self): + self.tar = tarfile.open(self.tarname, mode=self.mode) + + def test_list(self): + tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + with support.swap_attr(sys, 'stdout', tio): + self.tar.list(verbose=False) + out = tio.detach().getvalue() + self.assertIn(b'ustar/conttype', out) + self.assertIn(b'ustar/regtype', out) + self.assertIn(b'ustar/lnktype', out) + self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out) + self.assertIn(b'./ustar/linktest2/symtype', out) + self.assertIn(b'./ustar/linktest2/lnktype', out) + # Make sure it puts trailing slash for directory + self.assertIn(b'ustar/dirtype/', out) + self.assertIn(b'ustar/dirtype-with-size/', out) + # Make sure it is able to print unencodable characters + self.assertIn(br'ustar/umlauts-' + br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out) + self.assertIn(br'misc/regtype-hpux-signed-chksum-' + br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out) + self.assertIn(br'misc/regtype-old-v7-signed-chksum-' + br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out) + self.assertIn(br'pax/bad-pax-\udce4\udcf6\udcfc', out) + self.assertIn(br'pax/hdrcharset-\udce4\udcf6\udcfc', out) + # Make sure it prints files separated by one newline without any + # 'ls -l'-like accessories if verbose flag is not being used + # ... + # ustar/conttype + # ustar/regtype + # ... + self.assertRegex(out, br'ustar/conttype ?\r?\n' + br'ustar/regtype ?\r?\n') + # Make sure it does not print the source of link without verbose flag + self.assertNotIn(b'link to', out) + self.assertNotIn(b'->', out) + + def test_list_verbose(self): + tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + with support.swap_attr(sys, 'stdout', tio): + self.tar.list(verbose=True) + out = tio.detach().getvalue() + # Make sure it prints files separated by one newline with 'ls -l'-like + # accessories if verbose flag is being used + # ... + # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype + # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype + # ... + self.assertRegex(out, (br'\?rw-r--r-- tarfile/tarfile\s+7011 ' + br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d ' + br'ustar/\w+type ?\r?\n') * 2) + # Make sure it prints the source of link with verbose flag + self.assertIn(b'ustar/symtype -> regtype', out) + self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out) + self.assertIn(b'./ustar/linktest2/lnktype link to ' + b'./ustar/linktest1/regtype', out) + self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' + + (b'/123' * 125) + b'/longname', out) + self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' + + (b'/123' * 125) + b'/longname', out) + + +class GzipListTest(GzipTest, ListTest): + pass + + +class Bz2ListTest(Bz2Test, ListTest): + pass + + +class LzmaListTest(LzmaTest, ListTest): + pass + + class CommonReadTest(ReadTest): def test_empty_tarfile(self): @@ -1766,8 +1844,9 @@ class MiscTest(unittest.TestCase): class CommandLineTest(unittest.TestCase): - def tarfilecmd(self, *args): - rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args) + def tarfilecmd(self, *args, **kwargs): + rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args, + **kwargs) return out.replace(os.linesep.encode(), b'\n') def tarfilecmd_failure(self, *args): @@ -1815,24 +1894,26 @@ class CommandLineTest(unittest.TestCase): support.unlink(tmpname) def test_list_command(self): - self.make_simple_tarfile(tmpname) - with support.captured_stdout() as t: - with tarfile.open(tmpname, 'r') as tf: - tf.list(verbose=False) - expected = t.getvalue().encode(sys.getfilesystemencoding()) - for opt in '-l', '--list': - out = self.tarfilecmd(opt, tmpname) - self.assertEqual(out, expected) + for tar_name in testtarnames: + with support.captured_stdout() as t: + with tarfile.open(tar_name, 'r') as tf: + tf.list(verbose=False) + expected = t.getvalue().encode('ascii', 'backslashreplace') + for opt in '-l', '--list': + out = self.tarfilecmd(opt, tar_name, + PYTHONIOENCODING='ascii') + self.assertEqual(out, expected) def test_list_command_verbose(self): - self.make_simple_tarfile(tmpname) - with support.captured_stdout() as t: - with tarfile.open(tmpname, 'r') as tf: - tf.list(verbose=True) - expected = t.getvalue().encode(sys.getfilesystemencoding()) - for opt in '-v', '--verbose': - out = self.tarfilecmd(opt, '-l', tmpname) - self.assertEqual(out, expected) + for tar_name in testtarnames: + with support.captured_stdout() as t: + with tarfile.open(tar_name, 'r') as tf: + tf.list(verbose=True) + expected = t.getvalue().encode('ascii', 'backslashreplace') + for opt in '-v', '--verbose': + out = self.tarfilecmd(opt, '-l', tar_name, + PYTHONIOENCODING='ascii') + self.assertEqual(out, expected) def test_list_command_invalid_file(self): zipname = support.findfile('zipdir.zip') @@ -24,6 +24,9 @@ Core and Builtins Library ------- +- Issue #19920: TarFile.list() no longer fails when outputs a listing + containing non-encodable characters. Based on patch by Vajrasky Kok. + - Issue #20515: Fix NULL pointer dereference introduced by issue #20368. - Issue #19186: Restore namespacing of expat symbols inside the pyexpat module. @@ -118,6 +121,8 @@ IDLE Tests ----- +- Issue #19920: Added tests for TarFile.list(). Based on patch by Vajrasky Kok. + - Issue #19990: Added tests for the imghdr module. Based on patch by Claudiu Popa. |