diff options
Diffstat (limited to 'Lib/test/test_tarfile.py')
-rw-r--r-- | Lib/test/test_tarfile.py | 245 |
1 files changed, 180 insertions, 65 deletions
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 312050b..636a45e 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,4 +1,4 @@ -# encoding: iso8859-1 +# -*- coding: iso-8859-15 -*- import sys import os @@ -372,9 +372,9 @@ class LongnameTest(ReadTest): def test_read_longname(self): # Test reading of longname (bug #1471427). - name = self.subdir + "/" + "123/" * 125 + "longname" + longname = self.subdir + "/" + "123/" * 125 + "longname" try: - tarinfo = self.tar.getmember(name) + tarinfo = self.tar.getmember(longname) except KeyError: self.fail("longname not found") self.assert_(tarinfo.type != tarfile.DIRTYPE, "read longname as dirtype") @@ -393,13 +393,24 @@ class LongnameTest(ReadTest): tarinfo = self.tar.getmember(longname) offset = tarinfo.offset self.tar.fileobj.seek(offset) - fobj = StringIO.StringIO(self.tar.fileobj.read(1536)) + fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512)) self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj) + def test_header_offset(self): + # Test if the start offset of the TarInfo object includes + # the preceding extended header. + longname = self.subdir + "/" + "123/" * 125 + "longname" + offset = self.tar.getmember(longname).offset + fobj = open(tarname) + fobj.seek(offset) + tarinfo = tarfile.TarInfo.frombuf(fobj.read(512)) + self.assertEqual(tarinfo.type, self.longnametype) + class GNUReadTest(LongnameTest): subdir = "gnu" + longnametype = tarfile.GNUTYPE_LONGNAME def test_sparse_file(self): tarinfo1 = self.tar.getmember("ustar/sparse") @@ -410,26 +421,40 @@ class GNUReadTest(LongnameTest): "sparse file extraction failed") -class PaxReadTest(ReadTest): +class PaxReadTest(LongnameTest): subdir = "pax" + longnametype = tarfile.XHDTYPE - def test_pax_globheaders(self): + def test_pax_global_headers(self): tar = tarfile.open(tarname, encoding="iso8859-1") + tarinfo = tar.getmember("pax/regtype1") self.assertEqual(tarinfo.uname, "foo") self.assertEqual(tarinfo.gname, "bar") - self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß") + self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß") tarinfo = tar.getmember("pax/regtype2") self.assertEqual(tarinfo.uname, "") self.assertEqual(tarinfo.gname, "bar") - self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß") + self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß") tarinfo = tar.getmember("pax/regtype3") self.assertEqual(tarinfo.uname, "tarfile") self.assertEqual(tarinfo.gname, "tarfile") - self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß") + self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß") + + def test_pax_number_fields(self): + # All following number fields are read from the pax header. + tar = tarfile.open(tarname, encoding="iso8859-1") + tarinfo = tar.getmember("pax/regtype4") + self.assertEqual(tarinfo.size, 7011) + self.assertEqual(tarinfo.uid, 123) + self.assertEqual(tarinfo.gid, 123) + self.assertEqual(tarinfo.mtime, 1041808783.0) + self.assertEqual(type(tarinfo.mtime), float) + self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0) + self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0) class WriteTest(unittest.TestCase): @@ -700,68 +725,160 @@ class PaxWriteTest(GNUWriteTest): n = tar.getmembers()[0].name self.assert_(name == n, "PAX longname creation failed") - def test_iso8859_15_filename(self): - self._test_unicode_filename("iso8859-15") + def test_pax_global_header(self): + pax_headers = { + u"foo": u"bar", + u"uid": u"0", + u"mtime": u"1.23", + u"test": u"äöü", + u"äöü": u"test"} + + tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \ + pax_headers=pax_headers) + tar.addfile(tarfile.TarInfo("test")) + tar.close() + + # Test if the global header was written correctly. + tar = tarfile.open(tmpname, encoding="iso8859-1") + self.assertEqual(tar.pax_headers, pax_headers) + self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers) + + # Test if all the fields are unicode. + for key, val in tar.pax_headers.items(): + self.assert_(type(key) is unicode) + self.assert_(type(val) is unicode) + if key in tarfile.PAX_NUMBER_FIELDS: + try: + tarfile.PAX_NUMBER_FIELDS[key](val) + except (TypeError, ValueError): + self.fail("unable to convert pax header field") + + def test_pax_extended_header(self): + # The fields from the pax header have priority over the + # TarInfo. + pax_headers = {u"path": u"foo", u"uid": u"123"} + + tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1") + t = tarfile.TarInfo() + t.name = u"äöü" # non-ASCII + t.uid = 8**8 # too large + t.pax_headers = pax_headers + tar.addfile(t) + tar.close() + + tar = tarfile.open(tmpname, encoding="iso8859-1") + t = tar.getmembers()[0] + self.assertEqual(t.pax_headers, pax_headers) + self.assertEqual(t.name, "foo") + self.assertEqual(t.uid, 123) + + +class UstarUnicodeTest(unittest.TestCase): + # All *UnicodeTests FIXME + + format = tarfile.USTAR_FORMAT + + def test_iso8859_1_filename(self): + self._test_unicode_filename("iso8859-1") + + def test_utf7_filename(self): + self._test_unicode_filename("utf7") def test_utf8_filename(self): self._test_unicode_filename("utf8") - def test_utf16_filename(self): - self._test_unicode_filename("utf16") - def _test_unicode_filename(self, encoding): - tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT) - name = "\u20ac".encode(encoding) # Euro sign - tar.encoding = encoding + tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict") + name = "äöü" tar.addfile(tarfile.TarInfo(name)) tar.close() tar = tarfile.open(tmpname, encoding=encoding) - self.assertEqual(tar.getmembers()[0].name, name) + self.assert_(type(tar.getnames()[0]) is not unicode) + self.assertEqual(tar.getmembers()[0].name, name.encode(encoding)) tar.close() def test_unicode_filename_error(self): - # The euro sign filename cannot be translated to iso8859-1 encoding. - tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="utf8") - name = "\u20ac".encode("utf8") # Euro sign - tar.addfile(tarfile.TarInfo(name)) + tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict") + tarinfo = tarfile.TarInfo() + + tarinfo.name = "äöü" + if self.format == tarfile.PAX_FORMAT: + self.assertRaises(UnicodeError, tar.addfile, tarinfo) + else: + tar.addfile(tarinfo) + + tarinfo.name = u"äöü" + self.assertRaises(UnicodeError, tar.addfile, tarinfo) + + tarinfo.name = "foo" + tarinfo.uname = u"äöü" + self.assertRaises(UnicodeError, tar.addfile, tarinfo) + + def test_unicode_argument(self): + tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict") + for t in tar: + self.assert_(type(t.name) is str) + self.assert_(type(t.linkname) is str) + self.assert_(type(t.uname) is str) + self.assert_(type(t.gname) is str) tar.close() - self.assertRaises(UnicodeError, tarfile.open, tmpname, encoding="iso8859-1") + def test_uname_unicode(self): + for name in (u"äöü", "äöü"): + t = tarfile.TarInfo("foo") + t.uname = name + t.gname = name - def test_pax_headers(self): - self._test_pax_headers({"foo": "bar", "uid": 0, "mtime": 1.23}) + fobj = StringIO.StringIO() + tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1") + tar.addfile(t) + tar.close() + fobj.seek(0) - self._test_pax_headers({"euro": "\u20ac".encode("utf8")}) + tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1") + t = tar.getmember("foo") + self.assertEqual(t.uname, "äöü") + self.assertEqual(t.gname, "äöü") - self._test_pax_headers({"euro": "\u20ac"}, - {"euro": "\u20ac".encode("utf8")}) +class GNUUnicodeTest(UstarUnicodeTest): - self._test_pax_headers({"\u20ac": "euro"}, - {"\u20ac".encode("utf8"): "euro"}) + format = tarfile.GNU_FORMAT - def _test_pax_headers(self, pax_headers, cmp_headers=None): - if cmp_headers is None: - cmp_headers = pax_headers - tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \ - pax_headers=pax_headers, encoding="utf8") - tar.addfile(tarfile.TarInfo("test")) - tar.close() +class PaxUnicodeTest(UstarUnicodeTest): - tar = tarfile.open(tmpname, encoding="utf8") - self.assertEqual(tar.pax_headers, cmp_headers) + format = tarfile.PAX_FORMAT - def test_truncated_header(self): - tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT) - tarinfo = tarfile.TarInfo("123/" * 126 + "longname") - tar.addfile(tarinfo) + def _create_unicode_name(self, name): + tar = tarfile.open(tmpname, "w", format=self.format) + t = tarfile.TarInfo() + t.pax_headers["path"] = name + tar.addfile(t) tar.close() - # Simulate a premature EOF. - open(tmpname, "rb+").truncate(1536) - tar = tarfile.open(tmpname) - self.assertEqual(tar.getmembers(), []) + def test_error_handlers(self): + # Test if the unicode error handlers work correctly for characters + # that cannot be expressed in a given encoding. + self._create_unicode_name(u"äöü") + + for handler, name in (("utf-8", u"äöü".encode("utf8")), + ("replace", "???"), ("ignore", "")): + tar = tarfile.open(tmpname, format=self.format, encoding="ascii", + errors=handler) + self.assertEqual(tar.getnames()[0], name) + + self.assertRaises(UnicodeError, tarfile.open, tmpname, + encoding="ascii", errors="strict") + + def test_error_handler_utf8(self): + # Create a pathname that has one component representable using + # iso8859-1 and the other only in iso8859-15. + self._create_unicode_name(u"äöü/¤") + + tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1", + errors="utf-8") + self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8")) class AppendTest(unittest.TestCase): @@ -836,63 +953,58 @@ class LimitsTest(unittest.TestCase): def test_ustar_limits(self): # 100 char name tarinfo = tarfile.TarInfo("0123456789" * 10) - tarinfo.create_ustar_header() + tarinfo.tobuf(tarfile.USTAR_FORMAT) # 101 char name that cannot be stored tarinfo = tarfile.TarInfo("0123456789" * 10 + "0") - self.assertRaises(ValueError, tarinfo.create_ustar_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT) # 256 char name with a slash at pos 156 tarinfo = tarfile.TarInfo("123/" * 62 + "longname") - tarinfo.create_ustar_header() + tarinfo.tobuf(tarfile.USTAR_FORMAT) # 256 char name that cannot be stored tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname") - self.assertRaises(ValueError, tarinfo.create_ustar_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT) # 512 char name tarinfo = tarfile.TarInfo("123/" * 126 + "longname") - self.assertRaises(ValueError, tarinfo.create_ustar_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT) # 512 char linkname tarinfo = tarfile.TarInfo("longlink") tarinfo.linkname = "123/" * 126 + "longname" - self.assertRaises(ValueError, tarinfo.create_ustar_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT) # uid > 8 digits tarinfo = tarfile.TarInfo("name") tarinfo.uid = 010000000 - self.assertRaises(ValueError, tarinfo.create_ustar_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT) def test_gnu_limits(self): tarinfo = tarfile.TarInfo("123/" * 126 + "longname") - tarinfo.create_gnu_header() + tarinfo.tobuf(tarfile.GNU_FORMAT) tarinfo = tarfile.TarInfo("longlink") tarinfo.linkname = "123/" * 126 + "longname" - tarinfo.create_gnu_header() + tarinfo.tobuf(tarfile.GNU_FORMAT) # uid >= 256 ** 7 tarinfo = tarfile.TarInfo("name") tarinfo.uid = 04000000000000000000 - self.assertRaises(ValueError, tarinfo.create_gnu_header) + self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT) def test_pax_limits(self): - # A 256 char name that can be stored without an extended header. - tarinfo = tarfile.TarInfo("123/" * 62 + "longname") - self.assert_(len(tarinfo.create_pax_header("utf8")) == 512, - "create_pax_header attached superfluous extended header") - tarinfo = tarfile.TarInfo("123/" * 126 + "longname") - tarinfo.create_pax_header("utf8") + tarinfo.tobuf(tarfile.PAX_FORMAT) tarinfo = tarfile.TarInfo("longlink") tarinfo.linkname = "123/" * 126 + "longname" - tarinfo.create_pax_header("utf8") + tarinfo.tobuf(tarfile.PAX_FORMAT) tarinfo = tarfile.TarInfo("name") tarinfo.uid = 04000000000000000000 - tarinfo.create_pax_header("utf8") + tarinfo.tobuf(tarfile.PAX_FORMAT) class GzipMiscReadTest(MiscReadTest): @@ -940,6 +1052,9 @@ def test_main(): StreamWriteTest, GNUWriteTest, PaxWriteTest, + UstarUnicodeTest, + GNUUnicodeTest, + PaxUnicodeTest, AppendTest, LimitsTest, ] |