diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2007-08-21 12:17:05 (GMT) |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2007-08-21 12:17:05 (GMT) |
commit | 3741effcf8de2af3224e20af2865b7e378e59855 (patch) | |
tree | 8787415d72087c64291b10cb75949662f0b3deab /Lib | |
parent | 4566c71e0ea13acd519b732eda3cf9d70d3a62ca (diff) | |
download | cpython-3741effcf8de2af3224e20af2865b7e378e59855.zip cpython-3741effcf8de2af3224e20af2865b7e378e59855.tar.gz cpython-3741effcf8de2af3224e20af2865b7e378e59855.tar.bz2 |
Fall back to 'ascii' encoding if sys.getfilesystemencoding() returns
None. Remove encoding and errors argument from pax create methods in
TarInfo, pax always uses UTF-8.
Adapt the documentation and tests to the new string/unicode concept.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/tarfile.py | 21 | ||||
-rw-r--r-- | Lib/test/test_tarfile.py | 45 |
2 files changed, 38 insertions, 28 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 2f05618..bf67eab 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -167,7 +167,7 @@ TOEXEC = 0o001 # execute/search by other #--------------------------------------------------------- ENCODING = sys.getfilesystemencoding() if ENCODING is None: - ENCODING = sys.getdefaultencoding() + ENCODING = "ascii" #--------------------------------------------------------- # Some useful functions @@ -982,7 +982,7 @@ class TarInfo(object): elif format == GNU_FORMAT: return self.create_gnu_header(info, encoding, errors) elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding, errors) + return self.create_pax_header(info) else: raise ValueError("invalid format") @@ -1013,7 +1013,7 @@ class TarInfo(object): return buf + self._create_header(info, GNU_FORMAT, encoding, errors) - def create_pax_header(self, info, encoding, errors): + def create_pax_header(self, info): """Return the object as a ustar header block. If it cannot be represented this way, prepend a pax extended header sequence with supplement information. @@ -1056,17 +1056,17 @@ class TarInfo(object): # Create a pax extended header if necessary. if pax_headers: - buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding, errors) + buf = self._create_pax_generic_header(pax_headers, XHDTYPE) else: buf = b"" - return buf + self._create_header(info, USTAR_FORMAT, encoding, errors) + return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") @classmethod - def create_pax_global_header(cls, pax_headers, encoding, errors): + def create_pax_global_header(cls, pax_headers): """Return the object as a pax global header block sequence. """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, encoding, errors) + return cls._create_pax_generic_header(pax_headers, XGLTYPE) def _posix_split_name(self, name): """Split a name longer than 100 chars into a prefix @@ -1139,7 +1139,7 @@ class TarInfo(object): cls._create_payload(name) @classmethod - def _create_pax_generic_header(cls, pax_headers, type, encoding, errors): + def _create_pax_generic_header(cls, pax_headers, type): """Return a POSIX.1-2001 extended or global header sequence that contains a list of keyword, value pairs. The values must be strings. @@ -1166,7 +1166,7 @@ class TarInfo(object): info["magic"] = POSIX_MAGIC # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ + return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ cls._create_payload(records) @classmethod @@ -1566,8 +1566,7 @@ class TarFile(object): self._loaded = True if self.pax_headers: - buf = self.tarinfo.create_pax_global_header( - self.pax_headers.copy(), self.encoding, self.errors) + buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) self.fileobj.write(buf) self.offset += len(buf) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 0585131..913ab60 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -780,8 +780,8 @@ class PaxWriteTest(GNUWriteTest): tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1") t = tarfile.TarInfo() - t.name = "\xe4\xf6\xfc" # non-ASCII - t.uid = 8**8 # too large + t.name = "\xe4\xf6\xfc" # non-ASCII + t.uid = 8**8 # too large t.pax_headers = pax_headers tar.addfile(t) tar.close() @@ -794,7 +794,6 @@ class PaxWriteTest(GNUWriteTest): class UstarUnicodeTest(unittest.TestCase): - # All *UnicodeTests FIXME format = tarfile.USTAR_FORMAT @@ -814,11 +813,14 @@ class UstarUnicodeTest(unittest.TestCase): tar.close() tar = tarfile.open(tmpname, encoding=encoding) - self.assert_(type(tar.getnames()[0]) is not bytes) self.assertEqual(tar.getmembers()[0].name, name) tar.close() def test_unicode_filename_error(self): + if self.format == tarfile.PAX_FORMAT: + # PAX_FORMAT ignores encoding in write mode. + return + tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict") tarinfo = tarfile.TarInfo() @@ -839,21 +841,24 @@ class UstarUnicodeTest(unittest.TestCase): tar.close() def test_uname_unicode(self): - for name in ("\xe4\xf6\xfc", "\xe4\xf6\xfc"): - t = tarfile.TarInfo("foo") - t.uname = name - t.gname = name - - fobj = io.BytesIO() - tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1") - tar.addfile(t) - tar.close() - fobj.seek(0) + t = tarfile.TarInfo("foo") + t.uname = "\xe4\xf6\xfc" + t.gname = "\xe4\xf6\xfc" + + tar = tarfile.open(tmpname, mode="w", format=self.format, encoding="iso8859-1") + tar.addfile(t) + tar.close() - tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1") + tar = tarfile.open(tmpname, encoding="iso8859-1") + t = tar.getmember("foo") + self.assertEqual(t.uname, "\xe4\xf6\xfc") + self.assertEqual(t.gname, "\xe4\xf6\xfc") + + if self.format != tarfile.PAX_FORMAT: + tar = tarfile.open(tmpname, encoding="ascii") t = tar.getmember("foo") - self.assertEqual(t.uname, "\xe4\xf6\xfc") - self.assertEqual(t.gname, "\xe4\xf6\xfc") + self.assertEqual(t.uname, "\ufffd\ufffd\ufffd") + self.assertEqual(t.gname, "\ufffd\ufffd\ufffd") class GNUUnicodeTest(UstarUnicodeTest): @@ -861,6 +866,11 @@ class GNUUnicodeTest(UstarUnicodeTest): format = tarfile.GNU_FORMAT +class PAXUnicodeTest(UstarUnicodeTest): + + format = tarfile.PAX_FORMAT + + class AppendTest(unittest.TestCase): # Test append mode (cp. patch #1652681). @@ -1047,6 +1057,7 @@ def test_main(): PaxWriteTest, UstarUnicodeTest, GNUUnicodeTest, + PAXUnicodeTest, AppendTest, LimitsTest, MiscTest, |