summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorLars Gustäbel <lars@gustaebel.de>2007-08-21 12:17:05 (GMT)
committerLars Gustäbel <lars@gustaebel.de>2007-08-21 12:17:05 (GMT)
commit3741effcf8de2af3224e20af2865b7e378e59855 (patch)
tree8787415d72087c64291b10cb75949662f0b3deab /Lib
parent4566c71e0ea13acd519b732eda3cf9d70d3a62ca (diff)
downloadcpython-3741effcf8de2af3224e20af2865b7e378e59855.zip
cpython-3741effcf8de2af3224e20af2865b7e378e59855.tar.gz
cpython-3741effcf8de2af3224e20af2865b7e378e59855.tar.bz2
Fall back to 'ascii' encoding if sys.getfilesystemencoding() returns
None. Remove encoding and errors argument from pax create methods in TarInfo, pax always uses UTF-8. Adapt the documentation and tests to the new string/unicode concept.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/tarfile.py21
-rw-r--r--Lib/test/test_tarfile.py45
2 files changed, 38 insertions, 28 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 2f05618..bf67eab 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -167,7 +167,7 @@ TOEXEC = 0o001 # execute/search by other
#---------------------------------------------------------
ENCODING = sys.getfilesystemencoding()
if ENCODING is None:
- ENCODING = sys.getdefaultencoding()
+ ENCODING = "ascii"
#---------------------------------------------------------
# Some useful functions
@@ -982,7 +982,7 @@ class TarInfo(object):
elif format == GNU_FORMAT:
return self.create_gnu_header(info, encoding, errors)
elif format == PAX_FORMAT:
- return self.create_pax_header(info, encoding, errors)
+ return self.create_pax_header(info)
else:
raise ValueError("invalid format")
@@ -1013,7 +1013,7 @@ class TarInfo(object):
return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
- def create_pax_header(self, info, encoding, errors):
+ def create_pax_header(self, info):
"""Return the object as a ustar header block. If it cannot be
represented this way, prepend a pax extended header sequence
with supplement information.
@@ -1056,17 +1056,17 @@ class TarInfo(object):
# Create a pax extended header if necessary.
if pax_headers:
- buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding, errors)
+ buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
else:
buf = b""
- return buf + self._create_header(info, USTAR_FORMAT, encoding, errors)
+ return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
@classmethod
- def create_pax_global_header(cls, pax_headers, encoding, errors):
+ def create_pax_global_header(cls, pax_headers):
"""Return the object as a pax global header block sequence.
"""
- return cls._create_pax_generic_header(pax_headers, XGLTYPE, encoding, errors)
+ return cls._create_pax_generic_header(pax_headers, XGLTYPE)
def _posix_split_name(self, name):
"""Split a name longer than 100 chars into a prefix
@@ -1139,7 +1139,7 @@ class TarInfo(object):
cls._create_payload(name)
@classmethod
- def _create_pax_generic_header(cls, pax_headers, type, encoding, errors):
+ def _create_pax_generic_header(cls, pax_headers, type):
"""Return a POSIX.1-2001 extended or global header sequence
that contains a list of keyword, value pairs. The values
must be strings.
@@ -1166,7 +1166,7 @@ class TarInfo(object):
info["magic"] = POSIX_MAGIC
# Create pax header + record blocks.
- return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
+ return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
cls._create_payload(records)
@classmethod
@@ -1566,8 +1566,7 @@ class TarFile(object):
self._loaded = True
if self.pax_headers:
- buf = self.tarinfo.create_pax_global_header(
- self.pax_headers.copy(), self.encoding, self.errors)
+ buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
self.fileobj.write(buf)
self.offset += len(buf)
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 0585131..913ab60 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -780,8 +780,8 @@ class PaxWriteTest(GNUWriteTest):
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
t = tarfile.TarInfo()
- t.name = "\xe4\xf6\xfc" # non-ASCII
- t.uid = 8**8 # too large
+ t.name = "\xe4\xf6\xfc" # non-ASCII
+ t.uid = 8**8 # too large
t.pax_headers = pax_headers
tar.addfile(t)
tar.close()
@@ -794,7 +794,6 @@ class PaxWriteTest(GNUWriteTest):
class UstarUnicodeTest(unittest.TestCase):
- # All *UnicodeTests FIXME
format = tarfile.USTAR_FORMAT
@@ -814,11 +813,14 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close()
tar = tarfile.open(tmpname, encoding=encoding)
- self.assert_(type(tar.getnames()[0]) is not bytes)
self.assertEqual(tar.getmembers()[0].name, name)
tar.close()
def test_unicode_filename_error(self):
+ if self.format == tarfile.PAX_FORMAT:
+ # PAX_FORMAT ignores encoding in write mode.
+ return
+
tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
tarinfo = tarfile.TarInfo()
@@ -839,21 +841,24 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close()
def test_uname_unicode(self):
- for name in ("\xe4\xf6\xfc", "\xe4\xf6\xfc"):
- t = tarfile.TarInfo("foo")
- t.uname = name
- t.gname = name
-
- fobj = io.BytesIO()
- tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1")
- tar.addfile(t)
- tar.close()
- fobj.seek(0)
+ t = tarfile.TarInfo("foo")
+ t.uname = "\xe4\xf6\xfc"
+ t.gname = "\xe4\xf6\xfc"
+
+ tar = tarfile.open(tmpname, mode="w", format=self.format, encoding="iso8859-1")
+ tar.addfile(t)
+ tar.close()
- tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1")
+ tar = tarfile.open(tmpname, encoding="iso8859-1")
+ t = tar.getmember("foo")
+ self.assertEqual(t.uname, "\xe4\xf6\xfc")
+ self.assertEqual(t.gname, "\xe4\xf6\xfc")
+
+ if self.format != tarfile.PAX_FORMAT:
+ tar = tarfile.open(tmpname, encoding="ascii")
t = tar.getmember("foo")
- self.assertEqual(t.uname, "\xe4\xf6\xfc")
- self.assertEqual(t.gname, "\xe4\xf6\xfc")
+ self.assertEqual(t.uname, "\ufffd\ufffd\ufffd")
+ self.assertEqual(t.gname, "\ufffd\ufffd\ufffd")
class GNUUnicodeTest(UstarUnicodeTest):
@@ -861,6 +866,11 @@ class GNUUnicodeTest(UstarUnicodeTest):
format = tarfile.GNU_FORMAT
+class PAXUnicodeTest(UstarUnicodeTest):
+
+ format = tarfile.PAX_FORMAT
+
+
class AppendTest(unittest.TestCase):
# Test append mode (cp. patch #1652681).
@@ -1047,6 +1057,7 @@ def test_main():
PaxWriteTest,
UstarUnicodeTest,
GNUUnicodeTest,
+ PAXUnicodeTest,
AppendTest,
LimitsTest,
MiscTest,