summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_tarfile.py
diff options
context:
space:
mode:
authorLars Gustäbel <lars@gustaebel.de>2010-05-17 18:02:50 (GMT)
committerLars Gustäbel <lars@gustaebel.de>2010-05-17 18:02:50 (GMT)
commit1465cc2887be2054cca50c72ef804adcc15fdf65 (patch)
tree3f20bc90a15488fcbca7868415cf35d2bc1e114a /Lib/test/test_tarfile.py
parent0f78a94f445c48f5a96a77a1bb77ca88d7c50694 (diff)
downloadcpython-1465cc2887be2054cca50c72ef804adcc15fdf65.zip
cpython-1465cc2887be2054cca50c72ef804adcc15fdf65.tar.gz
cpython-1465cc2887be2054cca50c72ef804adcc15fdf65.tar.bz2
Issue #8633: Support for POSIX.1-2008 binary pax headers.
tarfile is now able to read and write pax headers with a "hdrcharset=BINARY" record. This record was introduced in POSIX.1-2008 as a method to store unencoded binary strings that cannot be translated to UTF-8. In practice, this is just a workaround that allows a tar implementation to store filenames that do not comply with the current filesystem encoding and thus cannot be decoded correctly. Additionally, tarfile works around a bug in current versions of GNU tar: undecodable filenames are stored as-is in a pax header without a "hdrcharset" record being added. Technically, these headers are invalid, but tarfile manages to read them correctly anyway.
Diffstat (limited to 'Lib/test/test_tarfile.py')
-rw-r--r--Lib/test/test_tarfile.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 2db18fe..c0741ee 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1126,11 +1126,32 @@ class GNUUnicodeTest(UstarUnicodeTest):
format = tarfile.GNU_FORMAT
+ def test_bad_pax_header(self):
+ # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
+ # without a hdrcharset=BINARY header.
+ for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"),
+ ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
+ with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
+ try:
+ t = tar.getmember(name)
+ except KeyError:
+ self.fail("unable to read bad GNU tar pax header")
+
class PAXUnicodeTest(UstarUnicodeTest):
format = tarfile.PAX_FORMAT
+ def test_binary_header(self):
+ # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
+ for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
+ ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
+ with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
+ try:
+ t = tar.getmember(name)
+ except KeyError:
+ self.fail("unable to read POSIX.1-2008 binary header")
+
class AppendTest(unittest.TestCase):
# Test append mode (cp. patch #1652681).