Normalize the encoding names for Latin-1 and UTF-8 to

'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303.
author: Marc-André Lemburg <mal@egenix.com> 2011-02-25 15:42:01 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2011-02-25 15:42:01 (GMT)
commit: 8f36af7a4c9409a673412e4bdfbad76d700abc3a (patch)
tree: 1b61599a07604a96539e98098b055c577cd7e6a8 /Lib/tarfile.py
parent: a391b11320f729f6eec6c772c00b3e62c2746eaf (diff)
download: cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.zip
cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.gz
cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.bz2
1 files changed, 9 insertions, 9 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 0f9d1da..6b663f4 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1084,7 +1084,7 @@ class TarInfo(object):
     def create_pax_global_header(cls, pax_headers):
         """Return the object as a pax global header block sequence.
         """
-        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
+        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
 
     def _posix_split_name(self, name):
         """Split a name longer than 100 chars into a prefix
@@ -1167,7 +1167,7 @@ class TarInfo(object):
         binary = False
         for keyword, value in pax_headers.items():
             try:
-                value.encode("utf8", "strict")
+                value.encode("utf-8", "strict")
             except UnicodeEncodeError:
                 binary = True
                 break
@@ -1178,13 +1178,13 @@ class TarInfo(object):
             records += b"21 hdrcharset=BINARY\n"
 
         for keyword, value in pax_headers.items():
-            keyword = keyword.encode("utf8")
+            keyword = keyword.encode("utf-8")
             if binary:
                 # Try to restore the original byte representation of `value'.
                 # Needless to say, that the encoding must match the string.
                 value = value.encode(encoding, "surrogateescape")
             else:
-                value = value.encode("utf8")
+                value = value.encode("utf-8")
 
             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
             n = p = 0
@@ -1393,7 +1393,7 @@ class TarInfo(object):
         # the translation to UTF-8 fails.
         match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
         if match is not None:
-            pax_headers["hdrcharset"] = match.group(1).decode("utf8")
+            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
 
         # For the time being, we don't care about anything other than "BINARY".
         # The only other value that is currently allowed by the standard is
@@ -1402,7 +1402,7 @@ class TarInfo(object):
         if hdrcharset == "BINARY":
             encoding = tarfile.encoding
         else:
-            encoding = "utf8"
+            encoding = "utf-8"
 
         # Parse pax header information. A record looks like that:
         # "%d %s=%s\n" % (length, keyword, value). length is the size
@@ -1419,20 +1419,20 @@ class TarInfo(object):
             length = int(length)
             value = buf[match.end(2) + 1:match.start(1) + length - 1]
 
-            # Normally, we could just use "utf8" as the encoding and "strict"
+            # Normally, we could just use "utf-8" as the encoding and "strict"
             # as the error handler, but we better not take the risk. For
             # example, GNU tar <= 1.23 is known to store filenames it cannot
             # translate to UTF-8 as raw strings (unfortunately without a
             # hdrcharset=BINARY header).
             # We first try the strict standard encoding, and if that fails we
             # fall back on the user's encoding and error handler.
-            keyword = self._decode_pax_field(keyword, "utf8", "utf8",
+            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
                     tarfile.errors)
             if keyword in PAX_NAME_FIELDS:
                 value = self._decode_pax_field(value, encoding, tarfile.encoding,
                         tarfile.errors)
             else:
-                value = self._decode_pax_field(value, "utf8", "utf8",
+                value = self._decode_pax_field(value, "utf-8", "utf-8",
                         tarfile.errors)
 
             pax_headers[keyword] = value
author	Marc-André Lemburg <mal@egenix.com>	2011-02-25 15:42:01 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2011-02-25 15:42:01 (GMT)
commit	8f36af7a4c9409a673412e4bdfbad76d700abc3a (patch)
tree	1b61599a07604a96539e98098b055c577cd7e6a8 /Lib/tarfile.py
parent	a391b11320f729f6eec6c772c00b3e62c2746eaf (diff)
download	cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.zip cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.gz cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.bz2