From 8f36af7a4c9409a673412e4bdfbad76d700abc3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Fri, 25 Feb 2011 15:42:01 +0000 Subject: Normalize the encoding names for Latin-1 and UTF-8 to 'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303. --- Lib/asynchat.py | 2 +- Lib/distutils/command/bdist_wininst.py | 6 +++--- Lib/ftplib.py | 2 +- Lib/http/client.py | 4 ++-- Lib/http/server.py | 4 ++-- Lib/multiprocessing/connection.py | 4 ++-- Lib/random.py | 2 +- Lib/smtpd.py | 2 +- Lib/sqlite3/test/types.py | 2 +- Lib/sre_parse.py | 2 +- Lib/tarfile.py | 18 +++++++++--------- Lib/test/test_argparse.py | 2 +- Lib/test/test_bigmem.py | 4 ++-- Lib/test/test_bytes.py | 18 ++++++++++-------- Lib/test/test_cmd_line.py | 4 ++-- Lib/test/test_codecs.py | 4 ++-- Lib/test/test_io.py | 18 +++++++++--------- Lib/test/test_mailbox.py | 6 +++--- Lib/test/test_nntplib.py | 2 +- Lib/test/test_pep3120.py | 4 ++-- Lib/test/test_sax.py | 4 ++-- Lib/test/test_shelve.py | 4 ++-- Lib/test/test_socket.py | 14 +++++++------- Lib/test/test_strlit.py | 2 +- Lib/test/test_subprocess.py | 2 +- Lib/test/test_tarfile.py | 6 +++--- Lib/test/test_unicode.py | 5 ++++- Lib/test/test_urllib.py | 2 +- Lib/test/test_urllib2.py | 2 +- Lib/test/test_uuid.py | 4 ++-- Lib/test/test_xml_etree.py | 6 +++--- Lib/urllib/request.py | 2 +- 32 files changed, 84 insertions(+), 79 deletions(-) diff --git a/Lib/asynchat.py b/Lib/asynchat.py index 6558512..2199d1b 100644 --- a/Lib/asynchat.py +++ b/Lib/asynchat.py @@ -75,7 +75,7 @@ class async_chat (asyncore.dispatcher): # sign of an application bug that we don't want to pass silently use_encoding = 0 - encoding = 'latin1' + encoding = 'latin-1' def __init__ (self, sock=None, map=None): # for string terminator matching diff --git a/Lib/distutils/command/bdist_wininst.py b/Lib/distutils/command/bdist_wininst.py index b2e2fc6..b886055 100644 --- a/Lib/distutils/command/bdist_wininst.py +++ b/Lib/distutils/command/bdist_wininst.py @@ -263,11 +263,11 @@ class bdist_wininst(Command): cfgdata = cfgdata + b"\0" if self.pre_install_script: # We need to normalize newlines, so we open in text mode and - # convert back to bytes. "latin1" simply avoids any possible + # convert back to bytes. "latin-1" simply avoids any possible # failures. with open(self.pre_install_script, "r", - encoding="latin1") as script: - script_data = script.read().encode("latin1") + encoding="latin-1") as script: + script_data = script.read().encode("latin-1") cfgdata = cfgdata + script_data + b"\n\0" else: # empty pre-install script diff --git a/Lib/ftplib.py b/Lib/ftplib.py index fd5a863..8dce9ac 100644 --- a/Lib/ftplib.py +++ b/Lib/ftplib.py @@ -100,7 +100,7 @@ class FTP: file = None welcome = None passiveserver = 1 - encoding = "latin1" + encoding = "latin-1" # Initialization method (called by class instantiation). # Initialize host to localhost, port to standard ftp port diff --git a/Lib/http/client.py b/Lib/http/client.py index 36b7349..8ad7cb6 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -697,7 +697,7 @@ class HTTPConnection: self.send(connect_bytes) for header, value in self._tunnel_headers.items(): header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("latin1") + header_bytes = header_str.encode("latin-1") self.send(header_bytes) self.send(b'\r\n') @@ -937,7 +937,7 @@ class HTTPConnection: values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin1') + values[i] = one_value.encode('latin-1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = b'\r\n\t'.join(values) diff --git a/Lib/http/server.py b/Lib/http/server.py index 543abe0..a35fd9d 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -448,7 +448,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): message = '' if self.request_version != 'HTTP/0.9': self.wfile.write(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode('latin1', 'strict')) + (self.protocol_version, code, message)).encode('latin-1', 'strict')) def send_header(self, keyword, value): """Send a MIME header.""" @@ -456,7 +456,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): if not hasattr(self, '_headers_buffer'): self._headers_buffer = [] self._headers_buffer.append( - ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict')) + ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) if keyword.lower() == 'connection': if value.lower() == 'close': diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index d6c23fb..d6627e5 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -434,10 +434,10 @@ class ConnectionWrapper(object): return self._loads(s) def _xml_dumps(obj): - return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf8') + return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf-8') def _xml_loads(s): - (obj,), method = xmlrpclib.loads(s.decode('utf8')) + (obj,), method = xmlrpclib.loads(s.decode('utf-8')) return obj class XmlListener(Listener): diff --git a/Lib/random.py b/Lib/random.py index cb49d56..f29803e 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -114,7 +114,7 @@ class Random(_random.Random): if version == 2: if isinstance(a, (str, bytes, bytearray)): if isinstance(a, str): - a = a.encode("utf8") + a = a.encode("utf-8") a += _sha512(a).digest() a = int.from_bytes(a, 'big') diff --git a/Lib/smtpd.py b/Lib/smtpd.py index 599e79b..32f45ae 100755 --- a/Lib/smtpd.py +++ b/Lib/smtpd.py @@ -275,7 +275,7 @@ class SMTPChannel(asynchat.async_chat): return elif limit: self.num_bytes += len(data) - self.received_lines.append(str(data, "utf8")) + self.received_lines.append(str(data, "utf-8")) # Implementation of base class abstract method def found_terminator(self): diff --git a/Lib/sqlite3/test/types.py b/Lib/sqlite3/test/types.py index 29413e1..d214f3d 100644 --- a/Lib/sqlite3/test/types.py +++ b/Lib/sqlite3/test/types.py @@ -85,7 +85,7 @@ class DeclTypesTests(unittest.TestCase): if isinstance(_val, bytes): # sqlite3 always calls __init__ with a bytes created from a # UTF-8 string when __conform__ was used to store the object. - _val = _val.decode('utf8') + _val = _val.decode('utf-8') self.val = _val def __cmp__(self, other): diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 13737ca..ae63c31 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -791,7 +791,7 @@ def parse_template(source, pattern): else: # The tokenizer implicitly decodes bytes objects as latin-1, we must # therefore re-encode the final representation. - encode = lambda x: x.encode('latin1') + encode = lambda x: x.encode('latin-1') for c, s in p: if c is MARK: groupsappend((i, s)) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 0f9d1da..6b663f4 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1084,7 +1084,7 @@ class TarInfo(object): def create_pax_global_header(cls, pax_headers): """Return the object as a pax global header block sequence. """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") def _posix_split_name(self, name): """Split a name longer than 100 chars into a prefix @@ -1167,7 +1167,7 @@ class TarInfo(object): binary = False for keyword, value in pax_headers.items(): try: - value.encode("utf8", "strict") + value.encode("utf-8", "strict") except UnicodeEncodeError: binary = True break @@ -1178,13 +1178,13 @@ class TarInfo(object): records += b"21 hdrcharset=BINARY\n" for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") + keyword = keyword.encode("utf-8") if binary: # Try to restore the original byte representation of `value'. # Needless to say, that the encoding must match the string. value = value.encode(encoding, "surrogateescape") else: - value = value.encode("utf8") + value = value.encode("utf-8") l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' n = p = 0 @@ -1393,7 +1393,7 @@ class TarInfo(object): # the translation to UTF-8 fails. match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf8") + pax_headers["hdrcharset"] = match.group(1).decode("utf-8") # For the time being, we don't care about anything other than "BINARY". # The only other value that is currently allowed by the standard is @@ -1402,7 +1402,7 @@ class TarInfo(object): if hdrcharset == "BINARY": encoding = tarfile.encoding else: - encoding = "utf8" + encoding = "utf-8" # Parse pax header information. A record looks like that: # "%d %s=%s\n" % (length, keyword, value). length is the size @@ -1419,20 +1419,20 @@ class TarInfo(object): length = int(length) value = buf[match.end(2) + 1:match.start(1) + length - 1] - # Normally, we could just use "utf8" as the encoding and "strict" + # Normally, we could just use "utf-8" as the encoding and "strict" # as the error handler, but we better not take the risk. For # example, GNU tar <= 1.23 is known to store filenames it cannot # translate to UTF-8 as raw strings (unfortunately without a # hdrcharset=BINARY header). # We first try the strict standard encoding, and if that fails we # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf8", "utf8", + keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", tarfile.errors) if keyword in PAX_NAME_FIELDS: value = self._decode_pax_field(value, encoding, tarfile.encoding, tarfile.errors) else: - value = self._decode_pax_field(value, "utf8", "utf8", + value = self._decode_pax_field(value, "utf-8", "utf-8", tarfile.errors) pax_headers[keyword] = value diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 03c95fa..8d80336 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -4328,7 +4328,7 @@ class TestEncoding(TestCase): def _test_module_encoding(self, path): path, _ = os.path.splitext(path) path += ".py" - with codecs.open(path, 'r', 'utf8') as f: + with codecs.open(path, 'r', 'utf-8') as f: f.read() def test_argparse_module_encoding(self): diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index ac6b109..f9a0a3d 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -707,7 +707,7 @@ class StrTest(unittest.TestCase, BaseStrTest): class BytesTest(unittest.TestCase, BaseStrTest): def from_latin1(self, s): - return s.encode("latin1") + return s.encode("latin-1") @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) def test_decode(self, size): @@ -718,7 +718,7 @@ class BytesTest(unittest.TestCase, BaseStrTest): class BytearrayTest(unittest.TestCase, BaseStrTest): def from_latin1(self, s): - return bytearray(s.encode("latin1")) + return bytearray(s.encode("latin-1")) @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) def test_decode(self, size): diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index e5c7ccd..84867bb 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase): def test_encoding(self): sample = "Hello world\n\u1234\u5678\u9abc" - for enc in ("utf8", "utf16"): + for enc in ("utf-8", "utf-16"): b = self.type2test(sample, enc) self.assertEqual(b, self.type2test(sample.encode(enc))) - self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1") - b = self.type2test(sample, "latin1", "ignore") + self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1") + b = self.type2test(sample, "latin-1", "ignore") self.assertEqual(b, self.type2test(sample[:-3], "utf-8")) def test_decode(self): sample = "Hello world\n\u1234\u5678\u9abc\def0\def0" - for enc in ("utf8", "utf16"): + for enc in ("utf-8", "utf-16"): b = self.type2test(sample, enc) self.assertEqual(b.decode(enc), sample) sample = "Hello world\n\x80\x81\xfe\xff" - b = self.type2test(sample, "latin1") - self.assertRaises(UnicodeDecodeError, b.decode, "utf8") - self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n") - self.assertEqual(b.decode(errors="ignore", encoding="utf8"), + b = self.type2test(sample, "latin-1") + self.assertRaises(UnicodeDecodeError, b.decode, "utf-8") + self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n") + self.assertEqual(b.decode(errors="ignore", encoding="utf-8"), "Hello world\n") + # Default encoding is utf-8 + self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') def test_from_int(self): b = self.type2test(0) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c4e3adf..a0a85ae 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase): @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X') def test_osx_utf8(self): def check_output(text): - decoded = text.decode('utf8', 'surrogateescape') + decoded = text.decode('utf-8', 'surrogateescape') expected = ascii(decoded).encode('ascii') + b'\n' env = os.environ.copy() @@ -223,7 +223,7 @@ class CmdLineTest(unittest.TestCase): self.assertIn(path2.encode('ascii'), out) def test_displayhook_unencodable(self): - for encoding in ('ascii', 'latin1', 'utf8'): + for encoding in ('ascii', 'latin-1', 'utf-8'): env = os.environ.copy() env['PYTHONIOENCODING'] = encoding p = subprocess.Popen( diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d560d7a..67a5aed 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1250,7 +1250,7 @@ class EncodedFileTest(unittest.TestCase): self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae') f = io.BytesIO() - ef = codecs.EncodedFile(f, 'utf-8', 'latin1') + ef = codecs.EncodedFile(f, 'utf-8', 'latin-1') ef.write(b'\xc3\xbc') self.assertEqual(f.getvalue(), b'\xfc') @@ -1611,7 +1611,7 @@ class SurrogateEscapeTest(unittest.TestCase): def test_latin1(self): # Issue6373 - self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"), + self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"), b"\xe4\xeb\xef\xf6\xfc") diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 3a2589e..624a68d 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -46,7 +46,7 @@ except ImportError: def _default_chunk_size(): """Get the default TextIOWrapper chunk size""" - with open(__file__, "r", encoding="latin1") as f: + with open(__file__, "r", encoding="latin-1") as f: return f._CHUNK_SIZE @@ -1684,11 +1684,11 @@ class TextIOWrapperTest(unittest.TestCase): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) t = self.TextIOWrapper(b) - t.__init__(b, encoding="latin1", newline="\r\n") - self.assertEqual(t.encoding, "latin1") + t.__init__(b, encoding="latin-1", newline="\r\n") + self.assertEqual(t.encoding, "latin-1") self.assertEqual(t.line_buffering, False) - t.__init__(b, encoding="utf8", line_buffering=True) - self.assertEqual(t.encoding, "utf8") + t.__init__(b, encoding="utf-8", line_buffering=True) + self.assertEqual(t.encoding, "utf-8") self.assertEqual(t.line_buffering, True) self.assertEqual("\xe9\n", t.readline()) self.assertRaises(TypeError, t.__init__, b, newline=42) @@ -1738,8 +1738,8 @@ class TextIOWrapperTest(unittest.TestCase): def test_encoding(self): # Check the encoding attribute is always set, and valid b = self.BytesIO() - t = self.TextIOWrapper(b, encoding="utf8") - self.assertEqual(t.encoding, "utf8") + t = self.TextIOWrapper(b, encoding="utf-8") + self.assertEqual(t.encoding, "utf-8") t = self.TextIOWrapper(b) self.assertTrue(t.encoding is not None) codecs.lookup(t.encoding) @@ -1918,7 +1918,7 @@ class TextIOWrapperTest(unittest.TestCase): def test_basic_io(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): - for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le": + for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le": f = self.open(support.TESTFN, "w+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEqual(f.write("abc"), 3) @@ -1968,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEqual(rlines, wlines) def test_telling(self): - f = self.open(support.TESTFN, "w+", encoding="utf8") + f = self.open(support.TESTFN, "w+", encoding="utf-8") p0 = f.tell() f.write("\xff\n") p1 = f.tell() diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 1e4f887..03f814a 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -95,14 +95,14 @@ class TestMailbox(TestBase): """) def test_add_invalid_8bit_bytes_header(self): - key = self._box.add(self._nonascii_msg.encode('latin1')) + key = self._box.add(self._nonascii_msg.encode('latin-1')) self.assertEqual(len(self._box), 1) self.assertEqual(self._box.get_bytes(key), - self._nonascii_msg.encode('latin1')) + self._nonascii_msg.encode('latin-1')) def test_invalid_nonascii_header_as_string(self): subj = self._nonascii_msg.splitlines()[1] - key = self._box.add(subj.encode('latin1')) + key = self._box.add(subj.encode('latin-1')) self.assertEqual(self._box.get_string(key), 'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz' 'YWwuIE3hciByZW5kZWx06Ww/?=\n\n') diff --git a/Lib/test/test_nntplib.py b/Lib/test/test_nntplib.py index 4577ddb..e463e52 100644 --- a/Lib/test/test_nntplib.py +++ b/Lib/test/test_nntplib.py @@ -813,7 +813,7 @@ class NNTPv1v2TestsMixin: def _check_article_body(self, lines): self.assertEqual(len(lines), 4) - self.assertEqual(lines[-1].decode('utf8'), "-- Signed by André.") + self.assertEqual(lines[-1].decode('utf-8'), "-- Signed by André.") self.assertEqual(lines[-2], b"") self.assertEqual(lines[-3], b".Here is a dot-starting line.") self.assertEqual(lines[-4], b"This is just a test article.") diff --git a/Lib/test/test_pep3120.py b/Lib/test/test_pep3120.py index 09fedf0..496f8da 100644 --- a/Lib/test/test_pep3120.py +++ b/Lib/test/test_pep3120.py @@ -19,8 +19,8 @@ class PEP3120Test(unittest.TestCase): try: import test.badsyntax_pep3120 except SyntaxError as msg: - msg = str(msg) - self.assertTrue('UTF-8' in msg or 'utf8' in msg) + msg = str(msg).lower() + self.assertTrue('utf-8' in msg or 'utf8' in msg) else: self.fail("expected exception didn't occur") diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 0f6a1ca..bddb375 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -20,8 +20,8 @@ import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") try: - TEST_XMLFILE.encode("utf8") - TEST_XMLFILE_OUT.encode("utf8") + TEST_XMLFILE.encode("utf-8") + TEST_XMLFILE_OUT.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index c9c0067..13c1265 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -129,8 +129,8 @@ class TestCase(unittest.TestCase): shelve.Shelf(d)[key] = [1] self.assertIn(key.encode('utf-8'), d) # but a different one can be given - shelve.Shelf(d, keyencoding='latin1')[key] = [1] - self.assertIn(key.encode('latin1'), d) + shelve.Shelf(d, keyencoding='latin-1')[key] = [1] + self.assertIn(key.encode('latin-1'), d) # with all consequences s = shelve.Shelf(d, keyencoding='ascii') self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1]) diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 23d22a8..9d5d8ca 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -44,7 +44,7 @@ def linux_version(): return 0, 0, 0 HOST = support.HOST -MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf8') ## test unicode string and carriage return +MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6) try: @@ -1065,7 +1065,7 @@ class FileObjectClassTestCase(SocketConnectedTest): """ bufsize = -1 # Use default buffer size - encoding = 'utf8' + encoding = 'utf-8' errors = 'strict' newline = None @@ -1269,7 +1269,7 @@ class FileObjectInterruptedTestCase(unittest.TestCase): data = b'' else: data = '' - expecting = expecting.decode('utf8') + expecting = expecting.decode('utf-8') while len(data) != len(expecting): part = fo.read(size) if not part: @@ -1427,7 +1427,7 @@ class UnicodeReadFileObjectClassTestCase(FileObjectClassTestCase): """Tests for socket.makefile() in text mode (rather than binary)""" read_mode = 'r' - read_msg = MSG.decode('utf8') + read_msg = MSG.decode('utf-8') write_mode = 'wb' write_msg = MSG newline = '' @@ -1439,7 +1439,7 @@ class UnicodeWriteFileObjectClassTestCase(FileObjectClassTestCase): read_mode = 'rb' read_msg = MSG write_mode = 'w' - write_msg = MSG.decode('utf8') + write_msg = MSG.decode('utf-8') newline = '' @@ -1447,9 +1447,9 @@ class UnicodeReadWriteFileObjectClassTestCase(FileObjectClassTestCase): """Tests for socket.makefile() in text mode (rather than binary)""" read_mode = 'r' - read_msg = MSG.decode('utf8') + read_msg = MSG.decode('utf-8') write_mode = 'w' - write_msg = MSG.decode('utf8') + write_msg = MSG.decode('utf-8') newline = '' diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index 23d96f8..2bcf4d1 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -130,7 +130,7 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) def test_file_utf8(self): - self.check_encoding("utf8") + self.check_encoding("utf-8") def test_file_iso_8859_1(self): self.check_encoding("iso-8859-1") diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 4b58308..9e267eb 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1191,7 +1191,7 @@ class POSIXProcessTestCase(BaseTestCase): stdout, stderr = p.communicate() self.assertEqual(0, p.returncode, "sigchild_ignore.py exited" " non-zero with this error:\n%s" % - stderr.decode('utf8')) + stderr.decode('utf-8')) @unittest.skipUnless(mswindows, "Windows specific tests") diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 68e094d..a645bf2 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase): self._test_unicode_filename("utf7") def test_utf8_filename(self): - self._test_unicode_filename("utf8") + self._test_unicode_filename("utf-8") def _test_unicode_filename(self, encoding): tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict") @@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest): def test_bad_pax_header(self): # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields # without a hdrcharset=BINARY header. - for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"), + for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"), ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),): with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar: try: @@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest): def test_binary_header(self): # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field. - for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"), + for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"), ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),): with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar: try: diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9ad9eed..d97894c 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1182,11 +1182,14 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('hello'.encode('ascii'), b'hello') self.assertEqual('hello'.encode('utf-7'), b'hello') self.assertEqual('hello'.encode('utf-8'), b'hello') - self.assertEqual('hello'.encode('utf8'), b'hello') + self.assertEqual('hello'.encode('utf-8'), b'hello') self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000') self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o') self.assertEqual('hello'.encode('latin-1'), b'hello') + # Default encoding is utf-8 + self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83') + # Roundtrip safety for BMP (just the first 1024 chars) for c in range(1024): u = chr(c) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3003331..e148c62 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -251,7 +251,7 @@ class urlretrieve_FileTests(unittest.TestCase): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) try: - filePath.encode("utf8") + filePath.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filePath is not encodable to utf8") return "file://%s" % urllib.request.pathname2url(filePath) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 1433670..4ddbe3f 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -599,7 +599,7 @@ class OpenerDirectorTests(unittest.TestCase): def sanepathname2url(path): try: - path.encode("utf8") + path.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("path is not encodable to utf8") urlpath = urllib.request.pathname2url(path) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 43fa656..7bc59ed 100644 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -471,14 +471,14 @@ class TestUUID(TestCase): if pid == 0: os.close(fds[0]) value = uuid.uuid4() - os.write(fds[1], value.hex.encode('latin1')) + os.write(fds[1], value.hex.encode('latin-1')) os._exit(0) else: os.close(fds[1]) parent_value = uuid.uuid4().hex os.waitpid(pid, 0) - child_value = os.read(fds[0], 100).decode('latin1') + child_value = os.read(fds[0], 100).decode('latin-1') self.assertNotEqual(parent_value, child_value) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 22fafa9..40c2291 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -22,7 +22,7 @@ from xml.etree import ElementTree as ET SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") try: - SIMPLE_XMLFILE.encode("utf8") + SIMPLE_XMLFILE.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") @@ -1255,8 +1255,8 @@ def processinginstruction(): >>> ET.tostring(ET.PI('test', '')) b'?>' - >>> ET.tostring(ET.PI('test', '\xe3'), 'latin1') - b"\\n\\xe3?>" + >>> ET.tostring(ET.PI('test', '\xe3'), 'latin-1') + b"\\n\\xe3?>" """ # diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index dfdbdec..d583a82 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1846,7 +1846,7 @@ class URLopener: if encoding == 'base64': import base64 # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin1') + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') else: data = unquote(data) msg.append('Content-Length: %d' % len(data)) -- cgit v0.12