diff options
author | Marc-André Lemburg <mal@egenix.com> | 2011-02-25 15:42:01 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2011-02-25 15:42:01 (GMT) |
commit | 8f36af7a4c9409a673412e4bdfbad76d700abc3a (patch) | |
tree | 1b61599a07604a96539e98098b055c577cd7e6a8 /Lib | |
parent | a391b11320f729f6eec6c772c00b3e62c2746eaf (diff) | |
download | cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.zip cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.gz cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.bz2 |
Normalize the encoding names for Latin-1 and UTF-8 to
'latin-1' and 'utf-8'.
These are optimized in the Python Unicode implementation
to result in more direct processing, bypassing the codec
registry.
Also see issue11303.
Diffstat (limited to 'Lib')
32 files changed, 84 insertions, 79 deletions
diff --git a/Lib/asynchat.py b/Lib/asynchat.py index 6558512..2199d1b 100644 --- a/Lib/asynchat.py +++ b/Lib/asynchat.py @@ -75,7 +75,7 @@ class async_chat (asyncore.dispatcher): # sign of an application bug that we don't want to pass silently use_encoding = 0 - encoding = 'latin1' + encoding = 'latin-1' def __init__ (self, sock=None, map=None): # for string terminator matching diff --git a/Lib/distutils/command/bdist_wininst.py b/Lib/distutils/command/bdist_wininst.py index b2e2fc6..b886055 100644 --- a/Lib/distutils/command/bdist_wininst.py +++ b/Lib/distutils/command/bdist_wininst.py @@ -263,11 +263,11 @@ class bdist_wininst(Command): cfgdata = cfgdata + b"\0" if self.pre_install_script: # We need to normalize newlines, so we open in text mode and - # convert back to bytes. "latin1" simply avoids any possible + # convert back to bytes. "latin-1" simply avoids any possible # failures. with open(self.pre_install_script, "r", - encoding="latin1") as script: - script_data = script.read().encode("latin1") + encoding="latin-1") as script: + script_data = script.read().encode("latin-1") cfgdata = cfgdata + script_data + b"\n\0" else: # empty pre-install script diff --git a/Lib/ftplib.py b/Lib/ftplib.py index fd5a863..8dce9ac 100644 --- a/Lib/ftplib.py +++ b/Lib/ftplib.py @@ -100,7 +100,7 @@ class FTP: file = None welcome = None passiveserver = 1 - encoding = "latin1" + encoding = "latin-1" # Initialization method (called by class instantiation). # Initialize host to localhost, port to standard ftp port diff --git a/Lib/http/client.py b/Lib/http/client.py index 36b7349..8ad7cb6 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -697,7 +697,7 @@ class HTTPConnection: self.send(connect_bytes) for header, value in self._tunnel_headers.items(): header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("latin1") + header_bytes = header_str.encode("latin-1") self.send(header_bytes) self.send(b'\r\n') @@ -937,7 +937,7 @@ class HTTPConnection: values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('latin1') + values[i] = one_value.encode('latin-1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = b'\r\n\t'.join(values) diff --git a/Lib/http/server.py b/Lib/http/server.py index 543abe0..a35fd9d 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -448,7 +448,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): message = '' if self.request_version != 'HTTP/0.9': self.wfile.write(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode('latin1', 'strict')) + (self.protocol_version, code, message)).encode('latin-1', 'strict')) def send_header(self, keyword, value): """Send a MIME header.""" @@ -456,7 +456,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): if not hasattr(self, '_headers_buffer'): self._headers_buffer = [] self._headers_buffer.append( - ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict')) + ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) if keyword.lower() == 'connection': if value.lower() == 'close': diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index d6c23fb..d6627e5 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -434,10 +434,10 @@ class ConnectionWrapper(object): return self._loads(s) def _xml_dumps(obj): - return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf8') + return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf-8') def _xml_loads(s): - (obj,), method = xmlrpclib.loads(s.decode('utf8')) + (obj,), method = xmlrpclib.loads(s.decode('utf-8')) return obj class XmlListener(Listener): diff --git a/Lib/random.py b/Lib/random.py index cb49d56..f29803e 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -114,7 +114,7 @@ class Random(_random.Random): if version == 2: if isinstance(a, (str, bytes, bytearray)): if isinstance(a, str): - a = a.encode("utf8") + a = a.encode("utf-8") a += _sha512(a).digest() a = int.from_bytes(a, 'big') diff --git a/Lib/smtpd.py b/Lib/smtpd.py index 599e79b..32f45ae 100755 --- a/Lib/smtpd.py +++ b/Lib/smtpd.py @@ -275,7 +275,7 @@ class SMTPChannel(asynchat.async_chat): return elif limit: self.num_bytes += len(data) - self.received_lines.append(str(data, "utf8")) + self.received_lines.append(str(data, "utf-8")) # Implementation of base class abstract method def found_terminator(self): diff --git a/Lib/sqlite3/test/types.py b/Lib/sqlite3/test/types.py index 29413e1..d214f3d 100644 --- a/Lib/sqlite3/test/types.py +++ b/Lib/sqlite3/test/types.py @@ -85,7 +85,7 @@ class DeclTypesTests(unittest.TestCase): if isinstance(_val, bytes): # sqlite3 always calls __init__ with a bytes created from a # UTF-8 string when __conform__ was used to store the object. - _val = _val.decode('utf8') + _val = _val.decode('utf-8') self.val = _val def __cmp__(self, other): diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 13737ca..ae63c31 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -791,7 +791,7 @@ def parse_template(source, pattern): else: # The tokenizer implicitly decodes bytes objects as latin-1, we must # therefore re-encode the final representation. - encode = lambda x: x.encode('latin1') + encode = lambda x: x.encode('latin-1') for c, s in p: if c is MARK: groupsappend((i, s)) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 0f9d1da..6b663f4 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1084,7 +1084,7 @@ class TarInfo(object): def create_pax_global_header(cls, pax_headers): """Return the object as a pax global header block sequence. """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") + return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") def _posix_split_name(self, name): """Split a name longer than 100 chars into a prefix @@ -1167,7 +1167,7 @@ class TarInfo(object): binary = False for keyword, value in pax_headers.items(): try: - value.encode("utf8", "strict") + value.encode("utf-8", "strict") except UnicodeEncodeError: binary = True break @@ -1178,13 +1178,13 @@ class TarInfo(object): records += b"21 hdrcharset=BINARY\n" for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") + keyword = keyword.encode("utf-8") if binary: # Try to restore the original byte representation of `value'. # Needless to say, that the encoding must match the string. value = value.encode(encoding, "surrogateescape") else: - value = value.encode("utf8") + value = value.encode("utf-8") l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' n = p = 0 @@ -1393,7 +1393,7 @@ class TarInfo(object): # the translation to UTF-8 fails. match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf8") + pax_headers["hdrcharset"] = match.group(1).decode("utf-8") # For the time being, we don't care about anything other than "BINARY". # The only other value that is currently allowed by the standard is @@ -1402,7 +1402,7 @@ class TarInfo(object): if hdrcharset == "BINARY": encoding = tarfile.encoding else: - encoding = "utf8" + encoding = "utf-8" # Parse pax header information. A record looks like that: # "%d %s=%s\n" % (length, keyword, value). length is the size @@ -1419,20 +1419,20 @@ class TarInfo(object): length = int(length) value = buf[match.end(2) + 1:match.start(1) + length - 1] - # Normally, we could just use "utf8" as the encoding and "strict" + # Normally, we could just use "utf-8" as the encoding and "strict" # as the error handler, but we better not take the risk. For # example, GNU tar <= 1.23 is known to store filenames it cannot # translate to UTF-8 as raw strings (unfortunately without a # hdrcharset=BINARY header). # We first try the strict standard encoding, and if that fails we # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf8", "utf8", + keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", tarfile.errors) if keyword in PAX_NAME_FIELDS: value = self._decode_pax_field(value, encoding, tarfile.encoding, tarfile.errors) else: - value = self._decode_pax_field(value, "utf8", "utf8", + value = self._decode_pax_field(value, "utf-8", "utf-8", tarfile.errors) pax_headers[keyword] = value diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 03c95fa..8d80336 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -4328,7 +4328,7 @@ class TestEncoding(TestCase): def _test_module_encoding(self, path): path, _ = os.path.splitext(path) path += ".py" - with codecs.open(path, 'r', 'utf8') as f: + with codecs.open(path, 'r', 'utf-8') as f: f.read() def test_argparse_module_encoding(self): diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index ac6b109..f9a0a3d 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -707,7 +707,7 @@ class StrTest(unittest.TestCase, BaseStrTest): class BytesTest(unittest.TestCase, BaseStrTest): def from_latin1(self, s): - return s.encode("latin1") + return s.encode("latin-1") @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) def test_decode(self, size): @@ -718,7 +718,7 @@ class BytesTest(unittest.TestCase, BaseStrTest): class BytearrayTest(unittest.TestCase, BaseStrTest): def from_latin1(self, s): - return bytearray(s.encode("latin1")) + return bytearray(s.encode("latin-1")) @bigmemtest(minsize=_2G + 2, memuse=1 + character_size) def test_decode(self, size): diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index e5c7ccd..84867bb 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase): def test_encoding(self): sample = "Hello world\n\u1234\u5678\u9abc" - for enc in ("utf8", "utf16"): + for enc in ("utf-8", "utf-16"): b = self.type2test(sample, enc) self.assertEqual(b, self.type2test(sample.encode(enc))) - self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1") - b = self.type2test(sample, "latin1", "ignore") + self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1") + b = self.type2test(sample, "latin-1", "ignore") self.assertEqual(b, self.type2test(sample[:-3], "utf-8")) def test_decode(self): sample = "Hello world\n\u1234\u5678\u9abc\def0\def0" - for enc in ("utf8", "utf16"): + for enc in ("utf-8", "utf-16"): b = self.type2test(sample, enc) self.assertEqual(b.decode(enc), sample) sample = "Hello world\n\x80\x81\xfe\xff" - b = self.type2test(sample, "latin1") - self.assertRaises(UnicodeDecodeError, b.decode, "utf8") - self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n") - self.assertEqual(b.decode(errors="ignore", encoding="utf8"), + b = self.type2test(sample, "latin-1") + self.assertRaises(UnicodeDecodeError, b.decode, "utf-8") + self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n") + self.assertEqual(b.decode(errors="ignore", encoding="utf-8"), "Hello world\n") + # Default encoding is utf-8 + self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') def test_from_int(self): b = self.type2test(0) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index c4e3adf..a0a85ae 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase): @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X') def test_osx_utf8(self): def check_output(text): - decoded = text.decode('utf8', 'surrogateescape') + decoded = text.decode('utf-8', 'surrogateescape') expected = ascii(decoded).encode('ascii') + b'\n' env = os.environ.copy() @@ -223,7 +223,7 @@ class CmdLineTest(unittest.TestCase): self.assertIn(path2.encode('ascii'), out) def test_displayhook_unencodable(self): - for encoding in ('ascii', 'latin1', 'utf8'): + for encoding in ('ascii', 'latin-1', 'utf-8'): env = os.environ.copy() env['PYTHONIOENCODING'] = encoding p = subprocess.Popen( diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d560d7a..67a5aed 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1250,7 +1250,7 @@ class EncodedFileTest(unittest.TestCase): self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae') f = io.BytesIO() - ef = codecs.EncodedFile(f, 'utf-8', 'latin1') + ef = codecs.EncodedFile(f, 'utf-8', 'latin-1') ef.write(b'\xc3\xbc') self.assertEqual(f.getvalue(), b'\xfc') @@ -1611,7 +1611,7 @@ class SurrogateEscapeTest(unittest.TestCase): def test_latin1(self): # Issue6373 - self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"), + self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"), b"\xe4\xeb\xef\xf6\xfc") diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 3a2589e..624a68d 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -46,7 +46,7 @@ except ImportError: def _default_chunk_size(): """Get the default TextIOWrapper chunk size""" - with open(__file__, "r", encoding="latin1") as f: + with open(__file__, "r", encoding="latin-1") as f: return f._CHUNK_SIZE @@ -1684,11 +1684,11 @@ class TextIOWrapperTest(unittest.TestCase): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) t = self.TextIOWrapper(b) - t.__init__(b, encoding="latin1", newline="\r\n") - self.assertEqual(t.encoding, "latin1") + t.__init__(b, encoding="latin-1", newline="\r\n") + self.assertEqual(t.encoding, "latin-1") self.assertEqual(t.line_buffering, False) - t.__init__(b, encoding="utf8", line_buffering=True) - self.assertEqual(t.encoding, "utf8") + t.__init__(b, encoding="utf-8", line_buffering=True) + self.assertEqual(t.encoding, "utf-8") self.assertEqual(t.line_buffering, True) self.assertEqual("\xe9\n", t.readline()) self.assertRaises(TypeError, t.__init__, b, newline=42) @@ -1738,8 +1738,8 @@ class TextIOWrapperTest(unittest.TestCase): def test_encoding(self): # Check the encoding attribute is always set, and valid b = self.BytesIO() - t = self.TextIOWrapper(b, encoding="utf8") - self.assertEqual(t.encoding, "utf8") + t = self.TextIOWrapper(b, encoding="utf-8") + self.assertEqual(t.encoding, "utf-8") t = self.TextIOWrapper(b) self.assertTrue(t.encoding is not None) codecs.lookup(t.encoding) @@ -1918,7 +1918,7 @@ class TextIOWrapperTest(unittest.TestCase): def test_basic_io(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): - for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le": + for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le": f = self.open(support.TESTFN, "w+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEqual(f.write("abc"), 3) @@ -1968,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCase): self.assertEqual(rlines, wlines) def test_telling(self): - f = self.open(support.TESTFN, "w+", encoding="utf8") + f = self.open(support.TESTFN, "w+", encoding="utf-8") p0 = f.tell() f.write("\xff\n") p1 = f.tell() diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 1e4f887..03f814a 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -95,14 +95,14 @@ class TestMailbox(TestBase): """) def test_add_invalid_8bit_bytes_header(self): - key = self._box.add(self._nonascii_msg.encode('latin1')) + key = self._box.add(self._nonascii_msg.encode('latin-1')) self.assertEqual(len(self._box), 1) self.assertEqual(self._box.get_bytes(key), - self._nonascii_msg.encode('latin1')) + self._nonascii_msg.encode('latin-1')) def test_invalid_nonascii_header_as_string(self): subj = self._nonascii_msg.splitlines()[1] - key = self._box.add(subj.encode('latin1')) + key = self._box.add(subj.encode('latin-1')) self.assertEqual(self._box.get_string(key), 'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz' 'YWwuIE3hciByZW5kZWx06Ww/?=\n\n') diff --git a/Lib/test/test_nntplib.py b/Lib/test/test_nntplib.py index 4577ddb..e463e52 100644 --- a/Lib/test/test_nntplib.py +++ b/Lib/test/test_nntplib.py @@ -813,7 +813,7 @@ class NNTPv1v2TestsMixin: def _check_article_body(self, lines): self.assertEqual(len(lines), 4) - self.assertEqual(lines[-1].decode('utf8'), "-- Signed by André.") + self.assertEqual(lines[-1].decode('utf-8'), "-- Signed by André.") self.assertEqual(lines[-2], b"") self.assertEqual(lines[-3], b".Here is a dot-starting line.") self.assertEqual(lines[-4], b"This is just a test article.") diff --git a/Lib/test/test_pep3120.py b/Lib/test/test_pep3120.py index 09fedf0..496f8da 100644 --- a/Lib/test/test_pep3120.py +++ b/Lib/test/test_pep3120.py @@ -19,8 +19,8 @@ class PEP3120Test(unittest.TestCase): try: import test.badsyntax_pep3120 except SyntaxError as msg: - msg = str(msg) - self.assertTrue('UTF-8' in msg or 'utf8' in msg) + msg = str(msg).lower() + self.assertTrue('utf-8' in msg or 'utf8' in msg) else: self.fail("expected exception didn't occur") diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 0f6a1ca..bddb375 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -20,8 +20,8 @@ import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") try: - TEST_XMLFILE.encode("utf8") - TEST_XMLFILE_OUT.encode("utf8") + TEST_XMLFILE.encode("utf-8") + TEST_XMLFILE_OUT.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index c9c0067..13c1265 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -129,8 +129,8 @@ class TestCase(unittest.TestCase): shelve.Shelf(d)[key] = [1] self.assertIn(key.encode('utf-8'), d) # but a different one can be given - shelve.Shelf(d, keyencoding='latin1')[key] = [1] - self.assertIn(key.encode('latin1'), d) + shelve.Shelf(d, keyencoding='latin-1')[key] = [1] + self.assertIn(key.encode('latin-1'), d) # with all consequences s = shelve.Shelf(d, keyencoding='ascii') self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1]) diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 23d22a8..9d5d8ca 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -44,7 +44,7 @@ def linux_version(): return 0, 0, 0 HOST = support.HOST -MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf8') ## test unicode string and carriage return +MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6) try: @@ -1065,7 +1065,7 @@ class FileObjectClassTestCase(SocketConnectedTest): """ bufsize = -1 # Use default buffer size - encoding = 'utf8' + encoding = 'utf-8' errors = 'strict' newline = None @@ -1269,7 +1269,7 @@ class FileObjectInterruptedTestCase(unittest.TestCase): data = b'' else: data = '' - expecting = expecting.decode('utf8') + expecting = expecting.decode('utf-8') while len(data) != len(expecting): part = fo.read(size) if not part: @@ -1427,7 +1427,7 @@ class UnicodeReadFileObjectClassTestCase(FileObjectClassTestCase): """Tests for socket.makefile() in text mode (rather than binary)""" read_mode = 'r' - read_msg = MSG.decode('utf8') + read_msg = MSG.decode('utf-8') write_mode = 'wb' write_msg = MSG newline = '' @@ -1439,7 +1439,7 @@ class UnicodeWriteFileObjectClassTestCase(FileObjectClassTestCase): read_mode = 'rb' read_msg = MSG write_mode = 'w' - write_msg = MSG.decode('utf8') + write_msg = MSG.decode('utf-8') newline = '' @@ -1447,9 +1447,9 @@ class UnicodeReadWriteFileObjectClassTestCase(FileObjectClassTestCase): """Tests for socket.makefile() in text mode (rather than binary)""" read_mode = 'r' - read_msg = MSG.decode('utf8') + read_msg = MSG.decode('utf-8') write_mode = 'w' - write_msg = MSG.decode('utf8') + write_msg = MSG.decode('utf-8') newline = '' diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py index 23d96f8..2bcf4d1 100644 --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -130,7 +130,7 @@ class TestLiterals(unittest.TestCase): self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) def test_file_utf8(self): - self.check_encoding("utf8") + self.check_encoding("utf-8") def test_file_iso_8859_1(self): self.check_encoding("iso-8859-1") diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 4b58308..9e267eb 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1191,7 +1191,7 @@ class POSIXProcessTestCase(BaseTestCase): stdout, stderr = p.communicate() self.assertEqual(0, p.returncode, "sigchild_ignore.py exited" " non-zero with this error:\n%s" % - stderr.decode('utf8')) + stderr.decode('utf-8')) @unittest.skipUnless(mswindows, "Windows specific tests") diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 68e094d..a645bf2 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase): self._test_unicode_filename("utf7") def test_utf8_filename(self): - self._test_unicode_filename("utf8") + self._test_unicode_filename("utf-8") def _test_unicode_filename(self, encoding): tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict") @@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest): def test_bad_pax_header(self): # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields # without a hdrcharset=BINARY header. - for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"), + for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"), ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),): with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar: try: @@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest): def test_binary_header(self): # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field. - for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"), + for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"), ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),): with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar: try: diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9ad9eed..d97894c 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1182,11 +1182,14 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual('hello'.encode('ascii'), b'hello') self.assertEqual('hello'.encode('utf-7'), b'hello') self.assertEqual('hello'.encode('utf-8'), b'hello') - self.assertEqual('hello'.encode('utf8'), b'hello') + self.assertEqual('hello'.encode('utf-8'), b'hello') self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000') self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o') self.assertEqual('hello'.encode('latin-1'), b'hello') + # Default encoding is utf-8 + self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83') + # Roundtrip safety for BMP (just the first 1024 chars) for c in range(1024): u = chr(c) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3003331..e148c62 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -251,7 +251,7 @@ class urlretrieve_FileTests(unittest.TestCase): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) try: - filePath.encode("utf8") + filePath.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filePath is not encodable to utf8") return "file://%s" % urllib.request.pathname2url(filePath) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 1433670..4ddbe3f 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -599,7 +599,7 @@ class OpenerDirectorTests(unittest.TestCase): def sanepathname2url(path): try: - path.encode("utf8") + path.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("path is not encodable to utf8") urlpath = urllib.request.pathname2url(path) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 43fa656..7bc59ed 100644 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -471,14 +471,14 @@ class TestUUID(TestCase): if pid == 0: os.close(fds[0]) value = uuid.uuid4() - os.write(fds[1], value.hex.encode('latin1')) + os.write(fds[1], value.hex.encode('latin-1')) os._exit(0) else: os.close(fds[1]) parent_value = uuid.uuid4().hex os.waitpid(pid, 0) - child_value = os.read(fds[0], 100).decode('latin1') + child_value = os.read(fds[0], 100).decode('latin-1') self.assertNotEqual(parent_value, child_value) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 22fafa9..40c2291 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -22,7 +22,7 @@ from xml.etree import ElementTree as ET SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") try: - SIMPLE_XMLFILE.encode("utf8") + SIMPLE_XMLFILE.encode("utf-8") except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") @@ -1255,8 +1255,8 @@ def processinginstruction(): >>> ET.tostring(ET.PI('test', '<testing&>')) b'<?test <testing&>?>' - >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1') - b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>" + >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1') + b"<?xml version='1.0' encoding='latin-1'?>\\n<?test <testing&>\\xe3?>" """ # diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index dfdbdec..d583a82 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1846,7 +1846,7 @@ class URLopener: if encoding == 'base64': import base64 # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin1') + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') else: data = unquote(data) msg.append('Content-Length: %d' % len(data)) |