summaryrefslogtreecommitdiffstats
path: root/Lib/test
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2011-02-25 15:42:01 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2011-02-25 15:42:01 (GMT)
commit8f36af7a4c9409a673412e4bdfbad76d700abc3a (patch)
tree1b61599a07604a96539e98098b055c577cd7e6a8 /Lib/test
parenta391b11320f729f6eec6c772c00b3e62c2746eaf (diff)
downloadcpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.zip
cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.gz
cpython-8f36af7a4c9409a673412e4bdfbad76d700abc3a.tar.bz2
Normalize the encoding names for Latin-1 and UTF-8 to
'latin-1' and 'utf-8'. These are optimized in the Python Unicode implementation to result in more direct processing, bypassing the codec registry. Also see issue11303.
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_argparse.py2
-rw-r--r--Lib/test/test_bigmem.py4
-rw-r--r--Lib/test/test_bytes.py18
-rw-r--r--Lib/test/test_cmd_line.py4
-rw-r--r--Lib/test/test_codecs.py4
-rw-r--r--Lib/test/test_io.py18
-rw-r--r--Lib/test/test_mailbox.py6
-rw-r--r--Lib/test/test_nntplib.py2
-rw-r--r--Lib/test/test_pep3120.py4
-rw-r--r--Lib/test/test_sax.py4
-rw-r--r--Lib/test/test_shelve.py4
-rw-r--r--Lib/test/test_socket.py14
-rw-r--r--Lib/test/test_strlit.py2
-rw-r--r--Lib/test/test_subprocess.py2
-rw-r--r--Lib/test/test_tarfile.py6
-rw-r--r--Lib/test/test_unicode.py5
-rw-r--r--Lib/test/test_urllib.py2
-rw-r--r--Lib/test/test_urllib2.py2
-rw-r--r--Lib/test/test_uuid.py4
-rw-r--r--Lib/test/test_xml_etree.py6
20 files changed, 59 insertions, 54 deletions
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 03c95fa..8d80336 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -4328,7 +4328,7 @@ class TestEncoding(TestCase):
def _test_module_encoding(self, path):
path, _ = os.path.splitext(path)
path += ".py"
- with codecs.open(path, 'r', 'utf8') as f:
+ with codecs.open(path, 'r', 'utf-8') as f:
f.read()
def test_argparse_module_encoding(self):
diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py
index ac6b109..f9a0a3d 100644
--- a/Lib/test/test_bigmem.py
+++ b/Lib/test/test_bigmem.py
@@ -707,7 +707,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
class BytesTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return s.encode("latin1")
+ return s.encode("latin-1")
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
def test_decode(self, size):
@@ -718,7 +718,7 @@ class BytesTest(unittest.TestCase, BaseStrTest):
class BytearrayTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return bytearray(s.encode("latin1"))
+ return bytearray(s.encode("latin-1"))
@bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
def test_decode(self, size):
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index e5c7ccd..84867bb 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase):
def test_encoding(self):
sample = "Hello world\n\u1234\u5678\u9abc"
- for enc in ("utf8", "utf16"):
+ for enc in ("utf-8", "utf-16"):
b = self.type2test(sample, enc)
self.assertEqual(b, self.type2test(sample.encode(enc)))
- self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
- b = self.type2test(sample, "latin1", "ignore")
+ self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
+ b = self.type2test(sample, "latin-1", "ignore")
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
def test_decode(self):
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
- for enc in ("utf8", "utf16"):
+ for enc in ("utf-8", "utf-16"):
b = self.type2test(sample, enc)
self.assertEqual(b.decode(enc), sample)
sample = "Hello world\n\x80\x81\xfe\xff"
- b = self.type2test(sample, "latin1")
- self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
- self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
- self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
+ b = self.type2test(sample, "latin-1")
+ self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
+ self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
+ self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
"Hello world\n")
+ # Default encoding is utf-8
+ self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
def test_from_int(self):
b = self.type2test(0)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index c4e3adf..a0a85ae 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase):
@unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
def test_osx_utf8(self):
def check_output(text):
- decoded = text.decode('utf8', 'surrogateescape')
+ decoded = text.decode('utf-8', 'surrogateescape')
expected = ascii(decoded).encode('ascii') + b'\n'
env = os.environ.copy()
@@ -223,7 +223,7 @@ class CmdLineTest(unittest.TestCase):
self.assertIn(path2.encode('ascii'), out)
def test_displayhook_unencodable(self):
- for encoding in ('ascii', 'latin1', 'utf8'):
+ for encoding in ('ascii', 'latin-1', 'utf-8'):
env = os.environ.copy()
env['PYTHONIOENCODING'] = encoding
p = subprocess.Popen(
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d560d7a..67a5aed 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1250,7 +1250,7 @@ class EncodedFileTest(unittest.TestCase):
self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
f = io.BytesIO()
- ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
+ ef = codecs.EncodedFile(f, 'utf-8', 'latin-1')
ef.write(b'\xc3\xbc')
self.assertEqual(f.getvalue(), b'\xfc')
@@ -1611,7 +1611,7 @@ class SurrogateEscapeTest(unittest.TestCase):
def test_latin1(self):
# Issue6373
- self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"),
+ self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
b"\xe4\xeb\xef\xf6\xfc")
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 3a2589e..624a68d 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -46,7 +46,7 @@ except ImportError:
def _default_chunk_size():
"""Get the default TextIOWrapper chunk size"""
- with open(__file__, "r", encoding="latin1") as f:
+ with open(__file__, "r", encoding="latin-1") as f:
return f._CHUNK_SIZE
@@ -1684,11 +1684,11 @@ class TextIOWrapperTest(unittest.TestCase):
r = self.BytesIO(b"\xc3\xa9\n\n")
b = self.BufferedReader(r, 1000)
t = self.TextIOWrapper(b)
- t.__init__(b, encoding="latin1", newline="\r\n")
- self.assertEqual(t.encoding, "latin1")
+ t.__init__(b, encoding="latin-1", newline="\r\n")
+ self.assertEqual(t.encoding, "latin-1")
self.assertEqual(t.line_buffering, False)
- t.__init__(b, encoding="utf8", line_buffering=True)
- self.assertEqual(t.encoding, "utf8")
+ t.__init__(b, encoding="utf-8", line_buffering=True)
+ self.assertEqual(t.encoding, "utf-8")
self.assertEqual(t.line_buffering, True)
self.assertEqual("\xe9\n", t.readline())
self.assertRaises(TypeError, t.__init__, b, newline=42)
@@ -1738,8 +1738,8 @@ class TextIOWrapperTest(unittest.TestCase):
def test_encoding(self):
# Check the encoding attribute is always set, and valid
b = self.BytesIO()
- t = self.TextIOWrapper(b, encoding="utf8")
- self.assertEqual(t.encoding, "utf8")
+ t = self.TextIOWrapper(b, encoding="utf-8")
+ self.assertEqual(t.encoding, "utf-8")
t = self.TextIOWrapper(b)
self.assertTrue(t.encoding is not None)
codecs.lookup(t.encoding)
@@ -1918,7 +1918,7 @@ class TextIOWrapperTest(unittest.TestCase):
def test_basic_io(self):
for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
- for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le":
+ for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
f = self.open(support.TESTFN, "w+", encoding=enc)
f._CHUNK_SIZE = chunksize
self.assertEqual(f.write("abc"), 3)
@@ -1968,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCase):
self.assertEqual(rlines, wlines)
def test_telling(self):
- f = self.open(support.TESTFN, "w+", encoding="utf8")
+ f = self.open(support.TESTFN, "w+", encoding="utf-8")
p0 = f.tell()
f.write("\xff\n")
p1 = f.tell()
diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py
index 1e4f887..03f814a 100644
--- a/Lib/test/test_mailbox.py
+++ b/Lib/test/test_mailbox.py
@@ -95,14 +95,14 @@ class TestMailbox(TestBase):
""")
def test_add_invalid_8bit_bytes_header(self):
- key = self._box.add(self._nonascii_msg.encode('latin1'))
+ key = self._box.add(self._nonascii_msg.encode('latin-1'))
self.assertEqual(len(self._box), 1)
self.assertEqual(self._box.get_bytes(key),
- self._nonascii_msg.encode('latin1'))
+ self._nonascii_msg.encode('latin-1'))
def test_invalid_nonascii_header_as_string(self):
subj = self._nonascii_msg.splitlines()[1]
- key = self._box.add(subj.encode('latin1'))
+ key = self._box.add(subj.encode('latin-1'))
self.assertEqual(self._box.get_string(key),
'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz'
'YWwuIE3hciByZW5kZWx06Ww/?=\n\n')
diff --git a/Lib/test/test_nntplib.py b/Lib/test/test_nntplib.py
index 4577ddb..e463e52 100644
--- a/Lib/test/test_nntplib.py
+++ b/Lib/test/test_nntplib.py
@@ -813,7 +813,7 @@ class NNTPv1v2TestsMixin:
def _check_article_body(self, lines):
self.assertEqual(len(lines), 4)
- self.assertEqual(lines[-1].decode('utf8'), "-- Signed by André.")
+ self.assertEqual(lines[-1].decode('utf-8'), "-- Signed by André.")
self.assertEqual(lines[-2], b"")
self.assertEqual(lines[-3], b".Here is a dot-starting line.")
self.assertEqual(lines[-4], b"This is just a test article.")
diff --git a/Lib/test/test_pep3120.py b/Lib/test/test_pep3120.py
index 09fedf0..496f8da 100644
--- a/Lib/test/test_pep3120.py
+++ b/Lib/test/test_pep3120.py
@@ -19,8 +19,8 @@ class PEP3120Test(unittest.TestCase):
try:
import test.badsyntax_pep3120
except SyntaxError as msg:
- msg = str(msg)
- self.assertTrue('UTF-8' in msg or 'utf8' in msg)
+ msg = str(msg).lower()
+ self.assertTrue('utf-8' in msg or 'utf8' in msg)
else:
self.fail("expected exception didn't occur")
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 0f6a1ca..bddb375 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -20,8 +20,8 @@ import unittest
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
try:
- TEST_XMLFILE.encode("utf8")
- TEST_XMLFILE_OUT.encode("utf8")
+ TEST_XMLFILE.encode("utf-8")
+ TEST_XMLFILE_OUT.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py
index c9c0067..13c1265 100644
--- a/Lib/test/test_shelve.py
+++ b/Lib/test/test_shelve.py
@@ -129,8 +129,8 @@ class TestCase(unittest.TestCase):
shelve.Shelf(d)[key] = [1]
self.assertIn(key.encode('utf-8'), d)
# but a different one can be given
- shelve.Shelf(d, keyencoding='latin1')[key] = [1]
- self.assertIn(key.encode('latin1'), d)
+ shelve.Shelf(d, keyencoding='latin-1')[key] = [1]
+ self.assertIn(key.encode('latin-1'), d)
# with all consequences
s = shelve.Shelf(d, keyencoding='ascii')
self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 23d22a8..9d5d8ca 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -44,7 +44,7 @@ def linux_version():
return 0, 0, 0
HOST = support.HOST
-MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf8') ## test unicode string and carriage return
+MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return
SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6)
try:
@@ -1065,7 +1065,7 @@ class FileObjectClassTestCase(SocketConnectedTest):
"""
bufsize = -1 # Use default buffer size
- encoding = 'utf8'
+ encoding = 'utf-8'
errors = 'strict'
newline = None
@@ -1269,7 +1269,7 @@ class FileObjectInterruptedTestCase(unittest.TestCase):
data = b''
else:
data = ''
- expecting = expecting.decode('utf8')
+ expecting = expecting.decode('utf-8')
while len(data) != len(expecting):
part = fo.read(size)
if not part:
@@ -1427,7 +1427,7 @@ class UnicodeReadFileObjectClassTestCase(FileObjectClassTestCase):
"""Tests for socket.makefile() in text mode (rather than binary)"""
read_mode = 'r'
- read_msg = MSG.decode('utf8')
+ read_msg = MSG.decode('utf-8')
write_mode = 'wb'
write_msg = MSG
newline = ''
@@ -1439,7 +1439,7 @@ class UnicodeWriteFileObjectClassTestCase(FileObjectClassTestCase):
read_mode = 'rb'
read_msg = MSG
write_mode = 'w'
- write_msg = MSG.decode('utf8')
+ write_msg = MSG.decode('utf-8')
newline = ''
@@ -1447,9 +1447,9 @@ class UnicodeReadWriteFileObjectClassTestCase(FileObjectClassTestCase):
"""Tests for socket.makefile() in text mode (rather than binary)"""
read_mode = 'r'
- read_msg = MSG.decode('utf8')
+ read_msg = MSG.decode('utf-8')
write_mode = 'w'
- write_msg = MSG.decode('utf8')
+ write_msg = MSG.decode('utf-8')
newline = ''
diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py
index 23d96f8..2bcf4d1 100644
--- a/Lib/test/test_strlit.py
+++ b/Lib/test/test_strlit.py
@@ -130,7 +130,7 @@ class TestLiterals(unittest.TestCase):
self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
def test_file_utf8(self):
- self.check_encoding("utf8")
+ self.check_encoding("utf-8")
def test_file_iso_8859_1(self):
self.check_encoding("iso-8859-1")
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
index 4b58308..9e267eb 100644
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -1191,7 +1191,7 @@ class POSIXProcessTestCase(BaseTestCase):
stdout, stderr = p.communicate()
self.assertEqual(0, p.returncode, "sigchild_ignore.py exited"
" non-zero with this error:\n%s" %
- stderr.decode('utf8'))
+ stderr.decode('utf-8'))
@unittest.skipUnless(mswindows, "Windows specific tests")
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 68e094d..a645bf2 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase):
self._test_unicode_filename("utf7")
def test_utf8_filename(self):
- self._test_unicode_filename("utf8")
+ self._test_unicode_filename("utf-8")
def _test_unicode_filename(self, encoding):
tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
@@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest):
def test_bad_pax_header(self):
# Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
# without a hdrcharset=BINARY header.
- for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"),
+ for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
try:
@@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest):
def test_binary_header(self):
# Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
- for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
+ for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
try:
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 9ad9eed..d97894c 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1182,11 +1182,14 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual('hello'.encode('ascii'), b'hello')
self.assertEqual('hello'.encode('utf-7'), b'hello')
self.assertEqual('hello'.encode('utf-8'), b'hello')
- self.assertEqual('hello'.encode('utf8'), b'hello')
+ self.assertEqual('hello'.encode('utf-8'), b'hello')
self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
self.assertEqual('hello'.encode('latin-1'), b'hello')
+ # Default encoding is utf-8
+ self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
+
# Roundtrip safety for BMP (just the first 1024 chars)
for c in range(1024):
u = chr(c)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 3003331..e148c62 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -251,7 +251,7 @@ class urlretrieve_FileTests(unittest.TestCase):
def constructLocalFileUrl(self, filePath):
filePath = os.path.abspath(filePath)
try:
- filePath.encode("utf8")
+ filePath.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filePath is not encodable to utf8")
return "file://%s" % urllib.request.pathname2url(filePath)
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 1433670..4ddbe3f 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -599,7 +599,7 @@ class OpenerDirectorTests(unittest.TestCase):
def sanepathname2url(path):
try:
- path.encode("utf8")
+ path.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("path is not encodable to utf8")
urlpath = urllib.request.pathname2url(path)
diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py
index 43fa656..7bc59ed 100644
--- a/Lib/test/test_uuid.py
+++ b/Lib/test/test_uuid.py
@@ -471,14 +471,14 @@ class TestUUID(TestCase):
if pid == 0:
os.close(fds[0])
value = uuid.uuid4()
- os.write(fds[1], value.hex.encode('latin1'))
+ os.write(fds[1], value.hex.encode('latin-1'))
os._exit(0)
else:
os.close(fds[1])
parent_value = uuid.uuid4().hex
os.waitpid(pid, 0)
- child_value = os.read(fds[0], 100).decode('latin1')
+ child_value = os.read(fds[0], 100).decode('latin-1')
self.assertNotEqual(parent_value, child_value)
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 22fafa9..40c2291 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -22,7 +22,7 @@ from xml.etree import ElementTree as ET
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
try:
- SIMPLE_XMLFILE.encode("utf8")
+ SIMPLE_XMLFILE.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
@@ -1255,8 +1255,8 @@ def processinginstruction():
>>> ET.tostring(ET.PI('test', '<testing&>'))
b'<?test <testing&>?>'
- >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1')
- b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
+ >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1')
+ b"<?xml version='1.0' encoding='latin-1'?>\\n<?test <testing&>\\xe3?>"
"""
#