summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_bytes.py4
-rw-r--r--Lib/test/test_codecs.py15
-rw-r--r--Lib/test/test_unicode.py6
-rw-r--r--Lib/test/test_unicodedata.py3
4 files changed, 20 insertions, 8 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index a3ea40a..992f3d2 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -169,13 +169,13 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step]))
def test_encoding(self):
- sample = "Hello world\n\u1234\u5678\u9abc\udef0"
+ sample = "Hello world\n\u1234\u5678\u9abc"
for enc in ("utf8", "utf16"):
b = self.type2test(sample, enc)
self.assertEqual(b, self.type2test(sample.encode(enc)))
self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
b = self.type2test(sample, "latin1", "ignore")
- self.assertEqual(b, self.type2test(sample[:-4], "utf-8"))
+ self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
def test_decode(self):
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1730dbe..6706507 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -541,6 +541,17 @@ class UTF8Test(ReadTest):
self.check_state_handling_decode(self.encoding,
u, u.encode(self.encoding))
+ def test_lone_surrogates(self):
+ self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
+ self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
+
+ def test_surrogates_handler(self):
+ self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
+ b"abc\xed\xa0\x80def")
+ self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
+ "abc\ud800def")
+ self.assertTrue(codecs.lookup_error("surrogates"))
+
class UTF7Test(ReadTest):
encoding = "utf-7"
@@ -1023,12 +1034,12 @@ class NameprepTest(unittest.TestCase):
# Skipped
continue
# The Unicode strings are given in UTF-8
- orig = str(orig, "utf-8")
+ orig = str(orig, "utf-8", "surrogates")
if prepped is None:
# Input contains prohibited characters
self.assertRaises(UnicodeError, nameprep, orig)
else:
- prepped = str(prepped, "utf-8")
+ prepped = str(prepped, "utf-8", "surrogates")
try:
self.assertEquals(nameprep(orig), prepped)
except Exception as e:
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 1fddc06..220a8eb 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -886,10 +886,10 @@ class UnicodeTest(
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
- self.assertEqual('\ud800'.encode('utf-8'), b'\xed\xa0\x80')
- self.assertEqual('\udc00'.encode('utf-8'), b'\xed\xb0\x80')
+ self.assertEqual('\ud800'.encode('utf-8', 'surrogates'), b'\xed\xa0\x80')
+ self.assertEqual('\udc00'.encode('utf-8', 'surrogates'), b'\xed\xb0\x80')
self.assertEqual(
- ('\ud800\udc02'*1000).encode('utf-8'),
+ ('\ud800\udc02'*1000).encode('utf-8', 'surrogates'),
b'\xf0\x90\x80\x82'*1000
)
self.assertEqual(
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index aed8eaa..b84aaaf 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -13,6 +13,7 @@ import subprocess
import test.support
encoding = 'utf-8'
+errors = 'surrogates'
### Run tests
@@ -61,7 +62,7 @@ class UnicodeMethodsTest(unittest.TestCase):
(char + 'ABC').title(),
]
- h.update(''.join(data).encode(encoding))
+ h.update(''.join(data).encode(encoding, errors))
result = h.hexdigest()
self.assertEqual(result, self.expectedchecksum)