summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py122
1 files changed, 104 insertions, 18 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 1327f11..ee1e28a 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -150,6 +150,22 @@ class CodecCallbackTest(unittest.TestCase):
sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
+ def test_nameescape(self):
+ # Does the same as backslashescape, but prefers ``\N{...}`` escape
+ # sequences.
+ sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+ sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("ascii", "namereplace"), sout)
+
+ sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
+
+ sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
+ b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+ self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
+
def test_decoding_callbacks(self):
# This is a test for a decoding callback handler
# that allows the decoding of the invalid sequence
@@ -220,6 +236,11 @@ class CodecCallbackTest(unittest.TestCase):
"\u0000\ufffd"
)
+ self.assertEqual(
+ b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
+ "\u0000\\x00"
+ )
+
codecs.register_error("test.hui", handler_unicodeinternal)
self.assertEqual(
@@ -287,7 +308,7 @@ class CodecCallbackTest(unittest.TestCase):
def test_longstrings(self):
# test long strings to check for memory overflow problems
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
- "backslashreplace"]
+ "backslashreplace", "namereplace"]
# register the handlers under different names,
# to prevent the codec from recognizing the name
for err in errors:
@@ -550,17 +571,6 @@ class CodecCallbackTest(unittest.TestCase):
codecs.backslashreplace_errors,
UnicodeError("ouch")
)
- # "backslashreplace" can only be used for encoding
- self.assertRaises(
- TypeError,
- codecs.backslashreplace_errors,
- UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
- )
- self.assertRaises(
- TypeError,
- codecs.backslashreplace_errors,
- UnicodeTranslateError("\u3042", 0, 1, "ouch")
- )
# Use the correct exception
tests = [
("\u3042", "\\u3042"),
@@ -585,6 +595,72 @@ class CodecCallbackTest(unittest.TestCase):
1, 1 + len(s), "ouch")),
(r, 1 + len(s))
)
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeTranslateError("a" + s + "b",
+ 1, 1 + len(s), "ouch")),
+ (r, 1 + len(s))
+ )
+ tests = [
+ (b"a", "\\x61"),
+ (b"\n", "\\x0a"),
+ (b"\x00", "\\x00"),
+ (b"\xff", "\\xff"),
+ ]
+ for b, r in tests:
+ with self.subTest(bytes=b):
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"),
+ 1, 2, "ouch")),
+ (r, 2)
+ )
+
+ def test_badandgoodnamereplaceexceptions(self):
+ # "namereplace" complains about a non-exception passed in
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ 42
+ )
+ # "namereplace" complains about the wrong exception types
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeError("ouch")
+ )
+ # "namereplace" can only be used for encoding
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
+ )
+ self.assertRaises(
+ TypeError,
+ codecs.namereplace_errors,
+ UnicodeTranslateError("\u3042", 0, 1, "ouch")
+ )
+ # Use the correct exception
+ tests = [
+ ("\u3042", "\\N{HIRAGANA LETTER A}"),
+ ("\x00", "\\x00"),
+ ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH "
+ "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"),
+ ("\U000e007f", "\\N{CANCEL TAG}"),
+ ("\U0010ffff", "\\U0010ffff"),
+ # Lone surrogates
+ ("\ud800", "\\ud800"),
+ ("\udfff", "\\udfff"),
+ ("\ud800\udfff", "\\ud800\\udfff"),
+ ]
+ for s, r in tests:
+ with self.subTest(str=s):
+ self.assertEqual(
+ codecs.namereplace_errors(
+ UnicodeEncodeError("ascii", "a" + s + "b",
+ 1, 1 + len(s), "ouch")),
+ (r, 1 + len(s))
+ )
def test_badandgoodsurrogateescapeexceptions(self):
surrogateescape_errors = codecs.lookup_error('surrogateescape')
@@ -663,20 +739,24 @@ class CodecCallbackTest(unittest.TestCase):
surrogatepass_errors,
UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch")
)
+ for s in ("\ud800", "\udfff", "\ud800\udfff"):
+ with self.subTest(str=s):
+ self.assertRaises(
+ UnicodeEncodeError,
+ surrogatepass_errors,
+ UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
+ )
tests = [
- ("ascii", "\ud800", b'\xed\xa0\x80', 3),
("utf-8", "\ud800", b'\xed\xa0\x80', 3),
("utf-16le", "\ud800", b'\x00\xd8', 2),
("utf-16be", "\ud800", b'\xd8\x00', 2),
("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4),
("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4),
- ("ascii", "\udfff", b'\xed\xbf\xbf', 3),
("utf-8", "\udfff", b'\xed\xbf\xbf', 3),
("utf-16le", "\udfff", b'\xff\xdf', 2),
("utf-16be", "\udfff", b'\xdf\xff', 2),
("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4),
("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4),
- ("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3),
("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2),
("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2),
@@ -694,7 +774,7 @@ class CodecCallbackTest(unittest.TestCase):
self.assertEqual(
surrogatepass_errors(
UnicodeDecodeError(enc, bytearray(b"a" + b[:n] + b"b"),
- 1, n, "ouch")),
+ 1, 1 + n, "ouch")),
(s[:1], 1 + n)
)
@@ -738,6 +818,10 @@ class CodecCallbackTest(unittest.TestCase):
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
+ self.assertEqual(
+ codecs.namereplace_errors,
+ codecs.lookup_error("namereplace")
+ )
def test_unencodablereplacement(self):
def unencrepl(exc):
@@ -890,7 +974,8 @@ class CodecCallbackTest(unittest.TestCase):
class D(dict):
def __getitem__(self, key):
raise ValueError
- for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
+ for err in ("strict", "replace", "xmlcharrefreplace",
+ "backslashreplace", "namereplace", "test.posreturn"):
self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
@@ -905,7 +990,7 @@ class CodecCallbackTest(unittest.TestCase):
def __getitem__(self, key):
raise ValueError
#self.assertRaises(ValueError, "\xff".translate, D())
- self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
+ self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
def test_bug828737(self):
@@ -967,6 +1052,7 @@ class CodecCallbackTest(unittest.TestCase):
codecs.ignore_errors,
codecs.replace_errors,
codecs.backslashreplace_errors,
+ codecs.namereplace_errors,
codecs.xmlcharrefreplace_errors,
codecs.lookup_error('surrogateescape'),
codecs.lookup_error('surrogatepass'),