diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2014-11-25 11:57:17 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2014-11-25 11:57:17 (GMT) |
commit | 166ebc4e5dd09f005c6144b7568da83728b8b893 (patch) | |
tree | f6b9deb3cb72095ef55bcef31637f4aaafe95248 /Lib/test/test_codeccallbacks.py | |
parent | 6cecf68c7b51390429a2488846b1d0c29581987a (diff) | |
download | cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.zip cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.gz cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.bz2 |
Issue #19676: Added the "namereplace" error handler.
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 100 |
1 files changed, 98 insertions, 2 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index a1ce9cf..9743791 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -158,6 +158,22 @@ class CodecCallbackTest(unittest.TestCase): sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) + def test_nameescape(self): + # Does the same as backslashescape, but prefers ``\N{...}`` escape + # sequences. + sin = "a\xac\u1234\u20ac\u8000\U0010ffff" + sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("ascii", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("latin-1", "namereplace"), sout) + + sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' + b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') + self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) + def test_decoding_callbacks(self): # This is a test for a decoding callback handler # that allows the decoding of the invalid sequence @@ -297,7 +313,7 @@ class CodecCallbackTest(unittest.TestCase): def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", - "backslashreplace"] + "backslashreplace", "namereplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: @@ -611,6 +627,81 @@ class CodecCallbackTest(unittest.TestCase): ("\\udfff", 1) ) + def test_badandgoodnamereplaceexceptions(self): + # "namereplace" complains about a non-exception passed in + self.assertRaises( + TypeError, + codecs.namereplace_errors, + 42 + ) + # "namereplace" complains about the wrong exception types + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeError("ouch") + ) + # "namereplace" can only be used for encoding + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") + ) + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeTranslateError("\u3042", 0, 1, "ouch") + ) + # Use the correct exception + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), + ("\\N{HIRAGANA LETTER A}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), + ("\\x00", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), + ("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), + ("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), + ("\\uffff", 1) + ) + if SIZEOF_WCHAR_T > 0: + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\U00010000", + 0, 1, "ouch")), + ("\\N{LINEAR B SYLLABLE B008 A}", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\U0010ffff", + 0, 1, "ouch")), + ("\\U0010ffff", 1) + ) + # Lone surrogates (regardless of unicode width) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), + ("\\ud800", 1) + ) + self.assertEqual( + codecs.namereplace_errors( + UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), + ("\\udfff", 1) + ) + def test_badhandlerresults(self): results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") @@ -651,6 +742,10 @@ class CodecCallbackTest(unittest.TestCase): codecs.backslashreplace_errors, codecs.lookup_error("backslashreplace") ) + self.assertEqual( + codecs.namereplace_errors, + codecs.lookup_error("namereplace") + ) def test_unencodablereplacement(self): def unencrepl(exc): @@ -804,7 +899,8 @@ class CodecCallbackTest(unittest.TestCase): class D(dict): def __getitem__(self, key): raise ValueError - for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): + for err in ("strict", "replace", "xmlcharrefreplace", + "backslashreplace", "namereplace", "test.posreturn"): self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) |