Issue #19676: Added the "namereplace" error handler.

author: Serhiy Storchaka <storchaka@gmail.com> 2014-11-25 11:57:17 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2014-11-25 11:57:17 (GMT)
commit: 166ebc4e5dd09f005c6144b7568da83728b8b893 (patch)
tree: f6b9deb3cb72095ef55bcef31637f4aaafe95248 /Lib/test/test_codeccallbacks.py
parent: 6cecf68c7b51390429a2488846b1d0c29581987a (diff)
download: cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.zip
cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.gz
cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.bz2
1 files changed, 98 insertions, 2 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index a1ce9cf..9743791 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -158,6 +158,22 @@ class CodecCallbackTest(unittest.TestCase):
         sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 
+    def test_nameescape(self):
+        # Does the same as backslashescape, but prefers ``\N{...}`` escape
+        # sequences.
+        sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+        sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("ascii", "namereplace"), sout)
+
+        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
+
+        sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
+                b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
+        self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
+
     def test_decoding_callbacks(self):
         # This is a test for a decoding callback handler
         # that allows the decoding of the invalid sequence
@@ -297,7 +313,7 @@ class CodecCallbackTest(unittest.TestCase):
     def test_longstrings(self):
         # test long strings to check for memory overflow problems
         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
-                   "backslashreplace"]
+                   "backslashreplace", "namereplace"]
         # register the handlers under different names,
         # to prevent the codec from recognizing the name
         for err in errors:
@@ -611,6 +627,81 @@ class CodecCallbackTest(unittest.TestCase):
                 ("\\udfff", 1)
             )
 
+    def test_badandgoodnamereplaceexceptions(self):
+        # "namereplace" complains about a non-exception passed in
+        self.assertRaises(
+           TypeError,
+           codecs.namereplace_errors,
+           42
+        )
+        # "namereplace" complains about the wrong exception types
+        self.assertRaises(
+           TypeError,
+           codecs.namereplace_errors,
+           UnicodeError("ouch")
+        )
+        # "namereplace" can only be used for encoding
+        self.assertRaises(
+            TypeError,
+            codecs.namereplace_errors,
+            UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.namereplace_errors,
+            UnicodeTranslateError("\u3042", 0, 1, "ouch")
+        )
+        # Use the correct exception
+        self.assertEqual(
+            codecs.namereplace_errors(
+                UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
+            ("\\N{HIRAGANA LETTER A}", 1)
+        )
+        self.assertEqual(
+            codecs.namereplace_errors(
+                UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
+            ("\\x00", 1)
+        )
+        self.assertEqual(
+            codecs.namereplace_errors(
+                UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
+            ("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1)
+        )
+        self.assertEqual(
+            codecs.namereplace_errors(
+                UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
+            ("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1)
+        )
+        self.assertEqual(
+            codecs.namereplace_errors(
+                UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
+            ("\\uffff", 1)
+        )
+        if SIZEOF_WCHAR_T > 0:
+            self.assertEqual(
+                codecs.namereplace_errors(
+                    UnicodeEncodeError("ascii", "\U00010000",
+                                       0, 1, "ouch")),
+                ("\\N{LINEAR B SYLLABLE B008 A}", 1)
+            )
+            self.assertEqual(
+                codecs.namereplace_errors(
+                    UnicodeEncodeError("ascii", "\U0010ffff",
+                                       0, 1, "ouch")),
+                ("\\U0010ffff", 1)
+            )
+            # Lone surrogates (regardless of unicode width)
+            self.assertEqual(
+                codecs.namereplace_errors(
+                    UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
+                ("\\ud800", 1)
+            )
+            self.assertEqual(
+                codecs.namereplace_errors(
+                    UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
+                ("\\udfff", 1)
+            )
+
     def test_badhandlerresults(self):
         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
@@ -651,6 +742,10 @@ class CodecCallbackTest(unittest.TestCase):
             codecs.backslashreplace_errors,
             codecs.lookup_error("backslashreplace")
         )
+        self.assertEqual(
+            codecs.namereplace_errors,
+            codecs.lookup_error("namereplace")
+        )
 
     def test_unencodablereplacement(self):
         def unencrepl(exc):
@@ -804,7 +899,8 @@ class CodecCallbackTest(unittest.TestCase):
         class D(dict):
             def __getitem__(self, key):
                 raise ValueError
-        for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
+        for err in ("strict", "replace", "xmlcharrefreplace",
+                    "backslashreplace", "namereplace", "test.posreturn"):
             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
author	Serhiy Storchaka <storchaka@gmail.com>	2014-11-25 11:57:17 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2014-11-25 11:57:17 (GMT)
commit	166ebc4e5dd09f005c6144b7568da83728b8b893 (patch)
tree	f6b9deb3cb72095ef55bcef31637f4aaafe95248 /Lib/test/test_codeccallbacks.py
parent	6cecf68c7b51390429a2488846b1d0c29581987a (diff)
download	cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.zip cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.gz cpython-166ebc4e5dd09f005c6144b7568da83728b8b893.tar.bz2