diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-09 20:30:23 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-09 20:30:23 (GMT) |
commit | e4a189274f3d88d64d5238bf340cec96eff4e5e0 (patch) | |
tree | 5ead5f4f2fe3799a34155f2e41a04518adb995b1 /Lib/test/test_codeccallbacks.py | |
parent | ea99c5c94985c21d8a64c9a3d753bde7f801c14a (diff) | |
download | cpython-e4a189274f3d88d64d5238bf340cec96eff4e5e0.zip cpython-e4a189274f3d88d64d5238bf340cec96eff4e5e0.tar.gz cpython-e4a189274f3d88d64d5238bf340cec96eff4e5e0.tar.bz2 |
Issue #9804: ascii() now always represents unicode surrogate pairs as
a single `\UXXXXXXXX`, regardless of whether the character is printable
or not. Also, the "backslashreplace" error handler now joins surrogate
pairs into a single character on UCS-2 builds.
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r-- | Lib/test/test_codeccallbacks.py | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 82782b5..6105fc0 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -577,17 +577,31 @@ class CodecCallbackTest(unittest.TestCase): UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), ("\\uffff", 1) ) - if sys.maxunicode>0xffff: - self.assertEquals( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")), - ("\\U00010000", 1) - ) - self.assertEquals( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")), - ("\\U0010ffff", 1) - ) + # 1 on UCS-4 builds, 2 on UCS-2 + len_wide = len("\U00010000") + self.assertEquals( + codecs.backslashreplace_errors( + UnicodeEncodeError("ascii", "\U00010000", + 0, len_wide, "ouch")), + ("\\U00010000", len_wide) + ) + self.assertEquals( + codecs.backslashreplace_errors( + UnicodeEncodeError("ascii", "\U0010ffff", + 0, len_wide, "ouch")), + ("\\U0010ffff", len_wide) + ) + # Lone surrogates (regardless of unicode width) + self.assertEquals( + codecs.backslashreplace_errors( + UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), + ("\\ud800", 1) + ) + self.assertEquals( + codecs.backslashreplace_errors( + UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), + ("\\udfff", 1) + ) def test_badhandlerresults(self): results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) |