summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codeccallbacks.py
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-09-09 20:33:43 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-09-09 20:33:43 (GMT)
commitc9a8df24cc8c95efb63b9820d9381ad2f54e45c5 (patch)
tree590f0f94fd1907e7849a30f071ee6d27af1a3fbb /Lib/test/test_codeccallbacks.py
parent8e0bb6a1e2907797cd6e4b7cc90539904e54db7e (diff)
downloadcpython-c9a8df24cc8c95efb63b9820d9381ad2f54e45c5.zip
cpython-c9a8df24cc8c95efb63b9820d9381ad2f54e45c5.tar.gz
cpython-c9a8df24cc8c95efb63b9820d9381ad2f54e45c5.tar.bz2
Merged revisions 84655 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84655 | antoine.pitrou | 2010-09-09 22:30:23 +0200 (jeu., 09 sept. 2010) | 6 lines Issue #9804: ascii() now always represents unicode surrogate pairs as a single `\UXXXXXXXX`, regardless of whether the character is printable or not. Also, the "backslashreplace" error handler now joins surrogate pairs into a single character on UCS-2 builds. ........
Diffstat (limited to 'Lib/test/test_codeccallbacks.py')
-rw-r--r--Lib/test/test_codeccallbacks.py36
1 files changed, 25 insertions, 11 deletions
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 82782b5..6105fc0 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -577,17 +577,31 @@ class CodecCallbackTest(unittest.TestCase):
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
- if sys.maxunicode>0xffff:
- self.assertEquals(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
- ("\\U00010000", 1)
- )
- self.assertEquals(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
- ("\\U0010ffff", 1)
- )
+ # 1 on UCS-4 builds, 2 on UCS-2
+ len_wide = len("\U00010000")
+ self.assertEquals(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\U00010000",
+ 0, len_wide, "ouch")),
+ ("\\U00010000", len_wide)
+ )
+ self.assertEquals(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\U0010ffff",
+ 0, len_wide, "ouch")),
+ ("\\U0010ffff", len_wide)
+ )
+ # Lone surrogates (regardless of unicode width)
+ self.assertEquals(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
+ ("\\ud800", 1)
+ )
+ self.assertEquals(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
+ ("\\udfff", 1)
+ )
def test_badhandlerresults(self):
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )