summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-09-14 09:19:53 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-09-14 09:19:53 (GMT)
commit1ce4ae3268e616414ad63a0cacdffa0b5830d0b5 (patch)
tree7ffa231ce151298b0502da67b406d17fd7f286d0
parent766e300eaaf8b8ca06a1e98e94fa1c5d1a33eba6 (diff)
downloadcpython-1ce4ae3268e616414ad63a0cacdffa0b5830d0b5.zip
cpython-1ce4ae3268e616414ad63a0cacdffa0b5830d0b5.tar.gz
cpython-1ce4ae3268e616414ad63a0cacdffa0b5830d0b5.tar.bz2
Don't test whether surrogate sequences round-trip in UTF-8. 2.2.2 candidate.
-rw-r--r--Lib/test/test_unicode.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index a57d6f4..89e28b5 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -695,7 +695,10 @@ for encoding in ('utf-8',
verify(unicode(u.encode(encoding),encoding) == u)
# UTF-8 must be roundtrip safe for all UCS-2 code points
-u = u''.join(map(unichr, range(0x10000)))
+# This excludes surrogates: in the full range, there would be
+# a surrogate pair (\udbff\udc00), which gets converted back
+# to a non-BMP character (\U0010fc00)
+u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
for encoding in ('utf-8',):
verify(unicode(u.encode(encoding),encoding) == u)