summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_unicode.py
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2001-07-20 17:39:11 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2001-07-20 17:39:11 (GMT)
commit6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe (patch)
tree78df5b177cd555fcaef467117c6d04d1de021215 /Lib/test/test_unicode.py
parent0d42e0c54a3b95aec4d4d12d1cd758438d645089 (diff)
downloadcpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.zip
cpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.tar.gz
cpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.tar.bz2
Make the unicode-escape and the UTF-16 codecs handle surrogates
correctly and thus roundtrip-safe. Some minor cleanups of the code. Added tests for the roundtrip-safety.
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r--Lib/test/test_unicode.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index c9732d6..eb74854 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -445,11 +445,19 @@ verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o')
verify(u'hello'.encode('latin-1') == 'hello')
+# Roundtrip safety for BMP (just the first 1024 chars)
u = u''.join(map(unichr, range(1024)))
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
verify(unicode(u.encode(encoding),encoding) == u)
+# Roundtrip safety for non-BMP (just a few chars)
+u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
+for encoding in ('utf-8',
+ 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+ verify(unicode(u.encode(encoding),encoding) == u)
+
u = u''.join(map(unichr, range(256)))
for encoding in (
'latin-1',