diff options
author | Marc-André Lemburg <mal@egenix.com> | 2001-07-20 17:39:11 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2001-07-20 17:39:11 (GMT) |
commit | 6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe (patch) | |
tree | 78df5b177cd555fcaef467117c6d04d1de021215 /Lib/test/test_unicode.py | |
parent | 0d42e0c54a3b95aec4d4d12d1cd758438d645089 (diff) | |
download | cpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.zip cpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.tar.gz cpython-6c6bfb7c70d77b52354a6fd8c76de2cc641aa8fe.tar.bz2 |
Make the unicode-escape and the UTF-16 codecs handle surrogates
correctly and thus roundtrip-safe.
Some minor cleanups of the code.
Added tests for the roundtrip-safety.
Diffstat (limited to 'Lib/test/test_unicode.py')
-rw-r--r-- | Lib/test/test_unicode.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c9732d6..eb74854 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -445,11 +445,19 @@ verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000') verify(u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o') verify(u'hello'.encode('latin-1') == 'hello') +# Roundtrip safety for BMP (just the first 1024 chars) u = u''.join(map(unichr, range(1024))) for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): verify(unicode(u.encode(encoding),encoding) == u) +# Roundtrip safety for non-BMP (just a few chars) +u = u'\U00010001\U00020002\U00030003\U00040004\U00050005' +for encoding in ('utf-8', + 'utf-16', 'utf-16-le', 'utf-16-be', + 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + verify(unicode(u.encode(encoding),encoding) == u) + u = u''.join(map(unichr, range(256))) for encoding in ( 'latin-1', |