diff options
author | Marc-André Lemburg <mal@egenix.com> | 2002-04-10 17:18:02 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2002-04-10 17:18:02 (GMT) |
commit | ce0b664af2b40c71a094b16d33b8310ff749f9df (patch) | |
tree | 7d8068117b88f805b76d17a6d2523512d0455fc4 /Lib | |
parent | a9745611def1d7811210d45626f4b5d91d0b927c (diff) | |
download | cpython-ce0b664af2b40c71a094b16d33b8310ff749f9df.zip cpython-ce0b664af2b40c71a094b16d33b8310ff749f9df.tar.gz cpython-ce0b664af2b40c71a094b16d33b8310ff749f9df.tar.bz2 |
Added test case for UTF-8 encoding bug #541828.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_unicode.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9ee7a39..4b77e75 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -508,6 +508,22 @@ verify(u'\ud800'.encode('utf-8') == '\xed\xa0\x80') verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80') verify((u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000) +verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' + u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' + u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' + u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' + u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das' + u' Nunstuck git und'.encode('utf-8') == + '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81' + '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3' + '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe' + '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' + '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8' + '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81' + '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81' + '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3' + '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf' + '\xe3\x80\x8cWenn ist das Nunstuck git und') # UTF-8 specific decoding tests verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' ) |