diff options
author | Guido van Rossum <guido@python.org> | 2000-03-24 22:14:19 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2000-03-24 22:14:19 (GMT) |
commit | d8855fde885ffcd9956352edb75674f38c64acaa (patch) | |
tree | e956abb92678c85ffb8674c9a49d1fb7e8459140 /Lib | |
parent | 27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff) | |
download | cpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2 |
Marc-Andre Lemburg:
Attached you find the latest update of the Unicode implementation.
The patch is against the current CVS version.
It includes the fix I posted yesterday for the core dump problem
in codecs.c (was introduced by my previous patch set -- sorry),
adds more tests for the codecs and two new parser markers
"es" and "es#".
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/codecs.py | 2 | ||||
-rw-r--r-- | Lib/test/output/test_unicode | 1 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 30 |
3 files changed, 31 insertions, 2 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index 7f478d7..c09f804 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -46,7 +46,7 @@ class Codec: handling schemes by providing the errors argument. These string values are defined: - 'strict' - raise an error (or a subclass) + 'strict' - raise a ValueError error (or a subclass) 'ignore' - ignore the character and continue with the next 'replace' - replace with a suitable replacement character; Python will use the official U+FFFD REPLACEMENT diff --git a/Lib/test/output/test_unicode b/Lib/test/output/test_unicode index 382a631..1ec9031 100644 --- a/Lib/test/output/test_unicode +++ b/Lib/test/output/test_unicode @@ -1,5 +1,4 @@ test_unicode Testing Unicode comparisons... done. -Testing Unicode contains method... done. Testing Unicode formatting strings... done. Testing unicodedata module... done. diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 69d4273..3d15f22 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -293,3 +293,33 @@ else: assert unicodedata.combining(u'\u20e1') == 230 print 'done.' + +# Test builtin codecs +print 'Testing builtin codecs...', + +assert unicode('hello','ascii') == u'hello' +assert unicode('hello','utf-8') == u'hello' +assert unicode('hello','utf8') == u'hello' +assert unicode('hello','latin-1') == u'hello' + +assert u'hello'.encode('ascii') == 'hello' +assert u'hello'.encode('utf-8') == 'hello' +assert u'hello'.encode('utf8') == 'hello' +assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000' +assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o' +assert u'hello'.encode('latin-1') == 'hello' + +u = u''.join(map(unichr, range(1024))) +for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', + 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + assert unicode(u.encode(encoding),encoding) == u + +u = u''.join(map(unichr, range(256))) +for encoding in ('latin-1',): + assert unicode(u.encode(encoding),encoding) == u + +u = u''.join(map(unichr, range(128))) +for encoding in ('ascii',): + assert unicode(u.encode(encoding),encoding) == u + +print 'done.' |