Marc-Andre Lemburg:

Attached you find the latest update of the Unicode implementation. The patch is against the current CVS version. It includes the fix I posted yesterday for the core dump problem in codecs.c (was introduced by my previous patch set -- sorry), adds more tests for the codecs and two new parser markers "es" and "es#".
author: Guido van Rossum <guido@python.org> 2000-03-24 22:14:19 (GMT)
committer: Guido van Rossum <guido@python.org> 2000-03-24 22:14:19 (GMT)
commit: d8855fde885ffcd9956352edb75674f38c64acaa (patch)
tree: e956abb92678c85ffb8674c9a49d1fb7e8459140 /Lib
parent: 27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
download: cpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2
3 files changed, 31 insertions, 2 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 7f478d7..c09f804 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -46,7 +46,7 @@ class Codec:
         handling schemes by providing the errors argument. These
         string values are defined:
 
-         'strict' - raise an error (or a subclass)
+         'strict' - raise a ValueError error (or a subclass)
          'ignore' - ignore the character and continue with the next
          'replace' - replace with a suitable replacement character;
                     Python will use the official U+FFFD REPLACEMENT
diff --git a/Lib/test/output/test_unicode b/Lib/test/output/test_unicode
index 382a631..1ec9031 100644
--- a/Lib/test/output/test_unicode
+++ b/Lib/test/output/test_unicode
@@ -1,5 +1,4 @@
 test_unicode
 Testing Unicode comparisons... done.
-Testing Unicode contains method... done.
 Testing Unicode formatting strings... done.
 Testing unicodedata module... done.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 69d4273..3d15f22 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -293,3 +293,33 @@ else:
     assert unicodedata.combining(u'\u20e1') == 230
     
     print 'done.'
+
+# Test builtin codecs
+print 'Testing builtin codecs...',
+
+assert unicode('hello','ascii') == u'hello'
+assert unicode('hello','utf-8') == u'hello'
+assert unicode('hello','utf8') == u'hello'
+assert unicode('hello','latin-1') == u'hello'
+
+assert u'hello'.encode('ascii') == 'hello'
+assert u'hello'.encode('utf-8') == 'hello'
+assert u'hello'.encode('utf8') == 'hello'
+assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
+assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
+assert u'hello'.encode('latin-1') == 'hello'
+
+u = u''.join(map(unichr, range(1024)))
+for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+                 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+    assert unicode(u.encode(encoding),encoding) == u
+
+u = u''.join(map(unichr, range(256)))
+for encoding in ('latin-1',):
+    assert unicode(u.encode(encoding),encoding) == u
+
+u = u''.join(map(unichr, range(128)))
+for encoding in ('ascii',):
+    assert unicode(u.encode(encoding),encoding) == u
+
+print 'done.'
author	Guido van Rossum <guido@python.org>	2000-03-24 22:14:19 (GMT)
committer	Guido van Rossum <guido@python.org>	2000-03-24 22:14:19 (GMT)
commit	d8855fde885ffcd9956352edb75674f38c64acaa (patch)
tree	e956abb92678c85ffb8674c9a49d1fb7e8459140 /Lib
parent	27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
download	cpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2