summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2000-03-24 22:14:19 (GMT)
committerGuido van Rossum <guido@python.org>2000-03-24 22:14:19 (GMT)
commitd8855fde885ffcd9956352edb75674f38c64acaa (patch)
treee956abb92678c85ffb8674c9a49d1fb7e8459140 /Lib
parent27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
downloadcpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2
Marc-Andre Lemburg:
Attached you find the latest update of the Unicode implementation. The patch is against the current CVS version. It includes the fix I posted yesterday for the core dump problem in codecs.c (was introduced by my previous patch set -- sorry), adds more tests for the codecs and two new parser markers "es" and "es#".
Diffstat (limited to 'Lib')
-rw-r--r--Lib/codecs.py2
-rw-r--r--Lib/test/output/test_unicode1
-rw-r--r--Lib/test/test_unicode.py30
3 files changed, 31 insertions, 2 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 7f478d7..c09f804 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -46,7 +46,7 @@ class Codec:
handling schemes by providing the errors argument. These
string values are defined:
- 'strict' - raise an error (or a subclass)
+ 'strict' - raise a ValueError error (or a subclass)
'ignore' - ignore the character and continue with the next
'replace' - replace with a suitable replacement character;
Python will use the official U+FFFD REPLACEMENT
diff --git a/Lib/test/output/test_unicode b/Lib/test/output/test_unicode
index 382a631..1ec9031 100644
--- a/Lib/test/output/test_unicode
+++ b/Lib/test/output/test_unicode
@@ -1,5 +1,4 @@
test_unicode
Testing Unicode comparisons... done.
-Testing Unicode contains method... done.
Testing Unicode formatting strings... done.
Testing unicodedata module... done.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 69d4273..3d15f22 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -293,3 +293,33 @@ else:
assert unicodedata.combining(u'\u20e1') == 230
print 'done.'
+
+# Test builtin codecs
+print 'Testing builtin codecs...',
+
+assert unicode('hello','ascii') == u'hello'
+assert unicode('hello','utf-8') == u'hello'
+assert unicode('hello','utf8') == u'hello'
+assert unicode('hello','latin-1') == u'hello'
+
+assert u'hello'.encode('ascii') == 'hello'
+assert u'hello'.encode('utf-8') == 'hello'
+assert u'hello'.encode('utf8') == 'hello'
+assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
+assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
+assert u'hello'.encode('latin-1') == 'hello'
+
+u = u''.join(map(unichr, range(1024)))
+for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+ 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
+ assert unicode(u.encode(encoding),encoding) == u
+
+u = u''.join(map(unichr, range(256)))
+for encoding in ('latin-1',):
+ assert unicode(u.encode(encoding),encoding) == u
+
+u = u''.join(map(unichr, range(128)))
+for encoding in ('ascii',):
+ assert unicode(u.encode(encoding),encoding) == u
+
+print 'done.'