summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorMarc-André Lemburg <mal@egenix.com>2001-09-20 10:35:46 (GMT)
committerMarc-André Lemburg <mal@egenix.com>2001-09-20 10:35:46 (GMT)
commitc60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (patch)
treec7c600ed692c243edbd520872a2648cb9c01a8c1 /Lib
parent26e3b681b26c9978c819396e278f43d356d86f9e (diff)
downloadcpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.zip
cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.gz
cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.bz2
Patch #435971: UTF-7 codec by Brian Quinlan.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/encodings/aliases.py4
-rw-r--r--Lib/test/test_unicode.py29
2 files changed, 32 insertions, 1 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index c3c49b4..65d8fef 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -14,6 +14,10 @@ aliases = {
'latin': 'latin_1',
'latin1': 'latin_1',
+ # UTF-7
+ 'utf7': 'utf_7',
+ 'u7': 'utf_7',
+
# UTF-8
'utf': 'utf_8',
'utf8': 'utf_8',
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index dde16ef..d57328d 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -377,6 +377,32 @@ print 'done.'
# Test builtin codecs
print 'Testing builtin codecs...',
+# UTF-7 specific encoding tests:
+utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
+ (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
+ (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
+ (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
+ (u'+', '+-'),
+ (u'+-', '+--'),
+ (u'+?', '+-?'),
+ (u'\?', '+AFw?'),
+ (u'+?', '+-?'),
+ (ur'\\?', '+AFwAXA?'),
+ (ur'\\\?', '+AFwAXABc?'),
+ (ur'++--', '+-+---')]
+
+for x,y in utfTests:
+ verify( x.encode('utf-7') == y )
+
+try:
+ unicode('+3ADYAA-', 'utf-7') # surrogates not supported
+except UnicodeError:
+ pass
+else:
+ raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
+
+verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
+
# UTF-8 specific encoding tests:
verify(u'\u20ac'.encode('utf-8') == \
''.join((chr(0xe2), chr(0x82), chr(0xac))) )
@@ -439,6 +465,7 @@ verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
verify(u'hello'.encode('ascii') == 'hello')
+verify(u'hello'.encode('utf-7') == 'hello')
verify(u'hello'.encode('utf-8') == 'hello')
verify(u'hello'.encode('utf8') == 'hello')
verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
@@ -447,7 +474,7 @@ verify(u'hello'.encode('latin-1') == 'hello')
# Roundtrip safety for BMP (just the first 1024 chars)
u = u''.join(map(unichr, range(1024)))
-for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
verify(unicode(u.encode(encoding),encoding) == u)