Patch #435971: UTF-7 codec by Brian Quinlan.

author: Marc-André Lemburg <mal@egenix.com> 2001-09-20 10:35:46 (GMT)
committer: Marc-André Lemburg <mal@egenix.com> 2001-09-20 10:35:46 (GMT)
commit: c60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (patch)
tree: c7c600ed692c243edbd520872a2648cb9c01a8c1 /Lib/test
parent: 26e3b681b26c9978c819396e278f43d356d86f9e (diff)
download: cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.zip
cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.gz
cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.bz2
1 files changed, 28 insertions, 1 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index dde16ef..d57328d 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -377,6 +377,32 @@ print 'done.'
 # Test builtin codecs
 print 'Testing builtin codecs...',
 
+# UTF-7 specific encoding tests:
+utfTests = [(u'A\u2262\u0391.', 'A+ImIDkQ.'),  # RFC2152 example
+ (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
+ (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
+ (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
+ (u'+', '+-'),
+ (u'+-', '+--'),
+ (u'+?', '+-?'),
+ (u'\?', '+AFw?'),
+ (u'+?', '+-?'),
+ (ur'\\?', '+AFwAXA?'),
+ (ur'\\\?', '+AFwAXABc?'),
+ (ur'++--', '+-+---')]
+
+for x,y in utfTests:
+    verify( x.encode('utf-7') == y )
+
+try:        
+    unicode('+3ADYAA-', 'utf-7') # surrogates not supported
+except UnicodeError:
+    pass
+else:
+    raise TestFailed, "unicode('+3ADYAA-', 'utf-7') failed to raise an exception"
+
+verify(unicode('+3ADYAA-', 'utf-7', 'replace') == u'\ufffd')
+
 # UTF-8 specific encoding tests:
 verify(u'\u20ac'.encode('utf-8') == \
        ''.join((chr(0xe2), chr(0x82), chr(0xac))) )
@@ -439,6 +465,7 @@ verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
 verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
 
 verify(u'hello'.encode('ascii') == 'hello')
+verify(u'hello'.encode('utf-7') == 'hello')
 verify(u'hello'.encode('utf-8') == 'hello')
 verify(u'hello'.encode('utf8') == 'hello')
 verify(u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000')
@@ -447,7 +474,7 @@ verify(u'hello'.encode('latin-1') == 'hello')
 
 # Roundtrip safety for BMP (just the first 1024 chars)
 u = u''.join(map(unichr, range(1024)))
-for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
                  'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
     verify(unicode(u.encode(encoding),encoding) == u)
author	Marc-André Lemburg <mal@egenix.com>	2001-09-20 10:35:46 (GMT)
committer	Marc-André Lemburg <mal@egenix.com>	2001-09-20 10:35:46 (GMT)
commit	c60e6f777114f43c64f1b83f9ad2b6e4efd220e7 (patch)
tree	c7c600ed692c243edbd520872a2648cb9c01a8c1 /Lib/test
parent	26e3b681b26c9978c819396e278f43d356d86f9e (diff)
download	cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.zip cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.gz cpython-c60e6f777114f43c64f1b83f9ad2b6e4efd220e7.tar.bz2