8 files changed, 434 insertions, 0 deletions
diff --git a/Lib/UserString.py b/Lib/UserString.py
index 163faa5..45cdeb5 100755
--- a/Lib/UserString.py
+++ b/Lib/UserString.py
@@ -72,6 +72,14 @@ class UserString:
     def center(self, width): return self.__class__(self.data.center(width))
     def count(self, sub, start=0, end=sys.maxint):
         return self.data.count(sub, start, end)
+    def decode(self, encoding=None, errors=None): # XXX improve this?
+        if encoding:
+            if errors:
+                return self.__class__(self.data.decode(encoding, errors))
+            else:
+                return self.__class__(self.data.decode(encoding))
+        else:
+            return self.__class__(self.data.decode())
     def encode(self, encoding=None, errors=None): # XXX improve this?
         if encoding:
             if errors:
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index b9bfd97..5573f6d 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -79,4 +79,13 @@ aliases = {
     'tis260': 'tactis',
     'sjis': 'shift_jis',
 
+    # Content transfer/compression encodings
+    'rot13': 'rot_13',
+    'base64': 'base64_codec',
+    'base_64': 'base64_codec',
+    'zlib': 'zlib_codec',
+    'zip': 'zlib_codec',
+    'hex': 'hex_codec',
+    'uu': 'uu_codec',
+
 }
diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py
new file mode 100644
index 0000000..ce21b1a4
--- /dev/null
+++ b/Lib/encodings/base64_codec.py
@@ -0,0 +1,60 @@
+""" Python 'base64_codec' Codec - base64 content transfer encoding
+
+    Unlike most of the other codecs which target Unicode, this codec
+    will return Python string objects for both encode and decode.
+
+    Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, base64
+
+### Codec APIs
+
+def base64_encode(input,errors='strict'):
+
+    """ Encodes the object input and returns a tuple (output
+        object, length consumed).
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = base64.encodestring(input)
+    return (output, len(input))
+
+def base64_decode(input,errors='strict'):
+
+    """ Decodes the object input and returns a tuple (output
+        object, length consumed).
+
+        input must be an object which provides the bf_getreadbuf
+        buffer slot. Python strings, buffer objects and memory
+        mapped files are examples of objects providing this slot.
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = base64.decodestring(input)
+    return (output, len(input))
+
+class Codec(codecs.Codec):
+
+    encode = base64_encode
+    decode = base64_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (base64_encode,base64_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py
new file mode 100644
index 0000000..ab7d86f
--- /dev/null
+++ b/Lib/encodings/hex_codec.py
@@ -0,0 +1,60 @@
+""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
+
+    Unlike most of the other codecs which target Unicode, this codec
+    will return Python string objects for both encode and decode.
+
+    Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, binascii
+
+### Codec APIs
+
+def hex_encode(input,errors='strict'):
+
+    """ Encodes the object input and returns a tuple (output
+        object, length consumed).
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = binascii.b2a_hex(input)
+    return (output, len(input))
+
+def hex_decode(input,errors='strict'):
+
+    """ Decodes the object input and returns a tuple (output
+        object, length consumed).
+
+        input must be an object which provides the bf_getreadbuf
+        buffer slot. Python strings, buffer objects and memory
+        mapped files are examples of objects providing this slot.
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = binascii.a2b_hex(input)
+    return (output, len(input))
+
+class Codec(codecs.Codec):
+
+    encode = hex_encode
+    decode = hex_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (hex_encode,hex_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py
new file mode 100644
index 0000000..8c54811
--- /dev/null
+++ b/Lib/encodings/rot_13.py
@@ -0,0 +1,107 @@
+#!/usr/local/bin/python2.1
+""" Python Character Mapping Codec for ROT13.
+
+    See http://ucsub.colorado.edu/~kominek/rot13/ for details.
+
+    Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+    def encode(self,input,errors='strict'):
+
+        return codecs.charmap_encode(input,errors,encoding_map)
+        
+    def decode(self,input,errors='strict'):
+
+        return codecs.charmap_decode(input,errors,decoding_map)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+   0x0041: 0x004e,
+   0x0042: 0x004f,
+   0x0043: 0x0050,
+   0x0044: 0x0051,
+   0x0045: 0x0052,
+   0x0046: 0x0053,
+   0x0047: 0x0054,
+   0x0048: 0x0055,
+   0x0049: 0x0056,
+   0x004a: 0x0057,
+   0x004b: 0x0058,
+   0x004c: 0x0059,
+   0x004d: 0x005a,
+   0x004e: 0x0041,
+   0x004f: 0x0042,
+   0x0050: 0x0043,
+   0x0051: 0x0044,
+   0x0052: 0x0045,
+   0x0053: 0x0046,
+   0x0054: 0x0047,
+   0x0055: 0x0048,
+   0x0056: 0x0049,
+   0x0057: 0x004a,
+   0x0058: 0x004b,
+   0x0059: 0x004c,
+   0x005a: 0x004d,
+   0x0061: 0x006e,
+   0x0062: 0x006f,
+   0x0063: 0x0070,
+   0x0064: 0x0071,
+   0x0065: 0x0072,
+   0x0066: 0x0073,
+   0x0067: 0x0074,
+   0x0068: 0x0075,
+   0x0069: 0x0076,
+   0x006a: 0x0077,
+   0x006b: 0x0078,
+   0x006c: 0x0079,
+   0x006d: 0x007a,
+   0x006e: 0x0061,
+   0x006f: 0x0062,
+   0x0070: 0x0063,
+   0x0071: 0x0064,
+   0x0072: 0x0065,
+   0x0073: 0x0066,
+   0x0074: 0x0067,
+   0x0075: 0x0068,
+   0x0076: 0x0069,
+   0x0077: 0x006a,
+   0x0078: 0x006b,
+   0x0079: 0x006c,
+   0x007a: 0x006d,
+})
+
+### Encoding Map
+
+encoding_map = {}
+for k,v in decoding_map.items():
+    encoding_map[v] = k
+
+### Filter API
+
+def rot13(infile, outfile):
+    outfile.write(infile.read().encode('rot-13'))
+
+if __name__ == '__main__':
+    import sys
+    rot13(sys.stdin, sys.stdout)
diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py
new file mode 100644
index 0000000..82e799c
--- /dev/null
+++ b/Lib/encodings/uu_codec.py
@@ -0,0 +1,110 @@
+""" Python 'uu_codec' Codec - UU content transfer encoding
+
+    Unlike most of the other codecs which target Unicode, this codec
+    will return Python string objects for both encode and decode.
+
+    Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
+    adapted from uu.py which was written by Lance Ellinghouse and
+    modified by Jack Jansen and Fredrik Lundh.
+
+"""
+import codecs, binascii
+
+### Codec APIs
+
+def uu_encode(input,errors='strict',filename='<data>',mode=0666):
+
+    """ Encodes the object input and returns a tuple (output
+        object, length consumed).
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    from cStringIO import StringIO
+    from binascii import b2a_uu
+    infile = StringIO(input)
+    outfile = StringIO()
+    read = infile.read
+    write = outfile.write
+
+    # Encode
+    write('begin %o %s\n' % (mode & 0777, filename))
+    chunk = read(45)
+    while chunk:
+        write(b2a_uu(chunk))
+        chunk = read(45)
+    write(' \nend\n')
+    
+    return (outfile.getvalue(), len(input))
+
+def uu_decode(input,errors='strict'):
+
+    """ Decodes the object input and returns a tuple (output
+        object, length consumed).
+
+        input must be an object which provides the bf_getreadbuf
+        buffer slot. Python strings, buffer objects and memory
+        mapped files are examples of objects providing this slot.
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+        Note: filename and file mode information in the input data is
+        ignored.
+
+    """
+    assert errors == 'strict'
+    from cStringIO import StringIO
+    from binascii import a2b_uu
+    infile = StringIO(input)
+    outfile = StringIO()
+    readline = infile.readline
+    write = outfile.write
+
+    # Find start of encoded data
+    while 1:
+        s = readline()
+        if not s:
+            raise ValueError, 'Missing "begin" line in input data'
+        if s[:5] == 'begin':
+            break
+
+    # Decode
+    while 1:
+        s = readline()
+        if not s or \
+           s == 'end\n':
+            break
+        try:
+            data = a2b_uu(s)
+        except binascii.Error, v:
+            # Workaround for broken uuencoders by /Fredrik Lundh
+            nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
+            data = a2b_uu(s[:nbytes])
+            #sys.stderr.write("Warning: %s\n" % str(v))
+        write(data)
+    if not s:
+        raise ValueError, 'Truncated input data'
+
+    return (outfile.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+
+    encode = uu_encode
+    decode = uu_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (uu_encode,uu_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py
new file mode 100644
index 0000000..035bb04
--- /dev/null
+++ b/Lib/encodings/zlib_codec.py
@@ -0,0 +1,61 @@
+""" Python 'zlib_codec' Codec - zlib compression encoding
+
+    Unlike most of the other codecs which target Unicode, this codec
+    will return Python string objects for both encode and decode.
+
+    Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs
+import zlib # this codec needs the optional zlib module !
+
+### Codec APIs
+
+def zlib_encode(input,errors='strict'):
+
+    """ Encodes the object input and returns a tuple (output
+        object, length consumed).
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = zlib.compress(input)
+    return (output, len(input))
+
+def zlib_decode(input,errors='strict'):
+
+    """ Decodes the object input and returns a tuple (output
+        object, length consumed).
+
+        input must be an object which provides the bf_getreadbuf
+        buffer slot. Python strings, buffer objects and memory
+        mapped files are examples of objects providing this slot.
+
+        errors defines the error handling to apply. It defaults to
+        'strict' handling which is the only currently supported
+        error handling for this codec.
+
+    """
+    assert errors == 'strict'
+    output = zlib.decompress(input)
+    return (output, len(input))
+
+class Codec(codecs.Codec):
+
+    encode = zlib_encode
+    decode = zlib_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+
+    return (zlib_encode,zlib_decode,StreamReader,StreamWriter)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index fcce50f..9b95a8e 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1,6 +1,7 @@
 """Common tests shared by test_string and test_userstring"""
 
 import string
+from test_support import verify, verbose, TestFailed
 
 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
 
@@ -212,3 +213,21 @@ def run_method_tests(test):
     test('endswith', 'helloworld', 0, 'lowo', 3, 8)
     test('endswith', 'ab', 0, 'ab', 0, 1)
     test('endswith', 'ab', 0, 'ab', 0, 0)
+
+    # Encoding/decoding
+    codecs = [('rot13', 'uryyb jbeyq'),
+              ('base64', 'aGVsbG8gd29ybGQ=\n'),
+              ('hex', '68656c6c6f20776f726c64'),
+              ('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')]
+    for encoding, data in codecs:
+        test('encode', 'hello world', data, encoding)
+        test('decode', data, 'hello world', encoding)
+    # zlib is optional, so we make the test optional too...
+    try:
+        import zlib
+    except ImportError:
+        pass
+    else:
+        data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
+        verify('hello world'.encode('zlib') == data)
+        verify(data.decode('zlib') == 'hello world')