diff options
Diffstat (limited to 'Lib')
-rwxr-xr-x | Lib/UserString.py | 8 | ||||
-rw-r--r-- | Lib/encodings/aliases.py | 9 | ||||
-rw-r--r-- | Lib/encodings/base64_codec.py | 60 | ||||
-rw-r--r-- | Lib/encodings/hex_codec.py | 60 | ||||
-rw-r--r-- | Lib/encodings/rot_13.py | 107 | ||||
-rw-r--r-- | Lib/encodings/uu_codec.py | 110 | ||||
-rw-r--r-- | Lib/encodings/zlib_codec.py | 61 | ||||
-rw-r--r-- | Lib/test/string_tests.py | 19 |
8 files changed, 434 insertions, 0 deletions
diff --git a/Lib/UserString.py b/Lib/UserString.py index 163faa5..45cdeb5 100755 --- a/Lib/UserString.py +++ b/Lib/UserString.py @@ -72,6 +72,14 @@ class UserString: def center(self, width): return self.__class__(self.data.center(width)) def count(self, sub, start=0, end=sys.maxint): return self.data.count(sub, start, end) + def decode(self, encoding=None, errors=None): # XXX improve this? + if encoding: + if errors: + return self.__class__(self.data.decode(encoding, errors)) + else: + return self.__class__(self.data.decode(encoding)) + else: + return self.__class__(self.data.decode()) def encode(self, encoding=None, errors=None): # XXX improve this? if encoding: if errors: diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index b9bfd97..5573f6d 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -79,4 +79,13 @@ aliases = { 'tis260': 'tactis', 'sjis': 'shift_jis', + # Content transfer/compression encodings + 'rot13': 'rot_13', + 'base64': 'base64_codec', + 'base_64': 'base64_codec', + 'zlib': 'zlib_codec', + 'zip': 'zlib_codec', + 'hex': 'hex_codec', + 'uu': 'uu_codec', + } diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py new file mode 100644 index 0000000..ce21b1a4 --- /dev/null +++ b/Lib/encodings/base64_codec.py @@ -0,0 +1,60 @@ +""" Python 'base64_codec' Codec - base64 content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs, base64 + +### Codec APIs + +def base64_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = base64.encodestring(input) + return (output, len(input)) + +def base64_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = base64.decodestring(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + encode = base64_encode + decode = base64_decode + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + + return (base64_encode,base64_decode,StreamReader,StreamWriter) diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py new file mode 100644 index 0000000..ab7d86f --- /dev/null +++ b/Lib/encodings/hex_codec.py @@ -0,0 +1,60 @@ +""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs, binascii + +### Codec APIs + +def hex_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = binascii.b2a_hex(input) + return (output, len(input)) + +def hex_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = binascii.a2b_hex(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + encode = hex_encode + decode = hex_decode + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + + return (hex_encode,hex_decode,StreamReader,StreamWriter) diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py new file mode 100644 index 0000000..8c54811 --- /dev/null +++ b/Lib/encodings/rot_13.py @@ -0,0 +1,107 @@ +#!/usr/local/bin/python2.1 +""" Python Character Mapping Codec for ROT13. + + See http://ucsub.colorado.edu/~kominek/rot13/ for details. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + + return codecs.charmap_encode(input,errors,encoding_map) + + def decode(self,input,errors='strict'): + + return codecs.charmap_decode(input,errors,decoding_map) + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + + return (Codec().encode,Codec().decode,StreamReader,StreamWriter) + +### Decoding Map + +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x0041: 0x004e, + 0x0042: 0x004f, + 0x0043: 0x0050, + 0x0044: 0x0051, + 0x0045: 0x0052, + 0x0046: 0x0053, + 0x0047: 0x0054, + 0x0048: 0x0055, + 0x0049: 0x0056, + 0x004a: 0x0057, + 0x004b: 0x0058, + 0x004c: 0x0059, + 0x004d: 0x005a, + 0x004e: 0x0041, + 0x004f: 0x0042, + 0x0050: 0x0043, + 0x0051: 0x0044, + 0x0052: 0x0045, + 0x0053: 0x0046, + 0x0054: 0x0047, + 0x0055: 0x0048, + 0x0056: 0x0049, + 0x0057: 0x004a, + 0x0058: 0x004b, + 0x0059: 0x004c, + 0x005a: 0x004d, + 0x0061: 0x006e, + 0x0062: 0x006f, + 0x0063: 0x0070, + 0x0064: 0x0071, + 0x0065: 0x0072, + 0x0066: 0x0073, + 0x0067: 0x0074, + 0x0068: 0x0075, + 0x0069: 0x0076, + 0x006a: 0x0077, + 0x006b: 0x0078, + 0x006c: 0x0079, + 0x006d: 0x007a, + 0x006e: 0x0061, + 0x006f: 0x0062, + 0x0070: 0x0063, + 0x0071: 0x0064, + 0x0072: 0x0065, + 0x0073: 0x0066, + 0x0074: 0x0067, + 0x0075: 0x0068, + 0x0076: 0x0069, + 0x0077: 0x006a, + 0x0078: 0x006b, + 0x0079: 0x006c, + 0x007a: 0x006d, +}) + +### Encoding Map + +encoding_map = {} +for k,v in decoding_map.items(): + encoding_map[v] = k + +### Filter API + +def rot13(infile, outfile): + outfile.write(infile.read().encode('rot-13')) + +if __name__ == '__main__': + import sys + rot13(sys.stdin, sys.stdout) diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py new file mode 100644 index 0000000..82e799c --- /dev/null +++ b/Lib/encodings/uu_codec.py @@ -0,0 +1,110 @@ +""" Python 'uu_codec' Codec - UU content transfer encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were + adapted from uu.py which was written by Lance Ellinghouse and + modified by Jack Jansen and Fredrik Lundh. + +""" +import codecs, binascii + +### Codec APIs + +def uu_encode(input,errors='strict',filename='<data>',mode=0666): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + from cStringIO import StringIO + from binascii import b2a_uu + infile = StringIO(input) + outfile = StringIO() + read = infile.read + write = outfile.write + + # Encode + write('begin %o %s\n' % (mode & 0777, filename)) + chunk = read(45) + while chunk: + write(b2a_uu(chunk)) + chunk = read(45) + write(' \nend\n') + + return (outfile.getvalue(), len(input)) + +def uu_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + Note: filename and file mode information in the input data is + ignored. + + """ + assert errors == 'strict' + from cStringIO import StringIO + from binascii import a2b_uu + infile = StringIO(input) + outfile = StringIO() + readline = infile.readline + write = outfile.write + + # Find start of encoded data + while 1: + s = readline() + if not s: + raise ValueError, 'Missing "begin" line in input data' + if s[:5] == 'begin': + break + + # Decode + while 1: + s = readline() + if not s or \ + s == 'end\n': + break + try: + data = a2b_uu(s) + except binascii.Error, v: + # Workaround for broken uuencoders by /Fredrik Lundh + nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3 + data = a2b_uu(s[:nbytes]) + #sys.stderr.write("Warning: %s\n" % str(v)) + write(data) + if not s: + raise ValueError, 'Truncated input data' + + return (outfile.getvalue(), len(input)) + +class Codec(codecs.Codec): + + encode = uu_encode + decode = uu_decode + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + + return (uu_encode,uu_decode,StreamReader,StreamWriter) diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py new file mode 100644 index 0000000..035bb04 --- /dev/null +++ b/Lib/encodings/zlib_codec.py @@ -0,0 +1,61 @@ +""" Python 'zlib_codec' Codec - zlib compression encoding + + Unlike most of the other codecs which target Unicode, this codec + will return Python string objects for both encode and decode. + + Written by Marc-Andre Lemburg (mal@lemburg.com). + +""" +import codecs +import zlib # this codec needs the optional zlib module ! + +### Codec APIs + +def zlib_encode(input,errors='strict'): + + """ Encodes the object input and returns a tuple (output + object, length consumed). + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = zlib.compress(input) + return (output, len(input)) + +def zlib_decode(input,errors='strict'): + + """ Decodes the object input and returns a tuple (output + object, length consumed). + + input must be an object which provides the bf_getreadbuf + buffer slot. Python strings, buffer objects and memory + mapped files are examples of objects providing this slot. + + errors defines the error handling to apply. It defaults to + 'strict' handling which is the only currently supported + error handling for this codec. + + """ + assert errors == 'strict' + output = zlib.decompress(input) + return (output, len(input)) + +class Codec(codecs.Codec): + + encode = zlib_encode + decode = zlib_decode + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + + return (zlib_encode,zlib_decode,StreamReader,StreamWriter) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index fcce50f..9b95a8e 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1,6 +1,7 @@ """Common tests shared by test_string and test_userstring""" import string +from test_support import verify, verbose, TestFailed transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' @@ -212,3 +213,21 @@ def run_method_tests(test): test('endswith', 'helloworld', 0, 'lowo', 3, 8) test('endswith', 'ab', 0, 'ab', 0, 1) test('endswith', 'ab', 0, 'ab', 0, 0) + + # Encoding/decoding + codecs = [('rot13', 'uryyb jbeyq'), + ('base64', 'aGVsbG8gd29ybGQ=\n'), + ('hex', '68656c6c6f20776f726c64'), + ('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')] + for encoding, data in codecs: + test('encode', 'hello world', data, encoding) + test('decode', data, 'hello world', encoding) + # zlib is optional, so we make the test optional too... + try: + import zlib + except ImportError: + pass + else: + data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]' + verify('hello world'.encode('zlib') == data) + verify(data.decode('zlib') == 'hello world') |