summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rwxr-xr-xLib/UserString.py8
-rw-r--r--Lib/encodings/aliases.py9
-rw-r--r--Lib/encodings/base64_codec.py60
-rw-r--r--Lib/encodings/hex_codec.py60
-rw-r--r--Lib/encodings/rot_13.py107
-rw-r--r--Lib/encodings/uu_codec.py110
-rw-r--r--Lib/encodings/zlib_codec.py61
-rw-r--r--Lib/test/string_tests.py19
8 files changed, 434 insertions, 0 deletions
diff --git a/Lib/UserString.py b/Lib/UserString.py
index 163faa5..45cdeb5 100755
--- a/Lib/UserString.py
+++ b/Lib/UserString.py
@@ -72,6 +72,14 @@ class UserString:
def center(self, width): return self.__class__(self.data.center(width))
def count(self, sub, start=0, end=sys.maxint):
return self.data.count(sub, start, end)
+ def decode(self, encoding=None, errors=None): # XXX improve this?
+ if encoding:
+ if errors:
+ return self.__class__(self.data.decode(encoding, errors))
+ else:
+ return self.__class__(self.data.decode(encoding))
+ else:
+ return self.__class__(self.data.decode())
def encode(self, encoding=None, errors=None): # XXX improve this?
if encoding:
if errors:
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index b9bfd97..5573f6d 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -79,4 +79,13 @@ aliases = {
'tis260': 'tactis',
'sjis': 'shift_jis',
+ # Content transfer/compression encodings
+ 'rot13': 'rot_13',
+ 'base64': 'base64_codec',
+ 'base_64': 'base64_codec',
+ 'zlib': 'zlib_codec',
+ 'zip': 'zlib_codec',
+ 'hex': 'hex_codec',
+ 'uu': 'uu_codec',
+
}
diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py
new file mode 100644
index 0000000..ce21b1a4
--- /dev/null
+++ b/Lib/encodings/base64_codec.py
@@ -0,0 +1,60 @@
+""" Python 'base64_codec' Codec - base64 content transfer encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, base64
+
+### Codec APIs
+
+def base64_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = base64.encodestring(input)
+ return (output, len(input))
+
+def base64_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = base64.decodestring(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ encode = base64_encode
+ decode = base64_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (base64_encode,base64_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py
new file mode 100644
index 0000000..ab7d86f
--- /dev/null
+++ b/Lib/encodings/hex_codec.py
@@ -0,0 +1,60 @@
+""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs, binascii
+
+### Codec APIs
+
+def hex_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = binascii.b2a_hex(input)
+ return (output, len(input))
+
+def hex_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = binascii.a2b_hex(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ encode = hex_encode
+ decode = hex_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (hex_encode,hex_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py
new file mode 100644
index 0000000..8c54811
--- /dev/null
+++ b/Lib/encodings/rot_13.py
@@ -0,0 +1,107 @@
+#!/usr/local/bin/python2.1
+""" Python Character Mapping Codec for ROT13.
+
+ See http://ucsub.colorado.edu/~kominek/rot13/ for details.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+
+ return codecs.charmap_decode(input,errors,decoding_map)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0041: 0x004e,
+ 0x0042: 0x004f,
+ 0x0043: 0x0050,
+ 0x0044: 0x0051,
+ 0x0045: 0x0052,
+ 0x0046: 0x0053,
+ 0x0047: 0x0054,
+ 0x0048: 0x0055,
+ 0x0049: 0x0056,
+ 0x004a: 0x0057,
+ 0x004b: 0x0058,
+ 0x004c: 0x0059,
+ 0x004d: 0x005a,
+ 0x004e: 0x0041,
+ 0x004f: 0x0042,
+ 0x0050: 0x0043,
+ 0x0051: 0x0044,
+ 0x0052: 0x0045,
+ 0x0053: 0x0046,
+ 0x0054: 0x0047,
+ 0x0055: 0x0048,
+ 0x0056: 0x0049,
+ 0x0057: 0x004a,
+ 0x0058: 0x004b,
+ 0x0059: 0x004c,
+ 0x005a: 0x004d,
+ 0x0061: 0x006e,
+ 0x0062: 0x006f,
+ 0x0063: 0x0070,
+ 0x0064: 0x0071,
+ 0x0065: 0x0072,
+ 0x0066: 0x0073,
+ 0x0067: 0x0074,
+ 0x0068: 0x0075,
+ 0x0069: 0x0076,
+ 0x006a: 0x0077,
+ 0x006b: 0x0078,
+ 0x006c: 0x0079,
+ 0x006d: 0x007a,
+ 0x006e: 0x0061,
+ 0x006f: 0x0062,
+ 0x0070: 0x0063,
+ 0x0071: 0x0064,
+ 0x0072: 0x0065,
+ 0x0073: 0x0066,
+ 0x0074: 0x0067,
+ 0x0075: 0x0068,
+ 0x0076: 0x0069,
+ 0x0077: 0x006a,
+ 0x0078: 0x006b,
+ 0x0079: 0x006c,
+ 0x007a: 0x006d,
+})
+
+### Encoding Map
+
+encoding_map = {}
+for k,v in decoding_map.items():
+ encoding_map[v] = k
+
+### Filter API
+
+def rot13(infile, outfile):
+ outfile.write(infile.read().encode('rot-13'))
+
+if __name__ == '__main__':
+ import sys
+ rot13(sys.stdin, sys.stdout)
diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py
new file mode 100644
index 0000000..82e799c
--- /dev/null
+++ b/Lib/encodings/uu_codec.py
@@ -0,0 +1,110 @@
+""" Python 'uu_codec' Codec - UU content transfer encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
+ adapted from uu.py which was written by Lance Ellinghouse and
+ modified by Jack Jansen and Fredrik Lundh.
+
+"""
+import codecs, binascii
+
+### Codec APIs
+
+def uu_encode(input,errors='strict',filename='<data>',mode=0666):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ from cStringIO import StringIO
+ from binascii import b2a_uu
+ infile = StringIO(input)
+ outfile = StringIO()
+ read = infile.read
+ write = outfile.write
+
+ # Encode
+ write('begin %o %s\n' % (mode & 0777, filename))
+ chunk = read(45)
+ while chunk:
+ write(b2a_uu(chunk))
+ chunk = read(45)
+ write(' \nend\n')
+
+ return (outfile.getvalue(), len(input))
+
+def uu_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ Note: filename and file mode information in the input data is
+ ignored.
+
+ """
+ assert errors == 'strict'
+ from cStringIO import StringIO
+ from binascii import a2b_uu
+ infile = StringIO(input)
+ outfile = StringIO()
+ readline = infile.readline
+ write = outfile.write
+
+ # Find start of encoded data
+ while 1:
+ s = readline()
+ if not s:
+ raise ValueError, 'Missing "begin" line in input data'
+ if s[:5] == 'begin':
+ break
+
+ # Decode
+ while 1:
+ s = readline()
+ if not s or \
+ s == 'end\n':
+ break
+ try:
+ data = a2b_uu(s)
+ except binascii.Error, v:
+ # Workaround for broken uuencoders by /Fredrik Lundh
+ nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
+ data = a2b_uu(s[:nbytes])
+ #sys.stderr.write("Warning: %s\n" % str(v))
+ write(data)
+ if not s:
+ raise ValueError, 'Truncated input data'
+
+ return (outfile.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+
+ encode = uu_encode
+ decode = uu_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (uu_encode,uu_decode,StreamReader,StreamWriter)
diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py
new file mode 100644
index 0000000..035bb04
--- /dev/null
+++ b/Lib/encodings/zlib_codec.py
@@ -0,0 +1,61 @@
+""" Python 'zlib_codec' Codec - zlib compression encoding
+
+ Unlike most of the other codecs which target Unicode, this codec
+ will return Python string objects for both encode and decode.
+
+ Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+"""
+import codecs
+import zlib # this codec needs the optional zlib module !
+
+### Codec APIs
+
+def zlib_encode(input,errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = zlib.compress(input)
+ return (output, len(input))
+
+def zlib_decode(input,errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling which is the only currently supported
+ error handling for this codec.
+
+ """
+ assert errors == 'strict'
+ output = zlib.decompress(input)
+ return (output, len(input))
+
+class Codec(codecs.Codec):
+
+ encode = zlib_encode
+ decode = zlib_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+
+ return (zlib_encode,zlib_decode,StreamReader,StreamWriter)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index fcce50f..9b95a8e 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1,6 +1,7 @@
"""Common tests shared by test_string and test_userstring"""
import string
+from test_support import verify, verbose, TestFailed
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
@@ -212,3 +213,21 @@ def run_method_tests(test):
test('endswith', 'helloworld', 0, 'lowo', 3, 8)
test('endswith', 'ab', 0, 'ab', 0, 1)
test('endswith', 'ab', 0, 'ab', 0, 0)
+
+ # Encoding/decoding
+ codecs = [('rot13', 'uryyb jbeyq'),
+ ('base64', 'aGVsbG8gd29ybGQ=\n'),
+ ('hex', '68656c6c6f20776f726c64'),
+ ('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')]
+ for encoding, data in codecs:
+ test('encode', 'hello world', data, encoding)
+ test('decode', data, 'hello world', encoding)
+ # zlib is optional, so we make the test optional too...
+ try:
+ import zlib
+ except ImportError:
+ pass
+ else:
+ data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
+ verify('hello world'.encode('zlib') == data)
+ verify(data.decode('zlib') == 'hello world')