diff options
-rw-r--r-- | Doc/lib/libuserdict.tex | 49 | ||||
-rwxr-xr-x | Lib/UserString.py | 158 | ||||
-rw-r--r-- | Lib/test/output/test_userstring | 1 | ||||
-rwxr-xr-x | Lib/test/test_userstring.py | 227 |
4 files changed, 435 insertions, 0 deletions
diff --git a/Doc/lib/libuserdict.tex b/Doc/lib/libuserdict.tex index 8e47360..1768f65 100644 --- a/Doc/lib/libuserdict.tex +++ b/Doc/lib/libuserdict.tex @@ -63,3 +63,52 @@ provide the following attribute: A real Python list object used to store the contents of the \class{UserList} class. \end{memberdesc} + + +\section{\module{UserString} --- + Class wrapper for string objects} + +\declaremodule{standard}{UserString} +\modulesynopsis{Class wrapper for string objects.} +\moduleauthor{Peter Funk}{pf@artcom-gmbh.de} +\sectionauthor{Peter Funk}{pf@artcom-gmbh.de} + +This module defines a class that acts as a wrapper around +string objects. It is a useful base class for +your own string-like classes, which can inherit from +them and override existing methods or add new ones. In this way one +can add new behaviours to strings. + +The \module{UserString} module defines the \class{UserString} class: + +\begin{classdesc}{UserString}{\optional{sequence}} +Return a class instance that simulates a string or an unicode string object. +The instance's content is kept in a regular string or unicode string +object, which is accessible via the +\member{data} attribute of \class{UserString} instances. The instance's +contents are initially set to a copy of \var{sequence}. +\var{sequence} can be either a regular Python string or unicode string, +an instance of \class{UserString} (or a subclass) or an arbitrary sequence +which can be converted into a string. +supporting. +\end{classdesc} + +In addition to supporting the methods and operations of string or +unicode objects (see section \ref{typesseq}), \class{UserString} instances +provide the following attribute: + +\begin{memberdesc}{data} +A real Python string or unicode object used to store the content of the +\class{UserString} class. +\end{memberdesc} + +\begin{classdesc}{MutableString}{\optional{sequence}} +This class is derived from the \class{UserString} above and redefines +strings to be \emph{mutable}. Mutable strings can't be used as +dictionary keys, because dictionaries require \emph{immutable} objects as +keys. The main intention of this class is to serve as an educational +example for inheritance and necessity to remove (override) the +\function{__hash__} method in order to trap attempts to use a +mutable object as dictionary key, which would be otherwise very +errorprone and hard to track down. +\end{classdesc} diff --git a/Lib/UserString.py b/Lib/UserString.py new file mode 100755 index 0000000..c2c2bed --- /dev/null +++ b/Lib/UserString.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +## vim:ts=4:et:nowrap +"""A user-defined wrapper around string objects + +Note: string objects have grown methods in Python 1.6 +This module requires Python 1.6 or later. +""" +from types import StringType, UnicodeType +import sys + +class UserString: + def __init__(self, seq): + if isinstance(seq, StringType) or isinstance(seq, UnicodeType): + self.data = seq + elif isinstance(seq, UserString): + self.data = seq.data[:] + else: + self.data = str(seq) + def __str__(self): return str(self.data) + def __repr__(self): return repr(self.data) + def __int__(self): return int(self.data) + def __long__(self): return long(self.data) + def __float__(self): return float(self.data) + def __complex__(self): return complex(self.data) + def __hash__(self): return hash(self.data) + + def __cmp__(self, string): + if isinstance(string, UserString): + return cmp(self.data, string.data) + else: + return cmp(self.data, string) + def __contains__(self, char): + return char in self.data + + def __len__(self): return len(self.data) + def __getitem__(self, index): return self.__class__(self.data[index]) + def __getslice__(self, start, end): + start = max(start, 0); end = max(end, 0) + return self.__class__(self.data[start:end]) + + def __add__(self, other): + if isinstance(other, UserString): + return self.__class__(self.data + other.data) + elif isinstance(other, StringType) or isinstance(other, UnicodeType): + return self.__class__(self.data + other) + else: + return self.__class__(self.data + str(other)) + def __radd__(self, other): + if isinstance(other, StringType) or isinstance(other, UnicodeType): + return self.__class__(other + self.data) + else: + return self.__class__(str(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + + # the following methods are defined in alphabetical order: + def capitalize(self): return self.__class__(self.data.capitalize()) + def center(self, width): return self.__class__(self.data.center(width)) + def count(self, sub, start=0, end=sys.maxint): + return self.data.count(sub, start, end) + def encode(self, encoding=None, errors=None): # XXX improve this? + if encoding: + if errors: + return self.__class__(self.data.encode(encoding, errors)) + else: + return self.__class__(self.data.encode(encoding)) + else: + return self.__class__(self.data.encode()) + def endswith(self, suffix, start=0, end=sys.maxint): + return self.data.endswith(suffix, start, end) + def expandtabs(self, tabsize=8): + return self.__class__(self.data.expandtabs(tabsize)) + def find(self, sub, start=0, end=sys.maxint): + return self.data.find(sub, start, end) + def index(self, sub, start=0, end=sys.maxint): + return self.data.index(sub, start, end) + def isdecimal(self): return self.data.isdecimal() + def isdigit(self): return self.data.isdigit() + def islower(self): return self.data.islower() + def isnumeric(self): return self.data.isnumeric() + def isspace(self): return self.data.isspace() + def istitle(self): return self.data.istitle() + def isupper(self): return self.data.isupper() + def join(self, seq): return self.data.join(seq) + def ljust(self, width): return self.__class__(self.data.ljust(width)) + def lower(self): return self.__class__(self.data.lower()) + def lstrip(self): return self.__class__(self.data.lstrip()) + def replace(self, old, new, maxsplit=-1): + return self.__class__(self.data.replace(old, new, maxsplit)) + def rfind(self, sub, start=0, end=sys.maxint): + return self.data.rfind(sub, start, end) + def rindex(self, sub, start=0, end=sys.maxint): + return self.data.rindex(sub, start, end) + def rjust(self, width): return self.__class__(self.data.rjust(width)) + def rstrip(self): return self.__class__(self.data.rstrip()) + def split(self, sep=None, maxsplit=-1): + return self.data.split(sep, maxsplit) + def splitlines(self, maxsplit=-1): return self.data.splitlines(maxsplit) + def startswith(self, prefix, start=0, end=sys.maxint): + return self.data.startswith(prefix, start, end) + def strip(self): return self.__class__(self.data.strip()) + def swapcase(self): return self.__class__(self.data.swapcase()) + def title(self): return self.__class__(self.data.title()) + def translate(self, table, deletechars=""): + return self.__class__(self.data.translate(table, deletechars)) + def upper(self): return self.__class__(self.data.upper()) + +class MutableString(UserString): + """mutable string objects + + Python strings are immutable objects. This has the advantage, that + strings may be used as dictionary keys. If this property isn't needed + and you insist on changing string values in place instead, you may cheat + and use MutableString. + + But the purpose of this class is an educational one: to prevent + people from inventing their own mutable string class derived + from UserString and than forget thereby to remove (override) the + __hash__ method inherited from ^UserString. This would lead to + errors that would be very hard to track down. + + A faster and better solution is to rewrite your program using lists.""" + def __init__(self, string=""): + self.data = string + def __hash__(self): + raise TypeError, "unhashable type (it is mutable)" + def __setitem__(self, index, sub): + if index < 0 or index >= len(self.data): raise IndexError + self.data = self.data[:index] + sub + self.data[index+1:] + def __delitem__(self, index): + if index < 0 or index >= len(self.data): raise IndexError + self.data = self.data[:index] + self.data[index+1:] + def __setslice__(self, start, end, sub): + start = max(start, 0); end = max(end, 0) + if isinstance(sub, UserString): + self.data = self.data[:start]+sub.data+self.data[end:] + elif isinstance(sub, StringType) or isinstance(sub, UnicodeType): + self.data = self.data[:start]+sub+self.data[end:] + else: + self.data = self.data[:start]+str(sub)+self.data[end:] + def __delslice__(self, start, end): + start = max(start, 0); end = max(end, 0) + self.data = self.data[:start] + self.data[end:] + def immutable(self): + return UserString(self.data) + +if __name__ == "__main__": + # execute the regression test to stdout, if called as a script: + import os + called_in_dir, called_as = os.path.split(sys.argv[0]) + called_in_dir = os.path.abspath(called_in_dir) + called_as, py = os.path.splitext(called_as) + sys.path.append(os.path.join(called_in_dir, 'test')) + if '-q' in sys.argv: + import test_support + test_support.verbose = 0 + __import__('test_' + called_as.lower()) diff --git a/Lib/test/output/test_userstring b/Lib/test/output/test_userstring new file mode 100644 index 0000000..0290d6c --- /dev/null +++ b/Lib/test/output/test_userstring @@ -0,0 +1 @@ +test_userstring diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py new file mode 100755 index 0000000..c12f1e2 --- /dev/null +++ b/Lib/test/test_userstring.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python +import sys, string +from test_support import verbose +# UserString is a wrapper around the native builtin string type. +# UserString instances should behave similar to builtin string objects. +# The test cases were in part derived from 'test_string.py'. +from UserString import UserString + +if __name__ == "__main__": + verbose = 0 + +tested_methods = {} + +def test(methodname, input, *args): + global tested_methods + tested_methods[methodname] = 1 + if verbose: + print '%s.%s(%s) ' % (input, methodname, args), + u = UserString(input) + objects = [input, u, UserString(u)] + res = [""] * 3 + for i in range(3): + object = objects[i] + try: + f = getattr(object, methodname) + res[i] = apply(f, args) + except: + res[i] = sys.exc_type + if res[0] != res[1]: + if verbose: + print 'no' + print `input`, f, `res[0]`, "<>", `res[1]` + else: + if verbose: + print 'yes' + if res[1] != res[2]: + if verbose: + print 'no' + print `input`, f, `res[1]`, "<>", `res[2]` + else: + if verbose: + print 'yes' + +test('capitalize', ' hello ') +test('capitalize', 'hello ') + +test('center', 'foo', 0) +test('center', 'foo', 3) +test('center', 'foo', 16) + +test('ljust', 'foo', 0) +test('ljust', 'foo', 3) +test('ljust', 'foo', 16) + +test('rjust', 'foo', 0) +test('rjust', 'foo', 3) +test('rjust', 'foo', 16) + +test('count', 'abcabcabc', 'abc') +test('count', 'abcabcabc', 'abc', 1) +test('count', 'abcabcabc', 'abc', -1) +test('count', 'abcabcabc', 'abc', 7) +test('count', 'abcabcabc', 'abc', 0, 3) +test('count', 'abcabcabc', 'abc', 0, 333) + +test('find', 'abcdefghiabc', 'abc') +test('find', 'abcdefghiabc', 'abc', 1) +test('find', 'abcdefghiabc', 'def', 4) +test('rfind', 'abcdefghiabc', 'abc') + +test('index', 'abcabcabc', 'abc') +test('index', 'abcabcabc', 'abc', 1) +test('index', 'abcabcabc', 'abc', -1) +test('index', 'abcabcabc', 'abc', 7) +test('index', 'abcabcabc', 'abc', 0, 3) +test('index', 'abcabcabc', 'abc', 0, 333) + +test('rindex', 'abcabcabc', 'abc') +test('rindex', 'abcabcabc', 'abc', 1) +test('rindex', 'abcabcabc', 'abc', -1) +test('rindex', 'abcabcabc', 'abc', 7) +test('rindex', 'abcabcabc', 'abc', 0, 3) +test('rindex', 'abcabcabc', 'abc', 0, 333) + + +test('lower', 'HeLLo') +test('lower', 'hello') +test('upper', 'HeLLo') +test('upper', 'HELLO') + +test('title', ' hello ') +test('title', 'hello ') +test('title', "fOrMaT thIs aS titLe String") +test('title', "fOrMaT,thIs-aS*titLe;String") +test('title', "getInt") + +test('expandtabs', 'abc\rab\tdef\ng\thi') +test('expandtabs', 'abc\rab\tdef\ng\thi', 8) +test('expandtabs', 'abc\rab\tdef\ng\thi', 4) +test('expandtabs', 'abc\r\nab\tdef\ng\thi', 4) + +test('islower', 'a') +test('islower', 'A') +test('islower', '\n') +test('islower', 'abc') +test('islower', 'aBc') +test('islower', 'abc\n') + +test('isupper', 'a') +test('isupper', 'A') +test('isupper', '\n') +test('isupper', 'ABC') +test('isupper', 'AbC') +test('isupper', 'ABC\n') + +test('isdigit', ' 0123456789') +test('isdigit', '56789') +test('isdigit', '567.89') +test('isdigit', '0123456789abc') + +test('isspace', '') +test('isspace', ' ') +test('isspace', ' \t') +test('isspace', ' \t\f\n') + +test('istitle', 'a') +test('istitle', 'A') +test('istitle', '\n') +test('istitle', 'A Titlecased Line') +test('istitle', 'A\nTitlecased Line') +test('istitle', 'A Titlecased, Line') +test('istitle', 'Not a capitalized String') +test('istitle', 'Not\ta Titlecase String') +test('istitle', 'Not--a Titlecase String') + +test('splitlines', "abc\ndef\n\rghi") +test('splitlines', "abc\ndef\n\r\nghi") +test('splitlines', "abc\ndef\r\nghi") +test('splitlines', "abc\ndef\r\nghi\n") +test('splitlines', "abc\ndef\r\nghi\n\r") +test('splitlines', "\nabc\ndef\r\nghi\n\r") +test('splitlines', "\nabc\ndef\r\nghi\n\r") +test('splitlines', "\nabc\ndef\r\nghi\n\r") + +test('split', 'this is the split function') +test('split', 'a|b|c|d', '|') +test('split', 'a|b|c|d', '|', 2) +test('split', 'a b c d', None, 1) +test('split', 'a b c d', None, 2) +test('split', 'a b c d', None, 3) +test('split', 'a b c d', None, 4) +test('split', 'a b c d', None, 0) +test('split', 'a b c d', None, 2) +test('split', 'a b c d ') + +# join now works with any sequence type +class Sequence: + def __init__(self): self.seq = 'wxyz' + def __len__(self): return len(self.seq) + def __getitem__(self, i): return self.seq[i] + +test('join', '', ('a', 'b', 'c', 'd')) +test('join', '', Sequence()) +test('join', '', 7) + +class BadSeq(Sequence): + def __init__(self): self.seq = [7, 'hello', 123L] + +test('join', '', BadSeq()) + +test('strip', ' hello ') +test('lstrip', ' hello ') +test('rstrip', ' hello ') +test('strip', 'hello') + +test('swapcase', 'HeLLo cOmpUteRs') +transtable = string.maketrans("abc", "xyz") +test('translate', 'xyzabcdef', transtable, 'def') + +transtable = string.maketrans('a', 'A') +test('translate', 'abc', transtable) +test('translate', 'xyz', transtable) + +test('replace', 'one!two!three!', '!', '@', 1) +test('replace', 'one!two!three!', '!', '') +test('replace', 'one!two!three!', '!', '@', 2) +test('replace', 'one!two!three!', '!', '@', 3) +test('replace', 'one!two!three!', '!', '@', 4) +test('replace', 'one!two!three!', '!', '@', 0) +test('replace', 'one!two!three!', '!', '@') +test('replace', 'one!two!three!', 'x', '@') +test('replace', 'one!two!three!', 'x', '@', 2) + +test('startswith', 'hello', 'he') +test('startswith', 'hello', 'hello') +test('startswith', 'hello', 'hello world') +test('startswith', 'hello', '') +test('startswith', 'hello', 'ello') +test('startswith', 'hello', 'ello', 1) +test('startswith', 'hello', 'o', 4) +test('startswith', 'hello', 'o', 5) +test('startswith', 'hello', '', 5) +test('startswith', 'hello', 'lo', 6) +test('startswith', 'helloworld', 'lowo', 3) +test('startswith', 'helloworld', 'lowo', 3, 7) +test('startswith', 'helloworld', 'lowo', 3, 6) + +test('endswith', 'hello', 'lo') +test('endswith', 'hello', 'he') +test('endswith', 'hello', '') +test('endswith', 'hello', 'hello world') +test('endswith', 'helloworld', 'worl') +test('endswith', 'helloworld', 'worl', 3, 9) +test('endswith', 'helloworld', 'world', 3, 12) +test('endswith', 'helloworld', 'lowo', 1, 7) +test('endswith', 'helloworld', 'lowo', 2, 7) +test('endswith', 'helloworld', 'lowo', 3, 7) +test('endswith', 'helloworld', 'lowo', 4, 7) +test('endswith', 'helloworld', 'lowo', 3, 8) +test('endswith', 'ab', 'ab', 0, 1) +test('endswith', 'ab', 'ab', 0, 0) + +# TODO: test cases for: int, long, float, complex, +, * and cmp +s = "" +for builtin_method in dir(s): + if not tested_methods.has_key(builtin_method): + print "no regression test case for method '"+builtin_method+"'" |