From b3a65f8d19f55437406a446927238a9e6f7d3902 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 21 Feb 2008 22:11:37 +0000 Subject: Move UserString to collections. Removed decode() method. Added isidentifier() and format() methods. Drop MutableUserString class. --- Doc/library/collections.rst | 19 ++++ Doc/library/datatypes.rst | 1 - Doc/library/userdict.rst | 61 ----------- Lib/UserString.py | 244 -------------------------------------------- Lib/collections.py | 144 ++++++++++++++++++++++++++ Lib/test/test___all__.py | 1 - Lib/test/test_userstring.py | 94 +---------------- Misc/NEWS | 3 +- 8 files changed, 167 insertions(+), 400 deletions(-) delete mode 100644 Doc/library/userdict.rst delete mode 100755 Lib/UserString.py diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index bb2140c..1d6687d 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -706,3 +706,22 @@ If a derived class does not wish to comply with this requirement, all of the special methods supported by this class will need to be overridden; please consult the sources for information about the methods which need to be provided in that case. + +:class:`UserString` objects +------------------------- + +The class, :class:`UserString` acts as a wrapper around string objects. +The need for this class has been partially supplanted by the ability to +subclass directly from :class:`str`; however, this class can be easier +to work with because the underlying string is accessible as an +attribute. + +.. class:: UserString([sequence]) + + Class that simulates a string or a Unicode string object. The instance's + content is kept in a regular string object, which is accessible via the + :attr:`data` attribute of :class:`UserString` instances. The instance's + contents are initially set to a copy of *sequence*. The *sequence* can + be an instance of :class:`bytes`, :class:`str`, :class:`UserString` (or a + subclass) or an arbitrary sequence which can be converted into a string using + the built-in :func:`str` function. diff --git a/Doc/library/datatypes.rst b/Doc/library/datatypes.rst index 496fd41..ae76e72 100644 --- a/Doc/library/datatypes.rst +++ b/Doc/library/datatypes.rst @@ -29,7 +29,6 @@ The following modules are documented in this chapter: mutex.rst queue.rst weakref.rst - userdict.rst types.rst copy.rst pprint.rst diff --git a/Doc/library/userdict.rst b/Doc/library/userdict.rst deleted file mode 100644 index ebdec7c..0000000 --- a/Doc/library/userdict.rst +++ /dev/null @@ -1,61 +0,0 @@ - -:mod:`UserString` --- Class wrapper for string objects -====================================================== - -.. module:: UserString - :synopsis: Class wrapper for string objects. -.. moduleauthor:: Peter Funk -.. sectionauthor:: Peter Funk - - -.. note:: - - This :class:`UserString` class from this module is available for backward - compatibility only. If you are writing code that does not need to work with - versions of Python earlier than Python 2.2, please consider subclassing directly - from the built-in :class:`str` type instead of using :class:`UserString` (there - is no built-in equivalent to :class:`MutableString`). - -This module defines a class that acts as a wrapper around string objects. It is -a useful base class for your own string-like classes, which can inherit from -them and override existing methods or add new ones. In this way one can add new -behaviors to strings. - -It should be noted that these classes are highly inefficient compared to real -string or bytes objects; this is especially the case for -:class:`MutableString`. - -The :mod:`UserString` module defines the following classes: - - -.. class:: UserString([sequence]) - - Class that simulates a string or a Unicode string object. The instance's - content is kept in a regular string or Unicode string object, which is - accessible via the :attr:`data` attribute of :class:`UserString` instances. The - instance's contents are initially set to a copy of *sequence*. *sequence* can - be an instance of :class:`bytes`, :class:`str`, :class:`UserString` (or a - subclass) or an arbitrary sequence which can be converted into a string using - the built-in :func:`str` function. - - -.. class:: MutableString([sequence]) - - This class is derived from the :class:`UserString` above and redefines strings - to be *mutable*. Mutable strings can't be used as dictionary keys, because - dictionaries require *immutable* objects as keys. The main intention of this - class is to serve as an educational example for inheritance and necessity to - remove (override) the :meth:`__hash__` method in order to trap attempts to use a - mutable object as dictionary key, which would be otherwise very error prone and - hard to track down. - -In addition to supporting the methods and operations of bytes and string -objects (see section :ref:`string-methods`), :class:`UserString` instances -provide the following attribute: - - -.. attribute:: MutableString.data - - A real Python string or bytes object used to store the content of the - :class:`UserString` class. - diff --git a/Lib/UserString.py b/Lib/UserString.py deleted file mode 100755 index 704ea59..0000000 --- a/Lib/UserString.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python -## vim:ts=4:et:nowrap -"""A user-defined wrapper around string objects - -Note: string objects have grown methods in Python 1.6 -This module requires Python 1.6 or later. -""" -import sys -import collections - -__all__ = ["UserString","MutableString"] - -class UserString(collections.Sequence): - def __init__(self, seq): - if isinstance(seq, str): - self.data = seq - elif isinstance(seq, UserString): - self.data = seq.data[:] - else: - self.data = str(seq) - def __str__(self): return str(self.data) - def __repr__(self): return repr(self.data) - def __int__(self): return int(self.data) - def __long__(self): return int(self.data) - def __float__(self): return float(self.data) - def __complex__(self): return complex(self.data) - def __hash__(self): return hash(self.data) - - def __eq__(self, string): - if isinstance(string, UserString): - return self.data == string.data - else: - return self.data == string - def __ne__(self, string): - if isinstance(string, UserString): - return self.data != string.data - else: - return self.data != string - def __lt__(self, string): - if isinstance(string, UserString): - return self.data < string.data - else: - return self.data < string - def __le__(self, string): - if isinstance(string, UserString): - return self.data <= string.data - else: - return self.data <= string - def __gt__(self, string): - if isinstance(string, UserString): - return self.data > string.data - else: - return self.data > string - def __ge__(self, string): - if isinstance(string, UserString): - return self.data >= string.data - else: - return self.data >= string - - def __contains__(self, char): - if isinstance(char, UserString): - char = char.data - return char in self.data - - def __len__(self): return len(self.data) - def __getitem__(self, index): return self.__class__(self.data[index]) - def __add__(self, other): - if isinstance(other, UserString): - return self.__class__(self.data + other.data) - elif isinstance(other, str): - return self.__class__(self.data + other) - else: - return self.__class__(self.data + str(other)) - def __radd__(self, other): - if isinstance(other, str): - return self.__class__(other + self.data) - else: - return self.__class__(str(other) + self.data) - def __mul__(self, n): - return self.__class__(self.data*n) - __rmul__ = __mul__ - def __mod__(self, args): - return self.__class__(self.data % args) - - # the following methods are defined in alphabetical order: - def capitalize(self): return self.__class__(self.data.capitalize()) - def center(self, width, *args): - return self.__class__(self.data.center(width, *args)) - def count(self, sub, start=0, end=sys.maxsize): - if isinstance(sub, UserString): - sub = sub.data - return self.data.count(sub, start, end) - def decode(self, encoding=None, errors=None): # XXX improve this? - if encoding: - if errors: - return self.__class__(self.data.decode(encoding, errors)) - else: - return self.__class__(self.data.decode(encoding)) - else: - return self.__class__(self.data.decode()) - def encode(self, encoding=None, errors=None): # XXX improve this? - if encoding: - if errors: - return self.__class__(self.data.encode(encoding, errors)) - else: - return self.__class__(self.data.encode(encoding)) - else: - return self.__class__(self.data.encode()) - def endswith(self, suffix, start=0, end=sys.maxsize): - return self.data.endswith(suffix, start, end) - def expandtabs(self, tabsize=8): - return self.__class__(self.data.expandtabs(tabsize)) - def find(self, sub, start=0, end=sys.maxsize): - if isinstance(sub, UserString): - sub = sub.data - return self.data.find(sub, start, end) - def index(self, sub, start=0, end=sys.maxsize): - return self.data.index(sub, start, end) - def isalpha(self): return self.data.isalpha() - def isalnum(self): return self.data.isalnum() - def isdecimal(self): return self.data.isdecimal() - def isdigit(self): return self.data.isdigit() - def islower(self): return self.data.islower() - def isnumeric(self): return self.data.isnumeric() - def isspace(self): return self.data.isspace() - def istitle(self): return self.data.istitle() - def isupper(self): return self.data.isupper() - def join(self, seq): return self.data.join(seq) - def ljust(self, width, *args): - return self.__class__(self.data.ljust(width, *args)) - def lower(self): return self.__class__(self.data.lower()) - def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars)) - def partition(self, sep): - return self.data.partition(sep) - def replace(self, old, new, maxsplit=-1): - if isinstance(old, UserString): - old = old.data - if isinstance(new, UserString): - new = new.data - return self.__class__(self.data.replace(old, new, maxsplit)) - def rfind(self, sub, start=0, end=sys.maxsize): - return self.data.rfind(sub, start, end) - def rindex(self, sub, start=0, end=sys.maxsize): - return self.data.rindex(sub, start, end) - def rjust(self, width, *args): - return self.__class__(self.data.rjust(width, *args)) - def rpartition(self, sep): - return self.data.rpartition(sep) - def rstrip(self, chars=None): return self.__class__(self.data.rstrip(chars)) - def split(self, sep=None, maxsplit=-1): - return self.data.split(sep, maxsplit) - def rsplit(self, sep=None, maxsplit=-1): - return self.data.rsplit(sep, maxsplit) - def splitlines(self, keepends=0): return self.data.splitlines(keepends) - def startswith(self, prefix, start=0, end=sys.maxsize): - return self.data.startswith(prefix, start, end) - def strip(self, chars=None): return self.__class__(self.data.strip(chars)) - def swapcase(self): return self.__class__(self.data.swapcase()) - def title(self): return self.__class__(self.data.title()) - def translate(self, *args): - return self.__class__(self.data.translate(*args)) - def upper(self): return self.__class__(self.data.upper()) - def zfill(self, width): return self.__class__(self.data.zfill(width)) - -class MutableString(UserString, collections.MutableSequence): - """mutable string objects - - Python strings are immutable objects. This has the advantage, that - strings may be used as dictionary keys. If this property isn't needed - and you insist on changing string values in place instead, you may cheat - and use MutableString. - - But the purpose of this class is an educational one: to prevent - people from inventing their own mutable string class derived - from UserString and than forget thereby to remove (override) the - __hash__ method inherited from UserString. This would lead to - errors that would be very hard to track down. - - A faster and better solution is to rewrite your program using lists.""" - def __init__(self, string=""): - self.data = string - def __hash__(self): - raise TypeError("unhashable type (it is mutable)") - def __setitem__(self, index, sub): - if isinstance(index, slice): - if isinstance(sub, UserString): - sub = sub.data - elif not isinstance(sub, str): - sub = str(sub) - start, stop, step = index.indices(len(self.data)) - if step == -1: - start, stop = stop+1, start+1 - sub = sub[::-1] - elif step != 1: - # XXX(twouters): I guess we should be reimplementing - # the extended slice assignment/deletion algorithm here... - raise TypeError("invalid step in slicing assignment") - start = min(start, stop) - self.data = self.data[:start] + sub + self.data[stop:] - else: - if index < 0: - index += len(self.data) - if index < 0 or index >= len(self.data): raise IndexError - self.data = self.data[:index] + sub + self.data[index+1:] - def __delitem__(self, index): - if isinstance(index, slice): - start, stop, step = index.indices(len(self.data)) - if step == -1: - start, stop = stop+1, start+1 - elif step != 1: - # XXX(twouters): see same block in __setitem__ - raise TypeError("invalid step in slicing deletion") - start = min(start, stop) - self.data = self.data[:start] + self.data[stop:] - else: - if index < 0: - index += len(self.data) - if index < 0 or index >= len(self.data): raise IndexError - self.data = self.data[:index] + self.data[index+1:] - def immutable(self): - return UserString(self.data) - def __iadd__(self, other): - if isinstance(other, UserString): - self.data += other.data - elif isinstance(other, str): - self.data += other - else: - self.data += str(other) - return self - def __imul__(self, n): - self.data *= n - return self - def insert(self, index, value): - self[index:index] = value - -if __name__ == "__main__": - # execute the regression test to stdout, if called as a script: - import os - called_in_dir, called_as = os.path.split(sys.argv[0]) - called_as, py = os.path.splitext(called_as) - if '-q' in sys.argv: - from test import test_support - test_support.verbose = 0 - __import__('test.test_' + called_as.lower()) diff --git a/Lib/collections.py b/Lib/collections.py index 93194e0..dafc5e5 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -237,6 +237,150 @@ class UserList(MutableSequence): ################################################################################ +### UserString +################################################################################ + +class UserString(Sequence): + def __init__(self, seq): + if isinstance(seq, str): + self.data = seq + elif isinstance(seq, UserString): + self.data = seq.data[:] + else: + self.data = str(seq) + def __str__(self): return str(self.data) + def __repr__(self): return repr(self.data) + def __int__(self): return int(self.data) + def __long__(self): return int(self.data) + def __float__(self): return float(self.data) + def __complex__(self): return complex(self.data) + def __hash__(self): return hash(self.data) + + def __eq__(self, string): + if isinstance(string, UserString): + return self.data == string.data + return self.data == string + def __ne__(self, string): + if isinstance(string, UserString): + return self.data != string.data + return self.data != string + def __lt__(self, string): + if isinstance(string, UserString): + return self.data < string.data + return self.data < string + def __le__(self, string): + if isinstance(string, UserString): + return self.data <= string.data + return self.data <= string + def __gt__(self, string): + if isinstance(string, UserString): + return self.data > string.data + return self.data > string + def __ge__(self, string): + if isinstance(string, UserString): + return self.data >= string.data + return self.data >= string + + def __contains__(self, char): + if isinstance(char, UserString): + char = char.data + return char in self.data + + def __len__(self): return len(self.data) + def __getitem__(self, index): return self.__class__(self.data[index]) + def __add__(self, other): + if isinstance(other, UserString): + return self.__class__(self.data + other.data) + elif isinstance(other, str): + return self.__class__(self.data + other) + return self.__class__(self.data + str(other)) + def __radd__(self, other): + if isinstance(other, str): + return self.__class__(other + self.data) + return self.__class__(str(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + def __mod__(self, args): + return self.__class__(self.data % args) + + # the following methods are defined in alphabetical order: + def capitalize(self): return self.__class__(self.data.capitalize()) + def center(self, width, *args): + return self.__class__(self.data.center(width, *args)) + def count(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data + return self.data.count(sub, start, end) + def encode(self, encoding=None, errors=None): # XXX improve this? + if encoding: + if errors: + return self.__class__(self.data.encode(encoding, errors)) + return self.__class__(self.data.encode(encoding)) + return self.__class__(self.data.encode()) + def endswith(self, suffix, start=0, end=_sys.maxsize): + return self.data.endswith(suffix, start, end) + def expandtabs(self, tabsize=8): + return self.__class__(self.data.expandtabs(tabsize)) + def find(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data + return self.data.find(sub, start, end) + def format(self, *args, **kwds): + return self.data.format(*args, **kwds) + def index(self, sub, start=0, end=_sys.maxsize): + return self.data.index(sub, start, end) + def isalpha(self): return self.data.isalpha() + def isalnum(self): return self.data.isalnum() + def isdecimal(self): return self.data.isdecimal() + def isdigit(self): return self.data.isdigit() + def isidentifier(self): return self.data.isidentifier() + def islower(self): return self.data.islower() + def isnumeric(self): return self.data.isnumeric() + def isspace(self): return self.data.isspace() + def istitle(self): return self.data.istitle() + def isupper(self): return self.data.isupper() + def join(self, seq): return self.data.join(seq) + def ljust(self, width, *args): + return self.__class__(self.data.ljust(width, *args)) + def lower(self): return self.__class__(self.data.lower()) + def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars)) + def partition(self, sep): + return self.data.partition(sep) + def replace(self, old, new, maxsplit=-1): + if isinstance(old, UserString): + old = old.data + if isinstance(new, UserString): + new = new.data + return self.__class__(self.data.replace(old, new, maxsplit)) + def rfind(self, sub, start=0, end=_sys.maxsize): + return self.data.rfind(sub, start, end) + def rindex(self, sub, start=0, end=_sys.maxsize): + return self.data.rindex(sub, start, end) + def rjust(self, width, *args): + return self.__class__(self.data.rjust(width, *args)) + def rpartition(self, sep): + return self.data.rpartition(sep) + def rstrip(self, chars=None): + return self.__class__(self.data.rstrip(chars)) + def split(self, sep=None, maxsplit=-1): + return self.data.split(sep, maxsplit) + def rsplit(self, sep=None, maxsplit=-1): + return self.data.rsplit(sep, maxsplit) + def splitlines(self, keepends=0): return self.data.splitlines(keepends) + def startswith(self, prefix, start=0, end=_sys.maxsize): + return self.data.startswith(prefix, start, end) + def strip(self, chars=None): return self.__class__(self.data.strip(chars)) + def swapcase(self): return self.__class__(self.data.swapcase()) + def title(self): return self.__class__(self.data.title()) + def translate(self, *args): + return self.__class__(self.data.translate(*args)) + def upper(self): return self.__class__(self.data.upper()) + def zfill(self, width): return self.__class__(self.data.zfill(width)) + + + +################################################################################ ### Simple tests ################################################################################ diff --git a/Lib/test/test___all__.py b/Lib/test/test___all__.py index 99bc330..6e844e7 100644 --- a/Lib/test/test___all__.py +++ b/Lib/test/test___all__.py @@ -36,7 +36,6 @@ class AllTest(unittest.TestCase): self.check_all("Queue") self.check_all("SimpleHTTPServer") self.check_all("SocketServer") - self.check_all("UserString") self.check_all("aifc") self.check_all("base64") self.check_all("bdb") diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 8bd8d10..a0fefc3 100755 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -6,7 +6,7 @@ import unittest import string from test import test_support, string_tests -from UserString import UserString, MutableString +from collections import UserString class UserStringTest( string_tests.CommonTest, @@ -42,99 +42,9 @@ class UserStringTest( # we don't fix the arguments, because UserString can't cope with it getattr(object, methodname)(*args) -class MutableStringTest(UserStringTest): - type2test = MutableString - - # MutableStrings can be hashed => deactivate test - def test_hash(self): - pass - - def test_setitem(self): - s = self.type2test("foo") - self.assertRaises(IndexError, s.__setitem__, -4, "bar") - self.assertRaises(IndexError, s.__setitem__, 3, "bar") - s[-1] = "bar" - self.assertEqual(s, "fobar") - s[0] = "bar" - self.assertEqual(s, "barobar") - - def test_delitem(self): - s = self.type2test("foo") - self.assertRaises(IndexError, s.__delitem__, -4) - self.assertRaises(IndexError, s.__delitem__, 3) - del s[-1] - self.assertEqual(s, "fo") - del s[0] - self.assertEqual(s, "o") - del s[0] - self.assertEqual(s, "") - - def test_setslice(self): - s = self.type2test("foo") - s[:] = "bar" - self.assertEqual(s, "bar") - s[1:2] = "foo" - self.assertEqual(s, "bfoor") - s[1:-1] = UserString("a") - self.assertEqual(s, "bar") - s[0:10] = 42 - self.assertEqual(s, "42") - - def test_delslice(self): - s = self.type2test("foobar") - del s[3:10] - self.assertEqual(s, "foo") - del s[-1:10] - self.assertEqual(s, "fo") - - def test_extended_set_del_slice(self): - indices = (0, None, 1, 3, 19, 100, -1, -2, -31, -100) - orig = string.ascii_letters + string.digits - for start in indices: - for stop in indices: - # Use indices[1:] when MutableString can handle real - # extended slices - for step in (None, 1, -1): - s = self.type2test(orig) - L = list(orig) - # Make sure we have a slice of exactly the right length, - # but with (hopefully) different data. - data = L[start:stop:step] - data.reverse() - L[start:stop:step] = data - s[start:stop:step] = "".join(data) - self.assertEquals(s, "".join(L)) - - del L[start:stop:step] - del s[start:stop:step] - self.assertEquals(s, "".join(L)) - - def test_immutable(self): - s = self.type2test("foobar") - s2 = s.immutable() - self.assertEqual(s, s2) - self.assert_(isinstance(s2, UserString)) - - def test_iadd(self): - s = self.type2test("foo") - s += "bar" - self.assertEqual(s, "foobar") - s += UserString("baz") - self.assertEqual(s, "foobarbaz") - s += 42 - self.assertEqual(s, "foobarbaz42") - - def test_imul(self): - s = self.type2test("foo") - s *= 1 - self.assertEqual(s, "foo") - s *= 2 - self.assertEqual(s, "foofoo") - s *= -1 - self.assertEqual(s, "") def test_main(): - test_support.run_unittest(UserStringTest, MutableStringTest) + test_support.run_unittest(UserStringTest) if __name__ == "__main__": test_main() diff --git a/Misc/NEWS b/Misc/NEWS index 26cc2db..1a3ae21 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -1650,7 +1650,8 @@ Library XXX their API still needs to be modernized (i.e. eliminate the iter methods). - Created new UserDict class in collections module. This one inherits from and - complies with the MutableMapping ABC. + complies with the MutableMapping ABC. Also, moved UserString and UserList + to the collections module. The MutableUserString class was removed. - Removed UserDict.DictMixin. Replaced all its uses with collections.MutableMapping. -- cgit v0.12