diff options
Diffstat (limited to 'Lib/test/string_tests.py')
| -rw-r--r-- | Lib/test/string_tests.py | 160 | 
1 files changed, 80 insertions, 80 deletions
| diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 242a931..cd3ee48 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1,5 +1,5 @@  """ -Common tests shared by test_unicode, test_userstring and test_string. +Common tests shared by test_unicode, test_userstring and test_bytes.  """  import unittest, string, sys, struct @@ -51,6 +51,9 @@ class BaseTest:          else:              return obj +    def test_fixtype(self): +        self.assertIs(type(self.fixtype("123")), self.type2test) +      # check that obj.method(*args) returns result      def checkequal(self, result, obj, methodname, *args, **kwargs):          result = self.fixtype(result) @@ -365,6 +368,8 @@ class BaseTest:                          sys.maxsize-2)          self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)          self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2) +        self.checkequal(['abcd'], 'abcd', 'split', '|') +        self.checkequal([''], '', 'split', '|')          self.checkequal(['endcase ', ''], 'endcase |', 'split', '|')          self.checkequal(['', ' startcase'], '| startcase', 'split', '|')          self.checkequal(['', 'bothcase', ''], '|bothcase|', 'split', '|') @@ -432,6 +437,8 @@ class BaseTest:                          sys.maxsize-100)          self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)          self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2) +        self.checkequal(['abcd'], 'abcd', 'rsplit', '|') +        self.checkequal([''], '', 'rsplit', '|')          self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')          self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|')          self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|') @@ -638,14 +645,6 @@ class BaseTest:          EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")          EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby") -        # XXX Commented out. Is there any reason to support buffer objects -        # as arguments for str.replace()?  GvR -##         ba = bytearray('a') -##         bb = bytearray('b') -##         EQ("bbc", "abc", "replace", ba, bb) -##         EQ("aac", "abc", "replace", bb, ba) - -        #          self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)          self.checkequal('onetwothree', 'one!two!three!', 'replace', '!', '')          self.checkequal('one@two@three!', 'one!two!three!', 'replace', '!', '@', 2) @@ -682,22 +681,6 @@ class BaseTest:          self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)          self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) - - -class CommonTest(BaseTest): -    # This testcase contains test that can be used in all -    # stringlike classes. Currently this is str, unicode -    # UserString and the string module. - -    def test_hash(self): -        # SF bug 1054139:  += optimization was not invalidating cached hash value -        a = self.type2test('DNSSEC') -        b = self.type2test('') -        for c in a: -            b += c -            hash(b) -        self.assertEqual(hash(a), hash(b)) -      def test_capitalize(self):          self.checkequal(' hello ', ' hello ', 'capitalize')          self.checkequal('Hello ', 'Hello ','capitalize') @@ -705,23 +688,6 @@ class CommonTest(BaseTest):          self.checkequal('Aaaa', 'aaaa', 'capitalize')          self.checkequal('Aaaa', 'AaAa', 'capitalize') -        # check that titlecased chars are lowered correctly -        # \u1ffc is the titlecased char -        self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3', -                        '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize') -        # check with cased non-letter chars -        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', -                        '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize') -        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', -                        '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize') -        self.checkequal('\u2160\u2171\u2172', -                        '\u2160\u2161\u2162', 'capitalize') -        self.checkequal('\u2160\u2171\u2172', -                        '\u2170\u2171\u2172', 'capitalize') -        # check with Ll chars with no upper - nothing changes here -        self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', -                        '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') -          self.checkraises(TypeError, 'hello', 'capitalize', 42)      def test_additional_split(self): @@ -744,16 +710,21 @@ class CommonTest(BaseTest):          self.checkequal(['a'], '  a    ', 'split')          self.checkequal(['a', 'b'], '  a    b   ', 'split')          self.checkequal(['a', 'b   '], '  a    b   ', 'split', None, 1) +        self.checkequal(['a    b   c   '], '  a    b   c   ', 'split', None, 0)          self.checkequal(['a', 'b   c   '], '  a    b   c   ', 'split', None, 1)          self.checkequal(['a', 'b', 'c   '], '  a    b   c   ', 'split', None, 2) +        self.checkequal(['a', 'b', 'c'], '  a    b   c   ', 'split', None, 3)          self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'split')          aaa = ' a '*20          self.checkequal(['a']*20, aaa, 'split')          self.checkequal(['a'] + [aaa[4:]], aaa, 'split', None, 1)          self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19) -        # mixed use of str and unicode -        self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', ' ', 2) +        for b in ('arf\tbarf', 'arf\nbarf', 'arf\rbarf', +                  'arf\fbarf', 'arf\vbarf'): +            self.checkequal(['arf', 'barf'], b, 'split') +            self.checkequal(['arf', 'barf'], b, 'split', None) +            self.checkequal(['arf', 'barf'], b, 'split', None, 2)      def test_additional_rsplit(self):          self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], @@ -775,36 +746,53 @@ class CommonTest(BaseTest):          self.checkequal(['a'], '  a    ', 'rsplit')          self.checkequal(['a', 'b'], '  a    b   ', 'rsplit')          self.checkequal(['  a', 'b'], '  a    b   ', 'rsplit', None, 1) +        self.checkequal(['  a    b   c'], '  a    b   c   ', 'rsplit', +                        None, 0)          self.checkequal(['  a    b','c'], '  a    b   c   ', 'rsplit',                          None, 1)          self.checkequal(['  a', 'b', 'c'], '  a    b   c   ', 'rsplit',                          None, 2) +        self.checkequal(['a', 'b', 'c'], '  a    b   c   ', 'rsplit', +                        None, 3)          self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88)          aaa = ' a '*20          self.checkequal(['a']*20, aaa, 'rsplit')          self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1)          self.checkequal([' a  a'] + ['a']*18, aaa, 'rsplit', None, 18) -        # mixed use of str and unicode -        self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', ' ', 2) +        for b in ('arf\tbarf', 'arf\nbarf', 'arf\rbarf', +                  'arf\fbarf', 'arf\vbarf'): +            self.checkequal(['arf', 'barf'], b, 'rsplit') +            self.checkequal(['arf', 'barf'], b, 'rsplit', None) +            self.checkequal(['arf', 'barf'], b, 'rsplit', None, 2) -    def test_strip(self): +    def test_strip_whitespace(self):          self.checkequal('hello', '   hello   ', 'strip')          self.checkequal('hello   ', '   hello   ', 'lstrip')          self.checkequal('   hello', '   hello   ', 'rstrip')          self.checkequal('hello', 'hello', 'strip') +        b = ' \t\n\r\f\vabc \t\n\r\f\v' +        self.checkequal('abc', b, 'strip') +        self.checkequal('abc \t\n\r\f\v', b, 'lstrip') +        self.checkequal(' \t\n\r\f\vabc', b, 'rstrip') +          # strip/lstrip/rstrip with None arg          self.checkequal('hello', '   hello   ', 'strip', None)          self.checkequal('hello   ', '   hello   ', 'lstrip', None)          self.checkequal('   hello', '   hello   ', 'rstrip', None)          self.checkequal('hello', 'hello', 'strip', None) +    def test_strip(self):          # strip/lstrip/rstrip with str arg          self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz')          self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', 'xyz')          self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')          self.checkequal('hello', 'hello', 'strip', 'xyz') +        self.checkequal('', 'mississippi', 'strip', 'mississippi') + +        # only trim the start and end; does not strip internal characters +        self.checkequal('mississipp', 'mississippi', 'strip', 'i')          self.checkraises(TypeError, 'hello', 'strip', 42, 42)          self.checkraises(TypeError, 'hello', 'lstrip', 42, 42) @@ -855,11 +843,6 @@ class CommonTest(BaseTest):          self.checkraises(TypeError, '123', 'zfill') -class MixinStrUnicodeUserStringTest: -    # additional tests that only work for -    # stringlike objects, i.e. str, unicode, UserString -    # (but not the string module) -      def test_islower(self):          self.checkequal(False, '', 'islower')          self.checkequal(True, 'a', 'islower') @@ -962,6 +945,43 @@ class MixinStrUnicodeUserStringTest:          self.checkraises(TypeError, 'abc', 'splitlines', 42, 42) + +class CommonTest(BaseTest): +    # This testcase contains tests that can be used in all +    # stringlike classes. Currently this is str and UserString. + +    def test_hash(self): +        # SF bug 1054139:  += optimization was not invalidating cached hash value +        a = self.type2test('DNSSEC') +        b = self.type2test('') +        for c in a: +            b += c +            hash(b) +        self.assertEqual(hash(a), hash(b)) + +    def test_capitalize_nonascii(self): +        # check that titlecased chars are lowered correctly +        # \u1ffc is the titlecased char +        self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3', +                        '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize') +        # check with cased non-letter chars +        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', +                        '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize') +        self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd', +                        '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize') +        self.checkequal('\u2160\u2171\u2172', +                        '\u2160\u2161\u2162', 'capitalize') +        self.checkequal('\u2160\u2171\u2172', +                        '\u2170\u2171\u2172', 'capitalize') +        # check with Ll chars with no upper - nothing changes here +        self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', +                        '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') + + +class MixinStrUnicodeUserStringTest: +    # additional tests that only work for +    # stringlike objects, i.e. str, UserString +      def test_startswith(self):          self.checkequal(True, 'hello', 'startswith', 'he')          self.checkequal(True, 'hello', 'startswith', 'hello') @@ -976,6 +996,9 @@ class MixinStrUnicodeUserStringTest:          self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3)          self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3, 7)          self.checkequal(False, 'helloworld', 'startswith', 'lowo', 3, 6) +        self.checkequal(True, '', 'startswith', '', 0, 1) +        self.checkequal(True, '', 'startswith', '', 0, 0) +        self.checkequal(False, '', 'startswith', '', 1, 0)          # test negative indices          self.checkequal(True, 'hello', 'startswith', 'he', 0, -1) @@ -1022,6 +1045,9 @@ class MixinStrUnicodeUserStringTest:          self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, 8)          self.checkequal(False, 'ab', 'endswith', 'ab', 0, 1)          self.checkequal(False, 'ab', 'endswith', 'ab', 0, 0) +        self.checkequal(True, '', 'endswith', '', 0, 1) +        self.checkequal(True, '', 'endswith', '', 0, 0) +        self.checkequal(False, '', 'endswith', '', 1, 0)          # test negative indices          self.checkequal(True, 'hello', 'endswith', 'lo', -2) @@ -1176,8 +1202,7 @@ class MixinStrUnicodeUserStringTest:          self.checkraises(TypeError, 'abc', '__mod__')          self.checkraises(TypeError, '%(foo)s', '__mod__', 42)          self.checkraises(TypeError, '%s%s', '__mod__', (42,)) -        with self.assertWarns(DeprecationWarning): -            self.checkraises(TypeError, '%c', '__mod__', (None,)) +        self.checkraises(TypeError, '%c', '__mod__', (None,))          self.checkraises(ValueError, '%(foo', '__mod__', {})          self.checkraises(TypeError, '%(foo)s %(bar)s', '__mod__', ('foo', 42))          self.checkraises(TypeError, '%d', '__mod__', "42") # not numeric @@ -1338,7 +1363,7 @@ class MixinStrUnicodeUserStringTest:  class MixinStrUnicodeTest: -    # Additional tests that only work with str and unicode. +    # Additional tests that only work with str.      def test_bug1001011(self):          # Make sure join returns a NEW object for single item sequences @@ -1356,28 +1381,3 @@ class MixinStrUnicodeTest:          s1 = t("abcd")          s2 = t().join([s1])          self.assertIs(s1, s2) - -        # Should also test mixed-type join. -        if t is str: -            s1 = subclass("abcd") -            s2 = "".join([s1]) -            self.assertIsNot(s1, s2) -            self.assertIs(type(s2), t) - -            s1 = t("abcd") -            s2 = "".join([s1]) -            self.assertIs(s1, s2) - -##         elif t is str8: -##             s1 = subclass("abcd") -##             s2 = "".join([s1]) -##             self.assertIsNot(s1, s2) -##             self.assertIs(type(s2), str) # promotes! - -##             s1 = t("abcd") -##             s2 = "".join([s1]) -##             self.assertIsNot(s1, s2) -##             self.assertIs(type(s2), str) # promotes! - -        else: -            self.fail("unexpected type for MixinStrUnicodeTest %r" % t) | 
