diff options
author | Raymond Hettinger <python@rcn.com> | 2005-02-20 04:07:08 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2005-02-20 04:07:08 (GMT) |
commit | 7cbf1bcb3e55c61617352ec1b20176603dacbafe (patch) | |
tree | fa39e30071d423e8c3de3574de4ac11f5998887d /Lib/test/string_tests.py | |
parent | 54c273c703957e37100900b3e8a25f94c4c17003 (diff) | |
download | cpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.zip cpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.tar.gz cpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.tar.bz2 |
* Beef-up testing of str.__contains__() and str.find().
* Speed-up "x in y" where x has more than one character.
The existing code made excessive calls to the expensive memcmp() function.
The new code uses memchr() to rapidly find a start point for memcmp().
In addition to knowing that the first character is a match, the new code
also checks that the last character is a match. This significantly reduces
the incidence of false starts (saving memcmp() calls and making quadratic
behavior less likely).
Improves the timings on:
python -m timeit -r7 -s"x='a'*1000" "'ab' in x"
python -m timeit -r7 -s"x='a'*1000" "'bc' in x"
Once this code has proven itself, then string_find_internal() should refer
to it rather than running its own version. Also, something similar may
apply to unicode objects.
Diffstat (limited to 'Lib/test/string_tests.py')
-rw-r--r-- | Lib/test/string_tests.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index c8ed07c..0ce9618 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -122,6 +122,30 @@ class CommonTest(unittest.TestCase): self.checkraises(TypeError, 'hello', 'find') self.checkraises(TypeError, 'hello', 'find', 42) + # For a variety of combinations, + # verify that str.find() matches __contains__ + # and that the found substring is really at that location + charset = ['', 'a', 'b', 'c'] + digits = 5 + base = len(charset) + teststrings = set() + for i in xrange(base ** digits): + entry = [] + for j in xrange(digits): + i, m = divmod(i, base) + entry.append(charset[m]) + teststrings.add(''.join(entry)) + for i in teststrings: + i = self.fixtype(i) + for j in teststrings: + loc = i.find(j) + r1 = (loc != -1) + r2 = j in i + if r1 != r2: + self.assertEqual(r1, r2) + if loc != -1: + self.assertEqual(i[loc:loc+len(j)], j) + def test_rfind(self): self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc') self.checkequal(12, 'abcdefghiabc', 'rfind', '') |