summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2005-02-20 04:07:08 (GMT)
committerRaymond Hettinger <python@rcn.com>2005-02-20 04:07:08 (GMT)
commit7cbf1bcb3e55c61617352ec1b20176603dacbafe (patch)
treefa39e30071d423e8c3de3574de4ac11f5998887d /Lib
parent54c273c703957e37100900b3e8a25f94c4c17003 (diff)
downloadcpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.zip
cpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.tar.gz
cpython-7cbf1bcb3e55c61617352ec1b20176603dacbafe.tar.bz2
* Beef-up testing of str.__contains__() and str.find().
* Speed-up "x in y" where x has more than one character. The existing code made excessive calls to the expensive memcmp() function. The new code uses memchr() to rapidly find a start point for memcmp(). In addition to knowing that the first character is a match, the new code also checks that the last character is a match. This significantly reduces the incidence of false starts (saving memcmp() calls and making quadratic behavior less likely). Improves the timings on: python -m timeit -r7 -s"x='a'*1000" "'ab' in x" python -m timeit -r7 -s"x='a'*1000" "'bc' in x" Once this code has proven itself, then string_find_internal() should refer to it rather than running its own version. Also, something similar may apply to unicode objects.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/string_tests.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index c8ed07c..0ce9618 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -122,6 +122,30 @@ class CommonTest(unittest.TestCase):
self.checkraises(TypeError, 'hello', 'find')
self.checkraises(TypeError, 'hello', 'find', 42)
+ # For a variety of combinations,
+ # verify that str.find() matches __contains__
+ # and that the found substring is really at that location
+ charset = ['', 'a', 'b', 'c']
+ digits = 5
+ base = len(charset)
+ teststrings = set()
+ for i in xrange(base ** digits):
+ entry = []
+ for j in xrange(digits):
+ i, m = divmod(i, base)
+ entry.append(charset[m])
+ teststrings.add(''.join(entry))
+ for i in teststrings:
+ i = self.fixtype(i)
+ for j in teststrings:
+ loc = i.find(j)
+ r1 = (loc != -1)
+ r2 = j in i
+ if r1 != r2:
+ self.assertEqual(r1, r2)
+ if loc != -1:
+ self.assertEqual(i[loc:loc+len(j)], j)
+
def test_rfind(self):
self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc')
self.checkequal(12, 'abcdefghiabc', 'rfind', '')