summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/string_tests.py24
-rw-r--r--Objects/stringobject.c39
2 files changed, 50 insertions, 13 deletions
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index c8ed07c..0ce9618 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -122,6 +122,30 @@ class CommonTest(unittest.TestCase):
self.checkraises(TypeError, 'hello', 'find')
self.checkraises(TypeError, 'hello', 'find', 42)
+ # For a variety of combinations,
+ # verify that str.find() matches __contains__
+ # and that the found substring is really at that location
+ charset = ['', 'a', 'b', 'c']
+ digits = 5
+ base = len(charset)
+ teststrings = set()
+ for i in xrange(base ** digits):
+ entry = []
+ for j in xrange(digits):
+ i, m = divmod(i, base)
+ entry.append(charset[m])
+ teststrings.add(''.join(entry))
+ for i in teststrings:
+ i = self.fixtype(i)
+ for j in teststrings:
+ loc = i.find(j)
+ r1 = (loc != -1)
+ r2 = j in i
+ if r1 != r2:
+ self.assertEqual(r1, r2)
+ if loc != -1:
+ self.assertEqual(i[loc:loc+len(j)], j)
+
def test_rfind(self):
self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc')
self.checkequal(12, 'abcdefghiabc', 'rfind', '')
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index b90221a..0cbf439 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1002,8 +1002,12 @@ string_slice(register PyStringObject *a, register int i, register int j)
static int
string_contains(PyObject *a, PyObject *el)
{
- const char *lhs, *rhs, *end;
- int size;
+ char *s = PyString_AS_STRING(a);
+ const char *sub = PyString_AS_STRING(el);
+ char *last;
+ int len_sub = PyString_GET_SIZE(el);
+ int shortsub;
+ char firstchar, lastchar;
if (!PyString_CheckExact(el)) {
#ifdef Py_USING_UNICODE
@@ -1016,20 +1020,29 @@ string_contains(PyObject *a, PyObject *el)
return -1;
}
}
- size = PyString_GET_SIZE(el);
- rhs = PyString_AS_STRING(el);
- lhs = PyString_AS_STRING(a);
- /* optimize for a single character */
- if (size == 1)
- return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
-
- end = lhs + (PyString_GET_SIZE(a) - size);
- while (lhs <= end) {
- if (memcmp(lhs++, rhs, size) == 0)
+ if (len_sub == 0)
+ return 1;
+ /* last points to one char beyond the start of the rightmost
+ substring. When s<last, there is still room for a possible match
+ and s[0] through s[len_sub-1] will be in bounds.
+ shortsub is len_sub minus the last character which is checked
+ separately just before the memcmp(). That check helps prevent
+ false starts and saves the setup time for memcmp().
+ */
+ firstchar = sub[0];
+ shortsub = len_sub - 1;
+ lastchar = sub[shortsub];
+ last = s + PyString_GET_SIZE(a) - len_sub + 1;
+ while (s < last) {
+ s = memchr(s, firstchar, last-s);
+ if (s == NULL)
+ return 0;
+ assert(s < last);
+ if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
return 1;
+ s++;
}
-
return 0;
}