summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/cpython/bytesobject.h19
-rw-r--r--Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst3
-rw-r--r--Modules/mmapmodule.c32
-rw-r--r--Objects/bytesobject.c18
4 files changed, 53 insertions, 19 deletions
diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
index 6b3f552..38a0fe0 100644
--- a/Include/cpython/bytesobject.h
+++ b/Include/cpython/bytesobject.h
@@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
+
+/* Substring Search.
+
+ Returns the index of the first occurence of
+ a substring ("needle") in a larger text ("haystack").
+ If the needle is not found, return -1.
+ If the needle is found, add offset to the index.
+*/
+
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset);
+
+/* Same as above, but search right-to-left */
+PyAPI_FUNC(Py_ssize_t)
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset);
diff --git a/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst b/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
new file mode 100644
index 0000000..bd20a84
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-01-01-16-13.bpo-46848.BB01Fr.rst
@@ -0,0 +1,3 @@
+For performance, use the optimized string-searching implementations
+from :meth:`~bytes.find` and :meth:`~bytes.rfind`
+for :meth:`~mmap.find` and :meth:`~mmap.rfind`.
diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c
index 26cedf1..6a038e7 100644
--- a/Modules/mmapmodule.c
+++ b/Modules/mmapmodule.c
@@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
&view, &start, &end)) {
return NULL;
- } else {
- const char *p, *start_p, *end_p;
- int sign = reverse ? -1 : 1;
- const char *needle = view.buf;
- Py_ssize_t len = view.len;
-
+ }
+ else {
if (start < 0)
start += self->size;
if (start < 0)
@@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
else if (end > self->size)
end = self->size;
- start_p = self->data + start;
- end_p = self->data + end;
-
- for (p = (reverse ? end_p - len : start_p);
- (p >= start_p) && (p + len <= end_p); p += sign) {
- Py_ssize_t i;
- for (i = 0; i < len && needle[i] == p[i]; ++i)
- /* nothing */;
- if (i == len) {
- PyBuffer_Release(&view);
- return PyLong_FromSsize_t(p - self->data);
- }
+ Py_ssize_t res;
+ if (reverse) {
+ res = _PyBytes_ReverseFind(
+ self->data + start, end - start,
+ view.buf, view.len, start);
+ }
+ else {
+ res = _PyBytes_Find(
+ self->data + start, end - start,
+ view.buf, view.len, start);
}
PyBuffer_Release(&view);
- return PyLong_FromLong(-1);
+ return PyLong_FromSsize_t(res);
}
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 3d8a216..4c67b8f 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
#undef STRINGLIB_GET_EMPTY
+Py_ssize_t
+_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset)
+{
+ return stringlib_find(haystack, len_haystack,
+ needle, len_needle, offset);
+}
+
+Py_ssize_t
+_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
+ const char *needle, Py_ssize_t len_needle,
+ Py_ssize_t offset)
+{
+ return stringlib_rfind(haystack, len_haystack,
+ needle, len_needle, offset);
+}
+
PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{