summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-11-14 13:42:17 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2015-11-14 13:42:17 (GMT)
commit413fdcea21908055cb8acad28a94b8f72eb2ffec (patch)
tree59d06812d65d6fab8c8a0bea714f0b2cc0736110
parent0304729ec49f4673bc8c88740537e66643a6c44a (diff)
downloadcpython-413fdcea21908055cb8acad28a94b8f72eb2ffec.zip
cpython-413fdcea21908055cb8acad28a94b8f72eb2ffec.tar.gz
cpython-413fdcea21908055cb8acad28a94b8f72eb2ffec.tar.bz2
Issue #24821: Refactor STRINGLIB(fastsearch_memchr_1char) and split it on
STRINGLIB(find_char) and STRINGLIB(rfind_char) that can be used independedly without special preconditions.
-rw-r--r--Objects/bytearrayobject.c19
-rw-r--r--Objects/bytesobject.c19
-rw-r--r--Objects/stringlib/fastsearch.h150
-rw-r--r--Objects/unicodeobject.c33
4 files changed, 121 insertions, 100 deletions
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 2103147..131e04d 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
ADJUST_INDICES(start, end, len);
if (end - start < sub_len)
res = -1;
- else if (sub_len == 1
-#ifndef HAVE_MEMRCHR
- && dir > 0
-#endif
- ) {
- unsigned char needle = *sub;
- int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
- res = stringlib_fastsearch_memchr_1char(
- PyByteArray_AS_STRING(self) + start, end - start,
- needle, needle, mode);
+ else if (sub_len == 1) {
+ if (dir > 0)
+ res = stringlib_find_char(
+ PyByteArray_AS_STRING(self) + start, end - start,
+ *sub);
+ else
+ res = stringlib_rfind_char(
+ PyByteArray_AS_STRING(self) + start, end - start,
+ *sub);
if (res >= 0)
res += start;
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index c10dbdf..6a9e062 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
ADJUST_INDICES(start, end, len);
if (end - start < sub_len)
res = -1;
- else if (sub_len == 1
-#ifndef HAVE_MEMRCHR
- && dir > 0
-#endif
- ) {
- unsigned char needle = *sub;
- int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
- res = stringlib_fastsearch_memchr_1char(
- PyBytes_AS_STRING(self) + start, end - start,
- needle, needle, mode);
+ else if (sub_len == 1) {
+ if (dir > 0)
+ res = stringlib_find_char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ *sub);
+ else
+ res = stringlib_rfind_char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ *sub);
if (res >= 0)
res += start;
}
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index cda68e7..98165ad 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -32,52 +32,98 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
- STRINGLIB_CHAR ch, unsigned char needle,
- int mode)
+STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
- if (mode == FAST_SEARCH) {
- const STRINGLIB_CHAR *ptr = s;
- const STRINGLIB_CHAR *e = s + n;
- while (ptr < e) {
- void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch)
- return (ptr - s);
- /* False positive */
- ptr++;
- }
+ const STRINGLIB_CHAR *p, *e;
+
+ p = s;
+ e = s + n;
+ if (n > 10) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
return -1;
+#else
+ /* use memchr if we can choose a needle without two many likely
+ false positives */
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ while (p < e) {
+ void *candidate = memchr(p, needle,
+ (e - p) * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ if (*p == ch)
+ return (p - s);
+ /* False positive */
+ p++;
+ }
+ return -1;
+ }
+#endif
}
+ while (p < e) {
+ if (*p == ch)
+ return (p - s);
+ p++;
+ }
+ return -1;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+ const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
/* memrchr() is a GNU extension, available since glibc 2.1.91.
it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop in FASTSEARCH below */
- else if (mode == FAST_RSEARCH) {
- while (n > 0) {
- const STRINGLIB_CHAR *found;
- void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- n = found - s;
- if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch)
- return n;
- /* False positive */
- }
+ faster than our hand-written loop below */
+
+ if (n > 10) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memrchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
return -1;
- }
+#else
+ /* use memrchr if we can choose a needle without two many likely
+ false positives */
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ while (n > 0) {
+ void *candidate = memrchr(s, needle,
+ n * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ n = p - s;
+ if (*p == ch)
+ return n;
+ /* False positive */
+ }
+ return -1;
+ }
#endif
- else {
- assert(0); /* Should never get here */
- return 0;
}
-
-#undef DO_MEMCHR
+#endif /* HAVE_MEMRCHR */
+ p = s + n;
+ while (p > s) {
+ p--;
+ if (*p == ch)
+ return (p - s);
+ }
+ return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
@@ -99,25 +145,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (m <= 0)
return -1;
/* use special case for 1-character strings */
- if (n > 10 && (mode == FAST_SEARCH
-#ifdef HAVE_MEMRCHR
- || mode == FAST_RSEARCH
-#endif
- )) {
- /* use memchr if we can choose a needle without two many likely
- false positives */
- unsigned char needle;
- needle = p[0] & 0xff;
-#if STRINGLIB_SIZEOF_CHAR > 1
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0)
-#endif
- return STRINGLIB(fastsearch_memchr_1char)
- (s, n, p[0], needle, mode);
- }
- if (mode == FAST_COUNT) {
+ if (mode == FAST_SEARCH)
+ return STRINGLIB(find_char)(s, n, p[0]);
+ else if (mode == FAST_RSEARCH)
+ return STRINGLIB(rfind_char)(s, n, p[0]);
+ else { /* FAST_COUNT */
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
@@ -125,14 +157,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return maxcount;
}
return count;
- } else if (mode == FAST_SEARCH) {
- for (i = 0; i < n; i++)
- if (s[i] == p[0])
- return i;
- } else { /* FAST_RSEARCH */
- for (i = n - 1; i > -1; i--)
- if (s[i] == p[0])
- return i;
}
return -1;
}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 18a30e2..c2b6f64 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch,
int direction)
{
- int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
-
switch (kind) {
case PyUnicode_1BYTE_KIND:
- {
- Py_UCS1 ch1 = (Py_UCS1) ch;
- if (ch1 == ch)
- return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
- else
- return -1;
- }
+ if ((Py_UCS1) ch != ch)
+ return -1;
+ if (direction > 0)
+ return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
+ else
+ return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
case PyUnicode_2BYTE_KIND:
- {
- Py_UCS2 ch2 = (Py_UCS2) ch;
- if (ch2 == ch)
- return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
- else
- return -1;
- }
+ if ((Py_UCS2) ch != ch)
+ return -1;
+ if (direction > 0)
+ return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
+ else
+ return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
case PyUnicode_4BYTE_KIND:
- return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
+ if (direction > 0)
+ return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
+ else
+ return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
default:
assert(0);
return -1;