summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib/fastsearch.h
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib/fastsearch.h')
-rw-r--r--Objects/stringlib/fastsearch.h155
1 files changed, 16 insertions, 139 deletions
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 56a4467..e231c58 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -1,5 +1,6 @@
/* stringlib: fastsearch implementation */
+#ifndef STRINGLIB_FASTSEARCH_H
#define STRINGLIB_FASTSEARCH_H
/* fast search/count implementation, based on a mix between boyer-
@@ -32,136 +33,8 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-#if STRINGLIB_SIZEOF_CHAR == 1
-# define MEMCHR_CUT_OFF 15
-#else
-# define MEMCHR_CUT_OFF 40
-#endif
-
Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p, *e;
-
- p = s;
- e = s + n;
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
- /* use memchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1, *e1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memchr(p, needle,
- (e - p) * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- s1 = p;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- if (*p == ch)
- return (p - s);
- /* False positive */
- p++;
- if (p - s1 > MEMCHR_CUT_OFF)
- continue;
- if (e - p <= MEMCHR_CUT_OFF)
- break;
- e1 = p + MEMCHR_CUT_OFF;
- while (p != e1) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- }
- while (e - p > MEMCHR_CUT_OFF);
- }
-#endif
- }
- while (p < e) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- return -1;
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p;
-#ifdef HAVE_MEMRCHR
- /* memrchr() is a GNU extension, available since glibc 2.1.91.
- it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop below */
-
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memrchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
- /* use memrchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1;
- Py_ssize_t n1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memrchr(s, needle,
- n * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- n1 = n;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- n = p - s;
- if (*p == ch)
- return n;
- /* False positive */
- if (n1 - n > MEMCHR_CUT_OFF)
- continue;
- if (n <= MEMCHR_CUT_OFF)
- break;
- s1 = p - MEMCHR_CUT_OFF;
- while (p > s1) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- n = p - s;
- }
- while (n > MEMCHR_CUT_OFF);
- }
-#endif
- }
-#endif /* HAVE_MEMRCHR */
- p = s + n;
- while (p > s) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- return -1;
-}
-
-#undef MEMCHR_CUT_OFF
-
-Py_LOCAL_INLINE(Py_ssize_t)
-FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
+fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -179,11 +52,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (m <= 0)
return -1;
/* use special case for 1-character strings */
- if (mode == FAST_SEARCH)
- return STRINGLIB(find_char)(s, n, p[0]);
- else if (mode == FAST_RSEARCH)
- return STRINGLIB(rfind_char)(s, n, p[0]);
- else { /* FAST_COUNT */
+ if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
if (s[i] == p[0]) {
count++;
@@ -191,7 +60,16 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return maxcount;
}
return count;
+ } else if (mode == FAST_SEARCH) {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ return i;
+ } else { /* FAST_RSEARCH */
+ for (i = n - 1; i > -1; i--)
+ if (s[i] == p[0])
+ return i;
}
+ return -1;
}
mlast = m - 1;
@@ -199,8 +77,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
mask = 0;
if (mode != FAST_RSEARCH) {
- const STRINGLIB_CHAR *ss = s + m - 1;
- const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
@@ -215,7 +91,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
- if (ss[i] == pp[0]) {
+ if (s[i+m-1] == p[m-1]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
@@ -231,13 +107,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ if (!STRINGLIB_BLOOM(mask, s[i+m]))
i = i + m;
}
}
@@ -281,3 +157,4 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
return count;
}
+#endif