diff options
Diffstat (limited to 'Objects/stringlib/fastsearch.h')
-rw-r--r-- | Objects/stringlib/fastsearch.h | 155 |
1 files changed, 16 insertions, 139 deletions
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 56a4467..e231c58 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -1,5 +1,6 @@ /* stringlib: fastsearch implementation */ +#ifndef STRINGLIB_FASTSEARCH_H #define STRINGLIB_FASTSEARCH_H /* fast search/count implementation, based on a mix between boyer- @@ -32,136 +33,8 @@ #define STRINGLIB_BLOOM(mask, ch) \ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) -#if STRINGLIB_SIZEOF_CHAR == 1 -# define MEMCHR_CUT_OFF 15 -#else -# define MEMCHR_CUT_OFF 40 -#endif - Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) -{ - const STRINGLIB_CHAR *p, *e; - - p = s; - e = s + n; - if (n > MEMCHR_CUT_OFF) { -#if STRINGLIB_SIZEOF_CHAR == 1 - p = memchr(s, ch, n); - if (p != NULL) - return (p - s); - return -1; -#else - /* use memchr if we can choose a needle without too many likely - false positives */ - const STRINGLIB_CHAR *s1, *e1; - unsigned char needle = ch & 0xff; - /* If looking for a multiple of 256, we'd have too - many false positives looking for the '\0' byte in UCS2 - and UCS4 representations. */ - if (needle != 0) { - do { - void *candidate = memchr(p, needle, - (e - p) * sizeof(STRINGLIB_CHAR)); - if (candidate == NULL) - return -1; - s1 = p; - p = (const STRINGLIB_CHAR *) - _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); - if (*p == ch) - return (p - s); - /* False positive */ - p++; - if (p - s1 > MEMCHR_CUT_OFF) - continue; - if (e - p <= MEMCHR_CUT_OFF) - break; - e1 = p + MEMCHR_CUT_OFF; - while (p != e1) { - if (*p == ch) - return (p - s); - p++; - } - } - while (e - p > MEMCHR_CUT_OFF); - } -#endif - } - while (p < e) { - if (*p == ch) - return (p - s); - p++; - } - return -1; -} - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) -{ - const STRINGLIB_CHAR *p; -#ifdef HAVE_MEMRCHR - /* memrchr() is a GNU extension, available since glibc 2.1.91. - it doesn't seem as optimized as memchr(), but is still quite - faster than our hand-written loop below */ - - if (n > MEMCHR_CUT_OFF) { -#if STRINGLIB_SIZEOF_CHAR == 1 - p = memrchr(s, ch, n); - if (p != NULL) - return (p - s); - return -1; -#else - /* use memrchr if we can choose a needle without too many likely - false positives */ - const STRINGLIB_CHAR *s1; - Py_ssize_t n1; - unsigned char needle = ch & 0xff; - /* If looking for a multiple of 256, we'd have too - many false positives looking for the '\0' byte in UCS2 - and UCS4 representations. */ - if (needle != 0) { - do { - void *candidate = memrchr(s, needle, - n * sizeof(STRINGLIB_CHAR)); - if (candidate == NULL) - return -1; - n1 = n; - p = (const STRINGLIB_CHAR *) - _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); - n = p - s; - if (*p == ch) - return n; - /* False positive */ - if (n1 - n > MEMCHR_CUT_OFF) - continue; - if (n <= MEMCHR_CUT_OFF) - break; - s1 = p - MEMCHR_CUT_OFF; - while (p > s1) { - p--; - if (*p == ch) - return (p - s); - } - n = p - s; - } - while (n > MEMCHR_CUT_OFF); - } -#endif - } -#endif /* HAVE_MEMRCHR */ - p = s + n; - while (p > s) { - p--; - if (*p == ch) - return (p - s); - } - return -1; -} - -#undef MEMCHR_CUT_OFF - -Py_LOCAL_INLINE(Py_ssize_t) -FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, +fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, Py_ssize_t maxcount, int mode) { @@ -179,11 +52,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, if (m <= 0) return -1; /* use special case for 1-character strings */ - if (mode == FAST_SEARCH) - return STRINGLIB(find_char)(s, n, p[0]); - else if (mode == FAST_RSEARCH) - return STRINGLIB(rfind_char)(s, n, p[0]); - else { /* FAST_COUNT */ + if (mode == FAST_COUNT) { for (i = 0; i < n; i++) if (s[i] == p[0]) { count++; @@ -191,7 +60,16 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, return maxcount; } return count; + } else if (mode == FAST_SEARCH) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + return i; + } else { /* FAST_RSEARCH */ + for (i = n - 1; i > -1; i--) + if (s[i] == p[0]) + return i; } + return -1; } mlast = m - 1; @@ -199,8 +77,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, mask = 0; if (mode != FAST_RSEARCH) { - const STRINGLIB_CHAR *ss = s + m - 1; - const STRINGLIB_CHAR *pp = p + m - 1; /* create compressed boyer-moore delta 1 table */ @@ -215,7 +91,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, for (i = 0; i <= w; i++) { /* note: using mlast in the skip path slows things down on x86 */ - if (ss[i] == pp[0]) { + if (s[i+m-1] == p[m-1]) { /* candidate match */ for (j = 0; j < mlast; j++) if (s[i+j] != p[j]) @@ -231,13 +107,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) + if (!STRINGLIB_BLOOM(mask, s[i+m])) i = i + m; else i = i + skip; } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, ss[i+1])) + if (!STRINGLIB_BLOOM(mask, s[i+m])) i = i + m; } } @@ -281,3 +157,4 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, return count; } +#endif |