diff options
-rw-r--r-- | Objects/stringlib/fastsearch.h | 34 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 16 |
2 files changed, 36 insertions, 14 deletions
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 21cf3a2..7525951 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -18,15 +18,27 @@ #define FAST_SEARCH 1 #define FAST_RSEARCH 2 -#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1))))) -#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1))))) +#if LONG_BIT >= 128 +#define STRINGLIB_BLOOM_WIDTH 128 +#elif LONG_BIT >= 64 +#define STRINGLIB_BLOOM_WIDTH 64 +#elif LONG_BIT >= 32 +#define STRINGLIB_BLOOM_WIDTH 32 +#else +#error "LONG_BIT is smaller than 32" +#endif + +#define STRINGLIB_BLOOM_ADD(mask, ch) \ + ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) +#define STRINGLIB_BLOOM(mask, ch) \ + ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) Py_LOCAL_INLINE(Py_ssize_t) fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, Py_ssize_t maxcount, int mode) { - long mask; + unsigned long mask; Py_ssize_t skip, count = 0; Py_ssize_t i, j, mlast, w; @@ -70,12 +82,12 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, /* process pattern[:-1] */ for (i = 0; i < mlast; i++) { - BLOOM_ADD(mask, p[i]); + STRINGLIB_BLOOM_ADD(mask, p[i]); if (p[i] == p[mlast]) skip = mlast - i - 1; } /* process pattern[-1] outside the loop */ - BLOOM_ADD(mask, p[mlast]); + STRINGLIB_BLOOM_ADD(mask, p[mlast]); for (i = 0; i <= w; i++) { /* note: using mlast in the skip path slows things down on x86 */ @@ -95,13 +107,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, s[i+m])) i = i + m; else i = i + skip; } else { /* skip: check if next character is part of pattern */ - if (!BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, s[i+m])) i = i + m; } } @@ -110,10 +122,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, /* create compressed boyer-moore delta 1 table */ /* process pattern[0] outside the loop */ - BLOOM_ADD(mask, p[0]); + STRINGLIB_BLOOM_ADD(mask, p[0]); /* process pattern[:0:-1] */ for (i = mlast; i > 0; i--) { - BLOOM_ADD(mask, p[i]); + STRINGLIB_BLOOM_ADD(mask, p[i]); if (p[i] == p[0]) skip = i - 1; } @@ -128,13 +140,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, /* got a match! */ return i; /* miss: check if previous character is part of pattern */ - if (!BLOOM(mask, s[i-1])) + if (!STRINGLIB_BLOOM(mask, s[i-1])) i = i - m; else i = i - skip; } else { /* skip: check if previous character is part of pattern */ - if (!BLOOM(mask, s[i-1])) + if (!STRINGLIB_BLOOM(mask, s[i-1])) i = i - m; } } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d8d9c35..35683d0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -206,12 +206,22 @@ PyUnicode_GetMax(void) /* the linebreak mask is set up by Unicode_Init below */ +#if LONG_BIT >= 128 +#define BLOOM_WIDTH 128 +#elif LONG_BIT >= 64 +#define BLOOM_WIDTH 64 +#elif LONG_BIT >= 32 +#define BLOOM_WIDTH 32 +#else +#error "LONG_BIT is smaller than 32" +#endif + #define BLOOM_MASK unsigned long static BLOOM_MASK bloom_linebreak; -#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1))))) -#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1))))) +#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) +#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM_LINEBREAK(ch) \ ((ch) < 128U ? ascii_linebreak[(ch)] : \ @@ -221,7 +231,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) { /* calculate simple bloom-style bitmask for a given unicode string */ - long mask; + BLOOM_MASK mask; Py_ssize_t i; mask = 0; |