diff options
Diffstat (limited to 'Objects/stringlib/fastsearch.h')
| -rw-r--r-- | Objects/stringlib/fastsearch.h | 40 | 
1 files changed, 21 insertions, 19 deletions
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 7b9dd47..21cf3a2 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -18,10 +18,13 @@  #define FAST_SEARCH 1  #define FAST_RSEARCH 2 +#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1))))) +#define BLOOM(mask, ch)     ((mask &  (1 << ((ch) & (LONG_BIT - 1))))) +  Py_LOCAL_INLINE(Py_ssize_t)  fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,             const STRINGLIB_CHAR* p, Py_ssize_t m, -           int mode) +           Py_ssize_t maxcount, int mode)  {      long mask;      Py_ssize_t skip, count = 0; @@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,      w = n - m; -    if (w < 0) +    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))          return -1;      /* look for special cases */ @@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,          /* use special case for 1-character strings */          if (mode == FAST_COUNT) {              for (i = 0; i < n; i++) -                if (s[i] == p[0]) +                if (s[i] == p[0]) {                      count++; +                    if (count == maxcount) +                        return maxcount; +                }              return count;          } else if (mode == FAST_SEARCH) {              for (i = 0; i < n; i++) @@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,      mlast = m - 1;      skip = mlast - 1; +    mask = 0;      if (mode != FAST_RSEARCH) {          /* create compressed boyer-moore delta 1 table */          /* process pattern[:-1] */ -        for (mask = i = 0; i < mlast; i++) { -            mask |= (1 << (p[i] & 0x1F)); +        for (i = 0; i < mlast; i++) { +            BLOOM_ADD(mask, p[i]);              if (p[i] == p[mlast])                  skip = mlast - i - 1;          }          /* process pattern[-1] outside the loop */ -        mask |= (1 << (p[mlast] & 0x1F)); +        BLOOM_ADD(mask, p[mlast]);          for (i = 0; i <= w; i++) {              /* note: using mlast in the skip path slows things down on x86 */ @@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,                      if (mode != FAST_COUNT)                          return i;                      count++; +                    if (count == maxcount) +                        return maxcount;                      i = i + mlast;                      continue;                  }                  /* miss: check if next character is part of pattern */ -                if (!(mask & (1 << (s[i+m] & 0x1F)))) +                if (!BLOOM(mask, s[i+m]))                      i = i + m;                  else                      i = i + skip;              } else {                  /* skip: check if next character is part of pattern */ -                if (!(mask & (1 << (s[i+m] & 0x1F)))) +                if (!BLOOM(mask, s[i+m]))                      i = i + m;              }          } @@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,          /* create compressed boyer-moore delta 1 table */          /* process pattern[0] outside the loop */ -        mask = (1 << (p[0] & 0x1F)); +        BLOOM_ADD(mask, p[0]);          /* process pattern[:0:-1] */          for (i = mlast; i > 0; i--) { -            mask |= (1 << (p[i] & 0x1F)); +            BLOOM_ADD(mask, p[i]);              if (p[i] == p[0])                  skip = i - 1;          } @@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,                      /* got a match! */                      return i;                  /* miss: check if previous character is part of pattern */ -                if (!(mask & (1 << (s[i-1] & 0x1F)))) +                if (!BLOOM(mask, s[i-1]))                      i = i - m;                  else                      i = i - skip;              } else {                  /* skip: check if previous character is part of pattern */ -                if (!(mask & (1 << (s[i-1] & 0x1F)))) +                if (!BLOOM(mask, s[i-1]))                      i = i - m;              }          } @@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,  }  #endif - -/* -Local variables: -c-basic-offset: 4 -indent-tabs-mode: nil -End: -*/  | 
