diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2006-05-26 17:04:58 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2006-05-26 17:04:58 (GMT) |
commit | a50d201bd9178bc37ef791685ad192bdbc8bf23f (patch) | |
tree | 418b020632234b8e4df4cd54ebfdbaa5dc0b1aa1 /Objects | |
parent | 877ab9bc235f3f98b6fa1a68c98e3c8968168ab3 (diff) | |
download | cpython-a50d201bd9178bc37ef791685ad192bdbc8bf23f.zip cpython-a50d201bd9178bc37ef791685ad192bdbc8bf23f.tar.gz cpython-a50d201bd9178bc37ef791685ad192bdbc8bf23f.tar.bz2 |
needforspeed: stringlib refactoring (in progress)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringlib/README.txt | 5 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 97 | ||||
-rw-r--r-- | Objects/stringobject.c | 99 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 89 |
4 files changed, 111 insertions, 179 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt new file mode 100644 index 0000000..051197c --- /dev/null +++ b/Objects/stringlib/README.txt @@ -0,0 +1,5 @@ +bits shared by the stringobject and unicodeobject implementations (and +possibly other modules, in a not too distant future). + +the stuff in here is included into relevant places; see the individual +source files for details. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h new file mode 100644 index 0000000..1ce7f13 --- /dev/null +++ b/Objects/stringlib/fastsearch.h @@ -0,0 +1,97 @@ +/* stringlib: fastsearch implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#define STRINGLIB_FASTSEARCH_H + +/* fast search/count implementation, based on a mix between boyer- + moore and horspool, with a few more bells and whistles on the top. + for some more background, see: http://effbot.org/stringlib */ + +/* note: fastsearch may access s[n], which isn't a problem when using + Python's ordinary string types, but may cause problems if you're + using this code in other contexts. also, the count mode returns -1 + if there cannot possible be a match in the target string, and 0 if + it has actually checked for matches, but didn't find any. callers + beware! */ + +#define FAST_COUNT 0 +#define FAST_SEARCH 1 + +Py_LOCAL(Py_ssize_t) +fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, + const STRINGLIB_CHAR* p, Py_ssize_t m, + int mode) +{ + long mask; + Py_ssize_t skip, count = 0; + Py_ssize_t i, j, mlast, w; + + w = n - m; + + if (w < 0) + return -1; + + /* look for special cases */ + if (m <= 1) { + if (m <= 0) + return -1; + /* use special case for 1-character strings */ + if (mode == FAST_COUNT) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + count++; + return count; + } else { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + return i; + } + return -1; + } + + mlast = m - 1; + + /* create compressed boyer-moore delta 1 table */ + skip = mlast - 1; + /* process pattern[:-1] */ + for (mask = i = 0; i < mlast; i++) { + mask |= (1 << (p[i] & 0x1F)); + if (p[i] == p[mlast]) + skip = mlast - i - 1; + } + /* process pattern[-1] outside the loop */ + mask |= (1 << (p[mlast] & 0x1F)); + + for (i = 0; i <= w; i++) { + /* note: using mlast in the skip path slows things down on x86 */ + if (s[i+m-1] == p[m-1]) { + /* candidate match */ + for (j = 0; j < mlast; j++) + if (s[i+j] != p[j]) + break; + if (j == mlast) { + /* got a match! */ + if (mode != FAST_COUNT) + return i; + count++; + i = i + mlast; + continue; + } + /* miss: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + else + i = i + skip; + } else { + /* skip: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + } + } + + if (mode != FAST_COUNT) + return -1; + return count; +} + +#endif diff --git a/Objects/stringobject.c b/Objects/stringobject.c index fcd50cd..138ebfe 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -765,102 +765,17 @@ PyString_AsStringAndSize(register PyObject *obj, } /* -------------------------------------------------------------------- */ -/* Helpers */ +/* stringlib components */ -#define USE_FAST /* experimental fast search implementation */ +#define USE_FAST -/* XXX - this code is copied from unicodeobject.c. we really should - refactor the core implementations (see _sre.c for how this can be - done), but that'll have to wait -- fredrik */ - -/* fast search/count implementation, based on a mix between boyer- - moore and horspool, with a few more bells and whistles on the top. - for some more background, see: http://effbot.org/stringlib */ - -/* note: fastsearch may access s[n], which isn't a problem when using - Python's ordinary string types, but may cause problems if you're - using this code in other contexts. also, the count mode returns -1 - if there cannot possibly be a match in the target string, and 0 if - it has actually checked for matches, but didn't find any. callers - beware! */ - -#define FAST_COUNT 0 -#define FAST_SEARCH 1 - -Py_LOCAL(Py_ssize_t) -fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode) -{ - long mask; - Py_ssize_t skip, count = 0; - Py_ssize_t i, j, mlast, w; - - w = n - m; - - if (w < 0) - return -1; +#ifdef USE_FAST - /* look for special cases */ - if (m <= 1) { - if (m <= 0) - return -1; - /* use special case for 1-character strings */ - if (mode == FAST_COUNT) { - for (i = 0; i < n; i++) - if (s[i] == p[0]) - count++; - return count; - } else { - for (i = 0; i < n; i++) - if (s[i] == p[0]) - return i; - } - return -1; - } +#define STRINGLIB_CHAR char - mlast = m - 1; - - /* create compressed boyer-moore delta 1 table */ - skip = mlast - 1; - /* process pattern[:-1] */ - for (mask = i = 0; i < mlast; i++) { - mask |= (1 << (p[i] & 0x1F)); - if (p[i] == p[mlast]) - skip = mlast - i - 1; - } - /* process pattern[-1] outside the loop */ - mask |= (1 << (p[mlast] & 0x1F)); - - for (i = 0; i <= w; i++) { - /* note: using mlast in the skip path slows things down on x86 */ - if (s[i+m-1] == p[m-1]) { - /* candidate match */ - for (j = 0; j < mlast; j++) - if (s[i+j] != p[j]) - break; - if (j == mlast) { - /* got a match! */ - if (mode != FAST_COUNT) - return i; - count++; - i = i + mlast; - continue; - } - /* miss: check if next character is part of pattern */ - if (!(mask & (1 << (s[i+m] & 0x1F)))) - i = i + m; - else - i = i + skip; - } else { - /* skip: check if next character is part of pattern */ - if (!(mask & (1 << (s[i+m] & 0x1F)))) - i = i + m; - } - } +#include "stringlib/fastsearch.h" - if (mode != FAST_COUNT) - return -1; - return count; -} +#endif /* -------------------------------------------------------------------- */ /* Methods */ @@ -2416,7 +2331,7 @@ string_count(PyStringObject *self, PyObject *args) #else r = 0; while (i < m) { - const char *t + const char *t; if (!memcmp(s+i, sub, n)) { r++; i += n; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ab638350..7ce9215 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3854,94 +3854,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, /* --- Helpers ------------------------------------------------------------ */ -/* fast search/count implementation, based on a mix between boyer- - moore and horspool, with a few more bells and whistles on the top. - for some more background, see: http://effbot.org/stringlib */ +#define STRINGLIB_CHAR Py_UNICODE -/* note: fastsearch may access s[n], which isn't a problem when using - Python's ordinary string types, but may cause problems if you're - using this code in other contexts. also, the count mode returns -1 - if there cannot possible be a match in the target string, and 0 if - it has actually checked for matches, but didn't find any. callers - beware! */ - -#define FAST_COUNT 0 -#define FAST_SEARCH 1 - -Py_LOCAL(Py_ssize_t) -fastsearch(Py_UNICODE* s, Py_ssize_t n, Py_UNICODE* p, Py_ssize_t m, int mode) -{ - long mask; - Py_ssize_t skip, count = 0; - Py_ssize_t i, j, mlast, w; - - w = n - m; - - if (w < 0) - return -1; - - /* look for special cases */ - if (m <= 1) { - if (m <= 0) - return -1; - /* use special case for 1-character strings */ - if (mode == FAST_COUNT) { - for (i = 0; i < n; i++) - if (s[i] == p[0]) - count++; - return count; - } else { - for (i = 0; i < n; i++) - if (s[i] == p[0]) - return i; - } - return -1; - } - - mlast = m - 1; - - /* create compressed boyer-moore delta 1 table */ - skip = mlast - 1; - /* process pattern[:-1] */ - for (mask = i = 0; i < mlast; i++) { - mask |= (1 << (p[i] & 0x1F)); - if (p[i] == p[mlast]) - skip = mlast - i - 1; - } - /* process pattern[-1] outside the loop */ - mask |= (1 << (p[mlast] & 0x1F)); - - for (i = 0; i <= w; i++) { - /* note: using mlast in the skip path slows things down on x86 */ - if (s[i+m-1] == p[m-1]) { - /* candidate match */ - for (j = 0; j < mlast; j++) - if (s[i+j] != p[j]) - break; - if (j == mlast) { - /* got a match! */ - if (mode != FAST_COUNT) - return i; - count++; - i = i + mlast; - continue; - } - /* miss: check if next character is part of pattern */ - if (!(mask & (1 << (s[i+m] & 0x1F)))) - i = i + m; - else - i = i + skip; - } else { - /* skip: check if next character is part of pattern */ - if (!(mask & (1 << (s[i+m] & 0x1F)))) - i = i + m; - } - } - - if (mode != FAST_COUNT) - return -1; - return count; -} +#include "stringlib/fastsearch.h" Py_LOCAL(Py_ssize_t) count(PyUnicodeObject *self, Py_ssize_t start, |