diff options
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/README.txt | 34 | ||||
-rw-r--r-- | Objects/stringlib/count.h | 34 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 104 | ||||
-rw-r--r-- | Objects/stringlib/find.h | 112 | ||||
-rw-r--r-- | Objects/stringlib/partition.h | 111 |
5 files changed, 395 insertions, 0 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt new file mode 100644 index 0000000..82a8774 --- /dev/null +++ b/Objects/stringlib/README.txt @@ -0,0 +1,34 @@ +bits shared by the stringobject and unicodeobject implementations (and +possibly other modules, in a not too distant future). + +the stuff in here is included into relevant places; see the individual +source files for details. + +-------------------------------------------------------------------- +the following defines used by the different modules: + +STRINGLIB_CHAR + + the type used to hold a character (char or Py_UNICODE) + +STRINGLIB_EMPTY + + a PyObject representing the empty string + +int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t) + + compares two strings. returns 0 if they match, and non-zero if not. + +Py_ssize_t STRINGLIB_LEN(PyObject*) + + returns the length of the given string object (which must be of the + right type) + +PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t) + + creates a new string object + +STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) + + returns the pointer to the character data for the given string + object (which must be of the right type) diff --git a/Objects/stringlib/count.h b/Objects/stringlib/count.h new file mode 100644 index 0000000..0bd02b5 --- /dev/null +++ b/Objects/stringlib/count.h @@ -0,0 +1,34 @@ +/* stringlib: count implementation */ + +#ifndef STRINGLIB_COUNT_H +#define STRINGLIB_COUNT_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len) +{ + Py_ssize_t count; + + if (sub_len == 0) + return str_len + 1; + + count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT); + + if (count < 0) + count = 0; /* no match */ + + return count; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h new file mode 100644 index 0000000..8f79c36 --- /dev/null +++ b/Objects/stringlib/fastsearch.h @@ -0,0 +1,104 @@ +/* stringlib: fastsearch implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#define STRINGLIB_FASTSEARCH_H + +/* fast search/count implementation, based on a mix between boyer- + moore and horspool, with a few more bells and whistles on the top. + for some more background, see: http://effbot.org/stringlib */ + +/* note: fastsearch may access s[n], which isn't a problem when using + Python's ordinary string types, but may cause problems if you're + using this code in other contexts. also, the count mode returns -1 + if there cannot possible be a match in the target string, and 0 if + it has actually checked for matches, but didn't find any. callers + beware! */ + +#define FAST_COUNT 0 +#define FAST_SEARCH 1 + +Py_LOCAL_INLINE(Py_ssize_t) +fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, + const STRINGLIB_CHAR* p, Py_ssize_t m, + int mode) +{ + long mask; + Py_ssize_t skip, count = 0; + Py_ssize_t i, j, mlast, w; + + w = n - m; + + if (w < 0) + return -1; + + /* look for special cases */ + if (m <= 1) { + if (m <= 0) + return -1; + /* use special case for 1-character strings */ + if (mode == FAST_COUNT) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + count++; + return count; + } else { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + return i; + } + return -1; + } + + mlast = m - 1; + + /* create compressed boyer-moore delta 1 table */ + skip = mlast - 1; + /* process pattern[:-1] */ + for (mask = i = 0; i < mlast; i++) { + mask |= (1 << (p[i] & 0x1F)); + if (p[i] == p[mlast]) + skip = mlast - i - 1; + } + /* process pattern[-1] outside the loop */ + mask |= (1 << (p[mlast] & 0x1F)); + + for (i = 0; i <= w; i++) { + /* note: using mlast in the skip path slows things down on x86 */ + if (s[i+m-1] == p[m-1]) { + /* candidate match */ + for (j = 0; j < mlast; j++) + if (s[i+j] != p[j]) + break; + if (j == mlast) { + /* got a match! */ + if (mode != FAST_COUNT) + return i; + count++; + i = i + mlast; + continue; + } + /* miss: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + else + i = i + skip; + } else { + /* skip: check if next character is part of pattern */ + if (!(mask & (1 << (s[i+m] & 0x1F)))) + i = i + m; + } + } + + if (mode != FAST_COUNT) + return -1; + return count; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h new file mode 100644 index 0000000..4cea2db --- /dev/null +++ b/Objects/stringlib/find.h @@ -0,0 +1,112 @@ +/* stringlib: find/index implementation */ + +#ifndef STRINGLIB_FIND_H +#define STRINGLIB_FIND_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + if (sub_len == 0) + return offset; + + pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH); + + if (pos >= 0) + pos += offset; + + return pos; +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + /* XXX - create reversefastsearch helper! */ + if (sub_len == 0) + pos = str_len + offset; + else { + Py_ssize_t j; + pos = -1; + for (j = str_len - sub_len; j >= 0; --j) + if (STRINGLIB_CMP(str+j, sub, sub_len) == 0) { + pos = j + offset; + break; + } + } + + return pos; +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + if (start < 0) + start += str_len; + if (start < 0) + start = 0; + if (end > str_len) + end = str_len; + if (end < 0) + end += str_len; + if (end < 0) + end = 0; + + return stringlib_find( + str + start, end - start, + sub, sub_len, start + ); +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + if (start < 0) + start += str_len; + if (start < 0) + start = 0; + if (end > str_len) + end = str_len; + if (end < 0) + end += str_len; + if (end < 0) + end = 0; + + return stringlib_rfind(str + start, end - start, sub, sub_len, start); +} + +#ifdef STRINGLIB_STR + +Py_LOCAL_INLINE(int) +stringlib_contains_obj(PyObject* str, PyObject* sub) +{ + return stringlib_find( + STRINGLIB_STR(str), STRINGLIB_LEN(str), + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0 + ) != -1; +} + +#endif /* STRINGLIB_STR */ + +#endif /* STRINGLIB_FIND_H */ + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h new file mode 100644 index 0000000..1486347 --- /dev/null +++ b/Objects/stringlib/partition.h @@ -0,0 +1,111 @@ +/* stringlib: partition implementation */ + +#ifndef STRINGLIB_PARTITION_H +#define STRINGLIB_PARTITION_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(PyObject*) +stringlib_partition( + PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len + ) +{ + PyObject* out; + Py_ssize_t pos; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH); + + if (pos < 0) { + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +Py_LOCAL_INLINE(PyObject*) +stringlib_rpartition( + PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len + ) +{ + PyObject* out; + Py_ssize_t pos, j; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + /* XXX - create reversefastsearch helper! */ + pos = -1; + for (j = str_len - sep_len; j >= 0; --j) + if (STRINGLIB_CMP(str+j, sep, sep_len) == 0) { + pos = j; + break; + } + + if (pos < 0) { + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +#endif + +/* +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ |