diff options
-rw-r--r-- | Objects/stringlib/README.txt | 29 | ||||
-rw-r--r-- | Objects/stringlib/find.h | 33 | ||||
-rw-r--r-- | Objects/stringobject.c | 94 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 152 |
4 files changed, 161 insertions, 147 deletions
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index 051197c..82a8774 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -3,3 +3,32 @@ possibly other modules, in a not too distant future). the stuff in here is included into relevant places; see the individual source files for details. + +-------------------------------------------------------------------- +the following defines used by the different modules: + +STRINGLIB_CHAR + + the type used to hold a character (char or Py_UNICODE) + +STRINGLIB_EMPTY + + a PyObject representing the empty string + +int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t) + + compares two strings. returns 0 if they match, and non-zero if not. + +Py_ssize_t STRINGLIB_LEN(PyObject*) + + returns the length of the given string object (which must be of the + right type) + +PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t) + + creates a new string object + +STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) + + returns the pointer to the character data for the given string + object (which must be of the right type) diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index d5394b4..9db633d 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -48,6 +48,39 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return pos; } +#ifdef STRINGLIB_STR + +Py_LOCAL(Py_ssize_t) +stringlib_find_obj(PyObject* str, PyObject* sub, + Py_ssize_t start, Py_ssize_t end) +{ + return stringlib_find( + STRINGLIB_STR(str) + start, end - start, + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start + ); +} + +Py_LOCAL(int) +stringlib_contains_obj(PyObject* str, PyObject* sub) +{ + return stringlib_find( + STRINGLIB_STR(str), STRINGLIB_LEN(str), + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0 + ) != -1; +} + +Py_LOCAL(Py_ssize_t) +stringlib_rfind_obj(PyObject* str, PyObject* sub, + Py_ssize_t start, Py_ssize_t end) +{ + return stringlib_rfind( + STRINGLIB_STR(str) + start, end - start, + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start + ); +} + +#endif + #endif /* diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 7e93783..c881927 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -690,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s, return NULL; } +/* -------------------------------------------------------------------- */ +/* object api */ + static Py_ssize_t string_getsize(register PyObject *op) { @@ -765,22 +768,23 @@ PyString_AsStringAndSize(register PyObject *obj, } /* -------------------------------------------------------------------- */ -/* stringlib components */ +/* Methods */ #define STRINGLIB_CHAR char -#define STRINGLIB_NEW PyString_FromStringAndSize #define STRINGLIB_CMP memcmp +#define STRINGLIB_LEN PyString_GET_SIZE +#define STRINGLIB_NEW PyString_FromStringAndSize +#define STRINGLIB_STR PyString_AS_STRING #define STRINGLIB_EMPTY nullstring #include "stringlib/fastsearch.h" +#include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/partition.h" -/* -------------------------------------------------------------------- */ -/* Methods */ static int string_print(PyStringObject *op, FILE *fp, int flags) @@ -1048,49 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i, } static int -string_contains(PyObject *a, PyObject *el) +string_contains(PyObject *str_obj, PyObject *sub_obj) { - char *s = PyString_AS_STRING(a); - const char *sub = PyString_AS_STRING(el); - Py_ssize_t len_sub = PyString_GET_SIZE(el); - Py_ssize_t pos; - - if (!PyString_CheckExact(el)) { + if (!PyString_CheckExact(sub_obj)) { #ifdef Py_USING_UNICODE - if (PyUnicode_Check(el)) - return PyUnicode_Contains(a, el); + if (PyUnicode_Check(sub_obj)) + return PyUnicode_Contains(str_obj, sub_obj); #endif - if (!PyString_Check(el)) { + if (!PyString_Check(sub_obj)) { PyErr_SetString(PyExc_TypeError, "'in <string>' requires string as left operand"); return -1; } } - if (len_sub == 0) - return 1; - - pos = fastsearch( - s, PyString_GET_SIZE(a), - sub, len_sub, FAST_SEARCH - ); - - return (pos != -1); + return stringlib_contains_obj(str_obj, sub_obj); } static PyObject * string_item(PyStringObject *a, register Py_ssize_t i) { + char pchar; PyObject *v; - char *pchar; if (i < 0 || i >= a->ob_size) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } - pchar = a->ob_sval + i; - v = (PyObject *)characters[*pchar & UCHAR_MAX]; + pchar = a->ob_sval[i]; + v = (PyObject *)characters[pchar & UCHAR_MAX]; if (v == NULL) - v = PyString_FromStringAndSize(pchar, 1); + v = PyString_FromStringAndSize(&pchar, 1); else { #ifdef COUNT_ALLOCS one_strings++; @@ -1166,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op) int _PyString_Eq(PyObject *o1, PyObject *o2) { - PyStringObject *a, *b; - a = (PyStringObject*)o1; - b = (PyStringObject*)o2; + PyStringObject *a = (PyStringObject*) o1; + PyStringObject *b = (PyStringObject*) o2; return a->ob_size == b->ob_size && *a->ob_sval == *b->ob_sval && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0; @@ -2264,43 +2254,37 @@ as in slice notation."); static PyObject * string_count(PyStringObject *self, PyObject *args) { - const char *s = PyString_AS_STRING(self), *sub; - Py_ssize_t len = PyString_GET_SIZE(self), n; - Py_ssize_t i = 0, last = PY_SSIZE_T_MAX; - Py_ssize_t m, r; - PyObject *subobj; + PyObject *sub_obj; + const char *str = PyString_AS_STRING(self), *sub; + Py_ssize_t sub_len; + Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; - if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj, - _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) + if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, + _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); - n = PyString_GET_SIZE(subobj); + if (PyString_Check(sub_obj)) { + sub = PyString_AS_STRING(sub_obj); + sub_len = PyString_GET_SIZE(sub_obj); } #ifdef Py_USING_UNICODE - else if (PyUnicode_Check(subobj)) { + else if (PyUnicode_Check(sub_obj)) { Py_ssize_t count; - count = PyUnicode_Count((PyObject *)self, subobj, i, last); + count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); if (count == -1) return NULL; else - return PyInt_FromLong((long) count); + return PyInt_FromSsize_t(count); } #endif - else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) return NULL; - string_adjust_indices(&i, &last, len); + string_adjust_indices(&start, &end, PyString_GET_SIZE(self)); - m = last + 1 - n; - if (n == 0) - return PyInt_FromSsize_t(m-i); - - r = fastsearch(s + i, last - i, sub, n, FAST_COUNT); - if (r < 0) - r = 0; /* no match */ - return PyInt_FromSsize_t(r); + return PyInt_FromSsize_t( + stringlib_count(str + start, end - start, sub, sub_len) + ); } PyDoc_STRVAR(swapcase__doc__, @@ -2477,7 +2461,7 @@ return_self(PyStringObject *self) } Py_LOCAL(Py_ssize_t) - countchar(char *target, int target_len, char c, Py_ssize_t maxcount) +countchar(char *target, int target_len, char c, Py_ssize_t maxcount) { Py_ssize_t count=0; char *start=target; @@ -2580,7 +2564,7 @@ countstring(char *target, Py_ssize_t target_len, } -/* Algorithms for difference cases of string replacement */ +/* Algorithms for different cases of string replacement */ /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ Py_LOCAL(PyStringObject *) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 26cf521..8cb32e1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3857,7 +3857,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, #define STRINGLIB_CHAR Py_UNICODE +#define STRINGLIB_LEN PyUnicode_GET_SIZE #define STRINGLIB_NEW PyUnicode_FromUnicode +#define STRINGLIB_STR PyUnicode_AS_UNICODE Py_LOCAL(int) STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) @@ -3918,67 +3920,33 @@ Py_ssize_t PyUnicode_Count(PyObject *str, return result; } -static Py_ssize_t findstring(PyUnicodeObject *self, - PyUnicodeObject *substring, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - FIX_START_END(self); - - if (substring->length == 0) - return (direction > 0) ? start : end; - - if (direction > 0) { - Py_ssize_t pos = fastsearch( - PyUnicode_AS_UNICODE(self) + start, end - start, - substring->str, substring->length, FAST_SEARCH - ); - if (pos >= 0) - return pos + start; - } else { - end -= substring->length; - for (; end >= start; end--) - if (Py_UNICODE_MATCH(self, end, substring)) - return end; - } - return -1; -} - Py_ssize_t PyUnicode_Find(PyObject *str, - PyObject *substr, + PyObject *sub, Py_ssize_t start, Py_ssize_t end, int direction) { Py_ssize_t result; - PyUnicodeObject* str_obj; - PyUnicodeObject* sub_obj; - str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str); - if (!str_obj) + str = PyUnicode_FromObject(str); + if (!str) return -2; - sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr); - if (!sub_obj) { - Py_DECREF(str_obj); + sub = PyUnicode_FromObject(sub); + if (!sub) { + Py_DECREF(str); return -2; } - FIX_START_END(str_obj); + FIX_START_END((PyUnicodeObject*) str); if (direction > 0) - result = stringlib_find( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length, - start - ); + result = stringlib_find_obj(str, sub, start, end); else - result = stringlib_rfind( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length, - start - ); + result = stringlib_rfind_obj(str, sub, start, end); + + Py_DECREF(str); + Py_DECREF(sub); - Py_DECREF(str_obj); - Py_DECREF(sub_obj); return result; } @@ -5046,39 +5014,29 @@ onError: int PyUnicode_Contains(PyObject *container, PyObject *element) { - PyUnicodeObject *u, *v; - Py_ssize_t size; - Py_ssize_t pos; + PyObject *str, *sub; + int result; /* Coerce the two arguments */ - v = (PyUnicodeObject *) PyUnicode_FromObject(element); - if (!v) { + sub = PyUnicode_FromObject(element); + if (!sub) { PyErr_SetString(PyExc_TypeError, "'in <string>' requires string as left operand"); return -1; } - u = (PyUnicodeObject *) PyUnicode_FromObject(container); - if (!u) { - Py_DECREF(v); + str = PyUnicode_FromObject(container); + if (!str) { + Py_DECREF(sub); return -1; } - size = PyUnicode_GET_SIZE(v); - if (!size) { - pos = 0; - goto done; - } + result = stringlib_contains_obj(str, sub); - pos = fastsearch( - PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u), - PyUnicode_AS_UNICODE(v), size, FAST_SEARCH - ); + Py_DECREF(str); + Py_DECREF(sub); -done: - Py_DECREF(u); - Py_DECREF(v); - return (pos != -1); + return result; } /* Concat to string or Unicode object giving a new Unicode object. */ @@ -5305,23 +5263,26 @@ Return -1 on failure."); static PyObject * unicode_find(PyUnicodeObject *self, PyObject *args) { - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1)); + FIX_START_END(self); + + result = stringlib_find_obj((PyObject*) self, substring, start, end); Py_DECREF(substring); - return result; + + return PyInt_FromSsize_t(result); } static PyObject * @@ -5371,7 +5332,7 @@ static PyObject * unicode_index(PyUnicodeObject *self, PyObject *args) { Py_ssize_t result; - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; @@ -5379,18 +5340,21 @@ unicode_index(PyUnicodeObject *self, PyObject *args) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = findstring(self, substring, start, end, 1); + FIX_START_END(self); + + result = stringlib_find_obj((PyObject*) self, substring, start, end); Py_DECREF(substring); + if (result < 0) { PyErr_SetString(PyExc_ValueError, "substring not found"); return NULL; } + return PyInt_FromSsize_t(result); } @@ -6038,23 +6002,25 @@ Return -1 on failure."); static PyObject * unicode_rfind(PyUnicodeObject *self, PyObject *args) { - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1)); + FIX_START_END(self); + + result = stringlib_rfind_obj((PyObject*)self, substring, start, end); Py_DECREF(substring); - return result; + + return PyInt_FromSsize_t(result); } PyDoc_STRVAR(rindex__doc__, @@ -6065,22 +6031,24 @@ Like S.rfind() but raise ValueError when the substring is not found."); static PyObject * unicode_rindex(PyUnicodeObject *self, PyObject *args) { - Py_ssize_t result; - PyUnicodeObject *substring; + PyObject *substring; Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; + Py_ssize_t result; if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - substring = (PyUnicodeObject *)PyUnicode_FromObject( - (PyObject *)substring); - if (substring == NULL) + substring = PyUnicode_FromObject(substring); + if (!substring) return NULL; - result = findstring(self, substring, start, end, -1); + FIX_START_END(self); + + result = stringlib_rfind_obj((PyObject*)self, substring, start, end); Py_DECREF(substring); + if (result < 0) { PyErr_SetString(PyExc_ValueError, "substring not found"); return NULL; |