diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2003-12-15 18:49:53 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2003-12-15 18:49:53 (GMT) |
commit | 3ae811b57d227a220f207869487fd9251e278608 (patch) | |
tree | ccbc4b81578fa69e6bc65df8da4994bf9a4b6e49 /Objects | |
parent | dce391cb398f4ce266d98130d10810a6a36617b3 (diff) | |
download | cpython-3ae811b57d227a220f207869487fd9251e278608.zip cpython-3ae811b57d227a220f207869487fd9251e278608.tar.gz cpython-3ae811b57d227a220f207869487fd9251e278608.tar.bz2 |
Add rsplit method for str and unicode builtin types.
SF feature request #801847.
Original patch is written by Sean Reifschneider.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringobject.c | 124 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 192 |
2 files changed, 315 insertions, 1 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c index d3351df..9512059 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1407,6 +1407,129 @@ string_split(PyStringObject *self, PyObject *args) return NULL; } +static PyObject * +rsplit_whitespace(const char *s, int len, int maxsplit) +{ + int i, j, err; + PyObject* item; + PyObject *list = PyList_New(0); + + if (list == NULL) + return NULL; + + for (i = j = len - 1; i >= 0; ) { + while (i >= 0 && isspace(Py_CHARMASK(s[i]))) + i--; + j = i; + while (i >= 0 && !isspace(Py_CHARMASK(s[i]))) + i--; + if (j > i) { + if (maxsplit-- <= 0) + break; + item = PyString_FromStringAndSize(s+i+1, (int)(j-i)); + if (item == NULL) + goto finally; + err = PyList_Insert(list, 0, item); + Py_DECREF(item); + if (err < 0) + goto finally; + while (i >= 0 && isspace(Py_CHARMASK(s[i]))) + i--; + j = i; + } + } + if (j >= 0) { + item = PyString_FromStringAndSize(s, (int)(j + 1)); + if (item == NULL) + goto finally; + err = PyList_Insert(list, 0, item); + Py_DECREF(item); + if (err < 0) + goto finally; + } + return list; + finally: + Py_DECREF(list); + return NULL; +} + + +PyDoc_STRVAR(rsplit__doc__, +"S.rsplit([sep [,maxsplit]]) -> list of strings\n\ +\n\ +Return a list of the words in the string S, using sep as the\n\ +delimiter string, starting at the end of the string and working\n\ +to the front. If maxsplit is given, at most maxsplit splits are\n\ +done. If sep is not specified or is None, any whitespace string\n\ +is a separator."); + +static PyObject * +string_rsplit(PyStringObject *self, PyObject *args) +{ + int len = PyString_GET_SIZE(self), n, i, j, err; + int maxsplit = -1; + const char *s = PyString_AS_STRING(self), *sub; + PyObject *list, *item, *subobj = Py_None; + + if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit)) + return NULL; + if (maxsplit < 0) + maxsplit = INT_MAX; + if (subobj == Py_None) + return rsplit_whitespace(s, len, maxsplit); + if (PyString_Check(subobj)) { + sub = PyString_AS_STRING(subobj); + n = PyString_GET_SIZE(subobj); + } +#ifdef Py_USING_UNICODE + else if (PyUnicode_Check(subobj)) + return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); +#endif + else if (PyObject_AsCharBuffer(subobj, &sub, &n)) + return NULL; + if (n == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + list = PyList_New(0); + if (list == NULL) + return NULL; + + j = len; + i = j - n; + while (i >= 0) { + if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) { + if (maxsplit-- <= 0) + break; + item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n)); + if (item == NULL) + goto fail; + err = PyList_Insert(list, 0, item); + Py_DECREF(item); + if (err < 0) + goto fail; + j = i; + i -= n; + } + else + i--; + } + item = PyString_FromStringAndSize(s, j); + if (item == NULL) + goto fail; + err = PyList_Insert(list, 0, item); + Py_DECREF(item); + if (err < 0) + goto fail; + + return list; + + fail: + Py_DECREF(list); + return NULL; +} + PyDoc_STRVAR(join__doc__, "S.join(sequence) -> string\n\ @@ -3064,6 +3187,7 @@ string_methods[] = { string.maketrans(). */ {"join", (PyCFunction)string_join, METH_O, join__doc__}, {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, + {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f87d749..19d0353 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4053,7 +4053,7 @@ PyUnicodeObject *pad(PyUnicodeObject *self, } #define SPLIT_APPEND(data, left, right) \ - str = PyUnicode_FromUnicode(data + left, right - left); \ + str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \ if (!str) \ goto onError; \ if (PyList_Append(list, str)) { \ @@ -4063,6 +4063,17 @@ PyUnicodeObject *pad(PyUnicodeObject *self, else \ Py_DECREF(str); +#define SPLIT_INSERT(data, left, right) \ + str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \ + if (!str) \ + goto onError; \ + if (PyList_Insert(list, 0, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); + static PyObject *split_whitespace(PyUnicodeObject *self, PyObject *list, @@ -4214,7 +4225,106 @@ PyObject *split_substring(PyUnicodeObject *self, return NULL; } +static +PyObject *rsplit_whitespace(PyUnicodeObject *self, + PyObject *list, + int maxcount) +{ + register int i; + register int j; + int len = self->length; + PyObject *str; + + for (i = j = len - 1; i >= 0; ) { + /* find a token */ + while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i])) + i--; + j = i; + while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i])) + i--; + if (j > i) { + if (maxcount-- <= 0) + break; + SPLIT_INSERT(self->str, i + 1, j + 1); + while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i])) + i--; + j = i; + } + } + if (j >= 0) { + SPLIT_INSERT(self->str, 0, j + 1); + } + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +static +PyObject *rsplit_char(PyUnicodeObject *self, + PyObject *list, + Py_UNICODE ch, + int maxcount) +{ + register int i; + register int j; + int len = self->length; + PyObject *str; + + for (i = j = len - 1; i >= 0; ) { + if (self->str[i] == ch) { + if (maxcount-- <= 0) + break; + SPLIT_INSERT(self->str, i + 1, j + 1); + j = i = i - 1; + } else + i--; + } + if (j >= 0) { + SPLIT_INSERT(self->str, 0, j + 1); + } + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +static +PyObject *rsplit_substring(PyUnicodeObject *self, + PyObject *list, + PyUnicodeObject *substring, + int maxcount) +{ + register int i; + register int j; + int len = self->length; + int sublen = substring->length; + PyObject *str; + + for (i = len - sublen, j = len; i >= 0; ) { + if (Py_UNICODE_MATCH(self, i, substring)) { + if (maxcount-- <= 0) + break; + SPLIT_INSERT(self->str, i + sublen, j); + j = i; + i -= sublen; + } else + i--; + } + if (j >= 0) { + SPLIT_INSERT(self->str, 0, j); + } + return list; + + onError: + Py_DECREF(list); + return NULL; +} + #undef SPLIT_APPEND +#undef SPLIT_INSERT static PyObject *split(PyUnicodeObject *self, @@ -4246,6 +4356,35 @@ PyObject *split(PyUnicodeObject *self, } static +PyObject *rsplit(PyUnicodeObject *self, + PyUnicodeObject *substring, + int maxcount) +{ + PyObject *list; + + if (maxcount < 0) + maxcount = INT_MAX; + + list = PyList_New(0); + if (!list) + return NULL; + + if (substring == NULL) + return rsplit_whitespace(self,list,maxcount); + + else if (substring->length == 1) + return rsplit_char(self,list,substring->str[0],maxcount); + + else if (substring->length == 0) { + Py_DECREF(list); + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + else + return rsplit_substring(self,list,substring,maxcount); +} + +static PyObject *replace(PyUnicodeObject *self, PyUnicodeObject *str1, PyUnicodeObject *str2, @@ -5675,6 +5814,56 @@ unicode_split(PyUnicodeObject *self, PyObject *args) return PyUnicode_Split((PyObject *)self, substring, maxcount); } +PyObject *PyUnicode_RSplit(PyObject *s, + PyObject *sep, + int maxsplit) +{ + PyObject *result; + + s = PyUnicode_FromObject(s); + if (s == NULL) + return NULL; + if (sep != NULL) { + sep = PyUnicode_FromObject(sep); + if (sep == NULL) { + Py_DECREF(s); + return NULL; + } + } + + result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit); + + Py_DECREF(s); + Py_XDECREF(sep); + return result; +} + +PyDoc_STRVAR(rsplit__doc__, +"S.rsplit([sep [,maxsplit]]) -> list of strings\n\ +\n\ +Return a list of the words in S, using sep as the\n\ +delimiter string, starting at the end of the string and\n\ +working to the front. If maxsplit is given, at most maxsplit\n\ +splits are done. If sep is not specified, any whitespace string\n\ +is a separator."); + +static PyObject* +unicode_rsplit(PyUnicodeObject *self, PyObject *args) +{ + PyObject *substring = Py_None; + int maxcount = -1; + + if (!PyArg_ParseTuple(args, "|Oi:rsplit", &substring, &maxcount)) + return NULL; + + if (substring == Py_None) + return rsplit(self, NULL, maxcount); + else if (PyUnicode_Check(substring)) + return rsplit(self, (PyUnicodeObject *)substring, maxcount); + else + return PyUnicode_RSplit((PyObject *)self, substring, maxcount); +} + PyDoc_STRVAR(splitlines__doc__, "S.splitlines([keepends]]) -> list of strings\n\ \n\ @@ -5870,6 +6059,7 @@ static PyMethodDef unicode_methods[] = { {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__}, {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, + {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, |