diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2006-05-25 23:27:53 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2006-05-25 23:27:53 (GMT) |
commit | fe5bb7e6d90e29affb02a654893a2a18d31b369e (patch) | |
tree | b8a10921c53e4b0619bbe1dfee94a2ba17f5607f | |
parent | d89fc22dc6300ef3f1a8447a34f9ad2d15c722d5 (diff) | |
download | cpython-fe5bb7e6d90e29affb02a654893a2a18d31b369e.zip cpython-fe5bb7e6d90e29affb02a654893a2a18d31b369e.tar.gz cpython-fe5bb7e6d90e29affb02a654893a2a18d31b369e.tar.bz2 |
needforspeed: partition for 8-bit strings. for some simple tests,
this is on par with a corresponding find, and nearly twice as fast
as split(sep, 1)
full tests, a unicode version, and documentation will follow to-
morrow.
-rw-r--r-- | Objects/stringobject.c | 71 |
1 files changed, 66 insertions, 5 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 1088d3f..2dfac03 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -799,8 +799,7 @@ PyString_AsStringAndSize(register PyObject *obj, #define FAST_SEARCH 1 LOCAL(Py_ssize_t) - fastsearch(const char* s, Py_ssize_t n, const char* p, - Py_ssize_t m, int mode) +fastsearch(const char* s, Py_ssize_t n, const char* p, Py_ssize_t m, int mode) { long mask; int skip, count = 0; @@ -860,10 +859,8 @@ LOCAL(Py_ssize_t) /* miss: check if next character is part of pattern */ if (!(mask & (1 << (s[i+m] & 0x1F)))) i = i + m; - else { + else i = i + skip; - continue; - } } else { /* skip: check if next character is part of pattern */ if (!(mask & (1 << (s[i+m] & 0x1F)))) @@ -1601,6 +1598,68 @@ string_split(PyStringObject *self, PyObject *args) return NULL; } +PyDoc_STRVAR(partition__doc__, +"S.partition(sep) -> (head, sep, tail)\n\ +\n\ +Searches for the separator sep in S, and returns the part before it,\n\ +the separator itself, and the part after it. If the separator is not\n\ +found, returns S and two empty strings."); + +static PyObject * +string_partition(PyStringObject *self, PyObject *args) +{ + Py_ssize_t len = PyString_GET_SIZE(self), sep_len, pos; + const char *str = PyString_AS_STRING(self), *sep; + PyObject *sepobj; + PyObject * out; + + if (!PyArg_ParseTuple(args, "O:partition", &sepobj)) + return NULL; + if (PyString_Check(sepobj)) { + sep = PyString_AS_STRING(sepobj); + sep_len = PyString_GET_SIZE(sepobj); + } +#ifdef Py_USING_UNICODE_NOTYET + else if (PyUnicode_Check(sepobj)) + return PyUnicode_Partition((PyObject *)self, sepobj); +#endif + else if (PyObject_AsCharBuffer(sepobj, &sep, &sep_len)) + return NULL; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + pos = fastsearch(str, len, sep, sep_len, FAST_SEARCH); + if (pos < 0) { + Py_INCREF(self); + PyTuple_SET_ITEM(out, 0, (PyObject*) self); + Py_INCREF(nullstring); + PyTuple_SET_ITEM(out, 1, (PyObject*) nullstring); + Py_INCREF(nullstring); + PyTuple_SET_ITEM(out, 2, (PyObject*) nullstring); + } else { + Py_INCREF(sepobj); + PyTuple_SET_ITEM(out, 0, PyString_FromStringAndSize(str, pos)); + PyTuple_SET_ITEM(out, 1, sepobj); + PyTuple_SET_ITEM(out, 2, + PyString_FromStringAndSize(str + sep_len + pos, + len - sep_len - pos) + ); + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + } + + return out; +} + static PyObject * rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { @@ -3910,6 +3969,8 @@ string_methods[] = { {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__}, + {"partition", (PyCFunction)string_partition, METH_VARARGS, + partition__doc__}, {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, |