diff options
author | Skip Montanaro <skip@pobox.com> | 2007-12-08 15:33:24 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2007-12-08 15:33:24 (GMT) |
commit | 26015494f2bbbae9910e94240f190ce9551c3549 (patch) | |
tree | ceb9f04c72b0308c32995c90584e6b9e4f0492af /Objects | |
parent | 546624609e814d91a4d4fc6ad65c100309bac160 (diff) | |
download | cpython-26015494f2bbbae9910e94240f190ce9551c3549.zip cpython-26015494f2bbbae9910e94240f190ce9551c3549.tar.gz cpython-26015494f2bbbae9910e94240f190ce9551c3549.tar.bz2 |
When splitting, avoid making a copy of the string if the split doesn't find
anything (issue 1538).
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringobject.c | 53 |
1 files changed, 42 insertions, 11 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c index a0e2837..2c8c989 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1403,8 +1403,9 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } Py_LOCAL_INLINE(PyObject *) -split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) +split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit) { + const char *s = PyString_AS_STRING(self); Py_ssize_t i, j, count=0; PyObject *str; PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); @@ -1419,6 +1420,13 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) if (i==len) break; j = i; i++; SKIP_NONSPACE(s, i, len); + if (j == 0 && i == len && PyString_CheckExact(self)) { + /* No whitespace in self, so just use it as list[0] */ + Py_INCREF(self); + PyList_SET_ITEM(list, 0, (PyObject *)self); + count++; + break; + } SPLIT_ADD(s, j, i); } @@ -1437,8 +1445,9 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) } Py_LOCAL_INLINE(PyObject *) -split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) +split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) { + const char *s = PyString_AS_STRING(self); register Py_ssize_t i, j, count=0; PyObject *str; PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); @@ -1457,7 +1466,13 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) } } } - if (i <= len) { + if (i == 0 && count == 0 && PyString_CheckExact(self)) { + /* ch not in self, so just use self as list[0] */ + Py_INCREF(self); + PyList_SET_ITEM(list, 0, (PyObject *)self); + count++; + } + else if (i <= len) { SPLIT_ADD(s, i, len); } FIX_PREALLOC_SIZE(list); @@ -1492,7 +1507,7 @@ string_split(PyStringObject *self, PyObject *args) if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return split_whitespace(s, len, maxsplit); + return split_whitespace(self, len, maxsplit); if (PyString_Check(subobj)) { sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); @@ -1509,7 +1524,7 @@ string_split(PyStringObject *self, PyObject *args) return NULL; } else if (n == 1) - return split_char(s, len, sub[0], maxsplit); + return split_char(self, len, sub[0], maxsplit); list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) @@ -1609,8 +1624,9 @@ string_rpartition(PyStringObject *self, PyObject *sep_obj) } Py_LOCAL_INLINE(PyObject *) -rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) +rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit) { + const char *s = PyString_AS_STRING(self); Py_ssize_t i, j, count=0; PyObject *str; PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); @@ -1625,6 +1641,13 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) if (i<0) break; j = i; i--; RSKIP_NONSPACE(s, i); + if (j == len-1 && i < 0 && PyString_CheckExact(self)) { + /* No whitespace in self, so just use it as list[0] */ + Py_INCREF(self); + PyList_SET_ITEM(list, 0, (PyObject *)self); + count++; + break; + } SPLIT_ADD(s, i + 1, j + 1); } if (i >= 0) { @@ -1645,8 +1668,9 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) } Py_LOCAL_INLINE(PyObject *) -rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) +rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) { + const char *s = PyString_AS_STRING(self); register Py_ssize_t i, j, count=0; PyObject *str; PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); @@ -1664,7 +1688,13 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) } } } - if (j >= -1) { + if (i < 0 && count == 0 && PyString_CheckExact(self)) { + /* ch not in self, so just use self as list[0] */ + Py_INCREF(self); + PyList_SET_ITEM(list, 0, (PyObject *)self); + count++; + } + else if (j >= -1) { SPLIT_ADD(s, 0, j + 1); } FIX_PREALLOC_SIZE(list); @@ -1691,7 +1721,7 @@ string_rsplit(PyStringObject *self, PyObject *args) { Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; Py_ssize_t maxsplit = -1, count=0; - const char *s = PyString_AS_STRING(self), *sub; + const char *s, *sub; PyObject *list, *str, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) @@ -1699,7 +1729,7 @@ string_rsplit(PyStringObject *self, PyObject *args) if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return rsplit_whitespace(s, len, maxsplit); + return rsplit_whitespace(self, len, maxsplit); if (PyString_Check(subobj)) { sub = PyString_AS_STRING(subobj); n = PyString_GET_SIZE(subobj); @@ -1716,7 +1746,7 @@ string_rsplit(PyStringObject *self, PyObject *args) return NULL; } else if (n == 1) - return rsplit_char(s, len, sub[0], maxsplit); + return rsplit_char(self, len, sub[0], maxsplit); list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) @@ -1725,6 +1755,7 @@ string_rsplit(PyStringObject *self, PyObject *args) j = len; i = j - n; + s = PyString_AS_STRING(self); while ( (i >= 0) && (maxsplit-- > 0) ) { for (; i>=0; i--) { if (Py_STRING_MATCH(s, i, sub, n)) { |