diff options
author | Andrew Dalke <dalke@dalkescientific.com> | 2006-05-26 15:21:01 (GMT) |
---|---|---|
committer | Andrew Dalke <dalke@dalkescientific.com> | 2006-05-26 15:21:01 (GMT) |
commit | 02758d66cef8be4cdbe262a8f27837ba85e40cce (patch) | |
tree | bbd7b2f0a4417315ee258b4f65a7b589d1d05caf /Objects/stringobject.c | |
parent | 1d2b0e3f615874aae9b267f341f3461881a1c054 (diff) | |
download | cpython-02758d66cef8be4cdbe262a8f27837ba85e40cce.zip cpython-02758d66cef8be4cdbe262a8f27837ba85e40cce.tar.gz cpython-02758d66cef8be4cdbe262a8f27837ba85e40cce.tar.bz2 |
Eeked out another 3% or so performance in split whitespace by cleaning up the algorithm.
Diffstat (limited to 'Objects/stringobject.c')
-rw-r--r-- | Objects/stringobject.c | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 70a2e77..3f1e482 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -1460,7 +1460,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; else \ Py_DECREF(str); -#define SPLIT_ADD(data, left, right) \ +#define SPLIT_ADD(data, left, right) { \ str = PyString_FromStringAndSize((data) + (left), \ (right) - (left)); \ if (str == NULL) \ @@ -1475,11 +1475,16 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; else \ Py_DECREF(str); \ } \ - count++; + count++; } /* Always force the list to the expected size. */ #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count; +#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; } +#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; } +#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; } +#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } + static PyObject * split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { @@ -1490,23 +1495,22 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) if (list == NULL) return NULL; - for (i = j = 0; i < len; ) { - while (i < len && isspace(Py_CHARMASK(s[i]))) - i++; - j = i; - while (i < len && !isspace(Py_CHARMASK(s[i]))) - i++; - if (j < i) { - if (maxsplit-- <= 0) - break; - SPLIT_ADD(s, j, i); - while (i < len && isspace(Py_CHARMASK(s[i]))) - i++; - j = i; - } + i = j = 0; + + while (maxsplit-- > 0) { + SKIP_SPACE(s, i, len); + if (i==len) break; + j = i; i++; + SKIP_NONSPACE(s, i, len); + SPLIT_ADD(s, j, i); } - if (j < len) { - SPLIT_ADD(s, j, len); + + if (i < len) { + /* Only occurs when maxsplit was reached */ + /* Skip any remaining whitespace and copy to end of string */ + SKIP_SPACE(s, i, len); + if (i != len) + SPLIT_ADD(s, i, len); } FIX_PREALLOC_SIZE(list); return list; @@ -1680,23 +1684,22 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) if (list == NULL) return NULL; - for (i = j = len - 1; i >= 0; ) { - while (i >= 0 && isspace(Py_CHARMASK(s[i]))) - i--; - j = i; - while (i >= 0 && !isspace(Py_CHARMASK(s[i]))) - i--; - if (j > i) { - if (maxsplit-- <= 0) - break; - SPLIT_ADD(s, i + 1, j + 1); - while (i >= 0 && isspace(Py_CHARMASK(s[i]))) - i--; - j = i; - } - } - if (j >= 0) { - SPLIT_ADD(s, 0, j + 1); + i = j = len-1; + + while (maxsplit-- > 0) { + RSKIP_SPACE(s, i); + if (i<0) break; + j = i; i--; + RSKIP_NONSPACE(s, i); + SPLIT_ADD(s, i + 1, j + 1); + } + if (i >= 0) { + /* Only occurs when maxsplit was reached */ + /* Skip any remaining whitespace and copy to beginning of string */ + RSKIP_SPACE(s, i); + if (i >= 0) + SPLIT_ADD(s, 0, i + 1); + } FIX_PREALLOC_SIZE(list); if (PyList_Reverse(list) < 0) |