diff options
author | Victor Stinner <vstinner@wyplay.com> | 2011-10-05 12:13:28 (GMT) |
---|---|---|
committer | Victor Stinner <vstinner@wyplay.com> | 2011-10-05 12:13:28 (GMT) |
commit | c80d6d20d57090016372ba6be7325b1fc0649413 (patch) | |
tree | d28fdb1b07492ebabfc8133405d3cee32e7431f5 /Objects | |
parent | ae86485517cab27d6cef96d036e870888660a144 (diff) | |
download | cpython-c80d6d20d57090016372ba6be7325b1fc0649413.zip cpython-c80d6d20d57090016372ba6be7325b1fc0649413.tar.gz cpython-c80d6d20d57090016372ba6be7325b1fc0649413.tar.bz2 |
Speedup str[a:b:step] for step != 1
Try to stop the scanner of the maximum character before the end using a limit
depending on the kind (e.g. 256 for PyUnicode_2BYTE_KIND).
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93459a7..417f955 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1520,6 +1520,22 @@ unicode_fromascii(const unsigned char* u, Py_ssize_t size) return res; } +static Py_UCS4 +kind_maxchar_limit(unsigned int kind) +{ + switch(kind) { + case PyUnicode_1BYTE_KIND: + return 0x80; + case PyUnicode_2BYTE_KIND: + return 0x100; + case PyUnicode_4BYTE_KIND: + return 0x10000; + default: + assert(0 && "invalid kind"); + return 0x10ffff; + } +} + static PyObject* _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size) { @@ -12335,7 +12351,7 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item) PyObject *result; void *src_data, *dest_data; int src_kind, dest_kind; - Py_UCS4 ch, max_char; + Py_UCS4 ch, max_char, kind_limit; if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self), &start, &stop, &step, &slicelength) < 0) { @@ -12354,13 +12370,17 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item) start, start + slicelength); } /* General case */ - max_char = 127; + max_char = 0; src_kind = PyUnicode_KIND(self); + kind_limit = kind_maxchar_limit(src_kind); src_data = PyUnicode_DATA(self); for (cur = start, i = 0; i < slicelength; cur += step, i++) { ch = PyUnicode_READ(src_kind, src_data, cur); - if (ch > max_char) + if (ch > max_char) { max_char = ch; + if (max_char >= kind_limit) + break; + } } result = PyUnicode_New(slicelength, max_char); if (result == NULL) |