diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-04 18:00:49 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2011-10-04 18:00:49 (GMT) |
commit | 875f29bb95ffa76b6dbbf87cacfe525f9bc5031d (patch) | |
tree | 465ad03be5e275cb598b9e6aa742269b4e9a780b /Objects | |
parent | 2b72f83877e1758676ca1ee157451466e7e2dad2 (diff) | |
download | cpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.zip cpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.tar.gz cpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.tar.bz2 |
Fix naïve heuristic in unicode slicing (followup to 1b4f886dc9e2)
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/unicodeobject.c | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 594623c..a958951 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12258,7 +12258,8 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item) Py_ssize_t start, stop, step, slicelength, cur, i; PyObject *result; void *src_data, *dest_data; - int kind; + int src_kind, dest_kind; + Py_UCS4 ch, max_char; if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self), &start, &stop, &step, &slicelength) < 0) { @@ -12276,17 +12277,24 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item) return PyUnicode_Substring((PyObject*)self, start, start + slicelength); } - /* General (less optimized) case */ - result = PyUnicode_New(slicelength, PyUnicode_MAX_CHAR_VALUE(self)); + /* General case */ + max_char = 127; + src_kind = PyUnicode_KIND(self); + src_data = PyUnicode_DATA(self); + for (cur = start, i = 0; i < slicelength; cur += step, i++) { + ch = PyUnicode_READ(src_kind, src_data, cur); + if (ch > max_char) + max_char = ch; + } + result = PyUnicode_New(slicelength, max_char); if (result == NULL) return NULL; - kind = PyUnicode_KIND(self); - src_data = PyUnicode_DATA(self); + dest_kind = PyUnicode_KIND(result); dest_data = PyUnicode_DATA(result); for (cur = start, i = 0; i < slicelength; cur += step, i++) { - Py_UCS4 ch = PyUnicode_READ(kind, src_data, cur); - PyUnicode_WRITE(kind, dest_data, i, ch); + Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur); + PyUnicode_WRITE(dest_kind, dest_data, i, ch); } return result; } else { |