summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-10-04 18:00:49 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-10-04 18:00:49 (GMT)
commit875f29bb95ffa76b6dbbf87cacfe525f9bc5031d (patch)
tree465ad03be5e275cb598b9e6aa742269b4e9a780b /Objects
parent2b72f83877e1758676ca1ee157451466e7e2dad2 (diff)
downloadcpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.zip
cpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.tar.gz
cpython-875f29bb95ffa76b6dbbf87cacfe525f9bc5031d.tar.bz2
Fix naïve heuristic in unicode slicing (followup to 1b4f886dc9e2)
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 594623c..a958951 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12258,7 +12258,8 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item)
Py_ssize_t start, stop, step, slicelength, cur, i;
PyObject *result;
void *src_data, *dest_data;
- int kind;
+ int src_kind, dest_kind;
+ Py_UCS4 ch, max_char;
if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self),
&start, &stop, &step, &slicelength) < 0) {
@@ -12276,17 +12277,24 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item)
return PyUnicode_Substring((PyObject*)self,
start, start + slicelength);
}
- /* General (less optimized) case */
- result = PyUnicode_New(slicelength, PyUnicode_MAX_CHAR_VALUE(self));
+ /* General case */
+ max_char = 127;
+ src_kind = PyUnicode_KIND(self);
+ src_data = PyUnicode_DATA(self);
+ for (cur = start, i = 0; i < slicelength; cur += step, i++) {
+ ch = PyUnicode_READ(src_kind, src_data, cur);
+ if (ch > max_char)
+ max_char = ch;
+ }
+ result = PyUnicode_New(slicelength, max_char);
if (result == NULL)
return NULL;
- kind = PyUnicode_KIND(self);
- src_data = PyUnicode_DATA(self);
+ dest_kind = PyUnicode_KIND(result);
dest_data = PyUnicode_DATA(result);
for (cur = start, i = 0; i < slicelength; cur += step, i++) {
- Py_UCS4 ch = PyUnicode_READ(kind, src_data, cur);
- PyUnicode_WRITE(kind, dest_data, i, ch);
+ Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur);
+ PyUnicode_WRITE(dest_kind, dest_data, i, ch);
}
return result;
} else {