diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-05-03 00:17:04 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-05-03 00:17:04 (GMT) |
commit | b6cd014d75394ca48e6113d614d36ed16fbe7b13 (patch) | |
tree | cf5b9082e64ee3fc8cde78204d836a781cd762d4 /Objects/unicodeobject.c | |
parent | bff7c9683442a6297bda2fb1ebedf73c9e4a265f (diff) | |
download | cpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.zip cpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.tar.gz cpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.tar.bz2 |
Unicode: optimize creating of 1-character strings
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 58 |
1 files changed, 50 insertions, 8 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e22fcfd..f427fd3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) return unicode_empty; } assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); + if (size == 1) { + Py_UCS4 ch = u[0]; + if (ch < 256) + return get_latin1_char((unsigned char)ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; + } max_char = ucs2lib_find_max_char(u, u + size); res = PyUnicode_New(size, max_char); @@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) return unicode_empty; } assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); + if (size == 1) { + Py_UCS4 ch = u[0]; + if (ch < 256) + return get_latin1_char((unsigned char)ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; + } max_char = ucs4lib_find_max_char(u, u + size); res = PyUnicode_New(size, max_char); @@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args) static PyObject * unicode_getitem(PyObject *self, Py_ssize_t index) { - Py_UCS4 ch = PyUnicode_ReadChar(self, index); - if (ch == (Py_UCS4)-1) + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 ch; + PyObject *res; + + if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) { + PyErr_BadArgument(); + return NULL; + } + if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; - return PyUnicode_FromOrdinal(ch); + } + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + ch = PyUnicode_READ(kind, data, index); + if (ch < 256) + return get_latin1_char(ch); + + res = PyUnicode_New(1, ch); + if (res == NULL) + return NULL; + kind = PyUnicode_KIND(res); + data = PyUnicode_DATA(res); + PyUnicode_WRITE(kind, data, 0, ch); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; } /* Believe it or not, this produces the same value for ASCII strings @@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) } if (PyUnicode_IS_ASCII(self)) { - kind = PyUnicode_KIND(self); data = PyUnicode_1BYTE_DATA(self); return unicode_fromascii(data + start, length); } |