summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-05-03 00:17:04 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-05-03 00:17:04 (GMT)
commitb6cd014d75394ca48e6113d614d36ed16fbe7b13 (patch)
treecf5b9082e64ee3fc8cde78204d836a781cd762d4 /Objects/unicodeobject.c
parentbff7c9683442a6297bda2fb1ebedf73c9e4a265f (diff)
downloadcpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.zip
cpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.tar.gz
cpython-b6cd014d75394ca48e6113d614d36ed16fbe7b13.tar.bz2
Unicode: optimize creating of 1-character strings
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c58
1 files changed, 50 insertions, 8 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e22fcfd..f427fd3 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
return unicode_empty;
}
assert(size > 0);
- if (size == 1 && u[0] < 256)
- return get_latin1_char((unsigned char)u[0]);
+ if (size == 1) {
+ Py_UCS4 ch = u[0];
+ if (ch < 256)
+ return get_latin1_char((unsigned char)ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
+ }
max_char = ucs2lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
return unicode_empty;
}
assert(size > 0);
- if (size == 1 && u[0] < 256)
- return get_latin1_char((unsigned char)u[0]);
+ if (size == 1) {
+ Py_UCS4 ch = u[0];
+ if (ch < 256)
+ return get_latin1_char((unsigned char)ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
+ }
max_char = ucs4lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args)
static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index)
{
- Py_UCS4 ch = PyUnicode_ReadChar(self, index);
- if (ch == (Py_UCS4)-1)
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 ch;
+ PyObject *res;
+
+ if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
+ PyErr_BadArgument();
+ return NULL;
+ }
+ if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
- return PyUnicode_FromOrdinal(ch);
+ }
+ kind = PyUnicode_KIND(self);
+ data = PyUnicode_DATA(self);
+ ch = PyUnicode_READ(kind, data, index);
+ if (ch < 256)
+ return get_latin1_char(ch);
+
+ res = PyUnicode_New(1, ch);
+ if (res == NULL)
+ return NULL;
+ kind = PyUnicode_KIND(res);
+ data = PyUnicode_DATA(res);
+ PyUnicode_WRITE(kind, data, 0, ch);
+ assert(_PyUnicode_CheckConsistency(res, 1));
+ return res;
}
/* Believe it or not, this produces the same value for ASCII strings
@@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
}
if (PyUnicode_IS_ASCII(self)) {
- kind = PyUnicode_KIND(self);
data = PyUnicode_1BYTE_DATA(self);
return unicode_fromascii(data + start, length);
}