diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 61 |
1 files changed, 54 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 902dfe4..7e29a03 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9124,7 +9124,7 @@ PyObject * PyUnicode_Join(PyObject *separator, PyObject *seq) { PyObject *sep = NULL; - Py_ssize_t seplen = 1; + Py_ssize_t seplen; PyObject *res = NULL; /* the result */ PyObject *fseq; /* PySequence_Fast(seq) */ Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ @@ -9133,6 +9133,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) Py_ssize_t sz, i, res_offset; Py_UCS4 maxchar; Py_UCS4 item_maxchar; + int use_memcpy; + unsigned char *res_data = NULL, *sep_data = NULL; + PyObject *last_obj; + unsigned int kind = 0; fseq = PySequence_Fast(seq, ""); if (fseq == NULL) { @@ -9153,6 +9157,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) } /* If singleton sequence with an exact Unicode, return that. */ + last_obj = NULL; items = PySequence_Fast_ITEMS(fseq); if (seqlen == 1) { if (PyUnicode_CheckExact(items[0])) { @@ -9161,7 +9166,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) Py_DECREF(fseq); return res; } - sep = NULL; + seplen = 0; maxchar = 0; } else { @@ -9171,6 +9176,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) sep = PyUnicode_FromOrdinal(' '); if (!sep) goto onError; + seplen = 1; maxchar = 32; } else { @@ -9190,6 +9196,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) above case of a blank separator */ Py_INCREF(sep); } + last_obj = sep; } /* There are at least two things to join, or else we have a subclass @@ -9198,6 +9205,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) * need (sz), and see whether all argument are strings. */ sz = 0; +#ifdef Py_DEBUG + use_memcpy = 0; +#else + use_memcpy = 1; +#endif for (i = 0; i < seqlen; i++) { const Py_ssize_t old_sz = sz; item = items[i]; @@ -9220,6 +9232,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) "join() result is too long for a Python string"); goto onError; } + if (use_memcpy && last_obj != NULL) { + if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item)) + use_memcpy = 0; + } + last_obj = item; } res = PyUnicode_New(sz, maxchar); @@ -9227,21 +9244,51 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) goto onError; /* Catenate everything. */ +#ifdef Py_DEBUG + use_memcpy = 0; +#else + if (use_memcpy) { + res_data = PyUnicode_1BYTE_DATA(res); + kind = PyUnicode_KIND(res); + if (seplen != 0) + sep_data = PyUnicode_1BYTE_DATA(sep); + } +#endif for (i = 0, res_offset = 0; i < seqlen; ++i) { Py_ssize_t itemlen; item = items[i]; /* Copy item, and maybe the separator. */ if (i && seplen != 0) { - copy_characters(res, res_offset, sep, 0, seplen); - res_offset += seplen; + if (use_memcpy) { + Py_MEMCPY(res_data, + sep_data, + PyUnicode_KIND_SIZE(kind, seplen)); + res_data += PyUnicode_KIND_SIZE(kind, seplen); + } + else { + copy_characters(res, res_offset, sep, 0, seplen); + res_offset += seplen; + } } itemlen = PyUnicode_GET_LENGTH(item); if (itemlen != 0) { - copy_characters(res, res_offset, item, 0, itemlen); - res_offset += itemlen; + if (use_memcpy) { + Py_MEMCPY(res_data, + PyUnicode_DATA(item), + PyUnicode_KIND_SIZE(kind, itemlen)); + res_data += PyUnicode_KIND_SIZE(kind, itemlen); + } + else { + copy_characters(res, res_offset, item, 0, itemlen); + res_offset += itemlen; + } } } - assert(res_offset == PyUnicode_GET_LENGTH(res)); + if (use_memcpy) + assert(res_data == PyUnicode_1BYTE_DATA(res) + + PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res))); + else + assert(res_offset == PyUnicode_GET_LENGTH(res)); Py_DECREF(fseq); Py_XDECREF(sep); |