1 files changed, 54 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 902dfe4..7e29a03 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9124,7 +9124,7 @@ PyObject *
 PyUnicode_Join(PyObject *separator, PyObject *seq)
 {
     PyObject *sep = NULL;
-    Py_ssize_t seplen = 1;
+    Py_ssize_t seplen;
     PyObject *res = NULL; /* the result */
     PyObject *fseq;          /* PySequence_Fast(seq) */
     Py_ssize_t seqlen;       /* len(fseq) -- number of items in sequence */
@@ -9133,6 +9133,10 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     Py_ssize_t sz, i, res_offset;
     Py_UCS4 maxchar;
     Py_UCS4 item_maxchar;
+    int use_memcpy;
+    unsigned char *res_data = NULL, *sep_data = NULL;
+    PyObject *last_obj;
+    unsigned int kind = 0;
 
     fseq = PySequence_Fast(seq, "");
     if (fseq == NULL) {
@@ -9153,6 +9157,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
     }
 
     /* If singleton sequence with an exact Unicode, return that. */
+    last_obj = NULL;
     items = PySequence_Fast_ITEMS(fseq);
     if (seqlen == 1) {
         if (PyUnicode_CheckExact(items[0])) {
@@ -9161,7 +9166,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
             Py_DECREF(fseq);
             return res;
         }
-        sep = NULL;
+        seplen = 0;
         maxchar = 0;
     }
     else {
@@ -9171,6 +9176,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
             sep = PyUnicode_FromOrdinal(' ');
             if (!sep)
                 goto onError;
+            seplen = 1;
             maxchar = 32;
         }
         else {
@@ -9190,6 +9196,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
                above case of a blank separator */
             Py_INCREF(sep);
         }
+        last_obj = sep;
     }
 
     /* There are at least two things to join, or else we have a subclass
@@ -9198,6 +9205,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
      * need (sz), and see whether all argument are strings.
      */
     sz = 0;
+#ifdef Py_DEBUG
+    use_memcpy = 0;
+#else
+    use_memcpy = 1;
+#endif
     for (i = 0; i < seqlen; i++) {
         const Py_ssize_t old_sz = sz;
         item = items[i];
@@ -9220,6 +9232,11 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
                             "join() result is too long for a Python string");
             goto onError;
         }
+        if (use_memcpy && last_obj != NULL) {
+            if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
+                use_memcpy = 0;
+        }
+        last_obj = item;
     }
 
     res = PyUnicode_New(sz, maxchar);
@@ -9227,21 +9244,51 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
         goto onError;
 
     /* Catenate everything. */
+#ifdef Py_DEBUG
+    use_memcpy = 0;
+#else
+    if (use_memcpy) {
+        res_data = PyUnicode_1BYTE_DATA(res);
+        kind = PyUnicode_KIND(res);
+        if (seplen != 0)
+            sep_data = PyUnicode_1BYTE_DATA(sep);
+    }
+#endif
     for (i = 0, res_offset = 0; i < seqlen; ++i) {
         Py_ssize_t itemlen;
         item = items[i];
         /* Copy item, and maybe the separator. */
         if (i && seplen != 0) {
-            copy_characters(res, res_offset, sep, 0, seplen);
-            res_offset += seplen;
+            if (use_memcpy) {
+                Py_MEMCPY(res_data,
+                          sep_data,
+                          PyUnicode_KIND_SIZE(kind, seplen));
+                res_data += PyUnicode_KIND_SIZE(kind, seplen);
+            }
+            else {
+                copy_characters(res, res_offset, sep, 0, seplen);
+                res_offset += seplen;
+            }
         }
         itemlen = PyUnicode_GET_LENGTH(item);
         if (itemlen != 0) {
-            copy_characters(res, res_offset, item, 0, itemlen);
-            res_offset += itemlen;
+            if (use_memcpy) {
+                Py_MEMCPY(res_data,
+                          PyUnicode_DATA(item),
+                          PyUnicode_KIND_SIZE(kind, itemlen));
+                res_data += PyUnicode_KIND_SIZE(kind, itemlen);
+            }
+            else {
+                copy_characters(res, res_offset, item, 0, itemlen);
+                res_offset += itemlen;
+            }
         }
     }
-    assert(res_offset == PyUnicode_GET_LENGTH(res));
+    if (use_memcpy)
+        assert(res_data == PyUnicode_1BYTE_DATA(res)
+                           + PyUnicode_KIND_SIZE(kind, PyUnicode_GET_LENGTH(res)));
+    else
+        assert(res_offset == PyUnicode_GET_LENGTH(res));
 
     Py_DECREF(fseq);
     Py_XDECREF(sep);