summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
authorBruce Merry <bmerry@gmail.com>2020-01-29 07:09:24 (GMT)
committerGitHub <noreply@github.com>2020-01-29 07:09:24 (GMT)
commitd07d9f4c43bc85a77021bcc7d77643f8ebb605cf (patch)
tree1a34ea430e52347e71547a55e4fd994634bd3358 /Objects/stringlib
parent6a65eba44bfd82ccc8bed4b5c6dd6637549955d5 (diff)
downloadcpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.zip
cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.gz
cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.bz2
bpo-36051: Drop GIL during large bytes.join() (GH-17757)
Improve multi-threaded performance by dropping the GIL in the fast path of bytes.join. To avoid increasing overhead for small joins, it is only done if the output size exceeds a threshold.
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/join.h57
1 files changed, 40 insertions, 17 deletions
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
index 6f314e1..4d023ed 100644
--- a/Objects/stringlib/join.h
+++ b/Objects/stringlib/join.h
@@ -18,6 +18,9 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
Py_buffer *buffers = NULL;
#define NB_STATIC_BUFFERS 10
Py_buffer static_buffers[NB_STATIC_BUFFERS];
+#define GIL_THRESHOLD 1048576
+ int drop_gil = 1;
+ PyThreadState *save;
seq = PySequence_Fast(iterable, "can only join an iterable");
if (seq == NULL) {
@@ -65,12 +68,21 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
buffers[i].buf = PyBytes_AS_STRING(item);
buffers[i].len = PyBytes_GET_SIZE(item);
}
- else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected a bytes-like object, "
- "%.80s found",
- i, Py_TYPE(item)->tp_name);
- goto error;
+ else {
+ if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected a bytes-like object, "
+ "%.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ /* If the backing objects are mutable, then dropping the GIL
+ * opens up race conditions where another thread tries to modify
+ * the object which we hold a buffer on it. Such code has data
+ * races anyway, but this is a conservative approach that avoids
+ * changing the behaviour of that data race.
+ */
+ drop_gil = 0;
}
nbufs = i + 1; /* for error cleanup */
itemlen = buffers[i].len;
@@ -102,6 +114,12 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
/* Catenate everything. */
p = STRINGLIB_STR(res);
+ if (sz < GIL_THRESHOLD) {
+ drop_gil = 0; /* Benefits are likely outweighed by the overheads */
+ }
+ if (drop_gil) {
+ save = PyEval_SaveThread();
+ }
if (!seplen) {
/* fast path */
for (i = 0; i < nbufs; i++) {
@@ -110,19 +128,23 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
memcpy(p, q, n);
p += n;
}
- goto done;
}
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n;
- char *q;
- if (i) {
- memcpy(p, sepstr, seplen);
- p += seplen;
+ else {
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ memcpy(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ memcpy(p, q, n);
+ p += n;
}
- n = buffers[i].len;
- q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
+ }
+ if (drop_gil) {
+ PyEval_RestoreThread(save);
}
goto done;
@@ -138,3 +160,4 @@ done:
}
#undef NB_STATIC_BUFFERS
+#undef GIL_THRESHOLD