summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Merry <bmerry@gmail.com>2020-01-29 07:09:24 (GMT)
committerGitHub <noreply@github.com>2020-01-29 07:09:24 (GMT)
commitd07d9f4c43bc85a77021bcc7d77643f8ebb605cf (patch)
tree1a34ea430e52347e71547a55e4fd994634bd3358
parent6a65eba44bfd82ccc8bed4b5c6dd6637549955d5 (diff)
downloadcpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.zip
cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.gz
cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.bz2
bpo-36051: Drop GIL during large bytes.join() (GH-17757)
Improve multi-threaded performance by dropping the GIL in the fast path of bytes.join. To avoid increasing overhead for small joins, it is only done if the output size exceeds a threshold.
-rw-r--r--Lib/test/test_bytes.py8
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst1
-rw-r--r--Objects/stringlib/join.h57
4 files changed, 48 insertions, 19 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index ddcf367..770e2c5 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -547,9 +547,13 @@ class BaseBytesTest:
self.assertEqual(dot_join([bytearray(b"ab"), b"cd"]), b"ab.:cd")
self.assertEqual(dot_join([b"ab", bytearray(b"cd")]), b"ab.:cd")
# Stress it with many items
- seq = [b"abc"] * 1000
- expected = b"abc" + b".:abc" * 999
+ seq = [b"abc"] * 100000
+ expected = b"abc" + b".:abc" * 99999
self.assertEqual(dot_join(seq), expected)
+ # Stress test with empty separator
+ seq = [b"abc"] * 100000
+ expected = b"abc" * 100000
+ self.assertEqual(self.type2test(b"").join(seq), expected)
self.assertRaises(TypeError, self.type2test(b" ").join, None)
# Error handling and cleanup when some item in the middle of the
# sequence has the wrong type.
diff --git a/Misc/ACKS b/Misc/ACKS
index 7e4b81b..f3e3680 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1106,6 +1106,7 @@ Ezio Melotti
Doug Mennella
Dimitri Merejkowsky
Brian Merrell
+Bruce Merry
Alexis Métaireau
Luke Mewburn
Carl Meyer
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst b/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst
new file mode 100644
index 0000000..f9d4492
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst
@@ -0,0 +1 @@
+Drop the GIL during large ``bytes.join`` operations. Patch by Bruce Merry.
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
index 6f314e1..4d023ed 100644
--- a/Objects/stringlib/join.h
+++ b/Objects/stringlib/join.h
@@ -18,6 +18,9 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
Py_buffer *buffers = NULL;
#define NB_STATIC_BUFFERS 10
Py_buffer static_buffers[NB_STATIC_BUFFERS];
+#define GIL_THRESHOLD 1048576
+ int drop_gil = 1;
+ PyThreadState *save;
seq = PySequence_Fast(iterable, "can only join an iterable");
if (seq == NULL) {
@@ -65,12 +68,21 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
buffers[i].buf = PyBytes_AS_STRING(item);
buffers[i].len = PyBytes_GET_SIZE(item);
}
- else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected a bytes-like object, "
- "%.80s found",
- i, Py_TYPE(item)->tp_name);
- goto error;
+ else {
+ if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected a bytes-like object, "
+ "%.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ /* If the backing objects are mutable, then dropping the GIL
+ * opens up race conditions where another thread tries to modify
+ * the object which we hold a buffer on it. Such code has data
+ * races anyway, but this is a conservative approach that avoids
+ * changing the behaviour of that data race.
+ */
+ drop_gil = 0;
}
nbufs = i + 1; /* for error cleanup */
itemlen = buffers[i].len;
@@ -102,6 +114,12 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
/* Catenate everything. */
p = STRINGLIB_STR(res);
+ if (sz < GIL_THRESHOLD) {
+ drop_gil = 0; /* Benefits are likely outweighed by the overheads */
+ }
+ if (drop_gil) {
+ save = PyEval_SaveThread();
+ }
if (!seplen) {
/* fast path */
for (i = 0; i < nbufs; i++) {
@@ -110,19 +128,23 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
memcpy(p, q, n);
p += n;
}
- goto done;
}
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n;
- char *q;
- if (i) {
- memcpy(p, sepstr, seplen);
- p += seplen;
+ else {
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ memcpy(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ memcpy(p, q, n);
+ p += n;
}
- n = buffers[i].len;
- q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
+ }
+ if (drop_gil) {
+ PyEval_RestoreThread(save);
}
goto done;
@@ -138,3 +160,4 @@ done:
}
#undef NB_STATIC_BUFFERS
+#undef GIL_THRESHOLD