diff options
author | Bruce Merry <bmerry@gmail.com> | 2020-01-29 07:09:24 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-29 07:09:24 (GMT) |
commit | d07d9f4c43bc85a77021bcc7d77643f8ebb605cf (patch) | |
tree | 1a34ea430e52347e71547a55e4fd994634bd3358 | |
parent | 6a65eba44bfd82ccc8bed4b5c6dd6637549955d5 (diff) | |
download | cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.zip cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.gz cpython-d07d9f4c43bc85a77021bcc7d77643f8ebb605cf.tar.bz2 |
bpo-36051: Drop GIL during large bytes.join() (GH-17757)
Improve multi-threaded performance by dropping the GIL in the fast path
of bytes.join. To avoid increasing overhead for small joins, it is only
done if the output size exceeds a threshold.
-rw-r--r-- | Lib/test/test_bytes.py | 8 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst | 1 | ||||
-rw-r--r-- | Objects/stringlib/join.h | 57 |
4 files changed, 48 insertions, 19 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index ddcf367..770e2c5 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -547,9 +547,13 @@ class BaseBytesTest: self.assertEqual(dot_join([bytearray(b"ab"), b"cd"]), b"ab.:cd") self.assertEqual(dot_join([b"ab", bytearray(b"cd")]), b"ab.:cd") # Stress it with many items - seq = [b"abc"] * 1000 - expected = b"abc" + b".:abc" * 999 + seq = [b"abc"] * 100000 + expected = b"abc" + b".:abc" * 99999 self.assertEqual(dot_join(seq), expected) + # Stress test with empty separator + seq = [b"abc"] * 100000 + expected = b"abc" * 100000 + self.assertEqual(self.type2test(b"").join(seq), expected) self.assertRaises(TypeError, self.type2test(b" ").join, None) # Error handling and cleanup when some item in the middle of the # sequence has the wrong type. @@ -1106,6 +1106,7 @@ Ezio Melotti Doug Mennella Dimitri Merejkowsky Brian Merrell +Bruce Merry Alexis Métaireau Luke Mewburn Carl Meyer diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst b/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst new file mode 100644 index 0000000..f9d4492 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-12-30-15-56-07.bpo-36051.imaVlq.rst @@ -0,0 +1 @@ +Drop the GIL during large ``bytes.join`` operations. Patch by Bruce Merry. diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index 6f314e1..4d023ed 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -18,6 +18,9 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) Py_buffer *buffers = NULL; #define NB_STATIC_BUFFERS 10 Py_buffer static_buffers[NB_STATIC_BUFFERS]; +#define GIL_THRESHOLD 1048576 + int drop_gil = 1; + PyThreadState *save; seq = PySequence_Fast(iterable, "can only join an iterable"); if (seq == NULL) { @@ -65,12 +68,21 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) buffers[i].buf = PyBytes_AS_STRING(item); buffers[i].len = PyBytes_GET_SIZE(item); } - else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { - PyErr_Format(PyExc_TypeError, - "sequence item %zd: expected a bytes-like object, " - "%.80s found", - i, Py_TYPE(item)->tp_name); - goto error; + else { + if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected a bytes-like object, " + "%.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + /* If the backing objects are mutable, then dropping the GIL + * opens up race conditions where another thread tries to modify + * the object which we hold a buffer on it. Such code has data + * races anyway, but this is a conservative approach that avoids + * changing the behaviour of that data race. + */ + drop_gil = 0; } nbufs = i + 1; /* for error cleanup */ itemlen = buffers[i].len; @@ -102,6 +114,12 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) /* Catenate everything. */ p = STRINGLIB_STR(res); + if (sz < GIL_THRESHOLD) { + drop_gil = 0; /* Benefits are likely outweighed by the overheads */ + } + if (drop_gil) { + save = PyEval_SaveThread(); + } if (!seplen) { /* fast path */ for (i = 0; i < nbufs; i++) { @@ -110,19 +128,23 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) memcpy(p, q, n); p += n; } - goto done; } - for (i = 0; i < nbufs; i++) { - Py_ssize_t n; - char *q; - if (i) { - memcpy(p, sepstr, seplen); - p += seplen; + else { + for (i = 0; i < nbufs; i++) { + Py_ssize_t n; + char *q; + if (i) { + memcpy(p, sepstr, seplen); + p += seplen; + } + n = buffers[i].len; + q = buffers[i].buf; + memcpy(p, q, n); + p += n; } - n = buffers[i].len; - q = buffers[i].buf; - memcpy(p, q, n); - p += n; + } + if (drop_gil) { + PyEval_RestoreThread(save); } goto done; @@ -138,3 +160,4 @@ done: } #undef NB_STATIC_BUFFERS +#undef GIL_THRESHOLD |