From bcd5cbe01ef4306a82f85d0500f9a9f04113f804 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 8 Jan 2009 21:17:16 +0000 Subject: Issue #4751: hashlib now releases the GIL when hashing large buffers (with a hardwired threshold of 2048 bytes), allowing better parallelization on multi-CPU systems. Contributed by Lukas Lueg (ebfe) and Victor Stinner. --- Doc/library/hashlib.rst | 5 + Lib/test/test_hashlib.py | 13 +++ Misc/NEWS | 4 + Modules/_hashopenssl.c | 245 +++++++++++++++++++++++++++++------------------ 4 files changed, 176 insertions(+), 91 deletions(-) diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index ad2bfa5..7a65d7d 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -37,6 +37,11 @@ concatenation of the data fed to it so far using the :meth:`digest` or .. note:: + For better multithreading performance, the Python GIL is released for + strings of more than 2047 bytes at object creation or on update. + +.. note:: + Feeding string objects is to :meth:`update` is not supported, as hashes work on bytes, not on characters. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 10fe3be..e69c704 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -198,6 +198,19 @@ class HashLibTestCase(unittest.TestCase): "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") + def test_gil(self): + # Check things work fine with an input larger than the size required + # for multithreaded operation (which is hardwired to 2048). + gil_minsize = 2048 + + m = hashlib.md5() + m.update(b'1') + m.update(b'#' * gil_minsize) + m.update(b'1') + self.assertEquals(m.hexdigest(), 'cb1e1a2cbc80be75e19935d621fb9b21') + + m = hashlib.md5(b'x' * gil_minsize) + self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958') def test_main(): support.run_unittest(HashLibTestCase) diff --git a/Misc/NEWS b/Misc/NEWS index f34cb91..685033c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -256,6 +256,10 @@ C-API Extension Modules ----------------- +- Issue #4751: hashlib now releases the GIL when hashing large buffers + (with a hardwired threshold of 2048 bytes), allowing better parallelization + on multi-CPU systems. Contributed by Lukas Lueg (ebfe) and Victor Stinner. + - Issue #4051: Prevent conflict of UNICODE macros in cPickle. - Issue #4738: Each zlib object now has a separate lock, allowing to compress diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 1b3ac6d..2284c5c 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -26,15 +26,35 @@ #define HASH_OBJ_CONSTRUCTOR 0 #endif +#define HASHLIB_GIL_MINSIZE 2048 + +#ifdef WITH_THREAD + #include "pythread.h" + + #define ENTER_HASHLIB(obj) \ + if ((obj)->lock) { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } \ + } + #define LEAVE_HASHLIB(obj) \ + if ((obj)->lock) { \ + PyThread_release_lock((obj)->lock); \ + } +#else + #define ENTER_HASHLIB(obj) + #define LEAVE_HASHLIB(obj) +#endif + typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ - EVP_MD_CTX ctx; /* OpenSSL message digest context */ - /* - * TODO investigate performance impact of including a lock for this object - * here and releasing the Python GIL while hash updates are in progress. - * (perhaps only release GIL if input length will take long to process?) - */ + EVP_MD_CTX ctx; /* OpenSSL message digest context */ +#ifdef WITH_THREAD + PyThread_type_lock lock; /* OpenSSL context lock */ +#endif } EVPobject; @@ -63,19 +83,42 @@ newEVPobject(PyObject *name) if (retval != NULL) { Py_INCREF(name); retval->name = name; +#ifdef WITH_THREAD + retval->lock = NULL; +#endif } return retval; } +static void +EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len) +{ + unsigned int process; + const unsigned char *cp = (const unsigned char *)vp; + while (0 < len) { + if (len > (Py_ssize_t)MUNCH_SIZE) + process = MUNCH_SIZE; + else + process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int); + EVP_DigestUpdate(&self->ctx, (const void*)cp, process); + len -= process; + cp += process; + } +} + /* Internal methods for a hash object */ static void -EVP_dealloc(PyObject *ptr) +EVP_dealloc(EVPobject *self) { - EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx); - Py_XDECREF(((EVPobject *)ptr)->name); - PyObject_Del(ptr); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + EVP_MD_CTX_cleanup(&self->ctx); + Py_XDECREF(self->name); + PyObject_Del(self); } @@ -91,7 +134,9 @@ EVP_copy(EVPobject *self, PyObject *unused) if ( (newobj = newEVPobject(self->name))==NULL) return NULL; + ENTER_HASHLIB(self); EVP_MD_CTX_copy(&newobj->ctx, &self->ctx); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -106,7 +151,9 @@ EVP_digest(EVPobject *self, PyObject *unused) PyObject *retval; unsigned int digest_size; + ENTER_HASHLIB(self); EVP_MD_CTX_copy(&temp_ctx, &self->ctx); + LEAVE_HASHLIB(self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -128,7 +175,9 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) unsigned int i, j, digest_size; /* Get the raw (binary) digest value */ + ENTER_HASHLIB(self); EVP_MD_CTX_copy(&temp_ctx, &self->ctx); + LEAVE_HASHLIB(self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -137,16 +186,16 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) /* Allocate a new buffer */ hex_digest = PyMem_Malloc(digest_size * 2 + 1); if (!hex_digest) - return PyErr_NoMemory(); + return PyErr_NoMemory(); /* Make hex version of the digest */ for(i=j=0; i> 4) & 0xf; - c = (c>9) ? c+'a'-10 : c + '0'; + c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; c = (digest[i] & 0xf); - c = (c>9) ? c+'a'-10 : c + '0'; + c = (c>9) ? c+'a'-10 : c + '0'; hex_digest[j++] = c; } retval = PyUnicode_FromStringAndSize(hex_digest, digest_size * 2); @@ -155,21 +204,26 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) } #define MY_GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) do { \ - if (PyUnicode_Check(obj) || !PyObject_CheckBuffer((obj))) { \ - PyErr_SetString(PyExc_TypeError, \ - "object supporting the buffer API required"); \ - return NULL; \ - } \ - if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) { \ - return NULL; \ - } \ - if ((viewp)->ndim > 1) { \ - PyErr_SetString(PyExc_BufferError, \ - "Buffer must be single dimension"); \ - PyBuffer_Release((viewp)); \ - return NULL; \ - } \ - } while(0); + if (PyUnicode_Check((obj))) { \ + PyErr_SetString(PyExc_TypeError, \ + "Unicode-objects must be encoded before hashing");\ + return NULL; \ + } \ + if (!PyObject_CheckBuffer((obj))) { \ + PyErr_SetString(PyExc_TypeError, \ + "object supporting the buffer API required"); \ + return NULL; \ + } \ + if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) { \ + return NULL; \ + } \ + if ((viewp)->ndim > 1) { \ + PyErr_SetString(PyExc_BufferError, \ + "Buffer must be single dimension"); \ + PyBuffer_Release((viewp)); \ + return NULL; \ + } \ + } while(0); PyDoc_STRVAR(EVP_update__doc__, "Update this hash object's state with the provided string."); @@ -184,41 +238,60 @@ EVP_update(EVPobject *self, PyObject *args) return NULL; MY_GET_BUFFER_VIEW_OR_ERROUT(obj, &view); - if (view.len > 0 && view.len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, view.buf, view.len); - } else { - Py_ssize_t offset = 0, len = view.len; - while (len) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf + offset, process); - len -= process; - offset += process; + +#ifdef WITH_THREAD + if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyBuffer_Release(&view); + PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); + return NULL; } } - PyBuffer_Release(&view); - Py_INCREF(Py_None); - return Py_None; + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + EVP_hash(self, view.buf, view.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + EVP_hash(self, view.buf, view.len); + } +#else + EVP_hash(self, view.buf, view.len); +#endif + + PyBuffer_Release(&view); + Py_RETURN_NONE; } static PyMethodDef EVP_methods[] = { - {"update", (PyCFunction)EVP_update, METH_VARARGS, EVP_update__doc__}, - {"digest", (PyCFunction)EVP_digest, METH_NOARGS, EVP_digest__doc__}, + {"update", (PyCFunction)EVP_update, METH_VARARGS, EVP_update__doc__}, + {"digest", (PyCFunction)EVP_digest, METH_NOARGS, EVP_digest__doc__}, {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS, EVP_hexdigest__doc__}, - {"copy", (PyCFunction)EVP_copy, METH_NOARGS, EVP_copy__doc__}, - {NULL, NULL} /* sentinel */ + {"copy", (PyCFunction)EVP_copy, METH_NOARGS, EVP_copy__doc__}, + {NULL, NULL} /* sentinel */ }; static PyObject * EVP_get_block_size(EVPobject *self, void *closure) { - return PyLong_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx)); + long block_size; + ENTER_HASHLIB(self); + block_size = EVP_MD_CTX_block_size(&self->ctx); + LEAVE_HASHLIB(self); + return PyLong_FromLong(block_size); } static PyObject * EVP_get_digest_size(EVPobject *self, void *closure) { - return PyLong_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx)); + long size; + ENTER_HASHLIB(self); + size = EVP_MD_CTX_size(&self->ctx); + LEAVE_HASHLIB(self); + return PyLong_FromLong(size); } static PyMemberDef EVP_members[] = { @@ -246,11 +319,11 @@ static PyGetSetDef EVP_getseters[] = { static PyObject * -EVP_repr(PyObject *self) +EVP_repr(EVPobject *self) { char buf[100]; PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>", - _PyUnicode_AsString(((EVPobject *)self)->name), self); + _PyUnicode_AsString(self->name), self); return PyUnicode_FromString(buf); } @@ -293,21 +366,16 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds) Py_INCREF(self->name); if (data_obj) { - if (len > 0 && len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t, - unsigned int)); + if (view.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + EVP_hash(self, view.buf, view.len); + Py_END_ALLOW_THREADS } else { - Py_ssize_t offset = 0, len = view.len; - while (len) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf + offset, process); - len -= process; - offset += process; - } + EVP_hash(self, view.buf, view.len); } PyBuffer_Release(&view); } - + return 0; } #endif @@ -332,15 +400,15 @@ digest_size -- number of bytes in this hashes output\n"); static PyTypeObject EVPtype = { PyVarObject_HEAD_INIT(NULL, 0) "_hashlib.HASH", /*tp_name*/ - sizeof(EVPobject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ + sizeof(EVPobject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ /* methods */ - EVP_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ + (destructor)EVP_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ - EVP_repr, /*tp_repr*/ + (reprfunc)EVP_repr, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ @@ -353,13 +421,13 @@ static PyTypeObject EVPtype = { Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ hashtype_doc, /*tp_doc*/ 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - EVP_methods, /* tp_methods */ - EVP_members, /* tp_members */ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + EVP_methods, /* tp_methods */ + EVP_members, /* tp_members */ EVP_getseters, /* tp_getset */ #if 1 0, /* tp_base */ @@ -395,17 +463,12 @@ EVPnew(PyObject *name_obj, } if (cp && len) { - if (len > 0 && len <= MUNCH_SIZE) { - EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t, - unsigned int)); + if (len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + EVP_hash(self, cp, len); + Py_END_ALLOW_THREADS } else { - Py_ssize_t offset = 0; - while (len) { - unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len; - EVP_DigestUpdate(&self->ctx, cp + offset, process); - len -= process; - offset += process; - } + EVP_hash(self, cp, len); } } @@ -522,7 +585,7 @@ static struct PyMethodDef EVP_functions[] = { CONSTRUCTOR_METH_DEF(sha256), CONSTRUCTOR_METH_DEF(sha384), CONSTRUCTOR_METH_DEF(sha512), - {NULL, NULL} /* Sentinel */ + {NULL, NULL} /* Sentinel */ }; @@ -530,15 +593,15 @@ static struct PyMethodDef EVP_functions[] = { static struct PyModuleDef _hashlibmodule = { - PyModuleDef_HEAD_INIT, - "_hashlib", - NULL, - -1, - EVP_functions, - NULL, - NULL, - NULL, - NULL + PyModuleDef_HEAD_INIT, + "_hashlib", + NULL, + -1, + EVP_functions, + NULL, + NULL, + NULL, + NULL }; PyMODINIT_FUNC -- cgit v0.12