From 2e5d8a90aa633ff0bebc9b2b8e21eea389937b19 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 22 May 2023 17:06:41 -0700 Subject: gh-99108: Release the GIL around hashlib built-in computation (#104675) This matches the GIL releasing behavior of our existing `_hashopenssl` module, extending it to the HACL* built-ins. Includes adding comments to better describe the ENTER/LEAVE macros purpose and explain the lock strategy in both existing and new code. --- .../2023-05-19-19-46-22.gh-issue-99108.wqCg0t.rst | 3 + Modules/_hashopenssl.c | 6 ++ Modules/hashlib.h | 9 ++- Modules/md5module.c | 37 ++++++++- Modules/sha1module.c | 37 ++++++++- Modules/sha2module.c | 94 ++++++++++++++++++++-- Modules/sha3module.c | 36 ++++++++- 7 files changed, 207 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-19-19-46-22.gh-issue-99108.wqCg0t.rst diff --git a/Misc/NEWS.d/next/Library/2023-05-19-19-46-22.gh-issue-99108.wqCg0t.rst b/Misc/NEWS.d/next/Library/2023-05-19-19-46-22.gh-issue-99108.wqCg0t.rst new file mode 100644 index 0000000..b595f18 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-19-19-46-22.gh-issue-99108.wqCg0t.rst @@ -0,0 +1,3 @@ +We now release the GIL around built-in :mod:`hashlib` computations of +reasonable size for the SHA families and MD5 hash functions, matching +what our OpenSSL backed hash computations already does. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 99d0b72..4b425f4 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -227,12 +227,16 @@ get_hashlib_state(PyObject *module) typedef struct { PyObject_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. PyThread_type_lock lock; /* OpenSSL context lock */ } EVPobject; typedef struct { PyObject_HEAD HMAC_CTX *ctx; /* OpenSSL hmac context */ + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. PyThread_type_lock lock; /* HMAC context lock */ } HMACobject; @@ -896,6 +900,8 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, if (view.buf && view.len) { if (view.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ Py_BEGIN_ALLOW_THREADS result = EVP_hash(self, view.buf, view.len); Py_END_ALLOW_THREADS diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 56ae7a5..a8bad9d 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -37,6 +37,13 @@ * LEAVE_HASHLIB block or explicitly acquire and release the lock inside * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for * an operation. + * + * These only drop the GIL if the lock acquisition itself is likely to + * block. Thus the non-blocking acquire gating the GIL release for a + * blocking lock acquisition. The intent of these macros is to surround + * the assumed always "fast" operations that you aren't releasing the + * GIL around. Otherwise use code similar to what you see in hash + * function update() methods. */ #include "pythread.h" @@ -53,7 +60,7 @@ PyThread_release_lock((obj)->lock); \ } -/* TODO(gps): We should probably make this a module or EVPobject attribute +/* TODO(gpshead): We should make this a module or class attribute * to allow the user to optimize based on the platform they're using. */ #define HASHLIB_GIL_MINSIZE 2048 diff --git a/Modules/md5module.c b/Modules/md5module.c index 8660577..2122f8b 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -49,7 +49,9 @@ typedef long long MD5_INT64; /* 64-bit integer */ typedef struct { PyObject_HEAD - + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. + PyThread_type_lock lock; Hacl_Streaming_MD5_state *hash_state; } MD5object; @@ -72,6 +74,7 @@ static MD5object * newMD5object(MD5State * st) { MD5object *md5 = (MD5object *)PyObject_GC_New(MD5object, st->md5_type); + md5->lock = NULL; PyObject_GC_Track(md5); return md5; } @@ -88,6 +91,9 @@ static void MD5_dealloc(MD5object *ptr) { Hacl_Streaming_MD5_legacy_free(ptr->hash_state); + if (ptr->lock != NULL) { + PyThread_free_lock(ptr->lock); + } PyTypeObject *tp = Py_TYPE(ptr); PyObject_GC_UnTrack(ptr); PyObject_GC_Del(ptr); @@ -115,7 +121,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) if ((newobj = newMD5object(st))==NULL) return NULL; + ENTER_HASHLIB(self); newobj->hash_state = Hacl_Streaming_MD5_legacy_copy(self->hash_state); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -130,7 +138,9 @@ MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { unsigned char digest[MD5_DIGESTSIZE]; + ENTER_HASHLIB(self); Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -145,7 +155,9 @@ MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { unsigned char digest[MD5_DIGESTSIZE]; + ENTER_HASHLIB(self); Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return _Py_strhex((const char*)digest, MD5_DIGESTSIZE); } @@ -177,7 +189,18 @@ MD5Type_update(MD5object *self, PyObject *obj) GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - update(self->hash_state, buf.buf, buf.len); + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + update(self->hash_state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + update(self->hash_state, buf.buf, buf.len); + } PyBuffer_Release(&buf); Py_RETURN_NONE; @@ -279,7 +302,15 @@ _md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update(new->hash_state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update(new->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update(new->hash_state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index bdb76c5..c66269b 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -48,7 +48,9 @@ typedef long long SHA1_INT64; /* 64-bit integer */ typedef struct { PyObject_HEAD - + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. + PyThread_type_lock lock; Hacl_Streaming_SHA1_state *hash_state; } SHA1object; @@ -71,6 +73,7 @@ static SHA1object * newSHA1object(SHA1State *st) { SHA1object *sha = (SHA1object *)PyObject_GC_New(SHA1object, st->sha1_type); + sha->lock = NULL; PyObject_GC_Track(sha); return sha; } @@ -88,6 +91,9 @@ static void SHA1_dealloc(SHA1object *ptr) { Hacl_Streaming_SHA1_legacy_free(ptr->hash_state); + if (ptr->lock != NULL) { + PyThread_free_lock(ptr->lock); + } PyTypeObject *tp = Py_TYPE(ptr); PyObject_GC_UnTrack(ptr); PyObject_GC_Del(ptr); @@ -115,7 +121,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) if ((newobj = newSHA1object(st)) == NULL) return NULL; + ENTER_HASHLIB(self); newobj->hash_state = Hacl_Streaming_SHA1_legacy_copy(self->hash_state); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -130,7 +138,9 @@ SHA1Type_digest_impl(SHA1object *self) /*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/ { unsigned char digest[SHA1_DIGESTSIZE]; + ENTER_HASHLIB(self); Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE); } @@ -145,7 +155,9 @@ SHA1Type_hexdigest_impl(SHA1object *self) /*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/ { unsigned char digest[SHA1_DIGESTSIZE]; + ENTER_HASHLIB(self); Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } @@ -177,7 +189,18 @@ SHA1Type_update(SHA1object *self, PyObject *obj) GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - update(self->hash_state, buf.buf, buf.len); + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + update(self->hash_state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + update(self->hash_state, buf.buf, buf.len); + } PyBuffer_Release(&buf); Py_RETURN_NONE; @@ -279,7 +302,15 @@ _sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update(new->hash_state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update(new->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update(new->hash_state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 37d9b5c..6c7c391 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -52,12 +52,18 @@ class SHA512Type "SHA512object *" "&PyType_Type" typedef struct { PyObject_HEAD int digestsize; + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. + PyThread_type_lock lock; Hacl_Streaming_SHA2_state_sha2_256 *state; } SHA256object; typedef struct { PyObject_HEAD int digestsize; + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. + PyThread_type_lock lock; Hacl_Streaming_SHA2_state_sha2_512 *state; } SHA512object; @@ -100,6 +106,7 @@ newSHA224object(sha2_state *state) if (!sha) { return NULL; } + sha->lock = NULL; PyObject_GC_Track(sha); return sha; } @@ -112,6 +119,7 @@ newSHA256object(sha2_state *state) if (!sha) { return NULL; } + sha->lock = NULL; PyObject_GC_Track(sha); return sha; } @@ -124,6 +132,7 @@ newSHA384object(sha2_state *state) if (!sha) { return NULL; } + sha->lock = NULL; PyObject_GC_Track(sha); return sha; } @@ -136,6 +145,7 @@ newSHA512object(sha2_state *state) if (!sha) { return NULL; } + sha->lock = NULL; PyObject_GC_Track(sha); return sha; } @@ -153,6 +163,9 @@ static void SHA256_dealloc(SHA256object *ptr) { Hacl_Streaming_SHA2_free_256(ptr->state); + if (ptr->lock != NULL) { + PyThread_free_lock(ptr->lock); + } PyTypeObject *tp = Py_TYPE(ptr); PyObject_GC_UnTrack(ptr); PyObject_GC_Del(ptr); @@ -163,6 +176,9 @@ static void SHA512_dealloc(SHA512object *ptr) { Hacl_Streaming_SHA2_free_512(ptr->state); + if (ptr->lock != NULL) { + PyThread_free_lock(ptr->lock); + } PyTypeObject *tp = Py_TYPE(ptr); PyObject_GC_UnTrack(ptr); PyObject_GC_Del(ptr); @@ -229,7 +245,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) } } + ENTER_HASHLIB(self); SHA256copy(self, newobj); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -259,7 +277,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls) } } + ENTER_HASHLIB(self); SHA512copy(self, newobj); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -275,9 +295,11 @@ SHA256Type_digest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); + ENTER_HASHLIB(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Streaming_SHA2_finish_256(self->state, digest); + LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -293,9 +315,11 @@ SHA512Type_digest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); + ENTER_HASHLIB(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Streaming_SHA2_finish_512(self->state, digest); + LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -311,7 +335,9 @@ SHA256Type_hexdigest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); + ENTER_HASHLIB(self); Hacl_Streaming_SHA2_finish_256(self->state, digest); + LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -327,7 +353,9 @@ SHA512Type_hexdigest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); + ENTER_HASHLIB(self); Hacl_Streaming_SHA2_finish_512(self->state, digest); + LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -348,7 +376,18 @@ SHA256Type_update(SHA256object *self, PyObject *obj) GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - update_256(self->state, buf.buf, buf.len); + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + update_256(self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + update_256(self->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); Py_RETURN_NONE; @@ -371,7 +410,18 @@ SHA512Type_update(SHA512object *self, PyObject *obj) GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - update_512(self->state, buf.buf, buf.len); + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + update_512(self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + update_512(self->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); Py_RETURN_NONE; @@ -560,7 +610,15 @@ _sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update_256(new->state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update_256(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update_256(new->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } @@ -606,7 +664,15 @@ _sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update_256(new->state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update_256(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update_256(new->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } @@ -651,7 +717,15 @@ _sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update_512(new->state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update_512(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update_512(new->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } @@ -696,7 +770,15 @@ _sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity) return NULL; } if (string) { - update_512(new->state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + update_512(new->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update_512(new->state, buf.buf, buf.len); + } PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index f051874..558d200 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -60,6 +60,9 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" typedef struct { PyObject_HEAD + // Prevents undefined behavior via multiple threads entering the C API. + // The lock will be NULL before threaded access has been enabled. + PyThread_type_lock lock; Hacl_Streaming_Keccak_state *hash_state; } SHA3object; @@ -73,6 +76,7 @@ newSHA3object(PyTypeObject *type) if (newobj == NULL) { return NULL; } + newobj->lock = NULL; return newobj; } @@ -133,7 +137,15 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) if (data) { GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - sha3_update(self->hash_state, buf.buf, buf.len); + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* We do not initialize self->lock here as this is the constructor + * where it is not yet possible to have concurrent access. */ + Py_BEGIN_ALLOW_THREADS + sha3_update(self->hash_state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + sha3_update(self->hash_state, buf.buf, buf.len); + } } PyBuffer_Release(&buf); @@ -157,6 +169,9 @@ static void SHA3_dealloc(SHA3object *self) { Hacl_Streaming_Keccak_free(self->hash_state); + if (self->lock != NULL) { + PyThread_free_lock(self->lock); + } PyTypeObject *tp = Py_TYPE(self); PyObject_Free(self); Py_DECREF(tp); @@ -181,7 +196,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self) if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { return NULL; } + ENTER_HASHLIB(self); newobj->hash_state = Hacl_Streaming_Keccak_copy(self->hash_state); + LEAVE_HASHLIB(self); return (PyObject *)newobj; } @@ -199,7 +216,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self) unsigned char digest[SHA3_MAX_DIGESTSIZE]; // This function errors out if the algorithm is Shake. Here, we know this // not to be the case, and therefore do not perform error checking. + ENTER_HASHLIB(self); Hacl_Streaming_Keccak_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return PyBytes_FromStringAndSize((const char *)digest, Hacl_Streaming_Keccak_hash_len(self->hash_state)); } @@ -216,7 +235,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) /*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/ { unsigned char digest[SHA3_MAX_DIGESTSIZE]; + ENTER_HASHLIB(self); Hacl_Streaming_Keccak_finish(self->hash_state, digest); + LEAVE_HASHLIB(self); return _Py_strhex((const char *)digest, Hacl_Streaming_Keccak_hash_len(self->hash_state)); } @@ -237,7 +258,18 @@ _sha3_sha3_224_update(SHA3object *self, PyObject *data) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - sha3_update(self->hash_state, buf.buf, buf.len); + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + sha3_update(self->hash_state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + sha3_update(self->hash_state, buf.buf, buf.len); + } PyBuffer_Release(&buf); Py_RETURN_NONE; } -- cgit v0.12