From 3f61d61b353c08525348a018294fa05bba1bdfe2 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 4 May 2009 00:45:33 +0000 Subject: Merge refactoring I did when committing r72267 to trunk into the already committed issue4751 support in py3k r68411. --- Doc/library/hashlib.rst | 6 +++++ Lib/test/test_hashlib.py | 46 ++++++++++++++++++++++++++++++++++++- Modules/_hashopenssl.c | 60 ++++++++++++++++++++++-------------------------- 3 files changed, 78 insertions(+), 34 deletions(-) diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index 955afb8..36f386c 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -105,6 +105,12 @@ A hash object has the following methods: concatenation of all the arguments: ``m.update(a); m.update(b)`` is equivalent to ``m.update(a+b)``. + .. versionchanged:: 2.7 + + The Python GIL is released to allow other threads to run while + hash updates on data larger than 2048 bytes is taking place when + using hash algorithms supplied by OpenSSL. + .. method:: hash.digest() diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 9b51459..594f5dd 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -2,11 +2,16 @@ # # $Id$ # -# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org) +# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org) # Licensed to PSF under a Contributor Agreement. # import hashlib +from io import StringIO +try: + import threading +except ImportError: + threading = None import unittest from test import support from test.support import _4G, precisionbigmemtest @@ -224,6 +229,45 @@ class HashLibTestCase(unittest.TestCase): m = hashlib.md5(b'x' * gil_minsize) self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958') + def test_threaded_hashing(self): + if not threading: + raise unittest.SkipTest('No threading module.') + + # Updating the same hash object from several threads at once + # using data chunk sizes containing the same byte sequences. + # + # If the internal locks are working to prevent multiple + # updates on the same object from running at once, the resulting + # hash will be the same as doing it single threaded upfront. + hasher = hashlib.sha1() + num_threads = 5 + smallest_data = b'swineflu' + data = smallest_data*200000 + expected_hash = hashlib.sha1(data*num_threads).hexdigest() + + def hash_in_chunks(chunk_size, event): + index = 0 + while index < len(data): + hasher.update(data[index:index+chunk_size]) + index += chunk_size + event.set() + + events = [] + for threadnum in range(num_threads): + chunk_size = len(data) // (10**threadnum) + assert chunk_size > 0 + assert chunk_size % len(smallest_data) == 0 + event = threading.Event() + events.append(event) + threading.Thread(target=hash_in_chunks, + args=(chunk_size, event)).start() + + for event in events: + event.wait() + + self.assertEqual(expected_hash, hasher.hexdigest()) + + def test_main(): support.run_unittest(HashLibTestCase) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 569d441..0dae515 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -1,7 +1,7 @@ /* Module that wraps all OpenSSL hash algorithms */ /* - * Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) + * Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org) * Licensed to PSF under a Contributor Agreement. * * Derived from a skeleton of shamodule.c containing work performed by: @@ -17,21 +17,8 @@ #include "structmember.h" #include "hashlib.h" -/* EVP is the preferred interface to hashing in OpenSSL */ -#include - -#define MUNCH_SIZE INT_MAX - - -#ifndef HASH_OBJ_CONSTRUCTOR -#define HASH_OBJ_CONSTRUCTOR 0 -#endif - -#define HASHLIB_GIL_MINSIZE 2048 - #ifdef WITH_THREAD - #include "pythread.h" - +#include "pythread.h" #define ENTER_HASHLIB(obj) \ if ((obj)->lock) { \ if (!PyThread_acquire_lock((obj)->lock, 0)) { \ @@ -49,6 +36,20 @@ #define LEAVE_HASHLIB(obj) #endif +/* EVP is the preferred interface to hashing in OpenSSL */ +#include + +#define MUNCH_SIZE INT_MAX + +/* TODO(gps): We should probably make this a module or EVPobject attribute + * to allow the user to optimize based on the platform they're using. */ +#define HASHLIB_GIL_MINSIZE 2048 + +#ifndef HASH_OBJ_CONSTRUCTOR +#define HASH_OBJ_CONSTRUCTOR 0 +#endif + + typedef struct { PyObject_HEAD PyObject *name; /* name of this hash algorithm */ @@ -122,11 +123,18 @@ EVP_dealloc(EVPobject *self) PyObject_Del(self); } +static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self) +{ + ENTER_HASHLIB(self); + EVP_MD_CTX_copy(new_ctx_p, &self->ctx); + LEAVE_HASHLIB(self); +} /* External methods for a hash object */ PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object."); + static PyObject * EVP_copy(EVPobject *self, PyObject *unused) { @@ -135,9 +143,7 @@ EVP_copy(EVPobject *self, PyObject *unused) if ( (newobj = newEVPobject(self->name))==NULL) return NULL; - ENTER_HASHLIB(self); - EVP_MD_CTX_copy(&newobj->ctx, &self->ctx); - LEAVE_HASHLIB(self); + locked_EVP_MD_CTX_copy(&newobj->ctx, self); return (PyObject *)newobj; } @@ -152,9 +158,7 @@ EVP_digest(EVPobject *self, PyObject *unused) PyObject *retval; unsigned int digest_size; - ENTER_HASHLIB(self); - EVP_MD_CTX_copy(&temp_ctx, &self->ctx); - LEAVE_HASHLIB(self); + locked_EVP_MD_CTX_copy(&temp_ctx, self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -176,9 +180,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused) unsigned int i, j, digest_size; /* Get the raw (binary) digest value */ - ENTER_HASHLIB(self); - EVP_MD_CTX_copy(&temp_ctx, &self->ctx); - LEAVE_HASHLIB(self); + locked_EVP_MD_CTX_copy(&temp_ctx, self); digest_size = EVP_MD_CTX_size(&temp_ctx); EVP_DigestFinal(&temp_ctx, digest, NULL); @@ -221,11 +223,7 @@ EVP_update(EVPobject *self, PyObject *args) #ifdef WITH_THREAD if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) { self->lock = PyThread_allocate_lock(); - if (self->lock == NULL) { - PyBuffer_Release(&view); - PyErr_SetString(PyExc_MemoryError, "unable to allocate lock"); - return NULL; - } + /* fail? lock = NULL and we fail over to non-threaded code. */ } if (self->lock != NULL) { @@ -257,9 +255,7 @@ static PyObject * EVP_get_block_size(EVPobject *self, void *closure) { long block_size; - ENTER_HASHLIB(self); block_size = EVP_MD_CTX_block_size(&self->ctx); - LEAVE_HASHLIB(self); return PyLong_FromLong(block_size); } @@ -267,9 +263,7 @@ static PyObject * EVP_get_digest_size(EVPobject *self, void *closure) { long size; - ENTER_HASHLIB(self); size = EVP_MD_CTX_size(&self->ctx); - LEAVE_HASHLIB(self); return PyLong_FromLong(size); } -- cgit v0.12