summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/hashlib.rst6
-rw-r--r--Lib/test/test_hashlib.py49
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_hashopenssl.c155
4 files changed, 161 insertions, 52 deletions
diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst
index 73e6e4e..b7b1371 100644
--- a/Doc/library/hashlib.rst
+++ b/Doc/library/hashlib.rst
@@ -95,6 +95,12 @@ A hash object has the following methods:
a single call with the concatenation of all the arguments: ``m.update(a);
m.update(b)`` is equivalent to ``m.update(a+b)``.
+ .. versionchanged:: 2.7
+
+ The Python GIL is released to allow other threads to run while
+ hash updates on data larger than 2048 bytes is taking place when
+ using hash algorithms supplied by OpenSSL.
+
.. method:: hash.digest()
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
index e7ce198..4ba07b1 100644
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -2,11 +2,16 @@
#
# $Id$
#
-# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
+# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
# Licensed to PSF under a Contributor Agreement.
#
import hashlib
+import StringIO
+try:
+ import threading
+except ImportError:
+ threading = None
import unittest
from test import test_support
from test.test_support import _4G, precisionbigmemtest
@@ -61,10 +66,10 @@ class HashLibTestCase(unittest.TestCase):
def check(self, name, data, digest):
# test the direct constructors
computed = getattr(hashlib, name)(data).hexdigest()
- self.assert_(computed == digest)
+ self.assertEqual(computed, digest)
# test the general new() interface
computed = hashlib.new(name, data).hexdigest()
- self.assert_(computed == digest)
+ self.assertEqual(computed, digest)
def check_no_unicode(self, algorithm_name):
# Unicode objects are not allowed as input.
@@ -211,6 +216,44 @@ class HashLibTestCase(unittest.TestCase):
"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
"de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
+ def test_threaded_hashing(self):
+ if not threading:
+ raise unittest.SkipTest('No threading module.')
+
+ # Updating the same hash object from several threads at once
+ # using data chunk sizes containing the same byte sequences.
+ #
+ # If the internal locks are working to prevent multiple
+ # updates on the same object from running at once, the resulting
+ # hash will be the same as doing it single threaded upfront.
+ hasher = hashlib.sha1()
+ num_threads = 5
+ smallest_data = 'swineflu'
+ data = smallest_data*200000
+ expected_hash = hashlib.sha1(data*num_threads).hexdigest()
+
+ def hash_in_chunks(chunk_size, event):
+ index = 0
+ while index < len(data):
+ hasher.update(data[index:index+chunk_size])
+ index += chunk_size
+ event.set()
+
+ events = []
+ for threadnum in xrange(num_threads):
+ chunk_size = len(data) // (10**threadnum)
+ assert chunk_size > 0
+ assert chunk_size % len(smallest_data) == 0
+ event = threading.Event()
+ events.append(event)
+ threading.Thread(target=hash_in_chunks,
+ args=(chunk_size, event)).start()
+
+ for event in events:
+ event.wait()
+
+ self.assertEqual(expected_hash, hasher.hexdigest())
+
def test_main():
test_support.run_unittest(HashLibTestCase)
diff --git a/Misc/NEWS b/Misc/NEWS
index 64e473a..e372a62 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -905,6 +905,9 @@ C-API
Extension Modules
-----------------
+- Issue #4751: For hashlib algorithms provided by OpenSSL, the Python
+ GIL is now released during computation on data lengths >= 2048 bytes.
+
- Issue #3745: Fix hashlib to always reject unicode and non buffer-api
supporting objects as input no matter how it was compiled (built in
implementations or external openssl library).
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 7b5a2e5..8dbaa20 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -1,7 +1,7 @@
/* Module that wraps all OpenSSL hash algorithms */
/*
- * Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
+ * Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
* Licensed to PSF under a Contributor Agreement.
*
* Derived from a skeleton of shamodule.c containing work performed by:
@@ -17,25 +17,49 @@
#include "structmember.h"
#include "hashlib.h"
+#ifdef WITH_THREAD
+#include "pythread.h"
+ #define ENTER_HASHLIB(obj) \
+ if ((obj)->lock) \
+ { \
+ if (!PyThread_acquire_lock((obj)->lock, 0)) \
+ { \
+ Py_BEGIN_ALLOW_THREADS \
+ PyThread_acquire_lock((obj)->lock, 1); \
+ Py_END_ALLOW_THREADS \
+ } \
+ }
+ #define LEAVE_HASHLIB(obj) \
+ if ((obj)->lock) \
+ { \
+ PyThread_release_lock((obj)->lock); \
+ }
+#else
+ #define ENTER_HASHLIB(obj)
+ #define LEAVE_HASHLIB(obj)
+#endif
+
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
#define MUNCH_SIZE INT_MAX
+/* TODO(gps): We should probably make this a module or EVPobject attribute
+ * to allow the user to optimize based on the platform they're using. */
+#define HASHLIB_GIL_MINSIZE 2048
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif
+
typedef struct {
PyObject_HEAD
PyObject *name; /* name of this hash algorithm */
EVP_MD_CTX ctx; /* OpenSSL message digest context */
- /*
- * TODO investigate performance impact of including a lock for this object
- * here and releasing the Python GIL while hash updates are in progress.
- * (perhaps only release GIL if input length will take long to process?)
- */
+#ifdef WITH_THREAD
+ PyThread_type_lock lock; /* OpenSSL context lock */
+#endif
} EVPobject;
@@ -64,26 +88,57 @@ newEVPobject(PyObject *name)
if (retval != NULL) {
Py_INCREF(name);
retval->name = name;
+#ifdef WITH_THREAD
+ retval->lock = NULL;
+#endif
}
return retval;
}
+static void
+EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
+{
+ unsigned int process;
+ const unsigned char *cp = (const unsigned char *)vp;
+ while (0 < len)
+ {
+ if (len > (Py_ssize_t)MUNCH_SIZE)
+ process = MUNCH_SIZE;
+ else
+ process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
+ EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
+ len -= process;
+ cp += process;
+ }
+}
+
/* Internal methods for a hash object */
static void
-EVP_dealloc(PyObject *ptr)
+EVP_dealloc(EVPobject *self)
{
- EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx);
- Py_XDECREF(((EVPobject *)ptr)->name);
- PyObject_Del(ptr);
+#ifdef WITH_THREAD
+ if (self->lock != NULL)
+ PyThread_free_lock(self->lock);
+#endif
+ EVP_MD_CTX_cleanup(&self->ctx);
+ Py_XDECREF(self->name);
+ PyObject_Del(self);
}
+static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
+{
+ ENTER_HASHLIB(self);
+ EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
+ LEAVE_HASHLIB(self);
+}
/* External methods for a hash object */
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
+
static PyObject *
EVP_copy(EVPobject *self, PyObject *unused)
{
@@ -92,7 +147,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
if ( (newobj = newEVPobject(self->name))==NULL)
return NULL;
- EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
+ locked_EVP_MD_CTX_copy(&newobj->ctx, self);
return (PyObject *)newobj;
}
@@ -107,7 +162,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
PyObject *retval;
unsigned int digest_size;
- EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
+ locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@@ -129,7 +184,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
unsigned int i, j, digest_size;
/* Get the raw (binary) digest value */
- EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
+ locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@@ -174,19 +229,26 @@ EVP_update(EVPobject *self, PyObject *args)
GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL);
- if (view.len > 0 && view.len <= MUNCH_SIZE) {
- EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
- Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
+#ifdef WITH_THREAD
+ if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE)
+ {
+ self->lock = PyThread_allocate_lock();
+ /* fail? lock = NULL and we fail over to non-threaded code. */
+ }
+
+ if (self->lock != NULL)
+ {
+ Py_BEGIN_ALLOW_THREADS
+ PyThread_acquire_lock(self->lock, 1);
+ EVP_hash(self, view.buf, view.len);
+ PyThread_release_lock(self->lock);
+ Py_END_ALLOW_THREADS
} else {
- Py_ssize_t len = view.len;
- unsigned char *cp = (unsigned char *)view.buf;
- while (len > 0) {
- unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
- EVP_DigestUpdate(&self->ctx, cp, process);
- len -= process;
- cp += process;
- }
+ EVP_hash(self, view.buf, view.len);
}
+#else
+ EVP_hash(self, view.buf, view.len);
+#endif
PyBuffer_Release(&view);
@@ -205,13 +267,17 @@ static PyMethodDef EVP_methods[] = {
static PyObject *
EVP_get_block_size(EVPobject *self, void *closure)
{
- return PyInt_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx));
+ long block_size;
+ block_size = EVP_MD_CTX_block_size(&self->ctx);
+ return PyLong_FromLong(block_size);
}
static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure)
{
- return PyInt_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx));
+ long size;
+ size = EVP_MD_CTX_size(&self->ctx);
+ return PyLong_FromLong(size);
}
static PyMemberDef EVP_members[] = {
@@ -286,19 +352,14 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
Py_INCREF(self->name);
if (data_obj) {
- if (view.len > 0 && view.len <= MUNCH_SIZE) {
- EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
- Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
+ if (view.len >= HASHLIB_GIL_MINSIZE)
+ {
+ Py_BEGIN_ALLOW_THREADS
+ EVP_hash(self, view.buf, view.len);
+ Py_END_ALLOW_THREADS
} else {
- Py_ssize_t len = view.len;
- unsigned char *cp = (unsigned char*)view.buf;
- while (len > 0) {
- unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
- EVP_DigestUpdate(&self->ctx, cp, process);
- len -= process;
- cp += process;
- }
- }
+ EVP_hash(self, view.buf, view.len);
+ }
PyBuffer_Release(&view);
}
@@ -329,7 +390,7 @@ static PyTypeObject EVPtype = {
sizeof(EVPobject), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
- EVP_dealloc, /*tp_dealloc*/
+ (destructor)EVP_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@@ -389,17 +450,13 @@ EVPnew(PyObject *name_obj,
}
if (cp && len) {
- if (len > 0 && len <= MUNCH_SIZE) {
- EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t,
- unsigned int));
+ if (len >= HASHLIB_GIL_MINSIZE)
+ {
+ Py_BEGIN_ALLOW_THREADS
+ EVP_hash(self, cp, len);
+ Py_END_ALLOW_THREADS
} else {
- Py_ssize_t offset = 0;
- while (len > 0) {
- unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
- EVP_DigestUpdate(&self->ctx, cp + offset, process);
- len -= process;
- offset += process;
- }
+ EVP_hash(self, cp, len);
}
}