summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory P. Smith <greg@mad-scientist.com>2009-05-04 00:45:33 (GMT)
committerGregory P. Smith <greg@mad-scientist.com>2009-05-04 00:45:33 (GMT)
commit3f61d61b353c08525348a018294fa05bba1bdfe2 (patch)
tree25258dd4c53962cd25ae584b079839369465df74
parentc1651a0b968390ef6b722d3c2e1ca72c5a7c9cec (diff)
downloadcpython-3f61d61b353c08525348a018294fa05bba1bdfe2.zip
cpython-3f61d61b353c08525348a018294fa05bba1bdfe2.tar.gz
cpython-3f61d61b353c08525348a018294fa05bba1bdfe2.tar.bz2
Merge refactoring I did when committing r72267 to trunk into the
already committed issue4751 support in py3k r68411.
-rw-r--r--Doc/library/hashlib.rst6
-rw-r--r--Lib/test/test_hashlib.py46
-rw-r--r--Modules/_hashopenssl.c60
3 files changed, 78 insertions, 34 deletions
diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst
index 955afb8..36f386c 100644
--- a/Doc/library/hashlib.rst
+++ b/Doc/library/hashlib.rst
@@ -105,6 +105,12 @@ A hash object has the following methods:
concatenation of all the arguments: ``m.update(a); m.update(b)`` is
equivalent to ``m.update(a+b)``.
+ .. versionchanged:: 2.7
+
+ The Python GIL is released to allow other threads to run while
+ hash updates on data larger than 2048 bytes is taking place when
+ using hash algorithms supplied by OpenSSL.
+
.. method:: hash.digest()
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
index 9b51459..594f5dd 100644
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -2,11 +2,16 @@
#
# $Id$
#
-# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
+# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
# Licensed to PSF under a Contributor Agreement.
#
import hashlib
+from io import StringIO
+try:
+ import threading
+except ImportError:
+ threading = None
import unittest
from test import support
from test.support import _4G, precisionbigmemtest
@@ -224,6 +229,45 @@ class HashLibTestCase(unittest.TestCase):
m = hashlib.md5(b'x' * gil_minsize)
self.assertEquals(m.hexdigest(), 'cfb767f225d58469c5de3632a8803958')
+ def test_threaded_hashing(self):
+ if not threading:
+ raise unittest.SkipTest('No threading module.')
+
+ # Updating the same hash object from several threads at once
+ # using data chunk sizes containing the same byte sequences.
+ #
+ # If the internal locks are working to prevent multiple
+ # updates on the same object from running at once, the resulting
+ # hash will be the same as doing it single threaded upfront.
+ hasher = hashlib.sha1()
+ num_threads = 5
+ smallest_data = b'swineflu'
+ data = smallest_data*200000
+ expected_hash = hashlib.sha1(data*num_threads).hexdigest()
+
+ def hash_in_chunks(chunk_size, event):
+ index = 0
+ while index < len(data):
+ hasher.update(data[index:index+chunk_size])
+ index += chunk_size
+ event.set()
+
+ events = []
+ for threadnum in range(num_threads):
+ chunk_size = len(data) // (10**threadnum)
+ assert chunk_size > 0
+ assert chunk_size % len(smallest_data) == 0
+ event = threading.Event()
+ events.append(event)
+ threading.Thread(target=hash_in_chunks,
+ args=(chunk_size, event)).start()
+
+ for event in events:
+ event.wait()
+
+ self.assertEqual(expected_hash, hasher.hexdigest())
+
+
def test_main():
support.run_unittest(HashLibTestCase)
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
index 569d441..0dae515 100644
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -1,7 +1,7 @@
/* Module that wraps all OpenSSL hash algorithms */
/*
- * Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
+ * Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
* Licensed to PSF under a Contributor Agreement.
*
* Derived from a skeleton of shamodule.c containing work performed by:
@@ -17,21 +17,8 @@
#include "structmember.h"
#include "hashlib.h"
-/* EVP is the preferred interface to hashing in OpenSSL */
-#include <openssl/evp.h>
-
-#define MUNCH_SIZE INT_MAX
-
-
-#ifndef HASH_OBJ_CONSTRUCTOR
-#define HASH_OBJ_CONSTRUCTOR 0
-#endif
-
-#define HASHLIB_GIL_MINSIZE 2048
-
#ifdef WITH_THREAD
- #include "pythread.h"
-
+#include "pythread.h"
#define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@@ -49,6 +36,20 @@
#define LEAVE_HASHLIB(obj)
#endif
+/* EVP is the preferred interface to hashing in OpenSSL */
+#include <openssl/evp.h>
+
+#define MUNCH_SIZE INT_MAX
+
+/* TODO(gps): We should probably make this a module or EVPobject attribute
+ * to allow the user to optimize based on the platform they're using. */
+#define HASHLIB_GIL_MINSIZE 2048
+
+#ifndef HASH_OBJ_CONSTRUCTOR
+#define HASH_OBJ_CONSTRUCTOR 0
+#endif
+
+
typedef struct {
PyObject_HEAD
PyObject *name; /* name of this hash algorithm */
@@ -122,11 +123,18 @@ EVP_dealloc(EVPobject *self)
PyObject_Del(self);
}
+static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
+{
+ ENTER_HASHLIB(self);
+ EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
+ LEAVE_HASHLIB(self);
+}
/* External methods for a hash object */
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
+
static PyObject *
EVP_copy(EVPobject *self, PyObject *unused)
{
@@ -135,9 +143,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
if ( (newobj = newEVPobject(self->name))==NULL)
return NULL;
- ENTER_HASHLIB(self);
- EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
- LEAVE_HASHLIB(self);
+ locked_EVP_MD_CTX_copy(&newobj->ctx, self);
return (PyObject *)newobj;
}
@@ -152,9 +158,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
PyObject *retval;
unsigned int digest_size;
- ENTER_HASHLIB(self);
- EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
- LEAVE_HASHLIB(self);
+ locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@@ -176,9 +180,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
unsigned int i, j, digest_size;
/* Get the raw (binary) digest value */
- ENTER_HASHLIB(self);
- EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
- LEAVE_HASHLIB(self);
+ locked_EVP_MD_CTX_copy(&temp_ctx, self);
digest_size = EVP_MD_CTX_size(&temp_ctx);
EVP_DigestFinal(&temp_ctx, digest, NULL);
@@ -221,11 +223,7 @@ EVP_update(EVPobject *self, PyObject *args)
#ifdef WITH_THREAD
if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
- if (self->lock == NULL) {
- PyBuffer_Release(&view);
- PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
- return NULL;
- }
+ /* fail? lock = NULL and we fail over to non-threaded code. */
}
if (self->lock != NULL) {
@@ -257,9 +255,7 @@ static PyObject *
EVP_get_block_size(EVPobject *self, void *closure)
{
long block_size;
- ENTER_HASHLIB(self);
block_size = EVP_MD_CTX_block_size(&self->ctx);
- LEAVE_HASHLIB(self);
return PyLong_FromLong(block_size);
}
@@ -267,9 +263,7 @@ static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure)
{
long size;
- ENTER_HASHLIB(self);
size = EVP_MD_CTX_size(&self->ctx);
- LEAVE_HASHLIB(self);
return PyLong_FromLong(size);
}