From 2f050c7e1b36bf641e7023f7b28b451454c6b98a Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 27 Jan 2018 09:53:43 +0100 Subject: bpo-32433: Optimized HMAC digest (#5023) The hmac module now has hmac.digest(), which provides an optimized HMAC digest for short messages. hmac.digest() is up to three times faster than hmac.HMAC().digest(). Signed-off-by: Christian Heimes --- Doc/library/hmac.rst | 15 ++++++ Doc/whatsnew/3.7.rst | 7 +++ Lib/hmac.py | 42 +++++++++++++++ Lib/test/test_hmac.py | 38 ++++++++++++++ .../2017-12-27-20-09-27.bpo-32433.vmxsVI.rst | 2 + Modules/_hashopenssl.c | 59 +++++++++++++++++++++- Modules/clinic/_hashopenssl.c.h | 44 +++++++++++++++- 7 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst index adbf78a..fcda86c 100644 --- a/Doc/library/hmac.rst +++ b/Doc/library/hmac.rst @@ -31,6 +31,21 @@ This module implements the HMAC algorithm as described by :rfc:`2104`. MD5 as implicit default digest for *digestmod* is deprecated. +.. function:: digest(key, msg, digest) + + Return digest of *msg* for given secret *key* and *digest*. The + function is equivalent to ``HMAC(key, msg, digest).digest()``, but + uses an optimized C or inline implementation, which is faster for messages + that fit into memory. The parameters *key*, *msg*, and *digest* have + the same meaning as in :func:`~hmac.new`. + + CPython implementation detail, the optimized C implementation is only used + when *digest* is a string and name of a digest algorithm, which is + supported by OpenSSL. + + .. versionadded:: 3.7 + + An HMAC object has the following methods: .. method:: HMAC.update(msg) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 43fbd01..133975a 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -492,6 +492,13 @@ and the ``--directory`` to the command line of the module :mod:`~http.server`. With this parameter, the server serves the specified directory, by default it uses the current working directory. (Contributed by Stéphane Wirtel and Julien Palard in :issue:`28707`.) +hmac +---- + +The hmac module now has an optimized one-shot :func:`~hmac.digest` function, +which is up to three times faster than :func:`~hmac.HMAC`. +(Contributed by Christian Heimes in :issue:`32433`.) + importlib --------- diff --git a/Lib/hmac.py b/Lib/hmac.py index 121029a..93c084e 100644 --- a/Lib/hmac.py +++ b/Lib/hmac.py @@ -5,6 +5,13 @@ Implements the HMAC algorithm as described by RFC 2104. import warnings as _warnings from _operator import _compare_digest as compare_digest +try: + import _hashlib as _hashopenssl +except ImportError: + _hashopenssl = None + _openssl_md_meths = None +else: + _openssl_md_meths = frozenset(_hashopenssl.openssl_md_meth_names) import hashlib as _hashlib trans_5C = bytes((x ^ 0x5C) for x in range(256)) @@ -142,3 +149,38 @@ def new(key, msg = None, digestmod = None): method. """ return HMAC(key, msg, digestmod) + + +def digest(key, msg, digest): + """Fast inline implementation of HMAC + + key: key for the keyed hash object. + msg: input message + digest: A hash name suitable for hashlib.new() for best performance. *OR* + A hashlib constructor returning a new hash object. *OR* + A module supporting PEP 247. + + Note: key and msg must be a bytes or bytearray objects. + """ + if (_hashopenssl is not None and + isinstance(digest, str) and digest in _openssl_md_meths): + return _hashopenssl.hmac_digest(key, msg, digest) + + if callable(digest): + digest_cons = digest + elif isinstance(digest, str): + digest_cons = lambda d=b'': _hashlib.new(digest, d) + else: + digest_cons = lambda d=b'': digest.new(d) + + inner = digest_cons() + outer = digest_cons() + blocksize = getattr(inner, 'block_size', 64) + if len(key) > blocksize: + key = digest_cons(key).digest() + key = key + b'\x00' * (blocksize - len(key)) + inner.update(key.translate(trans_36)) + outer.update(key.translate(trans_5C)) + inner.update(msg) + outer.update(inner.digest()) + return outer.digest() diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py index 067e13f..4e4ef0e 100644 --- a/Lib/test/test_hmac.py +++ b/Lib/test/test_hmac.py @@ -1,7 +1,9 @@ +import binascii import functools import hmac import hashlib import unittest +import unittest.mock import warnings @@ -23,16 +25,27 @@ class TestVectorsTestCase(unittest.TestCase): def md5test(key, data, digest): h = hmac.HMAC(key, data, digestmod=hashlib.md5) self.assertEqual(h.hexdigest().upper(), digest.upper()) + self.assertEqual(h.digest(), binascii.unhexlify(digest)) self.assertEqual(h.name, "hmac-md5") self.assertEqual(h.digest_size, 16) self.assertEqual(h.block_size, 64) h = hmac.HMAC(key, data, digestmod='md5') self.assertEqual(h.hexdigest().upper(), digest.upper()) + self.assertEqual(h.digest(), binascii.unhexlify(digest)) self.assertEqual(h.name, "hmac-md5") self.assertEqual(h.digest_size, 16) self.assertEqual(h.block_size, 64) + self.assertEqual( + hmac.digest(key, data, digest='md5'), + binascii.unhexlify(digest) + ) + with unittest.mock.patch('hmac._openssl_md_meths', {}): + self.assertEqual( + hmac.digest(key, data, digest='md5'), + binascii.unhexlify(digest) + ) md5test(b"\x0b" * 16, b"Hi There", @@ -67,16 +80,23 @@ class TestVectorsTestCase(unittest.TestCase): def shatest(key, data, digest): h = hmac.HMAC(key, data, digestmod=hashlib.sha1) self.assertEqual(h.hexdigest().upper(), digest.upper()) + self.assertEqual(h.digest(), binascii.unhexlify(digest)) self.assertEqual(h.name, "hmac-sha1") self.assertEqual(h.digest_size, 20) self.assertEqual(h.block_size, 64) h = hmac.HMAC(key, data, digestmod='sha1') self.assertEqual(h.hexdigest().upper(), digest.upper()) + self.assertEqual(h.digest(), binascii.unhexlify(digest)) self.assertEqual(h.name, "hmac-sha1") self.assertEqual(h.digest_size, 20) self.assertEqual(h.block_size, 64) + self.assertEqual( + hmac.digest(key, data, digest='sha1'), + binascii.unhexlify(digest) + ) + shatest(b"\x0b" * 20, b"Hi There", @@ -122,6 +142,24 @@ class TestVectorsTestCase(unittest.TestCase): self.assertEqual(h.digest_size, digest_size) self.assertEqual(h.block_size, block_size) + self.assertEqual( + hmac.digest(key, data, digest=hashfunc), + binascii.unhexlify(hexdigests[hashfunc]) + ) + self.assertEqual( + hmac.digest(key, data, digest=hash_name), + binascii.unhexlify(hexdigests[hashfunc]) + ) + + with unittest.mock.patch('hmac._openssl_md_meths', {}): + self.assertEqual( + hmac.digest(key, data, digest=hashfunc), + binascii.unhexlify(hexdigests[hashfunc]) + ) + self.assertEqual( + hmac.digest(key, data, digest=hash_name), + binascii.unhexlify(hexdigests[hashfunc]) + ) # 4.2. Test Case 1 hmactest(key = b'\x0b'*20, diff --git a/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst b/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst new file mode 100644 index 0000000..d9b326e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-12-27-20-09-27.bpo-32433.vmxsVI.rst @@ -0,0 +1,2 @@ +The hmac module now has hmac.digest(), which provides an optimized HMAC +digest. diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index c8d1758..50fe9d5 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -21,6 +21,7 @@ /* EVP is the preferred interface to hashing in OpenSSL */ #include +#include /* We use the object interface to discover what hashes OpenSSL supports. */ #include #include "openssl/err.h" @@ -528,8 +529,6 @@ EVP_new(PyObject *self, PyObject *args, PyObject *kwdict) return ret_obj; } - - #if (OPENSSL_VERSION_NUMBER >= 0x10000000 && !defined(OPENSSL_NO_HMAC) \ && !defined(OPENSSL_NO_SHA)) @@ -849,6 +848,61 @@ _hashlib_scrypt_impl(PyObject *module, Py_buffer *password, Py_buffer *salt, } #endif +/* Fast HMAC for hmac.digest() + */ + +/*[clinic input] +_hashlib.hmac_digest + + key: Py_buffer + msg: Py_buffer + digest: str + +Single-shot HMAC +[clinic start generated code]*/ + +static PyObject * +_hashlib_hmac_digest_impl(PyObject *module, Py_buffer *key, Py_buffer *msg, + const char *digest) +/*[clinic end generated code: output=75630e684cdd8762 input=10e964917921e2f2]*/ +{ + unsigned char md[EVP_MAX_MD_SIZE] = {0}; + unsigned int md_len = 0; + unsigned char *result; + const EVP_MD *evp; + + evp = EVP_get_digestbyname(digest); + if (evp == NULL) { + PyErr_SetString(PyExc_ValueError, "unsupported hash type"); + return NULL; + } + if (key->len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "key is too long."); + return NULL; + } + if (msg->len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "msg is too long."); + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + result = HMAC( + evp, + (const void*)key->buf, (int)key->len, + (const unsigned char*)msg->buf, (int)msg->len, + md, &md_len + ); + Py_END_ALLOW_THREADS + + if (result == NULL) { + _setException(PyExc_ValueError); + return NULL; + } + return PyBytes_FromStringAndSize((const char*)md, md_len); +} + /* State for our callback function so that it can accumulate a result. */ typedef struct _internal_name_mapper_state { PyObject *set; @@ -982,6 +1036,7 @@ static struct PyMethodDef EVP_functions[] = { pbkdf2_hmac__doc__}, #endif _HASHLIB_SCRYPT_METHODDEF + _HASHLIB_HMAC_DIGEST_METHODDEF CONSTRUCTOR_METH_DEF(md5), CONSTRUCTOR_METH_DEF(sha1), CONSTRUCTOR_METH_DEF(sha224), diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index f08d7f3..cbc8638 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -54,7 +54,49 @@ exit: #endif /* (OPENSSL_VERSION_NUMBER > 0x10100000L && !defined(OPENSSL_NO_SCRYPT) && !defined(LIBRESSL_VERSION_NUMBER)) */ +PyDoc_STRVAR(_hashlib_hmac_digest__doc__, +"hmac_digest($module, /, key, msg, digest)\n" +"--\n" +"\n" +"Single-shot HMAC"); + +#define _HASHLIB_HMAC_DIGEST_METHODDEF \ + {"hmac_digest", (PyCFunction)_hashlib_hmac_digest, METH_FASTCALL|METH_KEYWORDS, _hashlib_hmac_digest__doc__}, + +static PyObject * +_hashlib_hmac_digest_impl(PyObject *module, Py_buffer *key, Py_buffer *msg, + const char *digest); + +static PyObject * +_hashlib_hmac_digest(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"key", "msg", "digest", NULL}; + static _PyArg_Parser _parser = {"y*y*s:hmac_digest", _keywords, 0}; + Py_buffer key = {NULL, NULL}; + Py_buffer msg = {NULL, NULL}; + const char *digest; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &key, &msg, &digest)) { + goto exit; + } + return_value = _hashlib_hmac_digest_impl(module, &key, &msg, digest); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for msg */ + if (msg.obj) { + PyBuffer_Release(&msg); + } + + return return_value; +} + #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=1ea7d0397f38e2c2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b5b90821caf05391 input=a9049054013a1b77]*/ -- cgit v0.12