From 121b9487d13b7dcd5a864a6067c4a5c88ba15b5d Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Tue, 6 Sep 2016 22:03:25 +0200 Subject: Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib. --- Doc/library/crypto.rst | 1 + Doc/library/hashlib-blake2-tree.png | Bin 0 -> 11148 bytes Doc/library/hashlib-blake2.rst | 443 ++++++++++++++++++++++++++++ Doc/library/hashlib.rst | 14 +- Lib/hashlib.py | 32 ++- Lib/test/test_hashlib.py | 201 ++++++++++++- Makefile.pre.in | 8 +- Misc/NEWS | 2 + Modules/_blake2/blake2b2s.py | 49 ++++ Modules/_blake2/blake2b_impl.c | 460 ++++++++++++++++++++++++++++++ Modules/_blake2/blake2module.c | 105 +++++++ Modules/_blake2/blake2ns.h | 32 +++ Modules/_blake2/blake2s_impl.c | 460 ++++++++++++++++++++++++++++++ Modules/_blake2/clinic/blake2b_impl.c.h | 125 ++++++++ Modules/_blake2/clinic/blake2s_impl.c.h | 125 ++++++++ Modules/_blake2/impl/blake2-config.h | 74 +++++ Modules/_blake2/impl/blake2-impl.h | 139 +++++++++ Modules/_blake2/impl/blake2.h | 161 +++++++++++ Modules/_blake2/impl/blake2b-load-sse2.h | 70 +++++ Modules/_blake2/impl/blake2b-load-sse41.h | 404 ++++++++++++++++++++++++++ Modules/_blake2/impl/blake2b-ref.c | 416 +++++++++++++++++++++++++++ Modules/_blake2/impl/blake2b-round.h | 159 +++++++++++ Modules/_blake2/impl/blake2b.c | 450 +++++++++++++++++++++++++++++ Modules/_blake2/impl/blake2s-load-sse2.h | 61 ++++ Modules/_blake2/impl/blake2s-load-sse41.h | 231 +++++++++++++++ Modules/_blake2/impl/blake2s-load-xop.h | 191 +++++++++++++ Modules/_blake2/impl/blake2s-ref.c | 406 ++++++++++++++++++++++++++ Modules/_blake2/impl/blake2s-round.h | 90 ++++++ Modules/_blake2/impl/blake2s.c | 431 ++++++++++++++++++++++++++++ Modules/hashlib.h | 19 +- PCbuild/pythoncore.vcxproj | 3 + PCbuild/pythoncore.vcxproj.filters | 9 + setup.py | 16 ++ 33 files changed, 5361 insertions(+), 26 deletions(-) create mode 100644 Doc/library/hashlib-blake2-tree.png create mode 100644 Doc/library/hashlib-blake2.rst create mode 100755 Modules/_blake2/blake2b2s.py create mode 100644 Modules/_blake2/blake2b_impl.c create mode 100644 Modules/_blake2/blake2module.c create mode 100644 Modules/_blake2/blake2ns.h create mode 100644 Modules/_blake2/blake2s_impl.c create mode 100644 Modules/_blake2/clinic/blake2b_impl.c.h create mode 100644 Modules/_blake2/clinic/blake2s_impl.c.h create mode 100644 Modules/_blake2/impl/blake2-config.h create mode 100644 Modules/_blake2/impl/blake2-impl.h create mode 100644 Modules/_blake2/impl/blake2.h create mode 100644 Modules/_blake2/impl/blake2b-load-sse2.h create mode 100644 Modules/_blake2/impl/blake2b-load-sse41.h create mode 100644 Modules/_blake2/impl/blake2b-ref.c create mode 100644 Modules/_blake2/impl/blake2b-round.h create mode 100644 Modules/_blake2/impl/blake2b.c create mode 100644 Modules/_blake2/impl/blake2s-load-sse2.h create mode 100644 Modules/_blake2/impl/blake2s-load-sse41.h create mode 100644 Modules/_blake2/impl/blake2s-load-xop.h create mode 100644 Modules/_blake2/impl/blake2s-ref.c create mode 100644 Modules/_blake2/impl/blake2s-round.h create mode 100644 Modules/_blake2/impl/blake2s.c diff --git a/Doc/library/crypto.rst b/Doc/library/crypto.rst index ae45549..8eb4b81 100644 --- a/Doc/library/crypto.rst +++ b/Doc/library/crypto.rst @@ -15,5 +15,6 @@ Here's an overview: .. toctree:: hashlib.rst + hashlib-blake2.rst hmac.rst secrets.rst diff --git a/Doc/library/hashlib-blake2-tree.png b/Doc/library/hashlib-blake2-tree.png new file mode 100644 index 0000000..010dcba Binary files /dev/null and b/Doc/library/hashlib-blake2-tree.png differ diff --git a/Doc/library/hashlib-blake2.rst b/Doc/library/hashlib-blake2.rst new file mode 100644 index 0000000..40c594b --- /dev/null +++ b/Doc/library/hashlib-blake2.rst @@ -0,0 +1,443 @@ +.. _hashlib-blake2: + +:mod:`hashlib` --- BLAKE2 hash functions +======================================== + +.. module:: hashlib + :synopsis: BLAKE2 hash function for Python +.. sectionauthor:: Dmitry Chestnykh + +.. index:: + single: blake2b, blake2s + +BLAKE2_ is a cryptographic hash function, which offers highest security while +being as fast as MD5 or SHA-1, and comes in two flavors: + +* **BLAKE2b**, optimized for 64-bit platforms and produces digests of any size + between 1 and 64 bytes, + +* **BLAKE2s**, optimized for 8- to 32-bit platforms and produces digests of any + size between 1 and 32 bytes. + +BLAKE2 supports **keyed mode** (a faster and simpler replacement for HMAC_), +**salted hashing**, **personalization**, and **tree hashing**. + +Hash objects from this module follow the API of standard library's +:mod:`hashlib` objects. + + +Module +====== + +Creating hash objects +--------------------- + +New hash objects are created by calling constructor functions: + + +.. function:: blake2b(data=b'', digest_size=64, key=b'', salt=b'', \ + person=b'', fanout=1, depth=1, leaf_size=0, node_offset=0, \ + node_depth=0, inner_size=0, last_node=False) + +.. function:: blake2s(data=b'', digest_size=32, key=b'', salt=b'', \ + person=b'', fanout=1, depth=1, leaf_size=0, node_offset=0, \ + node_depth=0, inner_size=0, last_node=False) + + +These functions return the corresponding hash objects for calculating +BLAKE2b or BLAKE2s. They optionally take these general parameters: + +* *data*: initial chunk of data to hash, which must be interpretable as buffer + of bytes. + +* *digest_size*: size of output digest in bytes. + +* *key*: key for keyed hashing (up to 64 bytes for BLAKE2b, up to 32 bytes for + BLAKE2s). + +* *salt*: salt for randomized hashing (up to 16 bytes for BLAKE2b, up to 8 + bytes for BLAKE2s). + +* *person*: personalization string (up to 16 bytes for BLAKE2b, up to 8 bytes + for BLAKE2s). + +The following table shows limits for general parameters (in bytes): + +======= =========== ======== ========= =========== +Hash digest_size len(key) len(salt) len(person) +======= =========== ======== ========= =========== +BLAKE2b 64 64 16 16 +BLAKE2s 32 32 8 8 +======= =========== ======== ========= =========== + +.. note:: + + BLAKE2 specification defines constant lengths for salt and personalization + parameters, however, for convenience, this implementation accepts byte + strings of any size up to the specified length. If the length of the + parameter is less than specified, it is padded with zeros, thus, for + example, ``b'salt'`` and ``b'salt\x00'`` is the same value. (This is not + the case for *key*.) + +These sizes are available as module `constants`_ described below. + +Constructor functions also accept the following tree hashing parameters: + +* *fanout*: fanout (0 to 255, 0 if unlimited, 1 in sequential mode). + +* *depth*: maximal depth of tree (1 to 255, 255 if unlimited, 1 in + sequential mode). + +* *leaf_size*: maximal byte length of leaf (0 to 2**32-1, 0 if unlimited or in + sequential mode). + +* *node_offset*: node offset (0 to 2**64-1 for BLAKE2b, 0 to 2**48-1 for + BLAKE2s, 0 for the first, leftmost, leaf, or in sequential mode). + +* *node_depth*: node depth (0 to 255, 0 for leaves, or in sequential mode). + +* *inner_size*: inner digest size (0 to 64 for BLAKE2b, 0 to 32 for + BLAKE2s, 0 in sequential mode). + +* *last_node*: boolean indicating whether the processed node is the last + one (`False` for sequential mode). + +.. figure:: hashlib-blake2-tree.png + :alt: Explanation of tree mode parameters. + +See section 2.10 in `BLAKE2 specification +`_ for comprehensive review of tree +hashing. + + +Constants +--------- + +.. data:: blake2b.SALT_SIZE +.. data:: blake2s.SALT_SIZE + +Salt length (maximum length accepted by constructors). + + +.. data:: blake2b.PERSON_SIZE +.. data:: blake2s.PERSON_SIZE + +Personalization string length (maximum length accepted by constructors). + + +.. data:: blake2b.MAX_KEY_SIZE +.. data:: blake2s.MAX_KEY_SIZE + +Maximum key size. + + +.. data:: blake2b.MAX_DIGEST_SIZE +.. data:: blake2s.MAX_DIGEST_SIZE + +Maximum digest size that the hash function can output. + + +Examples +======== + +Simple hashing +-------------- + +To calculate hash of some data, you should first construct a hash object by +calling the appropriate constructor function (:func:`blake2b` or +:func:`blake2s`), then update it with the data by calling :meth:`update` on the +object, and, finally, get the digest out of the object by calling +:meth:`digest` (or :meth:`hexdigest` for hex-encoded string). + + >>> from hashlib import blake2b + >>> h = blake2b() + >>> h.update(b'Hello world') + >>> h.hexdigest() + '6ff843ba685842aa82031d3f53c48b66326df7639a63d128974c5c14f31a0f33343a8c65551134ed1ae0f2b0dd2bb495dc81039e3eeb0aa1bb0388bbeac29183' + + +As a shortcut, you can pass the first chunk of data to update directly to the +constructor as the first argument (or as *data* keyword argument): + + >>> from hashlib import blake2b + >>> blake2b(b'Hello world').hexdigest() + '6ff843ba685842aa82031d3f53c48b66326df7639a63d128974c5c14f31a0f33343a8c65551134ed1ae0f2b0dd2bb495dc81039e3eeb0aa1bb0388bbeac29183' + +You can call :meth:`hash.update` as many times as you need to iteratively +update the hash: + + >>> from hashlib import blake2b + >>> items = [b'Hello', b' ', b'world'] + >>> h = blake2b() + >>> for item in items: + ... h.update(item) + >>> h.hexdigest() + '6ff843ba685842aa82031d3f53c48b66326df7639a63d128974c5c14f31a0f33343a8c65551134ed1ae0f2b0dd2bb495dc81039e3eeb0aa1bb0388bbeac29183' + + +Using different digest sizes +---------------------------- + +BLAKE2 has configurable size of digests up to 64 bytes for BLAKE2b and up to 32 +bytes for BLAKE2s. For example, to replace SHA-1 with BLAKE2b without changing +the size of output, we can tell BLAKE2b to produce 20-byte digests: + + >>> from hashlib import blake2b + >>> h = blake2b(digest_size=20) + >>> h.update(b'Replacing SHA1 with the more secure function') + >>> h.hexdigest() + 'd24f26cf8de66472d58d4e1b1774b4c9158b1f4c' + >>> h.digest_size + 20 + >>> len(h.digest()) + 20 + +Hash objects with different digest sizes have completely different outputs +(shorter hashes are *not* prefixes of longer hashes); BLAKE2b and BLAKE2s +produce different outputs even if the output length is the same: + + >>> from hashlib import blake2b, blake2s + >>> blake2b(digest_size=10).hexdigest() + '6fa1d8fcfd719046d762' + >>> blake2b(digest_size=11).hexdigest() + 'eb6ec15daf9546254f0809' + >>> blake2s(digest_size=10).hexdigest() + '1bf21a98c78a1c376ae9' + >>> blake2s(digest_size=11).hexdigest() + '567004bf96e4a25773ebf4' + + +Keyed hashing +------------- + +Keyed hashing can be used for authentication as a faster and simpler +replacement for `Hash-based message authentication code +`_ (HMAC). +BLAKE2 can be securely used in prefix-MAC mode thanks to the +indifferentiability property inherited from BLAKE. + +This example shows how to get a (hex-encoded) 128-bit authentication code for +message ``b'message data'`` with key ``b'pseudorandom key'``: + + >>> from hashlib import blake2b + >>> h = blake2b(key=b'pseudorandom key', digest_size=16) + >>> h.update(b'message data') + >>> h.hexdigest() + '3d363ff7401e02026f4a4687d4863ced' + + +As a practical example, a web application can symmetrically sign cookies sent +to users and later verify them to make sure they weren't tampered with: + + >>> from hashlib import blake2b + >>> from hmac import compare_digest + >>> + >>> SECRET_KEY = b'pseudorandomly generated server secret key' + >>> AUTH_SIZE = 16 + >>> + >>> def sign(cookie): + ... h = blake2b(data=cookie, digest_size=AUTH_SIZE, key=SECRET_KEY) + ... return h.hexdigest() + >>> + >>> cookie = b'user:vatrogasac' + >>> sig = sign(cookie) + >>> print("{0},{1}".format(cookie.decode('utf-8'), sig)) + user:vatrogasac,349cf904533767ed2d755279a8df84d0 + >>> compare_digest(cookie, sig) + True + >>> compare_digest(b'user:policajac', sig) + False + >>> compare_digesty(cookie, '0102030405060708090a0b0c0d0e0f00') + False + +Even though there's a native keyed hashing mode, BLAKE2 can, of course, be used +in HMAC construction with :mod:`hmac` module: + + >>> import hmac, hashlib + >>> m = hmac.new(b'secret key', digestmod=hashlib.blake2s) + >>> m.update(b'message') + >>> m.hexdigest() + 'e3c8102868d28b5ff85fc35dda07329970d1a01e273c37481326fe0c861c8142' + + +Randomized hashing +------------------ + +By setting *salt* parameter users can introduce randomization to the hash +function. Randomized hashing is useful for protecting against collision attacks +on the hash function used in digital signatures. + + Randomized hashing is designed for situations where one party, the message + preparer, generates all or part of a message to be signed by a second + party, the message signer. If the message preparer is able to find + cryptographic hash function collisions (i.e., two messages producing the + same hash value), then she might prepare meaningful versions of the message + that would produce the same hash value and digital signature, but with + different results (e.g., transferring $1,000,000 to an account, rather than + $10). Cryptographic hash functions have been designed with collision + resistance as a major goal, but the current concentration on attacking + cryptographic hash functions may result in a given cryptographic hash + function providing less collision resistance than expected. Randomized + hashing offers the signer additional protection by reducing the likelihood + that a preparer can generate two or more messages that ultimately yield the + same hash value during the digital signature generation process – even if + it is practical to find collisions for the hash function. However, the use + of randomized hashing may reduce the amount of security provided by a + digital signature when all portions of the message are prepared + by the signer. + + (`NIST SP-800-106 "Randomized Hashing for Digital Signatures" + `_) + +In BLAKE2 the salt is processed as a one-time input to the hash function during +initialization, rather than as an input to each compression function. + +.. warning:: + + *Salted hashing* (or just hashing) with BLAKE2 or any other general-purpose + cryptographic hash function, such as SHA-256, is not suitable for hashing + passwords. See `BLAKE2 FAQ `_ for more + information. +.. + + >>> import os + >>> from hashlib import blake2b + >>> msg = b'some message' + >>> # Calculate the first hash with a random salt. + >>> salt1 = os.urandom(blake2b.SALT_SIZE) + >>> h1 = blake2b(salt=salt1) + >>> h1.update(msg) + >>> # Calculate the second hash with a different random salt. + >>> salt2 = os.urandom(blake2b.SALT_SIZE) + >>> h2 = blake2b(salt=salt2) + >>> h2.update(msg) + >>> # The digests are different. + >>> h1.digest() != h2.digest() + True + + +Personalization +--------------- + +Sometimes it is useful to force hash function to produce different digests for +the same input for different purposes. Quoting the authors of the Skein hash +function: + + We recommend that all application designers seriously consider doing this; + we have seen many protocols where a hash that is computed in one part of + the protocol can be used in an entirely different part because two hash + computations were done on similar or related data, and the attacker can + force the application to make the hash inputs the same. Personalizing each + hash function used in the protocol summarily stops this type of attack. + + (`The Skein Hash Function Family + `_, + p. 21) + +BLAKE2 can be personalized by passing bytes to the *person* argument: + + >>> from hashlib import blake2b + >>> FILES_HASH_PERSON = b'MyApp Files Hash' + >>> BLOCK_HASH_PERSON = b'MyApp Block Hash' + >>> h = blake2b(digest_size=32, person=FILES_HASH_PERSON) + >>> h.update(b'the same content') + >>> h.hexdigest() + '20d9cd024d4fb086aae819a1432dd2466de12947831b75c5a30cf2676095d3b4' + >>> h = blake2b(digest_size=32, person=BLOCK_HASH_PERSON) + >>> h.update(b'the same content') + >>> h.hexdigest() + 'cf68fb5761b9c44e7878bfb2c4c9aea52264a80b75005e65619778de59f383a3' + +Personalization together with the keyed mode can also be used to derive different +keys from a single one. + + >>> from hashlib import blake2s + >>> from base64 import b64decode, b64encode + >>> orig_key = b64decode(b'Rm5EPJai72qcK3RGBpW3vPNfZy5OZothY+kHY6h21KM=') + >>> enc_key = blake2s(key=orig_key, person=b'kEncrypt').digest() + >>> mac_key = blake2s(key=orig_key, person=b'kMAC').digest() + >>> print(b64encode(enc_key).decode('utf-8')) + rbPb15S/Z9t+agffno5wuhB77VbRi6F9Iv2qIxU7WHw= + >>> print(b64encode(mac_key).decode('utf-8')) + G9GtHFE1YluXY1zWPlYk1e/nWfu0WSEb0KRcjhDeP/o= + +Tree mode +--------- + +Here's an example of hashing a minimal tree with two leaf nodes:: + + 10 + / \ + 00 01 + +The example uses 64-byte internal digests, and returns the 32-byte final +digest. + + >>> from hashlib import blake2b + >>> + >>> FANOUT = 2 + >>> DEPTH = 2 + >>> LEAF_SIZE = 4096 + >>> INNER_SIZE = 64 + >>> + >>> buf = bytearray(6000) + >>> + >>> # Left leaf + ... h00 = blake2b(buf[0:LEAF_SIZE], fanout=FANOUT, depth=DEPTH, + ... leaf_size=LEAF_SIZE, inner_size=INNER_SIZE, + ... node_offset=0, node_depth=0, last_node=False) + >>> # Right leaf + ... h01 = blake2b(buf[LEAF_SIZE:], fanout=FANOUT, depth=DEPTH, + ... leaf_size=LEAF_SIZE, inner_size=INNER_SIZE, + ... node_offset=1, node_depth=0, last_node=True) + >>> # Root node + ... h10 = blake2b(digest_size=32, fanout=FANOUT, depth=DEPTH, + ... leaf_size=LEAF_SIZE, inner_size=INNER_SIZE, + ... node_offset=0, node_depth=1, last_node=True) + >>> h10.update(h00.digest()) + >>> h10.update(h01.digest()) + >>> h10.hexdigest() + '3ad2a9b37c6070e374c7a8c508fe20ca86b6ed54e286e93a0318e95e881db5aa' + +Credits +======= + +BLAKE2_ was designed by *Jean-Philippe Aumasson*, *Samuel Neves*, *Zooko +Wilcox-O'Hearn*, and *Christian Winnerlein* based on SHA-3_ finalist BLAKE_ +created by *Jean-Philippe Aumasson*, *Luca Henzen*, *Willi Meier*, and +*Raphael C.-W. Phan*. + +It uses core algorithm from ChaCha_ cipher designed by *Daniel J. Bernstein*. + +The stdlib implementation is based on pyblake2_ module. It was written by +*Dmitry Chestnykh* based on C implementation written by *Samuel Neves*. The +documentation was copied from pyblake2_ and written by *Dmitry Chestnykh*. + +The C code was partly rewritten for Python by *Christian Heimes*. + +The following public domain dedication applies for both C hash function +implementation, extension code, and this documentation: + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with this software. If not, see + http://creativecommons.org/publicdomain/zero/1.0/. + +The following people have helped with development or contributed their changes +to the project and the public domain according to the Creative Commons Public +Domain Dedication 1.0 Universal: + +* *Alexandr Sokolovskiy* + +.. seealso:: Official BLAKE2 website: https://blake2.net + +.. _BLAKE2: https://blake2.net +.. _HMAC: http://en.wikipedia.org/wiki/Hash-based_message_authentication_code +.. _BLAKE: https://131002.net/blake/ +.. _SHA-3: http://en.wikipedia.org/wiki/NIST_hash_function_competition +.. _ChaCha: http://cr.yp.to/chacha.html +.. _pyblake2: https://pythonhosted.org/pyblake2/ + diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index cf6b8ff..6ca5307 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -65,11 +65,15 @@ concatenation of the data fed to it so far using the :meth:`digest` or Constructors for hash algorithms that are always present in this module are :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, -and :func:`sha512`. :func:`md5` is normally available as well, though it +:func:`sha512`, :func:`blake2b`, and :func:`blake2s`. +:func:`md5` is normally available as well, though it may be missing if you are using a rare "FIPS compliant" build of Python. Additional algorithms may also be available depending upon the OpenSSL library that Python uses on your platform. +.. versionadded:: 3.6 + :func:`blake2b` and :func:`blake2s` were added. + For example, to obtain the digest of the byte string ``b'Nobody inspects the spammish repetition'``:: @@ -243,6 +247,12 @@ include a `salt `_. .. versionadded:: 3.6 +BLAKE2 +------ + +BLAKE2 takes additional arguments, see :ref:`hashlib-blake2`. + + .. seealso:: Module :mod:`hmac` @@ -251,6 +261,8 @@ include a `salt `_. Module :mod:`base64` Another way to encode binary hashes for non-binary environments. + See :ref:`hashlib-blake2`. + http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf The FIPS 180-2 publication on Secure Hash Algorithms. diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 348ea14..40ccdec 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -4,14 +4,14 @@ __doc__ = """hashlib module - A common interface to many hash functions. -new(name, data=b'') - returns a new hash object implementing the - given hash function; initializing the hash - using the given binary data. +new(name, data=b'', **kwargs) - returns a new hash object implementing the + given hash function; initializing the hash + using the given binary data. Named constructor functions are also available, these are faster than using new(name): -md5(), sha1(), sha224(), sha256(), sha384(), and sha512() +md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), and blake2s() More algorithms may be available on your platform but the above are guaranteed to exist. See the algorithms_guaranteed and algorithms_available attributes @@ -54,7 +54,8 @@ More condensed: # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. -__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', + 'blake2b', 'blake2s') algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) @@ -85,6 +86,10 @@ def __get_builtin_constructor(name): import _sha512 cache['SHA384'] = cache['sha384'] = _sha512.sha384 cache['SHA512'] = cache['sha512'] = _sha512.sha512 + elif name in ('blake2b', 'blake2s'): + import _blake2 + cache['blake2b'] = _blake2.blake2b + cache['blake2s'] = _blake2.blake2s except ImportError: pass # no extension module, this hash is unsupported. @@ -107,17 +112,23 @@ def __get_openssl_constructor(name): return __get_builtin_constructor(name) -def __py_new(name, data=b''): - """new(name, data=b'') - Return a new hashing object using the named algorithm; - optionally initialized with data (which must be bytes). +def __py_new(name, data=b'', **kwargs): + """new(name, data=b'', **kwargs) - Return a new hashing object using the + named algorithm; optionally initialized with data (which must be bytes). """ - return __get_builtin_constructor(name)(data) + return __get_builtin_constructor(name)(data, **kwargs) -def __hash_new(name, data=b''): +def __hash_new(name, data=b'', **kwargs): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be bytes). """ + if name in {'blake2b', 'blake2s'}: + # Prefer our blake2 implementation. + # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. + # It does neither support keyed blake2 nor advanced features like + # salt, personal, tree hashing or SSE. + return __get_builtin_constructor(name)(data, **kwargs) try: return _hashlib.new(name, data) except ValueError: @@ -218,6 +229,7 @@ for __func_name in __always_supported: import logging logging.exception('code for hash %s was not found.', __func_name) + # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index b010a74..e6df09c 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -27,6 +27,14 @@ COMPILED_WITH_PYDEBUG = hasattr(sys, 'gettotalrefcount') c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib']) py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib']) +try: + import _blake2 +except ImportError: + _blake2 = None + +requires_blake2 = unittest.skipUnless(_blake2, 'requires _blake2') + + def hexstr(s): assert isinstance(s, bytes), repr(s) h = "0123456789abcdef" @@ -36,10 +44,24 @@ def hexstr(s): return r +URL = "https://raw.githubusercontent.com/tiran/python_vectors/master/{}.txt" + +def read_vectors(hash_name): + with support.open_urlresource(URL.format(hash_name)) as f: + for line in f: + line = line.strip() + if line.startswith('#') or not line: + continue + parts = line.split(',') + parts[0] = bytes.fromhex(parts[0]) + yield parts + + class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', - 'sha384', 'SHA384', 'sha512', 'SHA512') + 'sha384', 'SHA384', 'sha512', 'SHA512', + 'blake2b', 'blake2s') # Issue #14693: fallback modules are always compiled under POSIX _warn_on_extension_import = os.name == 'posix' or COMPILED_WITH_PYDEBUG @@ -57,6 +79,11 @@ class HashLibTestCase(unittest.TestCase): algorithms = set() for algorithm in self.supported_hash_names: algorithms.add(algorithm.lower()) + + _blake2 = self._conditional_import_module('_blake2') + if _blake2: + algorithms.update({'blake2b', 'blake2s'}) + self.constructors_to_test = {} for algorithm in algorithms: self.constructors_to_test[algorithm] = set() @@ -65,10 +92,10 @@ class HashLibTestCase(unittest.TestCase): # of hashlib.new given the algorithm name. for algorithm, constructors in self.constructors_to_test.items(): constructors.add(getattr(hashlib, algorithm)) - def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm): + def _test_algorithm_via_hashlib_new(data=None, _alg=algorithm, **kwargs): if data is None: - return hashlib.new(_alg) - return hashlib.new(_alg, data) + return hashlib.new(_alg, **kwargs) + return hashlib.new(_alg, data, **kwargs) constructors.add(_test_algorithm_via_hashlib_new) _hashlib = self._conditional_import_module('_hashlib') @@ -100,6 +127,9 @@ class HashLibTestCase(unittest.TestCase): if _sha512: add_builtin_constructor('sha384') add_builtin_constructor('sha512') + if _blake2: + add_builtin_constructor('blake2s') + add_builtin_constructor('blake2b') super(HashLibTestCase, self).__init__(*args, **kwargs) @@ -194,13 +224,13 @@ class HashLibTestCase(unittest.TestCase): self.assertEqual(m1.digest(), m4_copy.digest()) self.assertEqual(m4.digest(), m4_digest) - def check(self, name, data, hexdigest): + def check(self, name, data, hexdigest, **kwargs): hexdigest = hexdigest.lower() constructors = self.constructors_to_test[name] # 2 is for hashlib.name(...) and hashlib.new(name, ...) self.assertGreaterEqual(len(constructors), 2) for hash_object_constructor in constructors: - m = hash_object_constructor(data) + m = hash_object_constructor(data, **kwargs) computed = m.hexdigest() self.assertEqual( computed, hexdigest, @@ -227,6 +257,11 @@ class HashLibTestCase(unittest.TestCase): self.check_no_unicode('sha384') self.check_no_unicode('sha512') + @requires_blake2 + def test_no_unicode_blake2(self): + self.check_no_unicode('blake2b') + self.check_no_unicode('blake2s') + def check_blocksize_name(self, name, block_size=0, digest_size=0): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: @@ -246,6 +281,11 @@ class HashLibTestCase(unittest.TestCase): self.check_blocksize_name('sha384', 128, 48) self.check_blocksize_name('sha512', 128, 64) + @requires_blake2 + def test_blocksize_name_blake2(self): + self.check_blocksize_name('blake2b', 128, 64) + self.check_blocksize_name('blake2s', 64, 32) + def test_case_md5_0(self): self.check('md5', b'', 'd41d8cd98f00b204e9800998ecf8427e') @@ -374,6 +414,155 @@ class HashLibTestCase(unittest.TestCase): "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") + def check_blake2(self, constructor, salt_size, person_size, key_size, + digest_size, max_offset): + self.assertEqual(constructor.SALT_SIZE, salt_size) + for i in range(salt_size + 1): + constructor(salt=b'a' * i) + salt = b'a' * (salt_size + 1) + self.assertRaises(ValueError, constructor, salt=salt) + + self.assertEqual(constructor.PERSON_SIZE, person_size) + for i in range(person_size+1): + constructor(person=b'a' * i) + person = b'a' * (person_size + 1) + self.assertRaises(ValueError, constructor, person=person) + + self.assertEqual(constructor.MAX_DIGEST_SIZE, digest_size) + for i in range(1, digest_size + 1): + constructor(digest_size=i) + self.assertRaises(ValueError, constructor, digest_size=-1) + self.assertRaises(ValueError, constructor, digest_size=0) + self.assertRaises(ValueError, constructor, digest_size=digest_size+1) + + self.assertEqual(constructor.MAX_KEY_SIZE, key_size) + for i in range(key_size+1): + constructor(key=b'a' * i) + key = b'a' * (key_size + 1) + self.assertRaises(ValueError, constructor, key=key) + self.assertEqual(constructor().hexdigest(), + constructor(key=b'').hexdigest()) + + for i in range(0, 256): + constructor(fanout=i) + self.assertRaises(ValueError, constructor, fanout=-1) + self.assertRaises(ValueError, constructor, fanout=256) + + for i in range(1, 256): + constructor(depth=i) + self.assertRaises(ValueError, constructor, depth=-1) + self.assertRaises(ValueError, constructor, depth=0) + self.assertRaises(ValueError, constructor, depth=256) + + for i in range(0, 256): + constructor(node_depth=i) + self.assertRaises(ValueError, constructor, node_depth=-1) + self.assertRaises(ValueError, constructor, node_depth=256) + + for i in range(0, digest_size + 1): + constructor(inner_size=i) + self.assertRaises(ValueError, constructor, inner_size=-1) + self.assertRaises(ValueError, constructor, inner_size=digest_size+1) + + constructor(leaf_size=0) + constructor(leaf_size=(1<<32)-1) + self.assertRaises(OverflowError, constructor, leaf_size=-1) + self.assertRaises(OverflowError, constructor, leaf_size=1<<32) + + constructor(node_offset=0) + constructor(node_offset=max_offset) + self.assertRaises(OverflowError, constructor, node_offset=-1) + self.assertRaises(OverflowError, constructor, node_offset=max_offset+1) + + constructor( + string=b'', + key=b'', + salt=b'', + person=b'', + digest_size=17, + fanout=1, + depth=1, + leaf_size=256, + node_offset=512, + node_depth=1, + inner_size=7, + last_node=True + ) + + def blake2_rfc7693(self, constructor, md_len, in_len): + def selftest_seq(length, seed): + mask = (1<<32)-1 + a = (0xDEAD4BAD * seed) & mask + b = 1 + out = bytearray(length) + for i in range(length): + t = (a + b) & mask + a, b = b, t + out[i] = (t >> 24) & 0xFF + return out + outer = constructor(digest_size=32) + for outlen in md_len: + for inlen in in_len: + indata = selftest_seq(inlen, inlen) + key = selftest_seq(outlen, outlen) + unkeyed = constructor(indata, digest_size=outlen) + outer.update(unkeyed.digest()) + keyed = constructor(indata, key=key, digest_size=outlen) + outer.update(keyed.digest()) + return outer.hexdigest() + + @requires_blake2 + def test_blake2b(self): + self.check_blake2(hashlib.blake2b, 16, 16, 64, 64, (1<<64)-1) + b2b_md_len = [20, 32, 48, 64] + b2b_in_len = [0, 3, 128, 129, 255, 1024] + self.assertEqual( + self.blake2_rfc7693(hashlib.blake2b, b2b_md_len, b2b_in_len), + "c23a7800d98123bd10f506c61e29da5603d763b8bbad2e737f5e765a7bccd475") + + @requires_blake2 + def test_case_blake2b_0(self): + self.check('blake2b', b"", + "786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419"+ + "d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce") + + @requires_blake2 + def test_case_blake2b_1(self): + self.check('blake2b', b"abc", + "ba80a53f981c4d0d6a2797b69f12f6e94c212f14685ac4b74b12bb6fdbffa2d1"+ + "7d87c5392aab792dc252d5de4533cc9518d38aa8dbf1925ab92386edd4009923") + + @requires_blake2 + def test_blake2b_vectors(self): + for msg, key, md in read_vectors('blake2b'): + key = bytes.fromhex(key) + self.check('blake2b', msg, md, key=key) + + @requires_blake2 + def test_blake2s(self): + self.check_blake2(hashlib.blake2s, 8, 8, 32, 32, (1<<48)-1) + b2s_md_len = [16, 20, 28, 32] + b2s_in_len = [0, 3, 64, 65, 255, 1024] + self.assertEqual( + self.blake2_rfc7693(hashlib.blake2s, b2s_md_len, b2s_in_len), + "6a411f08ce25adcdfb02aba641451cec53c598b24f4fc787fbdc88797f4c1dfe") + + @requires_blake2 + def test_case_blake2s_0(self): + self.check('blake2s', b"", + "69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9") + + @requires_blake2 + def test_case_blake2s_1(self): + self.check('blake2s', b"abc", + "508c5e8c327c14e2e1a72ba34eeb452f37458b209ed63a294d999b4c86675982") + + @requires_blake2 + def test_blake2s_vectors(self): + for msg, key, md in read_vectors('blake2s'): + key = bytes.fromhex(key) + self.check('blake2s', msg, md, key=key) + def test_gil(self): # Check things work fine with an input larger than the size required # for multithreaded operation (which is hardwired to 2048). diff --git a/Makefile.pre.in b/Makefile.pre.in index 9e9b1ad..e17fe77 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -541,7 +541,7 @@ coverage-report: # Run "Argument Clinic" over all source files # (depends on python having already been built) .PHONY=clinic -clinic: $(BUILDPYTHON) +clinic: $(BUILDPYTHON) Modules/_blake2/blake2s_impl.c $(RUNSHARED) $(PYTHON_FOR_BUILD) ./Tools/clinic/clinic.py --make # Build the interpreter @@ -571,6 +571,11 @@ pybuilddir.txt: $(BUILDPYTHON) Modules/_math.o: Modules/_math.c Modules/_math.h $(CC) -c $(CCSHARED) $(PY_CORE_CFLAGS) -o $@ $< +# blake2s is auto-generated from blake2b +Modules/_blake2/blake2s_impl.c: $(BUILDPYTHON) Modules/_blake2/blake2b_impl.c Modules/_blake2/blake2b2s.py + $(RUNSHARED) $(PYTHON_FOR_BUILD) Modules/_blake2/blake2b2s.py + $(RUNSHARED) $(PYTHON_FOR_BUILD) Tools/clinic/clinic.py -f $@ + # Build the shared modules # Under GNU make, MAKEFLAGS are sorted and normalized; the 's' for # -s, --silent or --quiet is always the first char. @@ -584,6 +589,7 @@ sharedmods: $(BUILDPYTHON) pybuilddir.txt Modules/_math.o _TCLTK_INCLUDES='$(TCLTK_INCLUDES)' _TCLTK_LIBS='$(TCLTK_LIBS)' \ $(PYTHON_FOR_BUILD) $(srcdir)/setup.py $$quiet build + # Build static library # avoid long command lines, same as LIBRARY_OBJS $(LIBRARY): $(LIBRARY_OBJS) diff --git a/Misc/NEWS b/Misc/NEWS index f34c6859..9198e31 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -89,6 +89,8 @@ Core and Builtins Library ------- +- Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib. + - Issue #25596: Optimized glob() and iglob() functions in the glob module; they are now about 3--6 times faster. diff --git a/Modules/_blake2/blake2b2s.py b/Modules/_blake2/blake2b2s.py new file mode 100755 index 0000000..01cf265 --- /dev/null +++ b/Modules/_blake2/blake2b2s.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 + +import os +import re + +HERE = os.path.dirname(os.path.abspath(__file__)) +BLAKE2 = os.path.join(HERE, 'impl') + +PUBLIC_SEARCH = re.compile(r'\ int (blake2[bs]p?[a-z_]*)\(') + + +def getfiles(): + for name in os.listdir(BLAKE2): + name = os.path.join(BLAKE2, name) + if os.path.isfile(name): + yield name + + +def find_public(): + public_funcs = set() + for name in getfiles(): + with open(name) as f: + for line in f: + # find public functions + mo = PUBLIC_SEARCH.search(line) + if mo: + public_funcs.add(mo.group(1)) + + for f in sorted(public_funcs): + print('#define {0:<18} PyBlake2_{0}'.format(f)) + + return public_funcs + + +def main(): + lines = [] + with open(os.path.join(HERE, 'blake2b_impl.c')) as f: + for line in f: + line = line.replace('blake2b', 'blake2s') + line = line.replace('BLAKE2b', 'BLAKE2s') + line = line.replace('BLAKE2B', 'BLAKE2S') + lines.append(line) + with open(os.path.join(HERE, 'blake2s_impl.c'), 'w') as f: + f.write(''.join(lines)) + # find_public() + + +if __name__ == '__main__': + main() diff --git a/Modules/_blake2/blake2b_impl.c b/Modules/_blake2/blake2b_impl.c new file mode 100644 index 0000000..1c67744 --- /dev/null +++ b/Modules/_blake2/blake2b_impl.c @@ -0,0 +1,460 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +/* WARNING: autogenerated file! + * + * The blake2s_impl.c is autogenerated from blake2b_impl.c. + */ + +#include "Python.h" +#include "pystrhex.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include "../hashlib.h" +#include "blake2ns.h" + +#define HAVE_BLAKE2B 1 +#define BLAKE2_LOCAL_INLINE(type) Py_LOCAL_INLINE(type) + +#include "impl/blake2.h" +#include "impl/blake2-impl.h" /* for secure_zero_memory() and store48() */ + +#ifdef BLAKE2_USE_SSE +#include "impl/blake2b.c" +#else +#include "impl/blake2b-ref.c" +#endif + + +extern PyTypeObject PyBlake2_BLAKE2bType; + +typedef struct { + PyObject_HEAD + blake2b_param param; + blake2b_state state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BLAKE2bObject; + +#include "clinic/blake2b_impl.c.h" + +/*[clinic input] +module _blake2b +class _blake2b.blake2b "BLAKE2bObject *" "&PyBlake2_BLAKE2bType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6893358c6622aecf]*/ + + +static BLAKE2bObject * +new_BLAKE2bObject(PyTypeObject *type) +{ + BLAKE2bObject *self; + self = (BLAKE2bObject *)type->tp_alloc(type, 0); +#ifdef WITH_THREAD + if (self != NULL) { + self->lock = NULL; + } +#endif + return self; +} + +/*[clinic input] +@classmethod +_blake2b.blake2b.__new__ as py_blake2b_new + string as data: object = NULL + * + digest_size: int(c_default="BLAKE2B_OUTBYTES") = _blake2b.blake2b.MAX_DIGEST_SIZE + key: Py_buffer = None + salt: Py_buffer = None + person: Py_buffer = None + fanout: int = 1 + depth: int = 1 + leaf_size as leaf_size_obj: object = NULL + node_offset as node_offset_obj: object = NULL + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + +Return a new BLAKE2b hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node) +/*[clinic end generated code: output=7506d8d890e5f13b input=e41548dfa0866031]*/ +{ + BLAKE2bObject *self = NULL; + Py_buffer buf; + + unsigned long leaf_size = 0; + unsigned PY_LONG_LONG node_offset = 0; + + self = new_BLAKE2bObject(type); + if (self == NULL) { + goto error; + } + + /* Zero parameter block. */ + memset(&self->param, 0, sizeof(self->param)); + + /* Set digest size. */ + if (digest_size <= 0 || digest_size > BLAKE2B_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "digest_size must be between 1 and %d bytes", + BLAKE2B_OUTBYTES); + goto error; + } + self->param.digest_length = digest_size; + + /* Set salt parameter. */ + if ((salt->obj != NULL) && salt->len) { + if (salt->len > BLAKE2B_SALTBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum salt length is %d bytes", + BLAKE2B_SALTBYTES); + goto error; + } + memcpy(self->param.salt, salt->buf, salt->len); + } + + /* Set personalization parameter. */ + if ((person->obj != NULL) && person->len) { + if (person->len > BLAKE2B_PERSONALBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum person length is %d bytes", + BLAKE2B_PERSONALBYTES); + goto error; + } + memcpy(self->param.personal, person->buf, person->len); + } + + /* Set tree parameters. */ + if (fanout < 0 || fanout > 255) { + PyErr_SetString(PyExc_ValueError, + "fanout must be between 0 and 255"); + goto error; + } + self->param.fanout = (uint8_t)fanout; + + if (depth <= 0 || depth > 255) { + PyErr_SetString(PyExc_ValueError, + "depth must be between 1 and 255"); + goto error; + } + self->param.depth = (uint8_t)depth; + + if (leaf_size_obj != NULL) { + leaf_size = PyLong_AsUnsignedLong(leaf_size_obj); + if (leaf_size == (unsigned long) -1 && PyErr_Occurred()) { + goto error; + } + if (leaf_size > 0xFFFFFFFFU) { + PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); + goto error; + } + } + self->param.leaf_length = (unsigned int)leaf_size; + + if (node_offset_obj != NULL) { + node_offset = PyLong_AsUnsignedLongLong(node_offset_obj); + if (node_offset == (unsigned PY_LONG_LONG) -1 && PyErr_Occurred()) { + goto error; + } + } +#ifdef HAVE_BLAKE2S + if (node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); + goto error; + } + store48(&(self->param.node_offset), node_offset); +#else + self->param.node_offset = node_offset; +#endif + + if (node_depth < 0 || node_depth > 255) { + PyErr_SetString(PyExc_ValueError, + "node_depth must be between 0 and 255"); + goto error; + } + self->param.node_depth = node_depth; + + if (inner_size < 0 || inner_size > BLAKE2B_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "inner_size must be between 0 and is %d", + BLAKE2B_OUTBYTES); + goto error; + } + self->param.inner_length = inner_size; + + /* Set key length. */ + if ((key->obj != NULL) && key->len) { + if (key->len > BLAKE2B_KEYBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum key length is %d bytes", + BLAKE2B_KEYBYTES); + goto error; + } + self->param.key_length = key->len; + } + + /* Initialize hash state. */ + if (blake2b_init_param(&self->state, &self->param) < 0) { + PyErr_SetString(PyExc_RuntimeError, + "error initializing hash state"); + goto error; + } + + /* Set last node flag (must come after initialization). */ + self->state.last_node = last_node; + + /* Process key block if any. */ + if (self->param.key_length) { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, sizeof(block)); + memcpy(block, key->buf, key->len); + blake2b_update(&self->state, block, sizeof(block)); + secure_zero_memory(block, sizeof(block)); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + blake2b_update(&self->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + blake2b_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self != NULL) { + Py_DECREF(self); + } + return NULL; +} + +/*[clinic input] +_blake2b.blake2b.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_copy_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=c89cd33550ab1543 input=4c9c319f18f10747]*/ +{ + BLAKE2bObject *cpy; + + if ((cpy = new_BLAKE2bObject(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + cpy->param = self->param; + cpy->state = self->state; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +/*[clinic input] +_blake2b.blake2b.update + + obj: object + / + +Update this hash object's state with the provided string. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_update(BLAKE2bObject *self, PyObject *obj) +/*[clinic end generated code: output=a888f07c4cddbe94 input=3ecb8c13ee4260f2]*/ +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + +#ifdef WITH_THREAD + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) + self->lock = PyThread_allocate_lock(); + + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + blake2b_update(&self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + blake2b_update(&self->state, buf.buf, buf.len); + } +#else + blake2b_update(&self->state, buf.buf, buf.len); +#endif /* !WITH_THREAD */ + PyBuffer_Release(&buf); + + Py_INCREF(Py_None); + return Py_None; +} + +/*[clinic input] +_blake2b.blake2b.digest + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_digest_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=b13a79360d984740 input=ac2fa462ebb1b9c7]*/ +{ + uint8_t digest[BLAKE2B_OUTBYTES]; + blake2b_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2b_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return PyBytes_FromStringAndSize((const char *)digest, + self->param.digest_length); +} + +/*[clinic input] +_blake2b.blake2b.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_blake2b_blake2b_hexdigest_impl(BLAKE2bObject *self) +/*[clinic end generated code: output=6a503611715b24bd input=d58f0b2f37812e33]*/ +{ + uint8_t digest[BLAKE2B_OUTBYTES]; + blake2b_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2b_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return _Py_strhex((const char *)digest, self->param.digest_length); +} + + +static PyMethodDef py_blake2b_methods[] = { + _BLAKE2B_BLAKE2B_COPY_METHODDEF + _BLAKE2B_BLAKE2B_DIGEST_METHODDEF + _BLAKE2B_BLAKE2B_HEXDIGEST_METHODDEF + _BLAKE2B_BLAKE2B_UPDATE_METHODDEF + {NULL, NULL} +}; + + + +static PyObject * +py_blake2b_get_name(BLAKE2bObject *self, void *closure) +{ + return PyUnicode_FromString("blake2b"); +} + + + +static PyObject * +py_blake2b_get_block_size(BLAKE2bObject *self, void *closure) +{ + return PyLong_FromLong(BLAKE2B_BLOCKBYTES); +} + + + +static PyObject * +py_blake2b_get_digest_size(BLAKE2bObject *self, void *closure) +{ + return PyLong_FromLong(self->param.digest_length); +} + + +static PyGetSetDef py_blake2b_getsetters[] = { + {"name", (getter)py_blake2b_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake2b_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake2b_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake2b_dealloc(PyObject *self) +{ + BLAKE2bObject *obj = (BLAKE2bObject *)self; + + /* Try not to leave state in memory. */ + secure_zero_memory(&obj->param, sizeof(obj->param)); + secure_zero_memory(&obj->state, sizeof(obj->state)); +#ifdef WITH_THREAD + if (obj->lock) { + PyThread_free_lock(obj->lock); + obj->lock = NULL; + } +#endif + PyObject_Del(self); +} + + +PyTypeObject PyBlake2_BLAKE2bType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_blake2.blake2b", /* tp_name */ + sizeof(BLAKE2bObject), /* tp_size */ + 0, /* tp_itemsize */ + py_blake2b_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + py_blake2b_new__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + py_blake2b_methods, /* tp_methods */ + 0, /* tp_members */ + py_blake2b_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + py_blake2b_new, /* tp_new */ +}; diff --git a/Modules/_blake2/blake2module.c b/Modules/_blake2/blake2module.c new file mode 100644 index 0000000..e2a3d42 --- /dev/null +++ b/Modules/_blake2/blake2module.c @@ -0,0 +1,105 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +#include "Python.h" + +#include "impl/blake2.h" + +extern PyTypeObject PyBlake2_BLAKE2bType; +extern PyTypeObject PyBlake2_BLAKE2sType; + + +PyDoc_STRVAR(blake2mod__doc__, +"_blake2b provides BLAKE2b for hashlib\n" +); + + +static struct PyMethodDef blake2mod_functions[] = { + {NULL, NULL} +}; + +static struct PyModuleDef blake2_module = { + PyModuleDef_HEAD_INIT, + "_blake2", + blake2mod__doc__, + -1, + blake2mod_functions, + NULL, + NULL, + NULL, + NULL +}; + +#define ADD_INT(d, name, value) do { \ + PyObject *x = PyLong_FromLong(value); \ + if (!x) { \ + Py_DECREF(m); \ + return NULL; \ + } \ + if (PyDict_SetItemString(d, name, x) < 0) { \ + Py_DECREF(m); \ + return NULL; \ + } \ + Py_DECREF(x); \ +} while(0) + + +PyMODINIT_FUNC +PyInit__blake2(void) +{ + PyObject *m; + PyObject *d; + + m = PyModule_Create(&blake2_module); + if (m == NULL) + return NULL; + + /* BLAKE2b */ + Py_TYPE(&PyBlake2_BLAKE2bType) = &PyType_Type; + if (PyType_Ready(&PyBlake2_BLAKE2bType) < 0) { + return NULL; + } + + Py_INCREF(&PyBlake2_BLAKE2bType); + PyModule_AddObject(m, "blake2b", (PyObject *)&PyBlake2_BLAKE2bType); + + d = PyBlake2_BLAKE2bType.tp_dict; + ADD_INT(d, "SALT_SIZE", BLAKE2B_SALTBYTES); + ADD_INT(d, "PERSON_SIZE", BLAKE2B_PERSONALBYTES); + ADD_INT(d, "MAX_KEY_SIZE", BLAKE2B_KEYBYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); + + PyModule_AddIntConstant(m, "BLAKE2B_SALT_SIZE", BLAKE2B_SALTBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_PERSON_SIZE", BLAKE2B_PERSONALBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_MAX_KEY_SIZE", BLAKE2B_KEYBYTES); + PyModule_AddIntConstant(m, "BLAKE2B_MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); + + /* BLAKE2s */ + Py_TYPE(&PyBlake2_BLAKE2sType) = &PyType_Type; + if (PyType_Ready(&PyBlake2_BLAKE2sType) < 0) { + return NULL; + } + + Py_INCREF(&PyBlake2_BLAKE2sType); + PyModule_AddObject(m, "blake2s", (PyObject *)&PyBlake2_BLAKE2sType); + + d = PyBlake2_BLAKE2sType.tp_dict; + ADD_INT(d, "SALT_SIZE", BLAKE2S_SALTBYTES); + ADD_INT(d, "PERSON_SIZE", BLAKE2S_PERSONALBYTES); + ADD_INT(d, "MAX_KEY_SIZE", BLAKE2S_KEYBYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); + + PyModule_AddIntConstant(m, "BLAKE2S_SALT_SIZE", BLAKE2S_SALTBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_PERSON_SIZE", BLAKE2S_PERSONALBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_MAX_KEY_SIZE", BLAKE2S_KEYBYTES); + PyModule_AddIntConstant(m, "BLAKE2S_MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); + + return m; +} diff --git a/Modules/_blake2/blake2ns.h b/Modules/_blake2/blake2ns.h new file mode 100644 index 0000000..53bce8e --- /dev/null +++ b/Modules/_blake2/blake2ns.h @@ -0,0 +1,32 @@ +/* Prefix all public blake2 symbols with PyBlake2_ + */ + +#ifndef Py_BLAKE2_NS +#define Py_BLAKE2_NS + +#define blake2b PyBlake2_blake2b +#define blake2b_compress PyBlake2_blake2b_compress +#define blake2b_final PyBlake2_blake2b_final +#define blake2b_init PyBlake2_blake2b_init +#define blake2b_init_key PyBlake2_blake2b_init_key +#define blake2b_init_param PyBlake2_blake2b_init_param +#define blake2b_update PyBlake2_blake2b_update +#define blake2bp PyBlake2_blake2bp +#define blake2bp_final PyBlake2_blake2bp_final +#define blake2bp_init PyBlake2_blake2bp_init +#define blake2bp_init_key PyBlake2_blake2bp_init_key +#define blake2bp_update PyBlake2_blake2bp_update +#define blake2s PyBlake2_blake2s +#define blake2s_compress PyBlake2_blake2s_compress +#define blake2s_final PyBlake2_blake2s_final +#define blake2s_init PyBlake2_blake2s_init +#define blake2s_init_key PyBlake2_blake2s_init_key +#define blake2s_init_param PyBlake2_blake2s_init_param +#define blake2s_update PyBlake2_blake2s_update +#define blake2sp PyBlake2_blake2sp +#define blake2sp_final PyBlake2_blake2sp_final +#define blake2sp_init PyBlake2_blake2sp_init +#define blake2sp_init_key PyBlake2_blake2sp_init_key +#define blake2sp_update PyBlake2_blake2sp_update + +#endif /* Py_BLAKE2_NS */ diff --git a/Modules/_blake2/blake2s_impl.c b/Modules/_blake2/blake2s_impl.c new file mode 100644 index 0000000..8b38270 --- /dev/null +++ b/Modules/_blake2/blake2s_impl.c @@ -0,0 +1,460 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +/* WARNING: autogenerated file! + * + * The blake2s_impl.c is autogenerated from blake2s_impl.c. + */ + +#include "Python.h" +#include "pystrhex.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include "../hashlib.h" +#include "blake2ns.h" + +#define HAVE_BLAKE2S 1 +#define BLAKE2_LOCAL_INLINE(type) Py_LOCAL_INLINE(type) + +#include "impl/blake2.h" +#include "impl/blake2-impl.h" /* for secure_zero_memory() and store48() */ + +#ifdef BLAKE2_USE_SSE +#include "impl/blake2s.c" +#else +#include "impl/blake2s-ref.c" +#endif + + +extern PyTypeObject PyBlake2_BLAKE2sType; + +typedef struct { + PyObject_HEAD + blake2s_param param; + blake2s_state state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} BLAKE2sObject; + +#include "clinic/blake2s_impl.c.h" + +/*[clinic input] +module _blake2s +class _blake2s.blake2s "BLAKE2sObject *" "&PyBlake2_BLAKE2sType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=edbfcf7557a685a7]*/ + + +static BLAKE2sObject * +new_BLAKE2sObject(PyTypeObject *type) +{ + BLAKE2sObject *self; + self = (BLAKE2sObject *)type->tp_alloc(type, 0); +#ifdef WITH_THREAD + if (self != NULL) { + self->lock = NULL; + } +#endif + return self; +} + +/*[clinic input] +@classmethod +_blake2s.blake2s.__new__ as py_blake2s_new + string as data: object = NULL + * + digest_size: int(c_default="BLAKE2S_OUTBYTES") = _blake2s.blake2s.MAX_DIGEST_SIZE + key: Py_buffer = None + salt: Py_buffer = None + person: Py_buffer = None + fanout: int = 1 + depth: int = 1 + leaf_size as leaf_size_obj: object = NULL + node_offset as node_offset_obj: object = NULL + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + +Return a new BLAKE2s hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node) +/*[clinic end generated code: output=fe060b258a8cbfc6 input=458cfdcb3d0d47ff]*/ +{ + BLAKE2sObject *self = NULL; + Py_buffer buf; + + unsigned long leaf_size = 0; + unsigned PY_LONG_LONG node_offset = 0; + + self = new_BLAKE2sObject(type); + if (self == NULL) { + goto error; + } + + /* Zero parameter block. */ + memset(&self->param, 0, sizeof(self->param)); + + /* Set digest size. */ + if (digest_size <= 0 || digest_size > BLAKE2S_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "digest_size must be between 1 and %d bytes", + BLAKE2S_OUTBYTES); + goto error; + } + self->param.digest_length = digest_size; + + /* Set salt parameter. */ + if ((salt->obj != NULL) && salt->len) { + if (salt->len > BLAKE2S_SALTBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum salt length is %d bytes", + BLAKE2S_SALTBYTES); + goto error; + } + memcpy(self->param.salt, salt->buf, salt->len); + } + + /* Set personalization parameter. */ + if ((person->obj != NULL) && person->len) { + if (person->len > BLAKE2S_PERSONALBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum person length is %d bytes", + BLAKE2S_PERSONALBYTES); + goto error; + } + memcpy(self->param.personal, person->buf, person->len); + } + + /* Set tree parameters. */ + if (fanout < 0 || fanout > 255) { + PyErr_SetString(PyExc_ValueError, + "fanout must be between 0 and 255"); + goto error; + } + self->param.fanout = (uint8_t)fanout; + + if (depth <= 0 || depth > 255) { + PyErr_SetString(PyExc_ValueError, + "depth must be between 1 and 255"); + goto error; + } + self->param.depth = (uint8_t)depth; + + if (leaf_size_obj != NULL) { + leaf_size = PyLong_AsUnsignedLong(leaf_size_obj); + if (leaf_size == (unsigned long) -1 && PyErr_Occurred()) { + goto error; + } + if (leaf_size > 0xFFFFFFFFU) { + PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); + goto error; + } + } + self->param.leaf_length = (unsigned int)leaf_size; + + if (node_offset_obj != NULL) { + node_offset = PyLong_AsUnsignedLongLong(node_offset_obj); + if (node_offset == (unsigned PY_LONG_LONG) -1 && PyErr_Occurred()) { + goto error; + } + } +#ifdef HAVE_BLAKE2S + if (node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); + goto error; + } + store48(&(self->param.node_offset), node_offset); +#else + self->param.node_offset = node_offset; +#endif + + if (node_depth < 0 || node_depth > 255) { + PyErr_SetString(PyExc_ValueError, + "node_depth must be between 0 and 255"); + goto error; + } + self->param.node_depth = node_depth; + + if (inner_size < 0 || inner_size > BLAKE2S_OUTBYTES) { + PyErr_Format(PyExc_ValueError, + "inner_size must be between 0 and is %d", + BLAKE2S_OUTBYTES); + goto error; + } + self->param.inner_length = inner_size; + + /* Set key length. */ + if ((key->obj != NULL) && key->len) { + if (key->len > BLAKE2S_KEYBYTES) { + PyErr_Format(PyExc_ValueError, + "maximum key length is %d bytes", + BLAKE2S_KEYBYTES); + goto error; + } + self->param.key_length = key->len; + } + + /* Initialize hash state. */ + if (blake2s_init_param(&self->state, &self->param) < 0) { + PyErr_SetString(PyExc_RuntimeError, + "error initializing hash state"); + goto error; + } + + /* Set last node flag (must come after initialization). */ + self->state.last_node = last_node; + + /* Process key block if any. */ + if (self->param.key_length) { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset(block, 0, sizeof(block)); + memcpy(block, key->buf, key->len); + blake2s_update(&self->state, block, sizeof(block)); + secure_zero_memory(block, sizeof(block)); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + blake2s_update(&self->state, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + blake2s_update(&self->state, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self != NULL) { + Py_DECREF(self); + } + return NULL; +} + +/*[clinic input] +_blake2s.blake2s.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_copy_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=6c5bada404b7aed7 input=c8858e887ae4a07a]*/ +{ + BLAKE2sObject *cpy; + + if ((cpy = new_BLAKE2sObject(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + cpy->param = self->param; + cpy->state = self->state; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +/*[clinic input] +_blake2s.blake2s.update + + obj: object + / + +Update this hash object's state with the provided string. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_update(BLAKE2sObject *self, PyObject *obj) +/*[clinic end generated code: output=fe8438a1d3cede87 input=47a408b9a3cc05c5]*/ +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + +#ifdef WITH_THREAD + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) + self->lock = PyThread_allocate_lock(); + + if (self->lock != NULL) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + blake2s_update(&self->state, buf.buf, buf.len); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } else { + blake2s_update(&self->state, buf.buf, buf.len); + } +#else + blake2s_update(&self->state, buf.buf, buf.len); +#endif /* !WITH_THREAD */ + PyBuffer_Release(&buf); + + Py_INCREF(Py_None); + return Py_None; +} + +/*[clinic input] +_blake2s.blake2s.digest + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_digest_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=80e81a48c6f79cf9 input=feb9a220135bdeba]*/ +{ + uint8_t digest[BLAKE2S_OUTBYTES]; + blake2s_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2s_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return PyBytes_FromStringAndSize((const char *)digest, + self->param.digest_length); +} + +/*[clinic input] +_blake2s.blake2s.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_blake2s_blake2s_hexdigest_impl(BLAKE2sObject *self) +/*[clinic end generated code: output=db6c5028c0a3c2e5 input=4e4877b8bd7aea91]*/ +{ + uint8_t digest[BLAKE2S_OUTBYTES]; + blake2s_state state_cpy; + + ENTER_HASHLIB(self); + state_cpy = self->state; + blake2s_final(&state_cpy, digest, self->param.digest_length); + LEAVE_HASHLIB(self); + return _Py_strhex((const char *)digest, self->param.digest_length); +} + + +static PyMethodDef py_blake2s_methods[] = { + _BLAKE2S_BLAKE2S_COPY_METHODDEF + _BLAKE2S_BLAKE2S_DIGEST_METHODDEF + _BLAKE2S_BLAKE2S_HEXDIGEST_METHODDEF + _BLAKE2S_BLAKE2S_UPDATE_METHODDEF + {NULL, NULL} +}; + + + +static PyObject * +py_blake2s_get_name(BLAKE2sObject *self, void *closure) +{ + return PyUnicode_FromString("blake2s"); +} + + + +static PyObject * +py_blake2s_get_block_size(BLAKE2sObject *self, void *closure) +{ + return PyLong_FromLong(BLAKE2S_BLOCKBYTES); +} + + + +static PyObject * +py_blake2s_get_digest_size(BLAKE2sObject *self, void *closure) +{ + return PyLong_FromLong(self->param.digest_length); +} + + +static PyGetSetDef py_blake2s_getsetters[] = { + {"name", (getter)py_blake2s_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake2s_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake2s_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake2s_dealloc(PyObject *self) +{ + BLAKE2sObject *obj = (BLAKE2sObject *)self; + + /* Try not to leave state in memory. */ + secure_zero_memory(&obj->param, sizeof(obj->param)); + secure_zero_memory(&obj->state, sizeof(obj->state)); +#ifdef WITH_THREAD + if (obj->lock) { + PyThread_free_lock(obj->lock); + obj->lock = NULL; + } +#endif + PyObject_Del(self); +} + + +PyTypeObject PyBlake2_BLAKE2sType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_blake2.blake2s", /* tp_name */ + sizeof(BLAKE2sObject), /* tp_size */ + 0, /* tp_itemsize */ + py_blake2s_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + py_blake2s_new__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + py_blake2s_methods, /* tp_methods */ + 0, /* tp_members */ + py_blake2s_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + py_blake2s_new, /* tp_new */ +}; diff --git a/Modules/_blake2/clinic/blake2b_impl.c.h b/Modules/_blake2/clinic/blake2b_impl.c.h new file mode 100644 index 0000000..71c073a --- /dev/null +++ b/Modules/_blake2/clinic/blake2b_impl.c.h @@ -0,0 +1,125 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(py_blake2b_new__doc__, +"blake2b(string=None, *, digest_size=_blake2b.blake2b.MAX_DIGEST_SIZE,\n" +" key=None, salt=None, person=None, fanout=1, depth=1,\n" +" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n" +" last_node=False)\n" +"--\n" +"\n" +"Return a new BLAKE2b hash object."); + +static PyObject * +py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node); + +static PyObject * +py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL}; + static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2b", _keywords, 0}; + PyObject *data = NULL; + int digest_size = BLAKE2B_OUTBYTES; + Py_buffer key = {NULL, NULL}; + Py_buffer salt = {NULL, NULL}; + Py_buffer person = {NULL, NULL}; + int fanout = 1; + int depth = 1; + PyObject *leaf_size_obj = NULL; + PyObject *node_offset_obj = NULL; + int node_depth = 0; + int inner_size = 0; + int last_node = 0; + + if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser, + &data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) { + goto exit; + } + return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for salt */ + if (salt.obj) { + PyBuffer_Release(&salt); + } + /* Cleanup for person */ + if (person.obj) { + PyBuffer_Release(&person); + } + + return return_value; +} + +PyDoc_STRVAR(_blake2b_blake2b_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _BLAKE2B_BLAKE2B_COPY_METHODDEF \ + {"copy", (PyCFunction)_blake2b_blake2b_copy, METH_NOARGS, _blake2b_blake2b_copy__doc__}, + +static PyObject * +_blake2b_blake2b_copy_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_copy(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_copy_impl(self); +} + +PyDoc_STRVAR(_blake2b_blake2b_update__doc__, +"update($self, obj, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided string."); + +#define _BLAKE2B_BLAKE2B_UPDATE_METHODDEF \ + {"update", (PyCFunction)_blake2b_blake2b_update, METH_O, _blake2b_blake2b_update__doc__}, + +PyDoc_STRVAR(_blake2b_blake2b_digest__doc__, +"digest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _BLAKE2B_BLAKE2B_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_blake2b_blake2b_digest, METH_NOARGS, _blake2b_blake2b_digest__doc__}, + +static PyObject * +_blake2b_blake2b_digest_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_digest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_digest_impl(self); +} + +PyDoc_STRVAR(_blake2b_blake2b_hexdigest__doc__, +"hexdigest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _BLAKE2B_BLAKE2B_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_blake2b_blake2b_hexdigest, METH_NOARGS, _blake2b_blake2b_hexdigest__doc__}, + +static PyObject * +_blake2b_blake2b_hexdigest_impl(BLAKE2bObject *self); + +static PyObject * +_blake2b_blake2b_hexdigest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2b_blake2b_hexdigest_impl(self); +} +/*[clinic end generated code: output=535a54852c98e51c input=a9049054013a1b77]*/ diff --git a/Modules/_blake2/clinic/blake2s_impl.c.h b/Modules/_blake2/clinic/blake2s_impl.c.h new file mode 100644 index 0000000..ca908d3 --- /dev/null +++ b/Modules/_blake2/clinic/blake2s_impl.c.h @@ -0,0 +1,125 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(py_blake2s_new__doc__, +"blake2s(string=None, *, digest_size=_blake2s.blake2s.MAX_DIGEST_SIZE,\n" +" key=None, salt=None, person=None, fanout=1, depth=1,\n" +" leaf_size=None, node_offset=None, node_depth=0, inner_size=0,\n" +" last_node=False)\n" +"--\n" +"\n" +"Return a new BLAKE2s hash object."); + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, PyObject *leaf_size_obj, + PyObject *node_offset_obj, int node_depth, + int inner_size, int last_node); + +static PyObject * +py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", NULL}; + static _PyArg_Parser _parser = {"|O$iy*y*y*iiOOiip:blake2s", _keywords, 0}; + PyObject *data = NULL; + int digest_size = BLAKE2S_OUTBYTES; + Py_buffer key = {NULL, NULL}; + Py_buffer salt = {NULL, NULL}; + Py_buffer person = {NULL, NULL}; + int fanout = 1; + int depth = 1; + PyObject *leaf_size_obj = NULL; + PyObject *node_offset_obj = NULL; + int node_depth = 0; + int inner_size = 0; + int last_node = 0; + + if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser, + &data, &digest_size, &key, &salt, &person, &fanout, &depth, &leaf_size_obj, &node_offset_obj, &node_depth, &inner_size, &last_node)) { + goto exit; + } + return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size_obj, node_offset_obj, node_depth, inner_size, last_node); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for salt */ + if (salt.obj) { + PyBuffer_Release(&salt); + } + /* Cleanup for person */ + if (person.obj) { + PyBuffer_Release(&person); + } + + return return_value; +} + +PyDoc_STRVAR(_blake2s_blake2s_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _BLAKE2S_BLAKE2S_COPY_METHODDEF \ + {"copy", (PyCFunction)_blake2s_blake2s_copy, METH_NOARGS, _blake2s_blake2s_copy__doc__}, + +static PyObject * +_blake2s_blake2s_copy_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_copy(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_copy_impl(self); +} + +PyDoc_STRVAR(_blake2s_blake2s_update__doc__, +"update($self, obj, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided string."); + +#define _BLAKE2S_BLAKE2S_UPDATE_METHODDEF \ + {"update", (PyCFunction)_blake2s_blake2s_update, METH_O, _blake2s_blake2s_update__doc__}, + +PyDoc_STRVAR(_blake2s_blake2s_digest__doc__, +"digest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _BLAKE2S_BLAKE2S_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_blake2s_blake2s_digest, METH_NOARGS, _blake2s_blake2s_digest__doc__}, + +static PyObject * +_blake2s_blake2s_digest_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_digest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_digest_impl(self); +} + +PyDoc_STRVAR(_blake2s_blake2s_hexdigest__doc__, +"hexdigest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _BLAKE2S_BLAKE2S_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_blake2s_blake2s_hexdigest, METH_NOARGS, _blake2s_blake2s_hexdigest__doc__}, + +static PyObject * +_blake2s_blake2s_hexdigest_impl(BLAKE2sObject *self); + +static PyObject * +_blake2s_blake2s_hexdigest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _blake2s_blake2s_hexdigest_impl(self); +} +/*[clinic end generated code: output=535ea7903f9ccf76 input=a9049054013a1b77]*/ diff --git a/Modules/_blake2/impl/blake2-config.h b/Modules/_blake2/impl/blake2-config.h new file mode 100644 index 0000000..40455b1 --- /dev/null +++ b/Modules/_blake2/impl/blake2-config.h @@ -0,0 +1,74 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_CONFIG_H__ +#define __BLAKE2_CONFIG_H__ + +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) +#define HAVE_SSE2 +#endif + +#if defined(__SSSE3__) +#define HAVE_SSSE3 +#endif + +#if defined(__SSE4_1__) +#define HAVE_SSE41 +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#endif + +#if defined(__XOP__) +#define HAVE_XOP +#endif + + +#ifdef HAVE_AVX2 +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_XOP +#ifndef HAVE_AVX +#define HAVE_AVX +#endif +#endif + +#ifdef HAVE_AVX +#ifndef HAVE_SSE41 +#define HAVE_SSE41 +#endif +#endif + +#ifdef HAVE_SSE41 +#ifndef HAVE_SSSE3 +#define HAVE_SSSE3 +#endif +#endif + +#ifdef HAVE_SSSE3 +#define HAVE_SSE2 +#endif + +#if !defined(HAVE_SSE2) +#error "This code requires at least SSE2." +#endif + +#endif + diff --git a/Modules/_blake2/impl/blake2-impl.h b/Modules/_blake2/impl/blake2-impl.h new file mode 100644 index 0000000..bbe3c0f --- /dev/null +++ b/Modules/_blake2/impl/blake2-impl.h @@ -0,0 +1,139 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_IMPL_H__ +#define __BLAKE2_IMPL_H__ + +#include +#include + +BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint32_t w = *p++; + w |= ( uint32_t )( *p++ ) << 8; + w |= ( uint32_t )( *p++ ) << 16; + w |= ( uint32_t )( *p++ ) << 24; + return w; +#endif +} + +BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + w |= ( uint64_t )( *p++ ) << 48; + w |= ( uint64_t )( *p++ ) << 56; + return w; +#endif +} + +BLAKE2_LOCAL_INLINE(void) store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +BLAKE2_LOCAL_INLINE(void) store64( void *dst, uint64_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +BLAKE2_LOCAL_INLINE(uint64_t) load48( const void *src ) +{ + const uint8_t *p = ( const uint8_t * )src; + uint64_t w = *p++; + w |= ( uint64_t )( *p++ ) << 8; + w |= ( uint64_t )( *p++ ) << 16; + w |= ( uint64_t )( *p++ ) << 24; + w |= ( uint64_t )( *p++ ) << 32; + w |= ( uint64_t )( *p++ ) << 40; + return w; +} + +BLAKE2_LOCAL_INLINE(void) store48( void *dst, uint64_t w ) +{ + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +} + +BLAKE2_LOCAL_INLINE(uint32_t) rotl32( const uint32_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 32 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint64_t) rotl64( const uint64_t w, const unsigned c ) +{ + return ( w << c ) | ( w >> ( 64 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint32_t) rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +BLAKE2_LOCAL_INLINE(uint64_t) rotr64( const uint64_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +BLAKE2_LOCAL_INLINE(void) secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif + diff --git a/Modules/_blake2/impl/blake2.h b/Modules/_blake2/impl/blake2.h new file mode 100644 index 0000000..1a9fdf4 --- /dev/null +++ b/Modules/_blake2/impl/blake2.h @@ -0,0 +1,161 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2_H__ +#define __BLAKE2_H__ + +#include +#include + +#ifdef BLAKE2_NO_INLINE +#define BLAKE2_LOCAL_INLINE(type) static type +#endif + +#ifndef BLAKE2_LOCAL_INLINE +#define BLAKE2_LOCAL_INLINE(type) static inline type +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + enum blake2b_constant + { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + + typedef struct __blake2s_state + { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; + } blake2s_state; + + typedef struct __blake2b_state + { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; + } blake2b_state; + + typedef struct __blake2sp_state + { + blake2s_state S[8][1]; + blake2s_state R[1]; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + } blake2sp_state; + + typedef struct __blake2bp_state + { + blake2b_state S[4][1]; + blake2b_state R[1]; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + } blake2bp_state; + + +#pragma pack(push, 1) + typedef struct __blake2s_param + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint8_t node_offset[6];// 14 + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + } blake2s_param; + + typedef struct __blake2b_param + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint64_t node_offset; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + } blake2b_param; +#pragma pack(pop) + + /* Streaming API */ + int blake2s_init( blake2s_state *S, const uint8_t outlen ); + int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ); + int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ); + + int blake2b_init( blake2b_state *S, const uint8_t outlen ); + int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ); + int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ); + + int blake2sp_init( blake2sp_state *S, const uint8_t outlen ); + int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen ); + + int blake2bp_init( blake2bp_state *S, const uint8_t outlen ); + int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ); + int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen ); + + /* Simple API */ + int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + + int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + + static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) + { + return blake2b( out, in, key, outlen, inlen, keylen ); + } + +#if defined(__cplusplus) +} +#endif + +#endif + diff --git a/Modules/_blake2/impl/blake2b-load-sse2.h b/Modules/_blake2/impl/blake2b-load-sse2.h new file mode 100644 index 0000000..0004a98 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-load-sse2.h @@ -0,0 +1,70 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE2_H__ +#define __BLAKE2B_LOAD_SSE2_H__ + +#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) +#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) +#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) +#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) +#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) +#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) +#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) +#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) +#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) +#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) +#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) +#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) +#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) +#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) +#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) +#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) +#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) +#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) +#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) +#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) +#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) +#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) +#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) +#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) +#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) +#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) +#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) +#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) +#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) +#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) +#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) +#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) +#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) +#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) +#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) +#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) +#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) +#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) + + +#endif + diff --git a/Modules/_blake2/impl/blake2b-load-sse41.h b/Modules/_blake2/impl/blake2b-load-sse41.h new file mode 100644 index 0000000..42a1349 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-load-sse41.h @@ -0,0 +1,404 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_LOAD_SSE41_H__ +#define __BLAKE2B_LOAD_SSE41_H__ + +#define LOAD_MSG_0_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_0_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_0_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_1_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_1_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_1_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_1_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#define LOAD_MSG_2_1(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m5, 8); \ +b1 = _mm_unpackhi_epi64(m2, m7); \ +} while(0) + + +#define LOAD_MSG_2_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m0); \ +b1 = _mm_blend_epi16(m1, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_2_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m5, m1, 0xF0); \ +b1 = _mm_unpackhi_epi64(m3, m4); \ +} while(0) + + +#define LOAD_MSG_2_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m3); \ +b1 = _mm_alignr_epi8(m2, m0, 8); \ +} while(0) + + +#define LOAD_MSG_3_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_unpackhi_epi64(m6, m5); \ +} while(0) + + +#define LOAD_MSG_3_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m0); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_3_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m2, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_3_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m5); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_4_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m2); \ +b1 = _mm_unpacklo_epi64(m1, m5); \ +} while(0) + + +#define LOAD_MSG_4_2(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m0, m3, 0xF0); \ +b1 = _mm_blend_epi16(m2, m7, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m7, m5, 0xF0); \ +b1 = _mm_blend_epi16(m3, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_4_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m6, m0, 8); \ +b1 = _mm_blend_epi16(m4, m6, 0xF0); \ +} while(0) + + +#define LOAD_MSG_5_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m3); \ +b1 = _mm_unpacklo_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_5_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m5); \ +b1 = _mm_unpackhi_epi64(m5, m1); \ +} while(0) + + +#define LOAD_MSG_5_3(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m2, m3, 0xF0); \ +b1 = _mm_unpackhi_epi64(m7, m0); \ +} while(0) + + +#define LOAD_MSG_5_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m2); \ +b1 = _mm_blend_epi16(m7, m4, 0xF0); \ +} while(0) + + +#define LOAD_MSG_6_1(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m6, m0, 0xF0); \ +b1 = _mm_unpacklo_epi64(m7, m2); \ +} while(0) + + +#define LOAD_MSG_6_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_alignr_epi8(m5, m6, 8); \ +} while(0) + + +#define LOAD_MSG_6_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m3); \ +b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ +} while(0) + + +#define LOAD_MSG_6_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m3, m1); \ +b1 = _mm_blend_epi16(m1, m5, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m6, m3); \ +b1 = _mm_blend_epi16(m6, m1, 0xF0); \ +} while(0) + + +#define LOAD_MSG_7_2(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpackhi_epi64(m0, m4); \ +} while(0) + + +#define LOAD_MSG_7_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m2, m7); \ +b1 = _mm_unpacklo_epi64(m4, m1); \ +} while(0) + + +#define LOAD_MSG_7_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m2); \ +b1 = _mm_unpacklo_epi64(m3, m5); \ +} while(0) + + +#define LOAD_MSG_8_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m3, m7); \ +b1 = _mm_alignr_epi8(m0, m5, 8); \ +} while(0) + + +#define LOAD_MSG_8_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_alignr_epi8(m4, m1, 8); \ +} while(0) + + +#define LOAD_MSG_8_3(b0, b1) \ +do \ +{ \ +b0 = m6; \ +b1 = _mm_alignr_epi8(m5, m0, 8); \ +} while(0) + + +#define LOAD_MSG_8_4(b0, b1) \ +do \ +{ \ +b0 = _mm_blend_epi16(m1, m3, 0xF0); \ +b1 = m2; \ +} while(0) + + +#define LOAD_MSG_9_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_unpackhi_epi64(m3, m0); \ +} while(0) + + +#define LOAD_MSG_9_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m1, m2); \ +b1 = _mm_blend_epi16(m3, m2, 0xF0); \ +} while(0) + + +#define LOAD_MSG_9_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m7, m4); \ +b1 = _mm_unpackhi_epi64(m1, m6); \ +} while(0) + + +#define LOAD_MSG_9_4(b0, b1) \ +do \ +{ \ +b0 = _mm_alignr_epi8(m7, m5, 8); \ +b1 = _mm_unpacklo_epi64(m6, m0); \ +} while(0) + + +#define LOAD_MSG_10_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m0, m1); \ +b1 = _mm_unpacklo_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m0, m1); \ +b1 = _mm_unpackhi_epi64(m2, m3); \ +} while(0) + + +#define LOAD_MSG_10_3(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m4, m5); \ +b1 = _mm_unpacklo_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_10_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpackhi_epi64(m4, m5); \ +b1 = _mm_unpackhi_epi64(m6, m7); \ +} while(0) + + +#define LOAD_MSG_11_1(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m7, m2); \ +b1 = _mm_unpackhi_epi64(m4, m6); \ +} while(0) + + +#define LOAD_MSG_11_2(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m5, m4); \ +b1 = _mm_alignr_epi8(m3, m7, 8); \ +} while(0) + + +#define LOAD_MSG_11_3(b0, b1) \ +do \ +{ \ +b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ +b1 = _mm_unpackhi_epi64(m5, m2); \ +} while(0) + + +#define LOAD_MSG_11_4(b0, b1) \ +do \ +{ \ +b0 = _mm_unpacklo_epi64(m6, m1); \ +b1 = _mm_unpackhi_epi64(m3, m1); \ +} while(0) + + +#endif + diff --git a/Modules/_blake2/impl/blake2b-ref.c b/Modules/_blake2/impl/blake2b-ref.c new file mode 100644 index 0000000..ac91568 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-ref.c @@ -0,0 +1,416 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) +{ + S->f[1] = 0; + return 0; +} + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) +{ + store32( &P->leaf_length, leaf_length ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) +{ + store64( &P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) +{ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + const uint8_t *p = ( const uint8_t * )( P ); + + blake2b_init0( S ); + + /* IV XOR ParamBlock */ + for( size_t i = 0; i < 8; ++i ) + S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + + return 0; +} + + + +int blake2b_init( blake2b_state *S, const uint8_t outlen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2b_init_param( S, P ); +} + + +int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; + + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + uint64_t m[16]; + uint64_t v[16]; + int i; + + for( i = 0; i < 16; ++i ) + m[i] = load64( block + i * sizeof( m[i] ) ); + + for( i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = S->t[0] ^ blake2b_IV[4]; + v[13] = S->t[1] ^ blake2b_IV[5]; + v[14] = S->f[0] ^ blake2b_IV[6]; + v[15] = S->f[1] ^ blake2b_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + + for( i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + return 0; +} + +/* inlen now in bytes */ +int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2B_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + + if( out == NULL || outlen == 0 || outlen > BLAKE2B_OUTBYTES ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); + S->buflen -= BLAKE2B_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); + } + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + diff --git a/Modules/_blake2/impl/blake2b-round.h b/Modules/_blake2/impl/blake2b-round.h new file mode 100644 index 0000000..4ce2255 --- /dev/null +++ b/Modules/_blake2/impl/blake2b-round.h @@ -0,0 +1,159 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2B_ROUND_H__ +#define __BLAKE2B_ROUND_H__ + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) +#endif +#else +/* ... */ +#endif + + + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE41) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif + diff --git a/Modules/_blake2/impl/blake2b.c b/Modules/_blake2/impl/blake2b.c new file mode 100644 index 0000000..f9090a1 --- /dev/null +++ b/Modules/_blake2/impl/blake2b.c @@ -0,0 +1,450 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +#include "blake2-config.h" + +#ifdef _MSC_VER +#include /* for _mm_set_epi64x */ +#endif +#include +#if defined(HAVE_SSSE3) +#include +#endif +#if defined(HAVE_SSE41) +#include +#endif +#if defined(HAVE_AVX) +#include +#endif +#if defined(HAVE_XOP) +#include +#endif + +#include "blake2b-round.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) +{ + S->f[1] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + + +BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ +#if __x86_64__ + /* ADD/ADC chain */ + __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; + t += inc; + S->t[0] = ( uint64_t )( t >> 0 ); + S->t[1] = ( uint64_t )( t >> 64 ); +#else + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +#endif + return 0; +} + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) +{ + P->leaf_length = leaf_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) +{ + P->node_offset = node_offset; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) +{ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; + + return 0; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + /*blake2b_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2b_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2b_state ) ); + + for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + return 0; +} + + +/* Some sort of default parameter block initialization, for sequential blake2b */ +int blake2b_init( blake2b_state *S, const uint8_t outlen ) +{ + const blake2b_param P = + { + outlen, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + return blake2b_init_param( S, &P ); +} + +int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + const blake2b_param P = + { + outlen, + keylen, + 1, + 1, + 0, + 0, + 0, + 0, + {0}, + {0}, + {0} + }; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; + + if( blake2b_init_param( S, &P ) < 0 ) + return 0; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + __m128i row1l, row1h; + __m128i row2l, row2h; + __m128i row3l, row3h; + __m128i row4l, row4h; + __m128i b0, b1; + __m128i t0, t1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); + const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); + const __m128i m4 = LOADU( block + 64 ); + const __m128i m5 = LOADU( block + 80 ); + const __m128i m6 = LOADU( block + 96 ); + const __m128i m7 = LOADU( block + 112 ); +#else + const uint64_t m0 = ( ( uint64_t * )block )[ 0]; + const uint64_t m1 = ( ( uint64_t * )block )[ 1]; + const uint64_t m2 = ( ( uint64_t * )block )[ 2]; + const uint64_t m3 = ( ( uint64_t * )block )[ 3]; + const uint64_t m4 = ( ( uint64_t * )block )[ 4]; + const uint64_t m5 = ( ( uint64_t * )block )[ 5]; + const uint64_t m6 = ( ( uint64_t * )block )[ 6]; + const uint64_t m7 = ( ( uint64_t * )block )[ 7]; + const uint64_t m8 = ( ( uint64_t * )block )[ 8]; + const uint64_t m9 = ( ( uint64_t * )block )[ 9]; + const uint64_t m10 = ( ( uint64_t * )block )[10]; + const uint64_t m11 = ( ( uint64_t * )block )[11]; + const uint64_t m12 = ( ( uint64_t * )block )[12]; + const uint64_t m13 = ( ( uint64_t * )block )[13]; + const uint64_t m14 = ( ( uint64_t * )block )[14]; + const uint64_t m15 = ( ( uint64_t * )block )[15]; +#endif + row1l = LOADU( &S->h[0] ); + row1h = LOADU( &S->h[2] ); + row2l = LOADU( &S->h[4] ); + row2h = LOADU( &S->h[6] ); + row3l = LOADU( &blake2b_IV[0] ); + row3h = LOADU( &blake2b_IV[2] ); + row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); + row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + row1l = _mm_xor_si128( row3l, row1l ); + row1h = _mm_xor_si128( row3h, row1h ); + STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); + STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); + row2l = _mm_xor_si128( row4l, row2l ); + row2h = _mm_xor_si128( row4h, row2h ); + STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); + STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); + return 0; +} + + +int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2B_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + + +int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +{ + if( outlen > BLAKE2B_OUTBYTES ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2B_BLOCKBYTES ) + { + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); + S->buflen -= BLAKE2B_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); + } + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + memcpy( out, &S->h[0], outlen ); + return 0; +} + + +int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + diff --git a/Modules/_blake2/impl/blake2s-load-sse2.h b/Modules/_blake2/impl/blake2s-load-sse2.h new file mode 100644 index 0000000..eadefa7 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-sse2.h @@ -0,0 +1,61 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_SSE2_H__ +#define __BLAKE2S_LOAD_SSE2_H__ + +#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) +#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) +#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8) +#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9) +#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) +#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) +#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1) +#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12) +#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) +#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) +#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10) +#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14) +#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) +#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) +#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2) +#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6) +#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) +#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) +#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14) +#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1) +#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) +#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) +#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4) +#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13) +#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) +#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) +#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0) +#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7) +#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) +#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) +#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5) +#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0) +#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) +#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) +#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12) +#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2) +#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) +#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) +#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15) +#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11) + + +#endif diff --git a/Modules/_blake2/impl/blake2s-load-sse41.h b/Modules/_blake2/impl/blake2s-load-sse41.h new file mode 100644 index 0000000..54bf0cd --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-sse41.h @@ -0,0 +1,231 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_SSE41_H__ +#define __BLAKE2S_LOAD_SSE41_H__ + +#define LOAD_MSG_0_1(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_2(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_0_3(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0))); + +#define LOAD_MSG_0_4(buf) \ +buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1))); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_blend_epi16(m1, m2, 0x0C); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); + +#define LOAD_MSG_1_2(buf) \ +t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ +t1 = _mm_blend_epi16(m1,m3,0xC0); \ +t2 = _mm_blend_epi16(t0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_slli_si128(m1, 4); \ +t1 = _mm_blend_epi16(m2, t0, 0x30); \ +t2 = _mm_blend_epi16(m0, t1, 0xF0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_1_4(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_slli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0, t1, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_unpackhi_epi32(m2,m3); \ +t1 = _mm_blend_epi16(m3,m1,0x0C); \ +t2 = _mm_blend_epi16(t0, t1, 0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_2_2(buf) \ +t0 = _mm_unpacklo_epi32(m2,m0); \ +t1 = _mm_blend_epi16(t0, m0, 0xF0); \ +t2 = _mm_slli_si128(m3, 8); \ +buf = _mm_blend_epi16(t1, t2, 0xC0); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_blend_epi16(m0, m2, 0x3C); \ +t1 = _mm_srli_si128(m1, 12); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_2_4(buf) \ +t0 = _mm_slli_si128(m3, 4); \ +t1 = _mm_blend_epi16(m0, m1, 0x33); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3)); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(t0, m2); \ +t2 = _mm_blend_epi16(t1, m3, 0x0C); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); + +#define LOAD_MSG_3_2(buf) \ +t0 = _mm_slli_si128(m2, 8); \ +t1 = _mm_blend_epi16(m3,m0,0x0C); \ +t2 = _mm_blend_epi16(t1, t0, 0xC0); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_blend_epi16(m0,m1,0x0F); \ +t1 = _mm_blend_epi16(t0, m3, 0xC0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_3_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m2); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +buf = _mm_unpacklo_epi64(t1,t0); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x33); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); + +#define LOAD_MSG_4_2(buf) \ +t0 = _mm_unpackhi_epi64(m1,m3); \ +t1 = _mm_unpacklo_epi64(m0,m1); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_unpackhi_epi64(m3,m1); \ +t1 = _mm_unpackhi_epi64(m2,m0); \ +buf = _mm_blend_epi16(t1,t0,0x33); + +#define LOAD_MSG_4_4(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_slli_si128(t0, 8); \ +t2 = _mm_blend_epi16(t1,m3,0x0F); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_unpacklo_epi32(m0,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_5_2(buf) \ +t0 = _mm_srli_si128(m2, 4); \ +t1 = _mm_blend_epi16(m0,m3,0x03); \ +buf = _mm_blend_epi16(t1,t0,0x3C); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x0C); \ +t1 = _mm_srli_si128(m3, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x30); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); + +#define LOAD_MSG_5_4(buf) \ +t0 = _mm_unpacklo_epi64(m1,m2); \ +t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \ +buf = _mm_blend_epi16(t0,t1,0x33); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_slli_si128(m1, 12); \ +t1 = _mm_blend_epi16(m0,m3,0x33); \ +buf = _mm_blend_epi16(t1,t0,0xC0); + +#define LOAD_MSG_6_2(buf) \ +t0 = _mm_blend_epi16(m3,m2,0x30); \ +t1 = _mm_srli_si128(m1, 4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_unpacklo_epi64(m0,m2); \ +t1 = _mm_srli_si128(m1, 4); \ +buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0)); + +#define LOAD_MSG_6_4(buf) \ +t0 = _mm_unpackhi_epi32(m1,m2); \ +t1 = _mm_unpackhi_epi64(m0,t0); \ +buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_unpackhi_epi32(m0,m1); \ +t1 = _mm_blend_epi16(t0,m3,0x0F); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); + +#define LOAD_MSG_7_2(buf) \ +t0 = _mm_blend_epi16(m2,m3,0x30); \ +t1 = _mm_srli_si128(m0,4); \ +t2 = _mm_blend_epi16(t0,t1,0x03); \ +buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_unpackhi_epi64(m0,m3); \ +t1 = _mm_unpacklo_epi64(m1,m2); \ +t2 = _mm_blend_epi16(t0,t1,0x3C); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1)); + +#define LOAD_MSG_7_4(buf) \ +t0 = _mm_unpacklo_epi32(m0,m1); \ +t1 = _mm_unpackhi_epi32(m1,m2); \ +buf = _mm_unpacklo_epi64(t0,t1); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_unpackhi_epi32(m1,m3); \ +t1 = _mm_unpacklo_epi64(t0,m0); \ +t2 = _mm_blend_epi16(t1,m2,0xC0); \ +buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_8_2(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_blend_epi16(m2,t0,0xF0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_blend_epi16(m2,m0,0x0C); \ +t1 = _mm_slli_si128(t0,4); \ +buf = _mm_blend_epi16(t1,m3,0x0F); + +#define LOAD_MSG_8_4(buf) \ +t0 = _mm_blend_epi16(m1,m0,0x30); \ +buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2)); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_blend_epi16(m0,m2,0x03); \ +t1 = _mm_blend_epi16(m1,m2,0x30); \ +t2 = _mm_blend_epi16(t1,t0,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); + +#define LOAD_MSG_9_2(buf) \ +t0 = _mm_slli_si128(m0,4); \ +t1 = _mm_blend_epi16(m1,t0,0xC0); \ +buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_unpackhi_epi32(m0,m3); \ +t1 = _mm_unpacklo_epi32(m2,m3); \ +t2 = _mm_unpackhi_epi64(t0,t1); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1)); + +#define LOAD_MSG_9_4(buf) \ +t0 = _mm_blend_epi16(m3,m2,0xC0); \ +t1 = _mm_unpacklo_epi32(m0,m3); \ +t2 = _mm_blend_epi16(t0,t1,0x0F); \ +buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); + +#endif + diff --git a/Modules/_blake2/impl/blake2s-load-xop.h b/Modules/_blake2/impl/blake2s-load-xop.h new file mode 100644 index 0000000..a3b5d65 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-load-xop.h @@ -0,0 +1,191 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_LOAD_XOP_H__ +#define __BLAKE2S_LOAD_XOP_H__ + +#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */ + +/* Basic VPPERM emulation, for testing purposes */ +/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) +{ + const __m128i sixteen = _mm_set1_epi8(16); + const __m128i t0 = _mm_shuffle_epi8(src1, sel); + const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); + const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), + _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */ + return _mm_blendv_epi8(t0, s1, mask); +}*/ + +#define LOAD_MSG_0_1(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); + +#define LOAD_MSG_0_2(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); + +#define LOAD_MSG_0_3(buf) \ +buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); + +#define LOAD_MSG_0_4(buf) \ +buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); + +#define LOAD_MSG_1_1(buf) \ +t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_1_2(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_1_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_1_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); + +#define LOAD_MSG_2_1(buf) \ +t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) ); + +#define LOAD_MSG_2_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_2_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_2_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_3_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_3_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_4_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) ); + +#define LOAD_MSG_4_2(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_4_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); + +#define LOAD_MSG_4_4(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) ); + +#define LOAD_MSG_5_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_5_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); + +#define LOAD_MSG_5_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); + +#define LOAD_MSG_5_4(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) ); + +#define LOAD_MSG_6_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) ); + +#define LOAD_MSG_6_2(buf) \ +t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) ); + +#define LOAD_MSG_6_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) ); + +#define LOAD_MSG_6_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_7_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) ); + +#define LOAD_MSG_7_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_7_3(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); + +#define LOAD_MSG_7_4(buf) \ +t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) ); + +#define LOAD_MSG_8_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ +t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); + +#define LOAD_MSG_8_2(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); + +#define LOAD_MSG_8_3(buf) \ +t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ + +#define LOAD_MSG_8_4(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); + +#define LOAD_MSG_9_1(buf) \ +t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) ); + +#define LOAD_MSG_9_2(buf) \ +buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) ); + +#define LOAD_MSG_9_3(buf) \ +t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \ +buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) ); + +#define LOAD_MSG_9_4(buf) \ +t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ +buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); + +#endif + diff --git a/Modules/_blake2/impl/blake2s-ref.c b/Modules/_blake2/impl/blake2s-ref.c new file mode 100644 index 0000000..0e246c3 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-ref.c @@ -0,0 +1,406 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint32_t blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8_t blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) +{ + S->f[1] = 0; + return 0; +} + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); + return 0; +} + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) +{ + store32( &P->leaf_length, leaf_length ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) +{ + store48( P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) +{ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; + + return 0; +} + +/* init2 xors IV with input parameter block */ +int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + const uint32_t *p = ( const uint32_t * )( P ); + + blake2s_init0( S ); + + /* IV XOR ParamBlock */ + for( size_t i = 0; i < 8; ++i ) + S->h[i] ^= load32( &p[i] ); + + return 0; +} + + +/* Sequential blake2s initialization */ +int blake2s_init( blake2s_state *S, const uint8_t outlen ) +{ + blake2s_param P[1]; + + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + P->digest_length = outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2s_init_param( S, P ); +} + +int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + blake2s_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; + + P->digest_length = outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset( block, 0, BLAKE2S_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +{ + uint32_t m[16]; + uint32_t v[16]; + + for( size_t i = 0; i < 16; ++i ) + m[i] = load32( block + i * sizeof( m[i] ) ); + + for( size_t i = 0; i < 8; ++i ) + v[i] = S->h[i]; + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + + for( size_t i = 0; i < 8; ++i ) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND + return 0; +} + + +int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + + if( out == NULL || outlen == 0 || outlen > BLAKE2S_OUTBYTES ) + return -1; + + if( blake2s_is_lastblock( S ) ) + return -1; + + + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); + S->buflen -= BLAKE2S_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); + } + + blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if ( NULL == key && keylen > 0) return -1; + + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; + + if( keylen > BLAKE2S_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2s_init( S, outlen ) < 0 ) return -1; + } + + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); + + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + diff --git a/Modules/_blake2/impl/blake2s-round.h b/Modules/_blake2/impl/blake2s-round.h new file mode 100644 index 0000000..7470d92 --- /dev/null +++ b/Modules/_blake2/impl/blake2s-round.h @@ -0,0 +1,90 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#pragma once +#ifndef __BLAKE2S_ROUND_H__ +#define __BLAKE2S_ROUND_H__ + +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi32(r, c) ( \ + (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ + : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ + : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) +#else +#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) +#endif +#else +/* ... */ +#endif + + +#define G1(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -16); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -12); + +#define G2(row1,row2,row3,row4,buf) \ + row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ + row4 = _mm_xor_si128( row4, row1 ); \ + row4 = _mm_roti_epi32(row4, -8); \ + row3 = _mm_add_epi32( row3, row4 ); \ + row2 = _mm_xor_si128( row2, row3 ); \ + row2 = _mm_roti_epi32(row2, -7); + +#define DIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) ); + +#define UNDIAGONALIZE(row1,row2,row3,row4) \ + row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \ + row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ + row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) ); + +#if defined(HAVE_XOP) +#include "blake2s-load-xop.h" +#elif defined(HAVE_SSE41) +#include "blake2s-load-sse41.h" +#else +#include "blake2s-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(buf1); \ + G1(row1,row2,row3,row4,buf1); \ + LOAD_MSG_ ##r ##_2(buf2); \ + G2(row1,row2,row3,row4,buf2); \ + DIAGONALIZE(row1,row2,row3,row4); \ + LOAD_MSG_ ##r ##_3(buf3); \ + G1(row1,row2,row3,row4,buf3); \ + LOAD_MSG_ ##r ##_4(buf4); \ + G2(row1,row2,row3,row4,buf4); \ + UNDIAGONALIZE(row1,row2,row3,row4); \ + +#endif + diff --git a/Modules/_blake2/impl/blake2s.c b/Modules/_blake2/impl/blake2s.c new file mode 100644 index 0000000..030a85e --- /dev/null +++ b/Modules/_blake2/impl/blake2s.c @@ -0,0 +1,431 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +#include "blake2-config.h" + + +#include +#if defined(HAVE_SSSE3) +#include +#endif +#if defined(HAVE_SSE41) +#include +#endif +#if defined(HAVE_AVX) +#include +#endif +#if defined(HAVE_XOP) +#include +#endif + +#include "blake2s-round.h" + +static const uint32_t blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8_t blake2s_sigma[10][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + + +/* Some helper functions, not necessarily useful */ +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) +{ + S->f[1] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) +{ + S->f[1] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +{ + return S->f[0] != 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_set_lastnode( S ); + + S->f[0] = -1; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) +{ + if( S->last_node ) blake2s_clear_lastnode( S ); + + S->f[0] = 0; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +{ + uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; + t += inc; + S->t[0] = ( uint32_t )( t >> 0 ); + S->t[1] = ( uint32_t )( t >> 32 ); + return 0; +} + + +/* Parameter-related functions */ +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) +{ + P->digest_length = digest_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) +{ + P->fanout = fanout; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) +{ + P->depth = depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) +{ + P->leaf_length = leaf_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) +{ + store48( P->node_offset, node_offset ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) +{ + P->node_depth = node_depth; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) +{ + P->inner_length = inner_length; + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) +{ + memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) +{ + memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); + return 0; +} + +BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) +{ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; + + return 0; +} + +/* init2 xors IV with input parameter block */ +int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) +{ + /*blake2s_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2s_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); + /* IV XOR ParamBlock */ + memset( S, 0, sizeof( blake2s_state ) ); + + for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + + return 0; +} + + +/* Some sort of default parameter block initialization, for sequential blake2s */ +int blake2s_init( blake2s_state *S, const uint8_t outlen ) +{ + const blake2s_param P = + { + outlen, + 0, + 1, + 1, + 0, + {0}, + 0, + 0, + {0}, + {0} + }; + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + return blake2s_init_param( S, &P ); +} + + +int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +{ + const blake2s_param P = + { + outlen, + keylen, + 1, + 1, + 0, + {0}, + 0, + 0, + {0}, + {0} + }; + + /* Move interval verification here? */ + if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; + + if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; + + if( blake2s_init_param( S, &P ) < 0 ) + return -1; + + { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset( block, 0, BLAKE2S_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + + +BLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +{ + __m128i row1, row2, row3, row4; + __m128i buf1, buf2, buf3, buf4; +#if defined(HAVE_SSE41) + __m128i t0, t1; +#if !defined(HAVE_XOP) + __m128i t2; +#endif +#endif + __m128i ff0, ff1; +#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) + const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); + const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); +#endif +#if defined(HAVE_SSE41) + const __m128i m0 = LOADU( block + 00 ); + const __m128i m1 = LOADU( block + 16 ); + const __m128i m2 = LOADU( block + 32 ); + const __m128i m3 = LOADU( block + 48 ); +#else + const uint32_t m0 = ( ( uint32_t * )block )[ 0]; + const uint32_t m1 = ( ( uint32_t * )block )[ 1]; + const uint32_t m2 = ( ( uint32_t * )block )[ 2]; + const uint32_t m3 = ( ( uint32_t * )block )[ 3]; + const uint32_t m4 = ( ( uint32_t * )block )[ 4]; + const uint32_t m5 = ( ( uint32_t * )block )[ 5]; + const uint32_t m6 = ( ( uint32_t * )block )[ 6]; + const uint32_t m7 = ( ( uint32_t * )block )[ 7]; + const uint32_t m8 = ( ( uint32_t * )block )[ 8]; + const uint32_t m9 = ( ( uint32_t * )block )[ 9]; + const uint32_t m10 = ( ( uint32_t * )block )[10]; + const uint32_t m11 = ( ( uint32_t * )block )[11]; + const uint32_t m12 = ( ( uint32_t * )block )[12]; + const uint32_t m13 = ( ( uint32_t * )block )[13]; + const uint32_t m14 = ( ( uint32_t * )block )[14]; + const uint32_t m15 = ( ( uint32_t * )block )[15]; +#endif + row1 = ff0 = LOADU( &S->h[0] ); + row2 = ff1 = LOADU( &S->h[4] ); + row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); + row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); + STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); + return 0; +} + +/* inlen now in bytes */ +int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +{ + while( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if( inlen > fill ) + { + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + S->buflen += fill; + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); /* Compress */ + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ + S->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } + else /* inlen <= fill */ + { + memcpy( S->buf + left, in, inlen ); + S->buflen += inlen; /* Be lazy, do not compress */ + in += inlen; + inlen -= inlen; + } + } + + return 0; +} + +/* Is this correct? */ +int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +{ + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + + if( outlen > BLAKE2S_OUTBYTES ) + return -1; + + if( blake2s_is_lastblock( S ) ) + return -1; + + if( S->buflen > BLAKE2S_BLOCKBYTES ) + { + blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); + blake2s_compress( S, S->buf ); + S->buflen -= BLAKE2S_BLOCKBYTES; + memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); + } + + blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_set_lastblock( S ); + memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + blake2s_compress( S, S->buf ); + + for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, outlen ); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +{ + blake2s_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if ( NULL == key && keylen > 0) return -1; + + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; + + if( keylen > BLAKE2S_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2s_init( S, outlen ) < 0 ) return -1; + } + + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); +} +#endif + +#if defined(BLAKE2S_SELFTEST) +#include +#include "blake2-kat.h" +int main( int argc, char **argv ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[KAT_LENGTH]; + + for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + for( size_t i = 0; i < KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + + if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + puts( "error" ); + return -1; + } + } + + puts( "ok" ); + return 0; +} +#endif + + diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 3580453..530b6b1 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -2,30 +2,33 @@ /* * Given a PyObject* obj, fill in the Py_buffer* viewp with the result - * of PyObject_GetBuffer. Sets an exception and issues a return NULL - * on any errors. + * of PyObject_GetBuffer. Sets an exception and issues the erraction + * on any errors, e.g. 'return NULL' or 'goto error'. */ -#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) do { \ +#define GET_BUFFER_VIEW_OR_ERROR(obj, viewp, erraction) do { \ if (PyUnicode_Check((obj))) { \ PyErr_SetString(PyExc_TypeError, \ "Unicode-objects must be encoded before hashing");\ - return NULL; \ + erraction; \ } \ if (!PyObject_CheckBuffer((obj))) { \ PyErr_SetString(PyExc_TypeError, \ "object supporting the buffer API required"); \ - return NULL; \ + erraction; \ } \ if (PyObject_GetBuffer((obj), (viewp), PyBUF_SIMPLE) == -1) { \ - return NULL; \ + erraction; \ } \ if ((viewp)->ndim > 1) { \ PyErr_SetString(PyExc_BufferError, \ "Buffer must be single dimension"); \ PyBuffer_Release((viewp)); \ - return NULL; \ + erraction; \ } \ - } while(0); + } while(0) + +#define GET_BUFFER_VIEW_OR_ERROUT(obj, viewp) \ + GET_BUFFER_VIEW_OR_ERROR(obj, viewp, return NULL) /* * Helper code to synchronize access to the hash object when the GIL is diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 9dfe9e3..68b216e 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -214,6 +214,9 @@ + + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index ce2ea60..8e3ccf8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -449,6 +449,15 @@ Modules + + Modules + + + Modules + + + Modules + Modules diff --git a/setup.py b/setup.py index 8c13cf2..bbb6bb7 100644 --- a/setup.py +++ b/setup.py @@ -889,6 +889,22 @@ class PyBuildExt(build_ext): exts.append( Extension('_sha1', ['sha1module.c'], depends=['hashlib.h']) ) + blake2_deps = [os.path.join('_blake2', 'impl', name) + for name in os.listdir('Modules/_blake2/impl')] + blake2_deps.append('hashlib.h') + + blake2_macros = [] + if os.uname().machine == "x86_64": + # Every x86_64 machine has at least SSE2. + blake2_macros.append(('BLAKE2_USE_SSE', '1')) + + exts.append( Extension('_blake2', + ['_blake2/blake2module.c', + '_blake2/blake2b_impl.c', + '_blake2/blake2s_impl.c'], + define_macros=blake2_macros, + depends=blake2_deps) ) + # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library -- cgit v0.12