diff options
26 files changed, 6495 insertions, 19 deletions
diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index 6ca5307..2cb3c78 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -69,7 +69,13 @@ Constructors for hash algorithms that are always present in this module are :func:`md5` is normally available as well, though it may be missing if you are using a rare "FIPS compliant" build of Python. Additional algorithms may also be available depending upon the OpenSSL -library that Python uses on your platform. +library that Python uses on your platform. On most platforms the +:func:`sha3_224`, :func:`sha3_256`, :func:`sha3_384`, :func:`sha3_512`, +:func:`shake_128`, :func:`shake_256` are also available. + +.. versionadded:: 3.6 + SHA3 (Keccak) and SHAKE constructors :func:`sha3_224`, :func:`sha3_256`, + :func:`sha3_384`, :func:`sha3_512`, :func:`shake_128`, :func:`shake_256`. .. versionadded:: 3.6 :func:`blake2b` and :func:`blake2s` were added. @@ -189,6 +195,28 @@ A hash object has the following methods: compute the digests of data sharing a common initial substring. +SHAKE variable length digests +----------------------------- + +The :func:`shake_128` and :func:`shake_256` algorithms provide variable +length digests with length_in_bits//2 up to 128 or 256 bits of security. +As such, their digest methods require a length. Maximum length is not limited +by the SHAKE algorithm. + +.. method:: shake.digest(length) + + Return the digest of the data passed to the :meth:`update` method so far. + This is a bytes object of size ``length`` which may contain bytes in + the whole range from 0 to 255. + + +.. method:: shake.hexdigest(length) + + Like :meth:`digest` except the digest is returned as a string object of + double length, containing only hexadecimal digits. This may be used to + exchange the value safely in email or other non-binary environments. + + Key derivation -------------- diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 2d5e92e..053a7ad 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -11,7 +11,8 @@ new(name, data=b'', **kwargs) - returns a new hash object implementing the Named constructor functions are also available, these are faster than using new(name): -md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), and blake2s() +md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(), +sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256. More algorithms may be available on your platform but the above are guaranteed to exist. See the algorithms_guaranteed and algorithms_available attributes @@ -55,7 +56,10 @@ More condensed: # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', - 'blake2b', 'blake2s') + 'blake2b', 'blake2s', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256') + algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) @@ -90,6 +94,15 @@ def __get_builtin_constructor(name): import _blake2 cache['blake2b'] = _blake2.blake2b cache['blake2s'] = _blake2.blake2s + elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256'}: + import _sha3 + cache['sha3_224'] = _sha3.sha3_224 + cache['sha3_256'] = _sha3.sha3_256 + cache['sha3_384'] = _sha3.sha3_384 + cache['sha3_512'] = _sha3.sha3_512 + cache['shake_128'] = _sha3.shake_128 + cache['shake_256'] = _sha3.shake_256 except ImportError: pass # no extension module, this hash is unsupported. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index e94fc3f..21260f6 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -34,6 +34,13 @@ except ImportError: requires_blake2 = unittest.skipUnless(_blake2, 'requires _blake2') +try: + import _sha3 +except ImportError: + _sha3 = None + +requires_sha3 = unittest.skipUnless(_sha3, 'requires _sha3') + def hexstr(s): assert isinstance(s, bytes), repr(s) @@ -61,7 +68,11 @@ class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', 'sha384', 'SHA384', 'sha512', 'SHA512', - 'blake2b', 'blake2s') + 'blake2b', 'blake2s', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'shake_128', 'shake_256') + + shakes = {'shake_128', 'shake_256'} # Issue #14693: fallback modules are always compiled under POSIX _warn_on_extension_import = os.name == 'posix' or COMPILED_WITH_PYDEBUG @@ -131,6 +142,15 @@ class HashLibTestCase(unittest.TestCase): add_builtin_constructor('blake2s') add_builtin_constructor('blake2b') + _sha3 = self._conditional_import_module('_sha3') + if _sha3: + add_builtin_constructor('sha3_224') + add_builtin_constructor('sha3_256') + add_builtin_constructor('sha3_384') + add_builtin_constructor('sha3_512') + add_builtin_constructor('shake_128') + add_builtin_constructor('shake_256') + super(HashLibTestCase, self).__init__(*args, **kwargs) @property @@ -142,7 +162,10 @@ class HashLibTestCase(unittest.TestCase): a = array.array("b", range(10)) for cons in self.hash_constructors: c = cons(a) - c.hexdigest() + if c.name in self.shakes: + c.hexdigest(16) + else: + c.hexdigest() def test_algorithms_guaranteed(self): self.assertEqual(hashlib.algorithms_guaranteed, @@ -186,14 +209,21 @@ class HashLibTestCase(unittest.TestCase): def test_hexdigest(self): for cons in self.hash_constructors: h = cons() - self.assertIsInstance(h.digest(), bytes) - self.assertEqual(hexstr(h.digest()), h.hexdigest()) + if h.name in self.shakes: + self.assertIsInstance(h.digest(16), bytes) + self.assertEqual(hexstr(h.digest(16)), h.hexdigest(16)) + else: + self.assertIsInstance(h.digest(), bytes) + self.assertEqual(hexstr(h.digest()), h.hexdigest()) def test_name_attribute(self): for cons in self.hash_constructors: h = cons() self.assertIsInstance(h.name, str) - self.assertIn(h.name, self.supported_hash_names) + if h.name in self.supported_hash_names: + self.assertIn(h.name, self.supported_hash_names) + else: + self.assertNotIn(h.name, self.supported_hash_names) self.assertEqual(h.name, hashlib.new(h.name).name) def test_large_update(self): @@ -208,40 +238,46 @@ class HashLibTestCase(unittest.TestCase): m1.update(bees) m1.update(cees) m1.update(dees) + if m1.name in self.shakes: + args = (16,) + else: + args = () m2 = cons() m2.update(aas + bees + cees + dees) - self.assertEqual(m1.digest(), m2.digest()) + self.assertEqual(m1.digest(*args), m2.digest(*args)) m3 = cons(aas + bees + cees + dees) - self.assertEqual(m1.digest(), m3.digest()) + self.assertEqual(m1.digest(*args), m3.digest(*args)) # verify copy() doesn't touch original m4 = cons(aas + bees + cees) - m4_digest = m4.digest() + m4_digest = m4.digest(*args) m4_copy = m4.copy() m4_copy.update(dees) - self.assertEqual(m1.digest(), m4_copy.digest()) - self.assertEqual(m4.digest(), m4_digest) + self.assertEqual(m1.digest(*args), m4_copy.digest(*args)) + self.assertEqual(m4.digest(*args), m4_digest) - def check(self, name, data, hexdigest, **kwargs): + def check(self, name, data, hexdigest, shake=False, **kwargs): + length = len(hexdigest)//2 hexdigest = hexdigest.lower() constructors = self.constructors_to_test[name] # 2 is for hashlib.name(...) and hashlib.new(name, ...) self.assertGreaterEqual(len(constructors), 2) for hash_object_constructor in constructors: m = hash_object_constructor(data, **kwargs) - computed = m.hexdigest() + computed = m.hexdigest() if not shake else m.hexdigest(length) self.assertEqual( computed, hexdigest, "Hash algorithm %s constructed using %s returned hexdigest" " %r for %d byte input data that should have hashed to %r." % (name, hash_object_constructor, computed, len(data), hexdigest)) - computed = m.digest() + computed = m.digest() if not shake else m.digest(length) digest = bytes.fromhex(hexdigest) self.assertEqual(computed, digest) - self.assertEqual(len(digest), m.digest_size) + if not shake: + self.assertEqual(len(digest), m.digest_size) def check_no_unicode(self, algorithm_name): # Unicode objects are not allowed as input. @@ -262,13 +298,30 @@ class HashLibTestCase(unittest.TestCase): self.check_no_unicode('blake2b') self.check_no_unicode('blake2s') - def check_blocksize_name(self, name, block_size=0, digest_size=0): + @requires_sha3 + def test_no_unicode_sha3(self): + self.check_no_unicode('sha3_224') + self.check_no_unicode('sha3_256') + self.check_no_unicode('sha3_384') + self.check_no_unicode('sha3_512') + self.check_no_unicode('shake_128') + self.check_no_unicode('shake_256') + + def check_blocksize_name(self, name, block_size=0, digest_size=0, + digest_length=None): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: m = hash_object_constructor() self.assertEqual(m.block_size, block_size) self.assertEqual(m.digest_size, digest_size) - self.assertEqual(len(m.digest()), digest_size) + if digest_length: + self.assertEqual(len(m.digest(digest_length)), + digest_length) + self.assertEqual(len(m.hexdigest(digest_length)), + 2*digest_length) + else: + self.assertEqual(len(m.digest()), digest_size) + self.assertEqual(len(m.hexdigest()), 2*digest_size) self.assertEqual(m.name, name) # split for sha3_512 / _sha3.sha3 object self.assertIn(name.split("_")[0], repr(m)) @@ -280,6 +333,12 @@ class HashLibTestCase(unittest.TestCase): self.check_blocksize_name('sha256', 64, 32) self.check_blocksize_name('sha384', 128, 48) self.check_blocksize_name('sha512', 128, 64) + self.check_blocksize_name('sha3_224', 144, 28) + self.check_blocksize_name('sha3_256', 136, 32) + self.check_blocksize_name('sha3_384', 104, 48) + self.check_blocksize_name('sha3_512', 72, 64) + self.check_blocksize_name('shake_128', 168, 0, 32) + self.check_blocksize_name('shake_256', 136, 0, 64) @requires_blake2 def test_blocksize_name_blake2(self): @@ -563,6 +622,72 @@ class HashLibTestCase(unittest.TestCase): key = bytes.fromhex(key) self.check('blake2s', msg, md, key=key) + @requires_sha3 + def test_case_sha3_224_0(self): + self.check('sha3_224', b"", + "6b4e03423667dbb73b6e15454f0eb1abd4597f9a1b078e3f5b5a6bc7") + + @requires_sha3 + def test_case_sha3_224_vector(self): + for msg, md in read_vectors('sha3_224'): + self.check('sha3_224', msg, md) + + @requires_sha3 + def test_case_sha3_256_0(self): + self.check('sha3_256', b"", + "a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a") + + @requires_sha3 + def test_case_sha3_256_vector(self): + for msg, md in read_vectors('sha3_256'): + self.check('sha3_256', msg, md) + + @requires_sha3 + def test_case_sha3_384_0(self): + self.check('sha3_384', b"", + "0c63a75b845e4f7d01107d852e4c2485c51a50aaaa94fc61995e71bbee983a2a"+ + "c3713831264adb47fb6bd1e058d5f004") + + @requires_sha3 + def test_case_sha3_384_vector(self): + for msg, md in read_vectors('sha3_384'): + self.check('sha3_384', msg, md) + + @requires_sha3 + def test_case_sha3_512_0(self): + self.check('sha3_512', b"", + "a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a6"+ + "15b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26") + + @requires_sha3 + def test_case_sha3_512_vector(self): + for msg, md in read_vectors('sha3_512'): + self.check('sha3_512', msg, md) + + @requires_sha3 + def test_case_shake_128_0(self): + self.check('shake_128', b"", + "7f9c2ba4e88f827d616045507605853ed73b8093f6efbc88eb1a6eacfa66ef26", + True) + self.check('shake_128', b"", "7f9c", True) + + @requires_sha3 + def test_case_shake128_vector(self): + for msg, md in read_vectors('shake_128'): + self.check('shake_128', msg, md, True) + + @requires_sha3 + def test_case_shake_256_0(self): + self.check('shake_256', b"", + "46b9dd2b0ba88d13233b3feb743eeb243fcd52ea62b81b82b50c27646ed5762f", + True) + self.check('shake_256', b"", "46b9", True) + + @requires_sha3 + def test_case_shake256_vector(self): + for msg, md in read_vectors('shake_256'): + self.check('shake_256', msg, md, True) + def test_gil(self): # Check things work fine with an input larger than the size required # for multithreaded operation (which is hardwired to 2048). @@ -91,6 +91,8 @@ Core and Builtins Library ------- +- Issue #16113: Add SHA-3 and SHAKE support to hashlib module. + - Issue #27776: The :func:`os.urandom` function does now block on Linux 3.17 and newer until the system urandom entropy pool is initialized to increase the security. This change is part of the :pep:`524`. diff --git a/Modules/_sha3/README.txt b/Modules/_sha3/README.txt new file mode 100644 index 0000000..e34b1d1 --- /dev/null +++ b/Modules/_sha3/README.txt @@ -0,0 +1,11 @@ +Keccak Code Package +=================== + +The files in kcp are taken from the Keccak Code Package. They have been +slightly to be C89 compatible. The architecture specific header file +KeccakP-1600-SnP.h ha been renamed to KeccakP-1600-SnP-opt32.h or +KeccakP-1600-SnP-opt64.h. + +The 64bit files were generated with generic64lc/libkeccak.a.pack target, the +32bit files with generic32lc/libkeccak.a.pack. + diff --git a/Modules/_sha3/cleanup.py b/Modules/_sha3/cleanup.py new file mode 100755 index 0000000..17c56b3 --- /dev/null +++ b/Modules/_sha3/cleanup.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# Copyright (C) 2012 Christian Heimes (christian@python.org) +# Licensed to PSF under a Contributor Agreement. +# +# cleanup Keccak sources + +import os +import re + +CPP1 = re.compile("^//(.*)") +CPP2 = re.compile("\ //(.*)") + +STATICS = ("void ", "int ", "HashReturn ", + "const UINT64 ", "UINT16 ", " int prefix##") + +HERE = os.path.dirname(os.path.abspath(__file__)) +KECCAK = os.path.join(HERE, "kcp") + +def getfiles(): + for name in os.listdir(KECCAK): + name = os.path.join(KECCAK, name) + if os.path.isfile(name): + yield name + +def cleanup(f): + buf = [] + for line in f: + # mark all functions and global data as static + #if line.startswith(STATICS): + # buf.append("static " + line) + # continue + # remove UINT64 typedef, we have our own + if line.startswith("typedef unsigned long long int"): + buf.append("/* %s */\n" % line.strip()) + continue + ## remove #include "brg_endian.h" + if "brg_endian.h" in line: + buf.append("/* %s */\n" % line.strip()) + continue + # transform C++ comments into ANSI C comments + line = CPP1.sub(r"/*\1 */\n", line) + line = CPP2.sub(r" /*\1 */\n", line) + buf.append(line) + return "".join(buf) + +for name in getfiles(): + with open(name) as f: + res = cleanup(f) + with open(name, "w") as f: + f.write(res) diff --git a/Modules/_sha3/clinic/sha3module.c.h b/Modules/_sha3/clinic/sha3module.c.h new file mode 100644 index 0000000..bfd95cd --- /dev/null +++ b/Modules/_sha3/clinic/sha3module.c.h @@ -0,0 +1,148 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(py_sha3_new__doc__, +"sha3_224(string=None)\n" +"--\n" +"\n" +"Return a new SHA3 hash object with a hashbit length of 28 bytes."); + +static PyObject * +py_sha3_new_impl(PyTypeObject *type, PyObject *data); + +static PyObject * +py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static char *_keywords[] = {"string", NULL}; + PyObject *data = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:sha3_224", _keywords, + &data)) + goto exit; + return_value = py_sha3_new_impl(type, data); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sha3_sha3_224_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a copy of the hash object."); + +#define _SHA3_SHA3_224_COPY_METHODDEF \ + {"copy", (PyCFunction)_sha3_sha3_224_copy, METH_NOARGS, _sha3_sha3_224_copy__doc__}, + +static PyObject * +_sha3_sha3_224_copy_impl(SHA3object *self); + +static PyObject * +_sha3_sha3_224_copy(SHA3object *self, PyObject *Py_UNUSED(ignored)) +{ + return _sha3_sha3_224_copy_impl(self); +} + +PyDoc_STRVAR(_sha3_sha3_224_digest__doc__, +"digest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _SHA3_SHA3_224_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_sha3_sha3_224_digest, METH_NOARGS, _sha3_sha3_224_digest__doc__}, + +static PyObject * +_sha3_sha3_224_digest_impl(SHA3object *self); + +static PyObject * +_sha3_sha3_224_digest(SHA3object *self, PyObject *Py_UNUSED(ignored)) +{ + return _sha3_sha3_224_digest_impl(self); +} + +PyDoc_STRVAR(_sha3_sha3_224_hexdigest__doc__, +"hexdigest($self, /)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _SHA3_SHA3_224_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_sha3_sha3_224_hexdigest, METH_NOARGS, _sha3_sha3_224_hexdigest__doc__}, + +static PyObject * +_sha3_sha3_224_hexdigest_impl(SHA3object *self); + +static PyObject * +_sha3_sha3_224_hexdigest(SHA3object *self, PyObject *Py_UNUSED(ignored)) +{ + return _sha3_sha3_224_hexdigest_impl(self); +} + +PyDoc_STRVAR(_sha3_sha3_224_update__doc__, +"update($self, obj, /)\n" +"--\n" +"\n" +"Update this hash object\'s state with the provided string."); + +#define _SHA3_SHA3_224_UPDATE_METHODDEF \ + {"update", (PyCFunction)_sha3_sha3_224_update, METH_O, _sha3_sha3_224_update__doc__}, + +PyDoc_STRVAR(_sha3_shake_128_digest__doc__, +"digest($self, /, length)\n" +"--\n" +"\n" +"Return the digest value as a string of binary data."); + +#define _SHA3_SHAKE_128_DIGEST_METHODDEF \ + {"digest", (PyCFunction)_sha3_shake_128_digest, METH_VARARGS|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, + +static PyObject * +_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); + +static PyObject * +_sha3_shake_128_digest(SHA3object *self, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static char *_keywords[] = {"length", NULL}; + unsigned long length; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "k:digest", _keywords, + &length)) + goto exit; + return_value = _sha3_shake_128_digest_impl(self, length); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, +"hexdigest($self, /, length)\n" +"--\n" +"\n" +"Return the digest value as a string of hexadecimal digits."); + +#define _SHA3_SHAKE_128_HEXDIGEST_METHODDEF \ + {"hexdigest", (PyCFunction)_sha3_shake_128_hexdigest, METH_VARARGS|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, + +static PyObject * +_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); + +static PyObject * +_sha3_shake_128_hexdigest(SHA3object *self, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static char *_keywords[] = {"length", NULL}; + unsigned long length; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "k:hexdigest", _keywords, + &length)) + goto exit; + return_value = _sha3_shake_128_hexdigest_impl(self, length); + +exit: + return return_value; +} +/*[clinic end generated code: output=2eb6db41778eeb50 input=a9049054013a1b77]*/ diff --git a/Modules/_sha3/kcp/KeccakHash.c b/Modules/_sha3/kcp/KeccakHash.c new file mode 100644 index 0000000..e09fb43 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakHash.c @@ -0,0 +1,82 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include <string.h> +#include "KeccakHash.h" + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) +{ + HashReturn result; + + if (delimitedSuffix == 0) + return FAIL; + result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); + if (result != SUCCESS) + return result; + instance->fixedOutputLength = hashbitlen; + instance->delimitedSuffix = delimitedSuffix; + return SUCCESS; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) == 0) + return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + else { + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + if (ret == SUCCESS) { + /* The last partial byte is assumed to be aligned on the least significant bits */ + + unsigned char lastByte = data[databitlen/8]; + /* Concatenate the last few bits provided here with those of the suffix */ + + unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); + if ((delimitedLastBytes & 0xFF00) == 0x0000) { + instance->delimitedSuffix = delimitedLastBytes & 0xFF; + } + else { + unsigned char oneByte[1]; + oneByte[0] = delimitedLastBytes & 0xFF; + ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1); + instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF; + } + } + return ret; + } +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) +{ + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); + if (ret == SUCCESS) + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); + else + return ret; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) != 0) + return FAIL; + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); +} diff --git a/Modules/_sha3/kcp/KeccakHash.h b/Modules/_sha3/kcp/KeccakHash.h new file mode 100644 index 0000000..bbd3dc6 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakHash.h @@ -0,0 +1,114 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakHashInterface_h_ +#define _KeccakHashInterface_h_ + +#ifndef KeccakP1600_excluded + +#include "KeccakSponge.h" +#include <string.h> + +typedef unsigned char BitSequence; +typedef size_t DataLength; +typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; + +typedef struct { + KeccakWidth1600_SpongeInstance sponge; + unsigned int fixedOutputLength; + unsigned char delimitedSuffix; +} Keccak_HashInstance; + +/** + * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. + * @param hashInstance Pointer to the hash instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param hashbitlen The desired number of output bits, + * or 0 for an arbitrarily-long output. + * @param delimitedSuffix Bits that will be automatically appended to the end + * of the input message, as in domain separation. + * This is a byte containing from 0 to 7 bits + * formatted like the @a delimitedData parameter of + * the Keccak_SpongeAbsorbLastFewBits() function. + * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); + +/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) + +/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) + +/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) + +/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) + +/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) + +/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) + +/** + * Function to give input data to be absorbed. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte (little-endian convention). + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen); + +/** + * Function to call after all input blocks have been input and to get + * output bits if the length was specified when calling Keccak_HashInitialize(). + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of + * output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits + * must be extracted using the Keccak_HashSqueeze() function. + * @param state Pointer to the state of the sponge function initialized by Init(). + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); + + /** + * Function to squeeze output data. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param databitlen The number of output bits desired (must be a multiple of 8). + * @pre Keccak_HashFinal() must have been already called. + * @pre @a databitlen is a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen); + +#endif + +#endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-64.macros b/Modules/_sha3/kcp/KeccakP-1600-64.macros new file mode 100644 index 0000000..1f11fe3 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-64.macros @@ -0,0 +1,2208 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ + +/* --- Code for round, with prepare-theta */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#define copyFromStateAndAdd(X, state, input, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba = state[ 0]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + } \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + if (laneCount < 3) { \ + X##bi = state[ 2]; \ + } \ + else { \ + X##bi = state[ 2]^input[ 2]; \ + } \ + } \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu = state[ 4]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + } \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + if (laneCount < 7) { \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ge = state[ 6]^input[ 6]; \ + } \ + } \ + } \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go = state[ 8]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + } \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + if (laneCount < 11) { \ + X##ka = state[10]; \ + } \ + else { \ + X##ka = state[10]^input[10]; \ + } \ + } \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki = state[12]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + } \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + if (laneCount < 15) { \ + X##ku = state[14]; \ + } \ + else { \ + X##ku = state[14]^input[14]; \ + } \ + } \ + } \ + } \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me = state[16]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + } \ + X##mi = state[17]; \ + X##mo = state[18]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + if (laneCount < 19) { \ + X##mo = state[18]; \ + } \ + else { \ + X##mo = state[18]^input[18]; \ + } \ + } \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa = state[20]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + } \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + if (laneCount < 23) { \ + X##si = state[22]; \ + } \ + else { \ + X##si = state[22]^input[22]; \ + } \ + } \ + } \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + X##si = state[22]^input[22]; \ + X##so = state[23]^input[23]; \ + if (laneCount < 25) { \ + X##su = state[24]; \ + } \ + else { \ + X##su = state[24]^input[24]; \ + } \ + } \ + } + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= input[ 0]; \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= input[ 2]; \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= input[ 4]; \ + } \ + } \ + else { \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= input[ 6]; \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= input[ 8]; \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= input[10]; \ + } \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= input[12]; \ + } \ + } \ + else { \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= input[14]; \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= input[16]; \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= input[18]; \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= input[20]; \ + } \ + } \ + else { \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= input[22]; \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + X##si ^= input[22]; \ + X##so ^= input[23]; \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= input[24]; \ + } \ + } \ + } + +#ifdef UseBebigokimisa + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + } \ + else { \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + } \ + else { \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = ~X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = ~(input[index] ^ X##name); \ + X##name ^= output[index]; \ + +#else /* UseBebigokimisa */ + + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12]= X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = X##ki; \ + } \ + } \ + else { \ + output[12] = X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + } \ + else { \ + output[20] = X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#endif + +#define wrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + wrapOne(X, input, output, 14, ku) \ + wrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + wrapOne(X, input, output, 22, si) \ + wrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 24, su) \ + } \ + } \ + } + +#define unwrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + unwrapOne(X, input, output, 14, ku) \ + unwrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + unwrapOne(X, input, output, 22, si) \ + unwrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 24, su) \ + } \ + } \ + } diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h new file mode 100644 index 0000000..6cf765e --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h @@ -0,0 +1,37 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +/** For the documentation, see SnP-documentation.h. + */ + +#define KeccakP1600_implementation "in-place 32-bit optimized implementation" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); + +#endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h new file mode 100644 index 0000000..889a31a --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h @@ -0,0 +1,49 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +/** For the documentation, see SnP-documentation.h. + */ + +/* #include "brg_endian.h" */ +#include "KeccakP-1600-opt64-config.h" + +#define KeccakP1600_implementation "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 +#define KeccakF1600_FastLoop_supported + +#include <stddef.h> + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define KeccakP1600_AddByte(state, byte, offset) \ + ((unsigned char*)(state))[(offset)] ^= (byte) +#else +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +#endif +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP.h b/Modules/_sha3/kcp/KeccakP-1600-SnP.h new file mode 100644 index 0000000..0b23f09 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-SnP.h @@ -0,0 +1,7 @@ +#if KeccakOpt == 64 + #include "KeccakP-1600-SnP-opt64.h" +#elif KeccakOpt == 32 + #include "KeccakP-1600-SnP-opt32.h" +#else + #error "No KeccakOpt" +#endif diff --git a/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c new file mode 100644 index 0000000..886dc44 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c @@ -0,0 +1,1160 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include <string.h> +/* #include "brg_endian.h" */ +#include "KeccakP-1600-SnP.h" +#include "SnP-Relaned.h" + +typedef unsigned char UINT8; +typedef unsigned int UINT32; +/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */ + +/*typedef unsigned long UINT32; */ + + +#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ + +#define prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + temp0 = (low); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp1 = (high); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); + +#define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd = (temp0 >> 16) | (temp1 & 0xFFFF0000); + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ + +#define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + temp0 = (even); \ + temp1 = (odd); \ + temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \ + temp0 = temp; \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); + +#define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + low = temp0; \ + high = temp1; + +#define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + lowOut = lowIn ^ temp0; \ + highOut = highIn ^ temp1; + +void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0xFF, offset); + memset(laneAsBytes+offset, 0x00, length); + memset(laneAsBytes+offset+length, 0xFF, 8-offset-length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) +{ + unsigned int lanePosition = offset/8; + unsigned int offsetInLane = offset%8; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + if (offsetInLane < 4) { + low = (UINT32)byte << (offsetInLane*8); + high = 0; + } + else { + low = 0; + high = (UINT32)byte << ((offsetInLane-4)*8); + } + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0, 8); + memcpy(laneAsBytes+offset, data, length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + const UINT32 * pI = (const UINT32 *)data; + UINT32 * pS = (UINT32*)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition<laneCount; lanePosition++) { + UINT8 laneAsBytes[8]; + memcpy(laneAsBytes, data+lanePosition*8, 8); + UINT32 low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + UINT32 high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); + UINT32 even, odd, temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ + KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length); + KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + const UINT32 * pI = (const UINT32 *)data; + UINT32 * pS = (UINT32 *)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition<laneCount; lanePosition++) { + UINT8 laneAsBytes[8]; + memcpy(laneAsBytes, data+lanePosition*8, 8); + UINT32 low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + UINT32 high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); + UINT32 even, odd, temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) +{ + UINT32 *stateAsHalfLanes = (UINT32*)state; + unsigned int i; + + for(i=0; i<byteCount/8; i++) { + stateAsHalfLanes[i*2+0] = 0; + stateAsHalfLanes[i*2+1] = 0; + } + if (byteCount%8 != 0) + KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + UINT8 laneAsBytes[8]; + + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + *((UINT32*)(laneAsBytes+0)) = low; + *((UINT32*)(laneAsBytes+4)) = high; +#else + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + memcpy(data, laneAsBytes+offset, length); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT32 * pI = (UINT32 *)data; + const UINT32 * pS = ( const UINT32 *)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + memcpy(pI++, &low, 4); + memcpy(pI++, &high, 4); +#else + fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition<laneCount; lanePosition++) { + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); + UINT8 laneAsBytes[8]; + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + memcpy(data+lanePosition*8, laneAsBytes, 8); + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + UINT8 laneAsBytes[8]; + unsigned int i; + + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + *((UINT32*)(laneAsBytes+0)) = low; + *((UINT32*)(laneAsBytes+4)) = high; +#else + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + for(i=0; i<length; i++) + output[i] = input[i] ^ laneAsBytes[offset+i]; +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + const UINT32 * pI = (const UINT32 *)input; + UINT32 * pO = (UINT32 *)output; + const UINT32 * pS = (const UINT32 *)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + *(pO++) = *(pI++) ^ low; + *(pO++) = *(pI++) ^ high; +#else + fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition<laneCount; lanePosition++) { + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); + UINT8 laneAsBytes[8]; + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0)); + ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4)); + } +#endif +} +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] = +{ + 0x00000001UL, 0x00000000UL, + 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, + 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, + 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, + 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, + 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, + 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, + 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, + 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, + 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, + 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, + 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, + 0x00000000UL, 0x80008082UL, + 0x000000FFUL +}; + +#define KeccakAtoD_round0() \ + Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ + Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Da1 = Cz^Du0; \ +\ + Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Do1 = Cy^Cx; \ +\ + Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ + De1 = Cz^Cw; \ +\ + Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ + +#define KeccakAtoD_round1() \ + Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \ + Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \ + Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \ + Da1 = Cz^Du0; \ +\ + Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \ + Do1 = Cy^Cx; \ +\ + Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \ + De1 = Cz^Cw; \ +\ + Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ + +#define KeccakAtoD_round2() \ + Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \ + Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \ + Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \ + Da1 = Cz^Du0; \ +\ + Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \ + Do1 = Cy^Cx; \ +\ + Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \ + De1 = Cz^Cw; \ +\ + Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ + +#define KeccakAtoD_round3() \ + Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \ + Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \ + Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \ + Da1 = Cz^Du0; \ +\ + Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \ + Do1 = Cy^Cx; \ +\ + Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \ + De1 = Cz^Cw; \ +\ + Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ + +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds) +{ + { + UINT32 Da0, De0, Di0, Do0, Du0; + UINT32 Da1, De1, Di1, Do1, Du1; + UINT32 Ca0, Ce0, Ci0, Co0, Cu0; + UINT32 Cx, Cy, Cz, Cw; + #define Ba Ca0 + #define Be Ce0 + #define Bi Ci0 + #define Bo Co0 + #define Bu Cu0 + const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2; + UINT32 *stateAsHalfLanes = (UINT32*)state; + #define Aba0 stateAsHalfLanes[ 0] + #define Aba1 stateAsHalfLanes[ 1] + #define Abe0 stateAsHalfLanes[ 2] + #define Abe1 stateAsHalfLanes[ 3] + #define Abi0 stateAsHalfLanes[ 4] + #define Abi1 stateAsHalfLanes[ 5] + #define Abo0 stateAsHalfLanes[ 6] + #define Abo1 stateAsHalfLanes[ 7] + #define Abu0 stateAsHalfLanes[ 8] + #define Abu1 stateAsHalfLanes[ 9] + #define Aga0 stateAsHalfLanes[10] + #define Aga1 stateAsHalfLanes[11] + #define Age0 stateAsHalfLanes[12] + #define Age1 stateAsHalfLanes[13] + #define Agi0 stateAsHalfLanes[14] + #define Agi1 stateAsHalfLanes[15] + #define Ago0 stateAsHalfLanes[16] + #define Ago1 stateAsHalfLanes[17] + #define Agu0 stateAsHalfLanes[18] + #define Agu1 stateAsHalfLanes[19] + #define Aka0 stateAsHalfLanes[20] + #define Aka1 stateAsHalfLanes[21] + #define Ake0 stateAsHalfLanes[22] + #define Ake1 stateAsHalfLanes[23] + #define Aki0 stateAsHalfLanes[24] + #define Aki1 stateAsHalfLanes[25] + #define Ako0 stateAsHalfLanes[26] + #define Ako1 stateAsHalfLanes[27] + #define Aku0 stateAsHalfLanes[28] + #define Aku1 stateAsHalfLanes[29] + #define Ama0 stateAsHalfLanes[30] + #define Ama1 stateAsHalfLanes[31] + #define Ame0 stateAsHalfLanes[32] + #define Ame1 stateAsHalfLanes[33] + #define Ami0 stateAsHalfLanes[34] + #define Ami1 stateAsHalfLanes[35] + #define Amo0 stateAsHalfLanes[36] + #define Amo1 stateAsHalfLanes[37] + #define Amu0 stateAsHalfLanes[38] + #define Amu1 stateAsHalfLanes[39] + #define Asa0 stateAsHalfLanes[40] + #define Asa1 stateAsHalfLanes[41] + #define Ase0 stateAsHalfLanes[42] + #define Ase1 stateAsHalfLanes[43] + #define Asi0 stateAsHalfLanes[44] + #define Asi1 stateAsHalfLanes[45] + #define Aso0 stateAsHalfLanes[46] + #define Aso1 stateAsHalfLanes[47] + #define Asu0 stateAsHalfLanes[48] + #define Asu1 stateAsHalfLanes[49] + + do + { + /* --- Code for 4 rounds */ + + /* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ + + KeccakAtoD_round0(); + + Ba = (Aba0^Da0); + Be = ROL32((Age0^De0), 22); + Bi = ROL32((Aki1^Di1), 22); + Bo = ROL32((Amo1^Do1), 11); + Bu = ROL32((Asu0^Du0), 7); + Aba0 = Ba ^((~Be)& Bi ); + Aba0 ^= *(pRoundConstants++); + Age0 = Be ^((~Bi)& Bo ); + Aki1 = Bi ^((~Bo)& Bu ); + Amo1 = Bo ^((~Bu)& Ba ); + Asu0 = Bu ^((~Ba)& Be ); + + Ba = (Aba1^Da1); + Be = ROL32((Age1^De1), 22); + Bi = ROL32((Aki0^Di0), 21); + Bo = ROL32((Amo0^Do0), 10); + Bu = ROL32((Asu1^Du1), 7); + Aba1 = Ba ^((~Be)& Bi ); + Aba1 ^= *(pRoundConstants++); + Age1 = Be ^((~Bi)& Bo ); + Aki0 = Bi ^((~Bo)& Bu ); + Amo0 = Bo ^((~Bu)& Ba ); + Asu1 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Aka1^Da1), 2); + Bo = ROL32((Ame1^De1), 23); + Bu = ROL32((Asi1^Di1), 31); + Ba = ROL32((Abo0^Do0), 14); + Be = ROL32((Agu0^Du0), 10); + Aka1 = Ba ^((~Be)& Bi ); + Ame1 = Be ^((~Bi)& Bo ); + Asi1 = Bi ^((~Bo)& Bu ); + Abo0 = Bo ^((~Bu)& Ba ); + Agu0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Aka0^Da0), 1); + Bo = ROL32((Ame0^De0), 22); + Bu = ROL32((Asi0^Di0), 30); + Ba = ROL32((Abo1^Do1), 14); + Be = ROL32((Agu1^Du1), 10); + Aka0 = Ba ^((~Be)& Bi ); + Ame0 = Be ^((~Bi)& Bo ); + Asi0 = Bi ^((~Bo)& Bu ); + Abo1 = Bo ^((~Bu)& Ba ); + Agu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Asa0^Da0), 9); + Ba = ROL32((Abe1^De1), 1); + Be = ROL32((Agi0^Di0), 3); + Bi = ROL32((Ako1^Do1), 13); + Bo = ROL32((Amu0^Du0), 4); + Asa0 = Ba ^((~Be)& Bi ); + Abe1 = Be ^((~Bi)& Bo ); + Agi0 = Bi ^((~Bo)& Bu ); + Ako1 = Bo ^((~Bu)& Ba ); + Amu0 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Asa1^Da1), 9); + Ba = (Abe0^De0); + Be = ROL32((Agi1^Di1), 3); + Bi = ROL32((Ako0^Do0), 12); + Bo = ROL32((Amu1^Du1), 4); + Asa1 = Ba ^((~Be)& Bi ); + Abe0 = Be ^((~Bi)& Bo ); + Agi1 = Bi ^((~Bo)& Bu ); + Ako0 = Bo ^((~Bu)& Ba ); + Amu1 = Bu ^((~Ba)& Be ); + + Be = ROL32((Aga0^Da0), 18); + Bi = ROL32((Ake0^De0), 5); + Bo = ROL32((Ami1^Di1), 8); + Bu = ROL32((Aso0^Do0), 28); + Ba = ROL32((Abu1^Du1), 14); + Aga0 = Ba ^((~Be)& Bi ); + Ake0 = Be ^((~Bi)& Bo ); + Ami1 = Bi ^((~Bo)& Bu ); + Aso0 = Bo ^((~Bu)& Ba ); + Abu1 = Bu ^((~Ba)& Be ); + + Be = ROL32((Aga1^Da1), 18); + Bi = ROL32((Ake1^De1), 5); + Bo = ROL32((Ami0^Di0), 7); + Bu = ROL32((Aso1^Do1), 28); + Ba = ROL32((Abu0^Du0), 13); + Aga1 = Ba ^((~Be)& Bi ); + Ake1 = Be ^((~Bi)& Bo ); + Ami0 = Bi ^((~Bo)& Bu ); + Aso1 = Bo ^((~Bu)& Ba ); + Abu0 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Ama1^Da1), 21); + Bu = ROL32((Ase0^De0), 1); + Ba = ROL32((Abi0^Di0), 31); + Be = ROL32((Ago1^Do1), 28); + Bi = ROL32((Aku1^Du1), 20); + Ama1 = Ba ^((~Be)& Bi ); + Ase0 = Be ^((~Bi)& Bo ); + Abi0 = Bi ^((~Bo)& Bu ); + Ago1 = Bo ^((~Bu)& Ba ); + Aku1 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Ama0^Da0), 20); + Bu = ROL32((Ase1^De1), 1); + Ba = ROL32((Abi1^Di1), 31); + Be = ROL32((Ago0^Do0), 27); + Bi = ROL32((Aku0^Du0), 19); + Ama0 = Ba ^((~Be)& Bi ); + Ase1 = Be ^((~Bi)& Bo ); + Abi1 = Bi ^((~Bo)& Bu ); + Ago0 = Bo ^((~Bu)& Ba ); + Aku0 = Bu ^((~Ba)& Be ); + + KeccakAtoD_round1(); + + Ba = (Aba0^Da0); + Be = ROL32((Ame1^De0), 22); + Bi = ROL32((Agi1^Di1), 22); + Bo = ROL32((Aso1^Do1), 11); + Bu = ROL32((Aku1^Du0), 7); + Aba0 = Ba ^((~Be)& Bi ); + Aba0 ^= *(pRoundConstants++); + Ame1 = Be ^((~Bi)& Bo ); + Agi1 = Bi ^((~Bo)& Bu ); + Aso1 = Bo ^((~Bu)& Ba ); + Aku1 = Bu ^((~Ba)& Be ); + + Ba = (Aba1^Da1); + Be = ROL32((Ame0^De1), 22); + Bi = ROL32((Agi0^Di0), 21); + Bo = ROL32((Aso0^Do0), 10); + Bu = ROL32((Aku0^Du1), 7); + Aba1 = Ba ^((~Be)& Bi ); + Aba1 ^= *(pRoundConstants++); + Ame0 = Be ^((~Bi)& Bo ); + Agi0 = Bi ^((~Bo)& Bu ); + Aso0 = Bo ^((~Bu)& Ba ); + Aku0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Asa1^Da1), 2); + Bo = ROL32((Ake1^De1), 23); + Bu = ROL32((Abi1^Di1), 31); + Ba = ROL32((Amo1^Do0), 14); + Be = ROL32((Agu0^Du0), 10); + Asa1 = Ba ^((~Be)& Bi ); + Ake1 = Be ^((~Bi)& Bo ); + Abi1 = Bi ^((~Bo)& Bu ); + Amo1 = Bo ^((~Bu)& Ba ); + Agu0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Asa0^Da0), 1); + Bo = ROL32((Ake0^De0), 22); + Bu = ROL32((Abi0^Di0), 30); + Ba = ROL32((Amo0^Do1), 14); + Be = ROL32((Agu1^Du1), 10); + Asa0 = Ba ^((~Be)& Bi ); + Ake0 = Be ^((~Bi)& Bo ); + Abi0 = Bi ^((~Bo)& Bu ); + Amo0 = Bo ^((~Bu)& Ba ); + Agu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Ama1^Da0), 9); + Ba = ROL32((Age1^De1), 1); + Be = ROL32((Asi1^Di0), 3); + Bi = ROL32((Ako0^Do1), 13); + Bo = ROL32((Abu1^Du0), 4); + Ama1 = Ba ^((~Be)& Bi ); + Age1 = Be ^((~Bi)& Bo ); + Asi1 = Bi ^((~Bo)& Bu ); + Ako0 = Bo ^((~Bu)& Ba ); + Abu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Ama0^Da1), 9); + Ba = (Age0^De0); + Be = ROL32((Asi0^Di1), 3); + Bi = ROL32((Ako1^Do0), 12); + Bo = ROL32((Abu0^Du1), 4); + Ama0 = Ba ^((~Be)& Bi ); + Age0 = Be ^((~Bi)& Bo ); + Asi0 = Bi ^((~Bo)& Bu ); + Ako1 = Bo ^((~Bu)& Ba ); + Abu0 = Bu ^((~Ba)& Be ); + + Be = ROL32((Aka1^Da0), 18); + Bi = ROL32((Abe1^De0), 5); + Bo = ROL32((Ami0^Di1), 8); + Bu = ROL32((Ago1^Do0), 28); + Ba = ROL32((Asu1^Du1), 14); + Aka1 = Ba ^((~Be)& Bi ); + Abe1 = Be ^((~Bi)& Bo ); + Ami0 = Bi ^((~Bo)& Bu ); + Ago1 = Bo ^((~Bu)& Ba ); + Asu1 = Bu ^((~Ba)& Be ); + + Be = ROL32((Aka0^Da1), 18); + Bi = ROL32((Abe0^De1), 5); + Bo = ROL32((Ami1^Di0), 7); + Bu = ROL32((Ago0^Do1), 28); + Ba = ROL32((Asu0^Du0), 13); + Aka0 = Ba ^((~Be)& Bi ); + Abe0 = Be ^((~Bi)& Bo ); + Ami1 = Bi ^((~Bo)& Bu ); + Ago0 = Bo ^((~Bu)& Ba ); + Asu0 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Aga1^Da1), 21); + Bu = ROL32((Ase0^De0), 1); + Ba = ROL32((Aki1^Di0), 31); + Be = ROL32((Abo1^Do1), 28); + Bi = ROL32((Amu1^Du1), 20); + Aga1 = Ba ^((~Be)& Bi ); + Ase0 = Be ^((~Bi)& Bo ); + Aki1 = Bi ^((~Bo)& Bu ); + Abo1 = Bo ^((~Bu)& Ba ); + Amu1 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Aga0^Da0), 20); + Bu = ROL32((Ase1^De1), 1); + Ba = ROL32((Aki0^Di1), 31); + Be = ROL32((Abo0^Do0), 27); + Bi = ROL32((Amu0^Du0), 19); + Aga0 = Ba ^((~Be)& Bi ); + Ase1 = Be ^((~Bi)& Bo ); + Aki0 = Bi ^((~Bo)& Bu ); + Abo0 = Bo ^((~Bu)& Ba ); + Amu0 = Bu ^((~Ba)& Be ); + + KeccakAtoD_round2(); + + Ba = (Aba0^Da0); + Be = ROL32((Ake1^De0), 22); + Bi = ROL32((Asi0^Di1), 22); + Bo = ROL32((Ago0^Do1), 11); + Bu = ROL32((Amu1^Du0), 7); + Aba0 = Ba ^((~Be)& Bi ); + Aba0 ^= *(pRoundConstants++); + Ake1 = Be ^((~Bi)& Bo ); + Asi0 = Bi ^((~Bo)& Bu ); + Ago0 = Bo ^((~Bu)& Ba ); + Amu1 = Bu ^((~Ba)& Be ); + + Ba = (Aba1^Da1); + Be = ROL32((Ake0^De1), 22); + Bi = ROL32((Asi1^Di0), 21); + Bo = ROL32((Ago1^Do0), 10); + Bu = ROL32((Amu0^Du1), 7); + Aba1 = Ba ^((~Be)& Bi ); + Aba1 ^= *(pRoundConstants++); + Ake0 = Be ^((~Bi)& Bo ); + Asi1 = Bi ^((~Bo)& Bu ); + Ago1 = Bo ^((~Bu)& Ba ); + Amu0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Ama0^Da1), 2); + Bo = ROL32((Abe0^De1), 23); + Bu = ROL32((Aki0^Di1), 31); + Ba = ROL32((Aso1^Do0), 14); + Be = ROL32((Agu0^Du0), 10); + Ama0 = Ba ^((~Be)& Bi ); + Abe0 = Be ^((~Bi)& Bo ); + Aki0 = Bi ^((~Bo)& Bu ); + Aso1 = Bo ^((~Bu)& Ba ); + Agu0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Ama1^Da0), 1); + Bo = ROL32((Abe1^De0), 22); + Bu = ROL32((Aki1^Di0), 30); + Ba = ROL32((Aso0^Do1), 14); + Be = ROL32((Agu1^Du1), 10); + Ama1 = Ba ^((~Be)& Bi ); + Abe1 = Be ^((~Bi)& Bo ); + Aki1 = Bi ^((~Bo)& Bu ); + Aso0 = Bo ^((~Bu)& Ba ); + Agu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Aga1^Da0), 9); + Ba = ROL32((Ame0^De1), 1); + Be = ROL32((Abi1^Di0), 3); + Bi = ROL32((Ako1^Do1), 13); + Bo = ROL32((Asu1^Du0), 4); + Aga1 = Ba ^((~Be)& Bi ); + Ame0 = Be ^((~Bi)& Bo ); + Abi1 = Bi ^((~Bo)& Bu ); + Ako1 = Bo ^((~Bu)& Ba ); + Asu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Aga0^Da1), 9); + Ba = (Ame1^De0); + Be = ROL32((Abi0^Di1), 3); + Bi = ROL32((Ako0^Do0), 12); + Bo = ROL32((Asu0^Du1), 4); + Aga0 = Ba ^((~Be)& Bi ); + Ame1 = Be ^((~Bi)& Bo ); + Abi0 = Bi ^((~Bo)& Bu ); + Ako0 = Bo ^((~Bu)& Ba ); + Asu0 = Bu ^((~Ba)& Be ); + + Be = ROL32((Asa1^Da0), 18); + Bi = ROL32((Age1^De0), 5); + Bo = ROL32((Ami1^Di1), 8); + Bu = ROL32((Abo1^Do0), 28); + Ba = ROL32((Aku0^Du1), 14); + Asa1 = Ba ^((~Be)& Bi ); + Age1 = Be ^((~Bi)& Bo ); + Ami1 = Bi ^((~Bo)& Bu ); + Abo1 = Bo ^((~Bu)& Ba ); + Aku0 = Bu ^((~Ba)& Be ); + + Be = ROL32((Asa0^Da1), 18); + Bi = ROL32((Age0^De1), 5); + Bo = ROL32((Ami0^Di0), 7); + Bu = ROL32((Abo0^Do1), 28); + Ba = ROL32((Aku1^Du0), 13); + Asa0 = Ba ^((~Be)& Bi ); + Age0 = Be ^((~Bi)& Bo ); + Ami0 = Bi ^((~Bo)& Bu ); + Abo0 = Bo ^((~Bu)& Ba ); + Aku1 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Aka0^Da1), 21); + Bu = ROL32((Ase0^De0), 1); + Ba = ROL32((Agi1^Di0), 31); + Be = ROL32((Amo0^Do1), 28); + Bi = ROL32((Abu0^Du1), 20); + Aka0 = Ba ^((~Be)& Bi ); + Ase0 = Be ^((~Bi)& Bo ); + Agi1 = Bi ^((~Bo)& Bu ); + Amo0 = Bo ^((~Bu)& Ba ); + Abu0 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Aka1^Da0), 20); + Bu = ROL32((Ase1^De1), 1); + Ba = ROL32((Agi0^Di1), 31); + Be = ROL32((Amo1^Do0), 27); + Bi = ROL32((Abu1^Du0), 19); + Aka1 = Ba ^((~Be)& Bi ); + Ase1 = Be ^((~Bi)& Bo ); + Agi0 = Bi ^((~Bo)& Bu ); + Amo1 = Bo ^((~Bu)& Ba ); + Abu1 = Bu ^((~Ba)& Be ); + + KeccakAtoD_round3(); + + Ba = (Aba0^Da0); + Be = ROL32((Abe0^De0), 22); + Bi = ROL32((Abi0^Di1), 22); + Bo = ROL32((Abo0^Do1), 11); + Bu = ROL32((Abu0^Du0), 7); + Aba0 = Ba ^((~Be)& Bi ); + Aba0 ^= *(pRoundConstants++); + Abe0 = Be ^((~Bi)& Bo ); + Abi0 = Bi ^((~Bo)& Bu ); + Abo0 = Bo ^((~Bu)& Ba ); + Abu0 = Bu ^((~Ba)& Be ); + + Ba = (Aba1^Da1); + Be = ROL32((Abe1^De1), 22); + Bi = ROL32((Abi1^Di0), 21); + Bo = ROL32((Abo1^Do0), 10); + Bu = ROL32((Abu1^Du1), 7); + Aba1 = Ba ^((~Be)& Bi ); + Aba1 ^= *(pRoundConstants++); + Abe1 = Be ^((~Bi)& Bo ); + Abi1 = Bi ^((~Bo)& Bu ); + Abo1 = Bo ^((~Bu)& Ba ); + Abu1 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Aga0^Da1), 2); + Bo = ROL32((Age0^De1), 23); + Bu = ROL32((Agi0^Di1), 31); + Ba = ROL32((Ago0^Do0), 14); + Be = ROL32((Agu0^Du0), 10); + Aga0 = Ba ^((~Be)& Bi ); + Age0 = Be ^((~Bi)& Bo ); + Agi0 = Bi ^((~Bo)& Bu ); + Ago0 = Bo ^((~Bu)& Ba ); + Agu0 = Bu ^((~Ba)& Be ); + + Bi = ROL32((Aga1^Da0), 1); + Bo = ROL32((Age1^De0), 22); + Bu = ROL32((Agi1^Di0), 30); + Ba = ROL32((Ago1^Do1), 14); + Be = ROL32((Agu1^Du1), 10); + Aga1 = Ba ^((~Be)& Bi ); + Age1 = Be ^((~Bi)& Bo ); + Agi1 = Bi ^((~Bo)& Bu ); + Ago1 = Bo ^((~Bu)& Ba ); + Agu1 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Aka0^Da0), 9); + Ba = ROL32((Ake0^De1), 1); + Be = ROL32((Aki0^Di0), 3); + Bi = ROL32((Ako0^Do1), 13); + Bo = ROL32((Aku0^Du0), 4); + Aka0 = Ba ^((~Be)& Bi ); + Ake0 = Be ^((~Bi)& Bo ); + Aki0 = Bi ^((~Bo)& Bu ); + Ako0 = Bo ^((~Bu)& Ba ); + Aku0 = Bu ^((~Ba)& Be ); + + Bu = ROL32((Aka1^Da1), 9); + Ba = (Ake1^De0); + Be = ROL32((Aki1^Di1), 3); + Bi = ROL32((Ako1^Do0), 12); + Bo = ROL32((Aku1^Du1), 4); + Aka1 = Ba ^((~Be)& Bi ); + Ake1 = Be ^((~Bi)& Bo ); + Aki1 = Bi ^((~Bo)& Bu ); + Ako1 = Bo ^((~Bu)& Ba ); + Aku1 = Bu ^((~Ba)& Be ); + + Be = ROL32((Ama0^Da0), 18); + Bi = ROL32((Ame0^De0), 5); + Bo = ROL32((Ami0^Di1), 8); + Bu = ROL32((Amo0^Do0), 28); + Ba = ROL32((Amu0^Du1), 14); + Ama0 = Ba ^((~Be)& Bi ); + Ame0 = Be ^((~Bi)& Bo ); + Ami0 = Bi ^((~Bo)& Bu ); + Amo0 = Bo ^((~Bu)& Ba ); + Amu0 = Bu ^((~Ba)& Be ); + + Be = ROL32((Ama1^Da1), 18); + Bi = ROL32((Ame1^De1), 5); + Bo = ROL32((Ami1^Di0), 7); + Bu = ROL32((Amo1^Do1), 28); + Ba = ROL32((Amu1^Du0), 13); + Ama1 = Ba ^((~Be)& Bi ); + Ame1 = Be ^((~Bi)& Bo ); + Ami1 = Bi ^((~Bo)& Bu ); + Amo1 = Bo ^((~Bu)& Ba ); + Amu1 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Asa0^Da1), 21); + Bu = ROL32((Ase0^De0), 1); + Ba = ROL32((Asi0^Di0), 31); + Be = ROL32((Aso0^Do1), 28); + Bi = ROL32((Asu0^Du1), 20); + Asa0 = Ba ^((~Be)& Bi ); + Ase0 = Be ^((~Bi)& Bo ); + Asi0 = Bi ^((~Bo)& Bu ); + Aso0 = Bo ^((~Bu)& Ba ); + Asu0 = Bu ^((~Ba)& Be ); + + Bo = ROL32((Asa1^Da0), 20); + Bu = ROL32((Ase1^De1), 1); + Ba = ROL32((Asi1^Di1), 31); + Be = ROL32((Aso1^Do0), 27); + Bi = ROL32((Asu1^Du0), 19); + Asa1 = Ba ^((~Be)& Bi ); + Ase1 = Be ^((~Bi)& Bo ); + Asi1 = Bi ^((~Bo)& Bu ); + Aso1 = Bo ^((~Bu)& Ba ); + Asu1 = Bu ^((~Ba)& Be ); + } + while ( *pRoundConstants != 0xFF ); + + #undef Aba0 + #undef Aba1 + #undef Abe0 + #undef Abe1 + #undef Abi0 + #undef Abi1 + #undef Abo0 + #undef Abo1 + #undef Abu0 + #undef Abu1 + #undef Aga0 + #undef Aga1 + #undef Age0 + #undef Age1 + #undef Agi0 + #undef Agi1 + #undef Ago0 + #undef Ago1 + #undef Agu0 + #undef Agu1 + #undef Aka0 + #undef Aka1 + #undef Ake0 + #undef Ake1 + #undef Aki0 + #undef Aki1 + #undef Ako0 + #undef Ako1 + #undef Aku0 + #undef Aku1 + #undef Ama0 + #undef Ama1 + #undef Ame0 + #undef Ame1 + #undef Ami0 + #undef Ami1 + #undef Amo0 + #undef Amo1 + #undef Amu0 + #undef Amu1 + #undef Asa0 + #undef Asa1 + #undef Ase0 + #undef Ase1 + #undef Asi0 + #undef Asi1 + #undef Aso0 + #undef Aso1 + #undef Asu0 + #undef Asu1 + } +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 12); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 24); +} diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h b/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h new file mode 100644 index 0000000..9501c64 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h @@ -0,0 +1,3 @@ +#define KeccakP1600_implementation_config "lane complementing, all rounds unrolled" +#define KeccakP1600_fullUnrolling +#define KeccakP1600_useLaneComplementing diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64.c b/Modules/_sha3/kcp/KeccakP-1600-opt64.c new file mode 100644 index 0000000..c90010d --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-opt64.c @@ -0,0 +1,474 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include <string.h> +#include <stdlib.h> +/* #include "brg_endian.h" */ +#include "KeccakP-1600-opt64-config.h" + +#if NOT_PYTHON +typedef unsigned char UINT8; +/* typedef unsigned long long int UINT64; */ +#endif + +#if defined(KeccakP1600_useLaneComplementing) +#define UseBebigokimisa +#endif + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#ifdef KeccakP1600_fullUnrolling +#define FullUnrolling +#else +#define Unrolling KeccakP1600_unrolling +#endif +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +#ifdef KeccakP1600_useLaneComplementing + ((UINT64*)state)[ 1] = ~(UINT64)0; + ((UINT64*)state)[ 2] = ~(UINT64)0; + ((UINT64*)state)[ 8] = ~(UINT64)0; + ((UINT64*)state)[12] = ~(UINT64)0; + ((UINT64*)state)[17] = ~(UINT64)0; + ((UINT64*)state)[20] = ~(UINT64)0; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT64 lane; + if (length == 0) + return; + if (length == 1) + lane = data[0]; + else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset*8; +#else + UINT64 lane = 0; + unsigned int i; + for(i=0; i<length; i++) + lane |= ((UINT64)data[i]) << ((i+offset)*8); +#endif + ((UINT64*)state)[lanePosition] ^= lane; +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + unsigned int i = 0; +#ifdef NO_MISALIGNED_ACCESSES + /* If either pointer is misaligned, fall back to byte-wise xor. */ + + if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) { + for (i = 0; i < laneCount * 8; i++) { + ((unsigned char*)state)[i] ^= data[i]; + } + } + else +#endif + { + /* Otherwise... */ + + for( ; (i+8)<=laneCount; i+=8) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; + ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; + ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4]; + ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5]; + ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6]; + ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7]; + } + for( ; (i+4)<=laneCount; i+=4) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; + ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; + } + for( ; (i+2)<=laneCount; i+=2) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + } + if (i<laneCount) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + } + } +#else + unsigned int i; + UINT8 *curData = data; + for(i=0; i<laneCount; i++, curData+=8) { + UINT64 lane = (UINT64)curData[0] + | ((UINT64)curData[1] << 8) + | ((UINT64)curData[2] << 16) + | ((UINT64)curData[3] << 24) + | ((UINT64)curData[4] <<32) + | ((UINT64)curData[5] << 40) + | ((UINT64)curData[6] << 48) + | ((UINT64)curData[7] << 56); + ((UINT64*)state)[i] ^= lane; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) +{ + UINT64 lane = byte; + lane <<= (offset%8)*8; + ((UINT64*)state)[offset/8] ^= lane; +} +#endif + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + unsigned int i; + for(i=0; i<length; i++) + ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i]; + } + else +#endif + { + memcpy((unsigned char*)state+lanePosition*8+offset, data, length); + } +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + unsigned int lanePosition; + + for(lanePosition=0; lanePosition<laneCount; lanePosition++) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition]; + else + ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition]; +#else + memcpy(state, data, laneCount*8); +#endif +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + unsigned int lanePosition; + + for(lanePosition=0; lanePosition<byteCount/8; lanePosition++) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + ((UINT64*)state)[lanePosition] = ~0; + else + ((UINT64*)state)[lanePosition] = 0; + if (byteCount%8 != 0) { + lanePosition = byteCount/8; + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8); + else + memset((unsigned char*)state+lanePosition*8, 0, byteCount%8); + } +#else + memset(state, 0, byteCount); +#endif +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) +{ + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + + copyFromState(A, stateAsLanes) + rounds24 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) +{ + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + + copyFromState(A, stateAsLanes) + rounds12 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + UINT64 lane1[1]; + lane1[0] = lane; + memcpy(data, (UINT8*)lane1+offset, length); + } +#else + unsigned int i; + lane >>= offset*8; + for(i=0; i<length; i++) { + data[i] = lane & 0xFF; + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +void fromWordToBytes(UINT8 *bytes, const UINT64 word) +{ + unsigned int i; + + for(i=0; i<(64/8); i++) + bytes[i] = (word >> (8*i)) & 0xFF; +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i<laneCount; i++) + fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); +#endif +#ifdef KeccakP1600_useLaneComplementing + if (laneCount > 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + UINT64 lane1[1]; + lane1[0] = lane; + for(i=0; i<length; i++) + output[i] = input[i] ^ ((UINT8*)lane1)[offset+i]; + } +#else + unsigned int i; + lane >>= offset*8; + for(i=0; i<length; i++) { + output[i] = input[i] ^ (lane & 0xFF); + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#endif + + for(i=0; i<laneCount; i++) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i]; +#else + fromWordToBytes(temp, ((const UINT64*)state)[i]); + for(j=0; j<8; j++) + output[i*8+j] = input[i*8+j] ^ temp[j]; +#endif + } +#ifdef KeccakP1600_useLaneComplementing + if (laneCount > 1) { + ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; + if (laneCount > 2) { + ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; + if (laneCount > 8) { + ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; + if (laneCount > 12) { + ((UINT64*)output)[12] = ~((UINT64*)output)[12]; + if (laneCount > 17) { + ((UINT64*)output)[17] = ~((UINT64*)output)[17]; + if (laneCount > 20) { + ((UINT64*)output)[20] = ~((UINT64*)output)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + UINT64 *inDataAsLanes = (UINT64*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds24 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros b/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros new file mode 100644 index 0000000..405ce29 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros @@ -0,0 +1,185 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (defined(FullUnrolling)) +#define rounds24 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 12) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 6) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#elif (Unrolling == 4) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#elif (Unrolling == 3) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#elif (Unrolling == 2) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 1) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#else +#error "Unrolling is not correctly specified!" +#endif diff --git a/Modules/_sha3/kcp/KeccakSponge.c b/Modules/_sha3/kcp/KeccakSponge.c new file mode 100644 index 0000000..afdb731 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakSponge.c @@ -0,0 +1,92 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSponge.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + + #define prefix KeccakWidth200 + #define SnP KeccakP200 + #define SnP_width 200 + #define SnP_Permute KeccakP200_Permute_18rounds + #if defined(KeccakF200_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + + #define prefix KeccakWidth400 + #define SnP KeccakP400 + #define SnP_width 400 + #define SnP_Permute KeccakP400_Permute_20rounds + #if defined(KeccakF400_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + + #define prefix KeccakWidth800 + #define SnP KeccakP800 + #define SnP_width 800 + #define SnP_Permute KeccakP800_Permute_22rounds + #if defined(KeccakF800_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/Modules/_sha3/kcp/KeccakSponge.h b/Modules/_sha3/kcp/KeccakSponge.h new file mode 100644 index 0000000..0f4badc --- /dev/null +++ b/Modules/_sha3/kcp/KeccakSponge.h @@ -0,0 +1,172 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +/** General information + * + * The following type and functions are not actually implemented. Their + * documentation is generic, with the prefix Prefix replaced by + * - KeccakWidth200 for a sponge function based on Keccak-f[200] + * - KeccakWidth400 for a sponge function based on Keccak-f[400] + * - KeccakWidth800 for a sponge function based on Keccak-f[800] + * - KeccakWidth1600 for a sponge function based on Keccak-f[1600] + * + * In all these functions, the rate and capacity must sum to the width of the + * chosen permutation. For instance, to use the sponge function + * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination + * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(), + * KeccakWidth1600_SpongeAbsorbLastFewBits() and + * KeccakWidth1600_SpongeSqueeze(). + * + * The Prefix_SpongeInstance contains the sponge instance attributes for use + * with the Prefix_Sponge* functions. + * It gathers the state processed by the permutation as well as the rate, + * the position of input/output bytes in the state and the phase + * (absorbing or squeezing). + */ + +#ifdef DontReallyInclude_DocumentationOnly +/** Function to evaluate the sponge function Keccak[r, c] in a single call. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param input Pointer to the input message (before the suffix). + * @param inputByteLen The length of the input message in bytes. + * @param suffix Byte containing from 0 to 7 suffix bits + * that must be absorbed after @a input. + * These <i>n</i> bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position <i>n</i> + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position <i>n</i>+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a suffix must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B. + * . + * @param output Pointer to the output buffer. + * @param outputByteLen The desired number of output bytes. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @pre @a suffix ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The phase of the sponge function is set to absorbing. + * @param spongeInstance Pointer to the sponge instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); + +/** + * Function to give input data bytes for the sponge function to absorb. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the input data. + * @param dataByteLen The number of input bytes provided in the input data. + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); + +/** + * Function to give input data bits for the sponge function to absorb + * and then to switch to the squeezing phase. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param delimitedData Byte containing from 0 to 7 trailing bits + * that must be absorbed. + * These <i>n</i> bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position <i>n</i> + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position <i>n</i>+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a delimitedData must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B. + * . + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @pre @a delimitedData ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData); + +/** + * Function to squeeze output data from the sponge function. + * If the sponge function was in the absorbing phase, this function + * switches it to the squeezing phase + * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param dataByteLen The number of output bytes desired. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); +#endif + +#include <string.h> +#include "align.h" + +#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define KCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth200) +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth400) +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth800) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600) +#endif + +#endif diff --git a/Modules/_sha3/kcp/KeccakSponge.inc b/Modules/_sha3/kcp/KeccakSponge.inc new file mode 100644 index 0000000..e10739d --- /dev/null +++ b/Modules/_sha3/kcp/KeccakSponge.inc @@ -0,0 +1,332 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define Sponge JOIN(prefix, _Sponge) +#define SpongeInstance JOIN(prefix, _SpongeInstance) +#define SpongeInitialize JOIN(prefix, _SpongeInitialize) +#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb) +#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits) +#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze) + +#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) +#define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) + +int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen) +{ + ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes]; + unsigned int partialBlock; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + unsigned int rateInBytes = rate/8; + + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + if (suffix == 0) + return 1; + + /* Initialize the state */ + + SnP_StaticInitialize(); + SnP_Initialize(state); + + /* First, absorb whole blocks */ + +#ifdef SnP_FastLoop_Absorb + if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { + /* fast lane: whole lane rate */ + + size_t j; + j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); + curInput += j; + inputByteLen -= j; + } +#endif + while(inputByteLen >= (size_t)rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curInput, rateInBytes); + #endif + SnP_AddBytes(state, curInput, 0, rateInBytes); + SnP_Permute(state); + curInput += rateInBytes; + inputByteLen -= rateInBytes; + } + + /* Then, absorb what remains */ + + partialBlock = (unsigned int)inputByteLen; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); + #endif + SnP_AddBytes(state, curInput, 0, partialBlock); + + /* Finally, absorb the suffix */ + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = suffix; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + + SnP_AddByte(state, suffix, partialBlock); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + + if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) + SnP_Permute(state); + /* Second bit of padding */ + + SnP_AddByte(state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(state); + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + + /* First, output whole blocks */ + + while(outputByteLen > (size_t)rateInBytes) { + SnP_ExtractBytes(state, curOutput, 0, rateInBytes); + SnP_Permute(state); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curOutput, rateInBytes); + #endif + curOutput += rateInBytes; + outputByteLen -= rateInBytes; + } + + /* Finally, output what remains */ + + partialBlock = (unsigned int)outputByteLen; + SnP_ExtractBytes(state, curOutput, 0, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curOutput, partialBlock); + #endif + + return 0; +} + +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ + +int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->rate = rate; + instance->byteIOIndex = 0; + instance->squeezing = 0; + + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + const unsigned char *curData; + unsigned int rateInBytes = instance->rate/8; + + if (instance->squeezing) + return 1; /* Too late for additional input */ + + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { +#ifdef SnP_FastLoop_Absorb + /* processing full blocks first */ + + if ((rateInBytes % (SnP_width/200)) == 0) { + /* fast lane: whole lane rate */ + + j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); + i += j; + curData += j; + } + else { +#endif + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, rateInBytes); + #endif + SnP_AddBytes(instance->state, curData, 0, rateInBytes); + SnP_Permute(instance->state); + curData+=rateInBytes; + } + i = dataByteLen - j; +#ifdef SnP_FastLoop_Absorb + } +#endif + } + else { + /* normal lane: using the message queue */ + + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); + #endif + i += partialBlock; + + SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + curData += partialBlock; + instance->byteIOIndex += partialBlock; + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData) +{ + unsigned int rateInBytes = instance->rate/8; + + if (delimitedData == 0) + return 1; + if (instance->squeezing) + return 1; /* Too late for additional input */ + + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = delimitedData; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + + SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + + if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) + SnP_Permute(instance->state); + /* Second bit of padding */ + + SnP_AddByte(instance->state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + instance->squeezing = 1; + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + unsigned int rateInBytes = instance->rate/8; + unsigned char *curData; + + if (!instance->squeezing) + SpongeAbsorbLastFewBits(instance, 0x01); + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + SnP_Permute(instance->state); + SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curData, rateInBytes); + #endif + curData+=rateInBytes; + } + i = dataByteLen - j; + } + else { + /* normal lane: using the message queue */ + + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + i += partialBlock; + + SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curData, partialBlock); + #endif + curData += partialBlock; + instance->byteIOIndex += partialBlock; + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +#undef Sponge +#undef SpongeInstance +#undef SpongeInitialize +#undef SpongeAbsorb +#undef SpongeAbsorbLastFewBits +#undef SpongeSqueeze +#undef SnP_stateSizeInBytes +#undef SnP_stateAlignment +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_ExtractBytes diff --git a/Modules/_sha3/kcp/PlSnP-Fallback.inc b/Modules/_sha3/kcp/PlSnP-Fallback.inc new file mode 100644 index 0000000..3a9119a --- /dev/null +++ b/Modules/_sha3/kcp/PlSnP-Fallback.inc @@ -0,0 +1,257 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */ + +/* expect SnP_stateSizeInBytes, SnP_stateAlignment */ + +/* expect prefix */ + +/* expect SnP_* */ + + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize) +#define PlSnP_InitializeAll JOIN(prefix, _InitializeAll) +#define PlSnP_AddByte JOIN(prefix, _AddByte) +#define PlSnP_AddBytes JOIN(prefix, _AddBytes) +#define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll) +#define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes) +#define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll) +#define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes) +#define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes) +#define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll) +#define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes) +#define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll) + +#if (PlSnP_baseParallelism == 1) + #define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) + #define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#else + #define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes) + #define SnP_stateAlignment JOIN(SnP, _statesAlignment) +#endif +#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism)) +#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment) +#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset)) + +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_InitializeAll JOIN(SnP, _InitializeAll) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_AddLanesAll JOIN(SnP, _AddLanesAll) +#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes) +#define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll) +#define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) +#define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll) +#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes) +#define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll) + +void PlSnP_StaticInitialize( void ) +{ + SnP_StaticInitialize(); +} + +void PlSnP_InitializeAll(void *states) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) + #if (PlSnP_baseParallelism == 1) + SnP_Initialize(stateWithIndex(i)); + #else + SnP_InitializeAll(stateWithIndex(i)); + #endif +} + +void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset) +{ + #if (PlSnP_baseParallelism == 1) + SnP_AddByte(stateWithIndex(instanceIndex), byte, offset); + #else + SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset); + #endif +} + +void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) +{ + #if (PlSnP_baseParallelism == 1) + SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length); + #else + SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length); + #endif +} + +void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes); + #else + SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset); + #endif + data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes; + } +} + +void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) +{ + #if (PlSnP_baseParallelism == 1) + SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length); + #else + SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length); + #endif +} + +void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes); + #else + SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset); + #endif + data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes; + } +} + +void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount) +{ + #if (PlSnP_baseParallelism == 1) + SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount); + #else + SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount); + #endif +} + +void PlSnP_PermuteAll(void *states) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_Permute(stateWithIndex(i)); + #else + SnP_PermuteAll(stateWithIndex(i)); + #endif + } +} + +#if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds)) +void PlSnP_PermuteAll_12rounds(void *states) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_Permute_12rounds(stateWithIndex(i)); + #else + SnP_PermuteAll_12rounds(stateWithIndex(i)); + #endif + } +} +#endif + +void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length) +{ + #if (PlSnP_baseParallelism == 1) + SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length); + #else + SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length); + #endif +} + +void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes); + #else + SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset); + #endif + data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism; + } +} + +void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + #if (PlSnP_baseParallelism == 1) + SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length); + #else + SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length); + #endif +} + +void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset) +{ + unsigned int i; + + for(i=0; i<PlSnP_factor; i++) { + #if (PlSnP_baseParallelism == 1) + SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes); + #else + SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset); + #endif + input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism; + output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism; + } +} + +#undef PlSnP_factor +#undef SnP_stateOffset +#undef stateWithIndex +#undef JOIN0 +#undef JOIN +#undef PlSnP_StaticInitialize +#undef PlSnP_InitializeAll +#undef PlSnP_AddByte +#undef PlSnP_AddBytes +#undef PlSnP_AddLanesAll +#undef PlSnP_OverwriteBytes +#undef PlSnP_OverwriteLanesAll +#undef PlSnP_OverwriteWithZeroes +#undef PlSnP_PermuteAll +#undef PlSnP_ExtractBytes +#undef PlSnP_ExtractLanesAll +#undef PlSnP_ExtractAndAddBytes +#undef PlSnP_ExtractAndAddLanesAll +#undef SnP_stateAlignment +#undef SnP_stateSizeInBytes +#undef PlSnP_factor +#undef SnP_stateOffset +#undef stateWithIndex +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_InitializeAll +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_AddLanesAll +#undef SnP_OverwriteBytes +#undef SnP_OverwriteWithZeroes +#undef SnP_OverwriteLanesAll +#undef SnP_ExtractBytes +#undef SnP_ExtractLanesAll +#undef SnP_ExtractAndAddBytes +#undef SnP_ExtractAndAddLanesAll diff --git a/Modules/_sha3/kcp/SnP-Relaned.h b/Modules/_sha3/kcp/SnP-Relaned.h new file mode 100644 index 0000000..086e635 --- /dev/null +++ b/Modules/_sha3/kcp/SnP-Relaned.h @@ -0,0 +1,134 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/Modules/_sha3/kcp/align.h b/Modules/_sha3/kcp/align.h new file mode 100644 index 0000000..6650fe8 --- /dev/null +++ b/Modules/_sha3/kcp/align.h @@ -0,0 +1,35 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ + +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/Modules/_sha3/sha3module.c b/Modules/_sha3/sha3module.c new file mode 100644 index 0000000..67c69f2 --- /dev/null +++ b/Modules/_sha3/sha3module.c @@ -0,0 +1,749 @@ +/* SHA3 module + * + * This module provides an interface to the SHA3 algorithm + * + * See below for information about the original code this module was + * based upon. Additional work performed by: + * + * Andrew Kuchling (amk@amk.ca) + * Greg Stein (gstein@lyra.org) + * Trevor Perrin (trevp@trevp.net) + * Gregory P. Smith (greg@krypto.org) + * + * Copyright (C) 2012-2016 Christian Heimes (christian@python.org) + * Licensed to PSF under a Contributor Agreement. + * + */ + +#include "Python.h" +#include "pystrhex.h" +#include "../hashlib.h" + +/* ************************************************************************** + * SHA-3 (Keccak) and SHAKE + * + * The code is based on KeccakCodePackage from 2016-04-23 + * commit 647f93079afc4ada3d23737477a6e52511ca41fd + * + * The reference implementation is altered in this points: + * - C++ comments are converted to ANSI C comments. + * - all function names are mangled + * - typedef for UINT64 is commented out. + * - brg_endian.h is removed + * + * *************************************************************************/ + +#ifdef __sparc + /* opt64 uses un-aligned memory access that causes a BUS error with msg + * 'invalid address alignment' on SPARC. */ + #define KeccakOpt 32 +#elif SIZEOF_VOID_P == 8 && defined(PY_UINT64_T) + /* opt64 works only for 64bit platforms with unsigned int64 */ + #define KeccakOpt 64 +#else + /* opt32 is used for the remaining 32 and 64bit platforms */ + #define KeccakOpt 32 +#endif + +#if KeccakOpt == 64 && defined(PY_UINT64_T) + /* 64bit platforms with unsigned int64 */ + typedef PY_UINT64_T UINT64; + typedef unsigned char UINT8; +#endif + +/* replacement for brg_endian.h */ +#define IS_LITTLE_ENDIAN 1234 +#define IS_BIG_ENDIAN 4321 +#if PY_LITTLE_ENDIAN +#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif +#if PY_BIG_ENDIAN +#define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#endif + +/* mangle names */ +#define KeccakF1600_FastLoop_Absorb _PySHA3_KeccakF1600_FastLoop_Absorb +#define Keccak_HashFinal _PySHA3_Keccak_HashFinal +#define Keccak_HashInitialize _PySHA3_Keccak_HashInitialize +#define Keccak_HashSqueeze _PySHA3_Keccak_HashSqueeze +#define Keccak_HashUpdate _PySHA3_Keccak_HashUpdate +#define KeccakP1600_AddBytes _PySHA3_KeccakP1600_AddBytes +#define KeccakP1600_AddBytesInLane _PySHA3_KeccakP1600_AddBytesInLane +#define KeccakP1600_AddLanes _PySHA3_KeccakP1600_AddLanes +#define KeccakP1600_ExtractAndAddBytes _PySHA3_KeccakP1600_ExtractAndAddBytes +#define KeccakP1600_ExtractAndAddBytesInLane _PySHA3_KeccakP1600_ExtractAndAddBytesInLane +#define KeccakP1600_ExtractAndAddLanes _PySHA3_KeccakP1600_ExtractAndAddLanes +#define KeccakP1600_ExtractBytes _PySHA3_KeccakP1600_ExtractBytes +#define KeccakP1600_ExtractBytesInLane _PySHA3_KeccakP1600_ExtractBytesInLane +#define KeccakP1600_ExtractLanes _PySHA3_KeccakP1600_ExtractLanes +#define KeccakP1600_Initialize _PySHA3_KeccakP1600_Initialize +#define KeccakP1600_OverwriteBytes _PySHA3_KeccakP1600_OverwriteBytes +#define KeccakP1600_OverwriteBytesInLane _PySHA3_KeccakP1600_OverwriteBytesInLane +#define KeccakP1600_OverwriteLanes _PySHA3_KeccakP1600_OverwriteLanes +#define KeccakP1600_OverwriteWithZeroes _PySHA3_KeccakP1600_OverwriteWithZeroes +#define KeccakP1600_Permute_12rounds _PySHA3_KeccakP1600_Permute_12rounds +#define KeccakP1600_Permute_24rounds _PySHA3_KeccakP1600_Permute_24rounds +#define KeccakWidth1600_Sponge _PySHA3_KeccakWidth1600_Sponge +#define KeccakWidth1600_SpongeAbsorb _PySHA3_KeccakWidth1600_SpongeAbsorb +#define KeccakWidth1600_SpongeAbsorbLastFewBits _PySHA3_KeccakWidth1600_SpongeAbsorbLastFewBits +#define KeccakWidth1600_SpongeInitialize _PySHA3_KeccakWidth1600_SpongeInitialize +#define KeccakWidth1600_SpongeSqueeze _PySHA3_KeccakWidth1600_SpongeSqueeze +#if KeccakOpt == 32 +#define KeccakP1600_AddByte _PySHA3_KeccakP1600_AddByte +#define KeccakP1600_Permute_Nrounds _PySHA3_KeccakP1600_Permute_Nrounds +#define KeccakP1600_SetBytesInLaneToZero _PySHA3_KeccakP1600_SetBytesInLaneToZero +#endif + +/* we are only interested in KeccakP1600 */ +#define KeccakP200_excluded 1 +#define KeccakP400_excluded 1 +#define KeccakP800_excluded 1 + +/* inline all Keccak dependencies */ +#include "kcp/KeccakHash.h" +#include "kcp/KeccakSponge.h" +#include "kcp/KeccakHash.c" +#include "kcp/KeccakSponge.c" +#if KeccakOpt == 64 + #include "kcp/KeccakP-1600-opt64.c" +#elif KeccakOpt == 32 + #include "kcp/KeccakP-1600-inplace32BI.c" +#endif + +#define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ +#define SHA3_state Keccak_HashInstance +#define SHA3_init Keccak_HashInitialize +#define SHA3_process Keccak_HashUpdate +#define SHA3_done Keccak_HashFinal +#define SHA3_squeeze Keccak_HashSqueeze +#define SHA3_copystate(dest, src) memcpy(&(dest), &(src), sizeof(SHA3_state)) + + +/*[clinic input] +module _sha3 +class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ" +class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ" +class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ" +class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ" +class _sha3.shake_128 "SHA3object *" "&SHAKE128type" +class _sha3.shake_256 "SHA3object *" "&SHAKE256type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/ + +/* The structure for storing SHA3 info */ + +#define PY_WITH_KECCAK 0 + +typedef struct { + PyObject_HEAD + SHA3_state hash_state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} SHA3object; + +static PyTypeObject SHA3_224type; +static PyTypeObject SHA3_256type; +static PyTypeObject SHA3_384type; +static PyTypeObject SHA3_512type; +#ifdef PY_WITH_KECCAK +static PyTypeObject Keccak_224type; +static PyTypeObject Keccak_256type; +static PyTypeObject Keccak_384type; +static PyTypeObject Keccak_512type; +#endif +static PyTypeObject SHAKE128type; +static PyTypeObject SHAKE256type; + +#include "clinic/sha3module.c.h" + +static SHA3object * +newSHA3object(PyTypeObject *type) +{ + SHA3object *newobj; + newobj = (SHA3object *)PyObject_New(SHA3object, type); + if (newobj == NULL) { + return NULL; + } +#ifdef WITH_THREAD + newobj->lock = NULL; +#endif + return newobj; +} + + +/*[clinic input] +@classmethod +_sha3.sha3_224.__new__ as py_sha3_new + string as data: object = NULL + +Return a new SHA3 hash object with a hashbit length of 28 bytes. +[clinic start generated code]*/ + +static PyObject * +py_sha3_new_impl(PyTypeObject *type, PyObject *data) +/*[clinic end generated code: output=8d5c34279e69bf09 input=d7c582b950a858b6]*/ +{ + SHA3object *self = NULL; + Py_buffer buf = {NULL, NULL}; + HashReturn res; + + self = newSHA3object(type); + if (self == NULL) { + goto error; + } + + if (type == &SHA3_224type) { + res = Keccak_HashInitialize_SHA3_224(&self->hash_state); + } else if (type == &SHA3_256type) { + res = Keccak_HashInitialize_SHA3_256(&self->hash_state); + } else if (type == &SHA3_384type) { + res = Keccak_HashInitialize_SHA3_384(&self->hash_state); + } else if (type == &SHA3_512type) { + res = Keccak_HashInitialize_SHA3_512(&self->hash_state); +#ifdef PY_WITH_KECCAK + } else if (type == &Keccak_224type) { + res = Keccak_HashInitialize(&self->hash_state, 1152, 448, 224, 0x01); + } else if (type == &Keccak_256type) { + res = Keccak_HashInitialize(&self->hash_state, 1088, 512, 256, 0x01); + } else if (type == &Keccak_384type) { + res = Keccak_HashInitialize(&self->hash_state, 832, 768, 384, 0x01); + } else if (type == &Keccak_512type) { + res = Keccak_HashInitialize(&self->hash_state, 576, 1024, 512, 0x01); +#endif + } else if (type == &SHAKE128type) { + res = Keccak_HashInitialize_SHAKE128(&self->hash_state); + } else if (type == &SHAKE256type) { + res = Keccak_HashInitialize_SHAKE256(&self->hash_state); + } else { + PyErr_BadInternalCall(); + goto error; + } + + if (data) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); +#ifdef WITH_THREAD + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* invariant: New objects can't be accessed by other code yet, + * thus it's safe to release the GIL without locking the object. + */ + Py_BEGIN_ALLOW_THREADS + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + Py_END_ALLOW_THREADS + } + else { + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + } +#else + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); +#endif + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, + "internal error in SHA3 Update()"); + goto error; + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; + + error: + if (self) { + Py_DECREF(self); + } + if (data && buf.obj) { + PyBuffer_Release(&buf); + } + return NULL; +} + + +/* Internal methods for a hash object */ + +static void +SHA3_dealloc(SHA3object *self) +{ +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + } +#endif + PyObject_Del(self); +} + + +/* External methods for a hash object */ + + +/*[clinic input] +_sha3.sha3_224.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_sha3_sha3_224_copy_impl(SHA3object *self) +/*[clinic end generated code: output=6c537411ecdcda4c input=93a44aaebea51ba8]*/ +{ + SHA3object *newobj; + + if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { + return NULL; + } + ENTER_HASHLIB(self); + SHA3_copystate(newobj->hash_state, self->hash_state); + LEAVE_HASHLIB(self); + return (PyObject *)newobj; +} + + +/*[clinic input] +_sha3.sha3_224.digest + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_sha3_sha3_224_digest_impl(SHA3object *self) +/*[clinic end generated code: output=fd531842e20b2d5b input=a5807917d219b30e]*/ +{ + unsigned char digest[SHA3_MAX_DIGESTSIZE]; + SHA3_state temp; + HashReturn res; + + ENTER_HASHLIB(self); + SHA3_copystate(temp, self->hash_state); + LEAVE_HASHLIB(self); + res = SHA3_done(&temp, digest); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); + return NULL; + } + return PyBytes_FromStringAndSize((const char *)digest, + self->hash_state.fixedOutputLength / 8); +} + + +/*[clinic input] +_sha3.sha3_224.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_sha3_sha3_224_hexdigest_impl(SHA3object *self) +/*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/ +{ + unsigned char digest[SHA3_MAX_DIGESTSIZE]; + SHA3_state temp; + HashReturn res; + + /* Get the raw (binary) digest value */ + ENTER_HASHLIB(self); + SHA3_copystate(temp, self->hash_state); + LEAVE_HASHLIB(self); + res = SHA3_done(&temp, digest); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); + return NULL; + } + return _Py_strhex((const char *)digest, + self->hash_state.fixedOutputLength / 8); +} + + +/*[clinic input] +_sha3.sha3_224.update + + obj: object + / + +Update this hash object's state with the provided string. +[clinic start generated code]*/ + +static PyObject * +_sha3_sha3_224_update(SHA3object *self, PyObject *obj) +/*[clinic end generated code: output=06721d55b483e0af input=be44bf0d1c279791]*/ +{ + Py_buffer buf; + HashReturn res; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + + /* add new data, the function takes the length in bits not bytes */ +#ifdef WITH_THREAD + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + /* Once a lock exists all code paths must be synchronized. We have to + * release the GIL even for small buffers as acquiring the lock may take + * an unlimited amount of time when another thread updates this object + * with lots of data. */ + if (self->lock) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } + else { + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + } +#else + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); +#endif + + if (res != SUCCESS) { + PyBuffer_Release(&buf); + PyErr_SetString(PyExc_RuntimeError, + "internal error in SHA3 Update()"); + return NULL; + } + + PyBuffer_Release(&buf); + Py_INCREF(Py_None); + return Py_None; +} + + +static PyMethodDef SHA3_methods[] = { + _SHA3_SHA3_224_COPY_METHODDEF + _SHA3_SHA3_224_DIGEST_METHODDEF + _SHA3_SHA3_224_HEXDIGEST_METHODDEF + _SHA3_SHA3_224_UPDATE_METHODDEF + {NULL, NULL} /* sentinel */ +}; + + +static PyObject * +SHA3_get_block_size(SHA3object *self, void *closure) +{ + int rate = self->hash_state.sponge.rate; + return PyLong_FromLong(rate / 8); +} + + +static PyObject * +SHA3_get_name(SHA3object *self, void *closure) +{ + PyTypeObject *type = Py_TYPE(self); + if (type == &SHA3_224type) { + return PyUnicode_FromString("sha3_224"); + } else if (type == &SHA3_256type) { + return PyUnicode_FromString("sha3_256"); + } else if (type == &SHA3_384type) { + return PyUnicode_FromString("sha3_384"); + } else if (type == &SHA3_512type) { + return PyUnicode_FromString("sha3_512"); +#ifdef PY_WITH_KECCAK + } else if (type == &Keccak_224type) { + return PyUnicode_FromString("keccak_224"); + } else if (type == &Keccak_256type) { + return PyUnicode_FromString("keccak_256"); + } else if (type == &Keccak_384type) { + return PyUnicode_FromString("keccak_384"); + } else if (type == &Keccak_512type) { + return PyUnicode_FromString("keccak_512"); +#endif + } else if (type == &SHAKE128type) { + return PyUnicode_FromString("shake_128"); + } else if (type == &SHAKE256type) { + return PyUnicode_FromString("shake_256"); + } else { + PyErr_BadInternalCall(); + return NULL; + } +} + + +static PyObject * +SHA3_get_digest_size(SHA3object *self, void *closure) +{ + return PyLong_FromLong(self->hash_state.fixedOutputLength / 8); +} + + +static PyObject * +SHA3_get_capacity_bits(SHA3object *self, void *closure) +{ + int capacity = 1600 - self->hash_state.sponge.rate; + return PyLong_FromLong(capacity); +} + + +static PyObject * +SHA3_get_rate_bits(SHA3object *self, void *closure) +{ + unsigned int rate = self->hash_state.sponge.rate; + return PyLong_FromLong(rate); +} + +static PyObject * +SHA3_get_suffix(SHA3object *self, void *closure) +{ + unsigned char suffix[2]; + suffix[0] = self->hash_state.delimitedSuffix; + suffix[1] = 0; + return PyBytes_FromStringAndSize((const char *)suffix, 1); +} + + +static PyGetSetDef SHA3_getseters[] = { + {"block_size", (getter)SHA3_get_block_size, NULL, NULL, NULL}, + {"name", (getter)SHA3_get_name, NULL, NULL, NULL}, + {"digest_size", (getter)SHA3_get_digest_size, NULL, NULL, NULL}, + {"_capacity_bits", (getter)SHA3_get_capacity_bits, NULL, NULL, NULL}, + {"_rate_bits", (getter)SHA3_get_rate_bits, NULL, NULL, NULL}, + {"_suffix", (getter)SHA3_get_suffix, NULL, NULL, NULL}, + {NULL} /* Sentinel */ +}; + + +#define SHA3_TYPE(type_obj, type_name, type_doc, type_methods) \ + static PyTypeObject type_obj = { \ + PyVarObject_HEAD_INIT(NULL, 0) \ + type_name, /* tp_name */ \ + sizeof(SHA3object), /* tp_size */ \ + 0, /* tp_itemsize */ \ + /* methods */ \ + (destructor)SHA3_dealloc, /* tp_dealloc */ \ + 0, /* tp_print */ \ + 0, /* tp_getattr */ \ + 0, /* tp_setattr */ \ + 0, /* tp_reserved */ \ + 0, /* tp_repr */ \ + 0, /* tp_as_number */ \ + 0, /* tp_as_sequence */ \ + 0, /* tp_as_mapping */ \ + 0, /* tp_hash */ \ + 0, /* tp_call */ \ + 0, /* tp_str */ \ + 0, /* tp_getattro */ \ + 0, /* tp_setattro */ \ + 0, /* tp_as_buffer */ \ + Py_TPFLAGS_DEFAULT, /* tp_flags */ \ + type_doc, /* tp_doc */ \ + 0, /* tp_traverse */ \ + 0, /* tp_clear */ \ + 0, /* tp_richcompare */ \ + 0, /* tp_weaklistoffset */ \ + 0, /* tp_iter */ \ + 0, /* tp_iternext */ \ + type_methods, /* tp_methods */ \ + NULL, /* tp_members */ \ + SHA3_getseters, /* tp_getset */ \ + 0, /* tp_base */ \ + 0, /* tp_dict */ \ + 0, /* tp_descr_get */ \ + 0, /* tp_descr_set */ \ + 0, /* tp_dictoffset */ \ + 0, /* tp_init */ \ + 0, /* tp_alloc */ \ + py_sha3_new, /* tp_new */ \ + } + +PyDoc_STRVAR(sha3_256__doc__, +"sha3_256([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 32 bytes."); + +PyDoc_STRVAR(sha3_384__doc__, +"sha3_384([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 48 bytes."); + +PyDoc_STRVAR(sha3_512__doc__, +"sha3_512([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 64 bytes."); + +SHA3_TYPE(SHA3_224type, "_sha3.sha3_224", py_sha3_new__doc__, SHA3_methods); +SHA3_TYPE(SHA3_256type, "_sha3.sha3_256", sha3_256__doc__, SHA3_methods); +SHA3_TYPE(SHA3_384type, "_sha3.sha3_384", sha3_384__doc__, SHA3_methods); +SHA3_TYPE(SHA3_512type, "_sha3.sha3_512", sha3_512__doc__, SHA3_methods); + +#ifdef PY_WITH_KECCAK +PyDoc_STRVAR(keccak_224__doc__, +"keccak_224([string]) -> Keccak object\n\ +\n\ +Return a new Keccak hash object with a hashbit length of 28 bytes."); + +PyDoc_STRVAR(keccak_256__doc__, +"keccak_256([string]) -> Keccak object\n\ +\n\ +Return a new Keccak hash object with a hashbit length of 32 bytes."); + +PyDoc_STRVAR(keccak_384__doc__, +"keccak_384([string]) -> Keccak object\n\ +\n\ +Return a new Keccak hash object with a hashbit length of 48 bytes."); + +PyDoc_STRVAR(keccak_512__doc__, +"keccak_512([string]) -> Keccak object\n\ +\n\ +Return a new Keccak hash object with a hashbit length of 64 bytes."); + +SHA3_TYPE(Keccak_224type, "_sha3.keccak_224", keccak_224__doc__, SHA3_methods); +SHA3_TYPE(Keccak_256type, "_sha3.keccak_256", keccak_256__doc__, SHA3_methods); +SHA3_TYPE(Keccak_384type, "_sha3.keccak_384", keccak_384__doc__, SHA3_methods); +SHA3_TYPE(Keccak_512type, "_sha3.keccak_512", keccak_512__doc__, SHA3_methods); +#endif + + +static PyObject * +_SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex) +{ + unsigned char *digest = NULL; + SHA3_state temp; + int res; + PyObject *result = NULL; + + if ((digest = (unsigned char*)PyMem_Malloc(digestlen)) == NULL) { + return PyErr_NoMemory(); + } + + /* Get the raw (binary) digest value */ + ENTER_HASHLIB(self); + SHA3_copystate(temp, self->hash_state); + LEAVE_HASHLIB(self); + res = SHA3_done(&temp, NULL); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 done()"); + goto error; + } + res = SHA3_squeeze(&temp, digest, digestlen * 8); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Squeeze()"); + return NULL; + } + if (hex) { + result = _Py_strhex((const char *)digest, digestlen); + } else { + result = PyBytes_FromStringAndSize((const char *)digest, + digestlen); + } + error: + if (digest != NULL) { + PyMem_Free(digest); + } + return result; +} + + +/*[clinic input] +_sha3.shake_128.digest + + length: unsigned_long(bitwise=True) + \ + +Return the digest value as a string of binary data. +[clinic start generated code]*/ + +static PyObject * +_sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) +/*[clinic end generated code: output=2313605e2f87bb8f input=608c8ca80ae9d115]*/ +{ + return _SHAKE_digest(self, length, 0); +} + + +/*[clinic input] +_sha3.shake_128.hexdigest + + length: unsigned_long(bitwise=True) + \ + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) +/*[clinic end generated code: output=bf8e2f1e490944a8 input=64e56b4760db4573]*/ +{ + return _SHAKE_digest(self, length, 1); +} + + +static PyMethodDef SHAKE_methods[] = { + _SHA3_SHA3_224_COPY_METHODDEF + _SHA3_SHAKE_128_DIGEST_METHODDEF + _SHA3_SHAKE_128_HEXDIGEST_METHODDEF + _SHA3_SHA3_224_UPDATE_METHODDEF + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(shake_128__doc__, +"shake_128([string]) -> SHAKE object\n\ +\n\ +Return a new SHAKE hash object."); + +PyDoc_STRVAR(shake_256__doc__, +"shake_256([string]) -> SHAKE object\n\ +\n\ +Return a new SHAKE hash object."); + +SHA3_TYPE(SHAKE128type, "_sha3.shake_128", shake_128__doc__, SHAKE_methods); +SHA3_TYPE(SHAKE256type, "_sha3.shake_256", shake_256__doc__, SHAKE_methods); + + +/* Initialize this module. */ +static struct PyModuleDef _SHA3module = { + PyModuleDef_HEAD_INIT, + "_sha3", + NULL, + -1, + NULL, + NULL, + NULL, + NULL, + NULL +}; + + +PyMODINIT_FUNC +PyInit__sha3(void) +{ + PyObject *m = NULL; + + m = PyModule_Create(&_SHA3module); + +#define init_sha3type(name, type) \ + do { \ + Py_TYPE(type) = &PyType_Type; \ + if (PyType_Ready(type) < 0) { \ + goto error; \ + } \ + Py_INCREF((PyObject *)type); \ + if (PyModule_AddObject(m, name, (PyObject *)type) < 0) { \ + goto error; \ + } \ + } while(0) + + init_sha3type("sha3_224", &SHA3_224type); + init_sha3type("sha3_256", &SHA3_256type); + init_sha3type("sha3_384", &SHA3_384type); + init_sha3type("sha3_512", &SHA3_512type); +#ifdef PY_WITH_KECCAK + init_sha3type("keccak_224", &Keccak_224type); + init_sha3type("keccak_256", &Keccak_256type); + init_sha3type("keccak_384", &Keccak_384type); + init_sha3type("keccak_512", &Keccak_512type); +#endif + init_sha3type("shake_128", &SHAKE128type); + init_sha3type("shake_256", &SHAKE256type); + +#undef init_sha3type + + if (PyModule_AddIntConstant(m, "keccakopt", KeccakOpt) < 0) { + goto error; + } + if (PyModule_AddStringConstant(m, "implementation", + KeccakP1600_implementation) < 0) { + goto error; + } + + return m; + error: + Py_DECREF(m); + return NULL; +} diff --git a/PC/config.c b/PC/config.c index e007d4b..43d9e20 100644 --- a/PC/config.c +++ b/PC/config.c @@ -23,6 +23,7 @@ extern PyObject* PyInit__signal(void); extern PyObject* PyInit__sha1(void); extern PyObject* PyInit__sha256(void); extern PyObject* PyInit__sha512(void); +extern PyObject* PyInit__sha3(void); extern PyObject* PyInit__blake2(void); extern PyObject* PyInit_time(void); extern PyObject* PyInit__thread(void); @@ -97,6 +98,7 @@ struct _inittab _PyImport_Inittab[] = { {"_sha1", PyInit__sha1}, {"_sha256", PyInit__sha256}, {"_sha512", PyInit__sha512}, + {"_sha3", PyInit__sha3}, {"_blake2", PyInit__blake2}, {"time", PyInit_time}, #ifdef WITH_THREAD @@ -905,6 +905,13 @@ class PyBuildExt(build_ext): define_macros=blake2_macros, depends=blake2_deps) ) + sha3_deps = glob(os.path.join(os.getcwd(), srcdir, + 'Modules/_sha3/kcp/*')) + sha3_deps.append('hashlib.h') + exts.append( Extension('_sha3', + ['_sha3/sha3module.c'], + depends=sha3_deps)) + # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library |