From 4a0270d82bfd782c89a8ae2b869102dafb81ffea Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Sat, 6 Oct 2012 02:23:36 +0200 Subject: Issue #16113: integrade SHA-3 (Keccak) patch from http://hg.python.org/sandbox/cheimes --- Doc/library/hashlib.rst | 10 +- Doc/license.rst | 19 + Doc/whatsnew/3.4.rst | 2 +- Lib/hashlib.py | 15 +- Lib/test/test_hashlib.py | 127 ++- Modules/_hashopenssl.c | 22 - Modules/_sha3/cleanup.py | 49 + Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros | 555 +++++++++ Modules/_sha3/keccak/KeccakF-1600-32-s1.macros | 1187 ++++++++++++++++++++ Modules/_sha3/keccak/KeccakF-1600-32-s2.macros | 1187 ++++++++++++++++++++ Modules/_sha3/keccak/KeccakF-1600-32.macros | 26 + Modules/_sha3/keccak/KeccakF-1600-64.macros | 728 ++++++++++++ Modules/_sha3/keccak/KeccakF-1600-int-set.h | 6 + Modules/_sha3/keccak/KeccakF-1600-interface.h | 46 + Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h | 6 + Modules/_sha3/keccak/KeccakF-1600-opt32.c | 524 +++++++++ Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h | 9 + Modules/_sha3/keccak/KeccakF-1600-opt64.c | 508 +++++++++ Modules/_sha3/keccak/KeccakF-1600-simd128.macros | 651 +++++++++++ Modules/_sha3/keccak/KeccakF-1600-simd64.macros | 517 +++++++++ Modules/_sha3/keccak/KeccakF-1600-unrolling.macros | 124 ++ Modules/_sha3/keccak/KeccakF-1600-xop.macros | 573 ++++++++++ Modules/_sha3/keccak/KeccakNISTInterface.c | 83 ++ Modules/_sha3/keccak/KeccakNISTInterface.h | 72 ++ Modules/_sha3/keccak/KeccakSponge.c | 266 +++++ Modules/_sha3/keccak/KeccakSponge.h | 76 ++ Modules/_sha3/keccak/crypto_hash.h | 0 Modules/_sha3/sha3module.c | 569 ++++++++++ Modules/hashlib.h | 33 + setup.py | 9 + 30 files changed, 7971 insertions(+), 28 deletions(-) create mode 100755 Modules/_sha3/cleanup.py create mode 100644 Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-32-s1.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-32-s2.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-32.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-64.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-int-set.h create mode 100644 Modules/_sha3/keccak/KeccakF-1600-interface.h create mode 100644 Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h create mode 100644 Modules/_sha3/keccak/KeccakF-1600-opt32.c create mode 100644 Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h create mode 100644 Modules/_sha3/keccak/KeccakF-1600-opt64.c create mode 100644 Modules/_sha3/keccak/KeccakF-1600-simd128.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-simd64.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-unrolling.macros create mode 100644 Modules/_sha3/keccak/KeccakF-1600-xop.macros create mode 100644 Modules/_sha3/keccak/KeccakNISTInterface.c create mode 100644 Modules/_sha3/keccak/KeccakNISTInterface.h create mode 100644 Modules/_sha3/keccak/KeccakSponge.c create mode 100644 Modules/_sha3/keccak/KeccakSponge.h create mode 100644 Modules/_sha3/keccak/crypto_hash.h create mode 100644 Modules/_sha3/sha3module.c diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index bc8ab2c..4f5aac9 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -51,9 +51,13 @@ concatenation of the data fed to it so far using the :meth:`digest` or .. index:: single: OpenSSL; (use in module hashlib) Constructors for hash algorithms that are always present in this module are -:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, and -:func:`sha512`. Additional algorithms may also be available depending upon the -OpenSSL library that Python uses on your platform. +:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, +:func:`sha512`, :func:`sha3_224`, :func:`sha3_256`, :func:`sha3_384`, and +:func:`sha3_512`. Additional algorithms may also be available depending upon +the OpenSSL library that Python uses on your platform. + + .. versionchanged:: 3.4 + Add sha3 family of hash algorithms. For example, to obtain the digest of the byte string ``b'Nobody inspects the spammish repetition'``:: diff --git a/Doc/license.rst b/Doc/license.rst index cf1d96f..e56ca5b 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -658,6 +658,25 @@ The :mod:`select` and contains the following notice for the kqueue interface:: SUCH DAMAGE. +SHA-3 +----- + +The module :mod:`_sha3` and :mod:`hashlib` are using the reference +implementation of Keccak. The files at :file:`Modules/_sha3/keccak/` contain +the following note:: + + The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, + Michaël Peeters and Gilles Van Assche. For more information, feedback or + questions, please refer to our website: http://keccak.noekeon.org/ + + Implementation by the designers, + hereby denoted as "the implementer". + + To the extent possible under law, the implementer has waived all copyright + and related or neighboring rights to the source code in this file. + http://creativecommons.org/publicdomain/zero/1.0/ + + strtod and dtoa --------------- diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 6f9db35..931a333 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -101,7 +101,7 @@ Implementation improvements: Significantly Improved Library Modules: -* None yet. +* SHA-3 (Keccak) support for :mod:`hashlib`. Security improvements: diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 21454c7..a1bd8b2 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -54,7 +54,8 @@ More condensed: # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. -__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') +__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', + 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512') algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) @@ -85,6 +86,18 @@ def __get_builtin_constructor(name): return _sha512.sha512 elif bs == '384': return _sha512.sha384 + elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512', + 'SHA3_224', 'SHA3_256', 'SHA3_384', 'SHA3_512'}: + import _sha3 + bs = name[5:] + if bs == '224': + return _sha3.sha3_224 + elif bs == '256': + return _sha3.sha3_256 + elif bs == '384': + return _sha3.sha3_384 + elif bs == '512': + return _sha3.sha3_512 except ImportError: pass # no extension module, this hash is unsupported. diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 32f85e9..54201a1 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -36,7 +36,10 @@ def hexstr(s): class HashLibTestCase(unittest.TestCase): supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1', 'sha224', 'SHA224', 'sha256', 'SHA256', - 'sha384', 'SHA384', 'sha512', 'SHA512' ) + 'sha384', 'SHA384', 'sha512', 'SHA512', + 'sha3_224', 'sha3_256', 'sha3_384', + 'sha3_512', 'SHA3_224', 'SHA3_256', + 'SHA3_384', 'SHA3_512' ) # Issue #14693: fallback modules are always compiled under POSIX _warn_on_extension_import = os.name == 'posix' or COMPILED_WITH_PYDEBUG @@ -93,6 +96,12 @@ class HashLibTestCase(unittest.TestCase): if _sha512: self.constructors_to_test['sha384'].add(_sha512.sha384) self.constructors_to_test['sha512'].add(_sha512.sha512) + _sha3 = self._conditional_import_module('_sha3') + if _sha3: + self.constructors_to_test['sha3_224'].add(_sha3.sha3_224) + self.constructors_to_test['sha3_256'].add(_sha3.sha3_256) + self.constructors_to_test['sha3_384'].add(_sha3.sha3_384) + self.constructors_to_test['sha3_512'].add(_sha3.sha3_512) super(HashLibTestCase, self).__init__(*args, **kwargs) @@ -158,6 +167,7 @@ class HashLibTestCase(unittest.TestCase): self.assertEqual(m1.digest(), m2.digest()) def check(self, name, data, digest): + digest = digest.lower() constructors = self.constructors_to_test[name] # 2 is for hashlib.name(...) and hashlib.new(name, ...) self.assertGreaterEqual(len(constructors), 2) @@ -183,6 +193,10 @@ class HashLibTestCase(unittest.TestCase): self.check_no_unicode('sha256') self.check_no_unicode('sha384') self.check_no_unicode('sha512') + self.check_no_unicode('sha3_224') + self.check_no_unicode('sha3_256') + self.check_no_unicode('sha3_384') + self.check_no_unicode('sha3_512') def test_case_md5_0(self): self.check('md5', b'', 'd41d8cd98f00b204e9800998ecf8427e') @@ -318,11 +332,122 @@ class HashLibTestCase(unittest.TestCase): "e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+ "de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b") + # SHA-3 family + def test_case_sha3_224_0(self): + self.check('sha3_224', b"", + "F71837502BA8E10837BDD8D365ADB85591895602FC552B48B7390ABD") + + def test_case_sha3_224_1(self): + self.check('sha3_224', bytes.fromhex("CC"), + "A9CAB59EB40A10B246290F2D6086E32E3689FAF1D26B470C899F2802") + + def test_case_sha3_224_2(self): + self.check('sha3_224', bytes.fromhex("41FB"), + "615BA367AFDC35AAC397BC7EB5D58D106A734B24986D5D978FEFD62C") + + def test_case_sha3_224_3(self): + self.check('sha3_224', bytes.fromhex( + "433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+ + "8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+ + "444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+ + "E7E0846DCBB4CE"), + "62B10F1B6236EBC2DA72957742A8D4E48E213B5F8934604BFD4D2C3A") + + @bigmemtest(size=_4G + 5, memuse=1) + def test_case_sha3_224_huge(self, size): + if size == _4G + 5: + try: + self.check('sha3_224', b'A'*size, + '58ef60057c9dddb6a87477e9ace5a26f0d9db01881cf9b10a9f8c224') + except OverflowError: + pass # 32-bit arch + + + def test_case_sha3_256_0(self): + self.check('sha3_256', b"", + "C5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470") + + def test_case_sha3_256_1(self): + self.check('sha3_256', bytes.fromhex("CC"), + "EEAD6DBFC7340A56CAEDC044696A168870549A6A7F6F56961E84A54BD9970B8A") + + def test_case_sha3_256_2(self): + self.check('sha3_256', bytes.fromhex("41FB"), + "A8EACEDA4D47B3281A795AD9E1EA2122B407BAF9AABCB9E18B5717B7873537D2") + + def test_case_sha3_256_3(self): + self.check('sha3_256', bytes.fromhex( + "433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+ + "8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+ + "444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+ + "E7E0846DCBB4CE"), + "CE87A5173BFFD92399221658F801D45C294D9006EE9F3F9D419C8D427748DC41") + + + def test_case_sha3_384_0(self): + self.check('sha3_384', b"", + "2C23146A63A29ACF99E73B88F8C24EAA7DC60AA771780CCC006AFBFA8FE2479B"+ + "2DD2B21362337441AC12B515911957FF") + + def test_case_sha3_384_1(self): + self.check('sha3_384', bytes.fromhex("CC"), + "1B84E62A46E5A201861754AF5DC95C4A1A69CAF4A796AE405680161E29572641"+ + "F5FA1E8641D7958336EE7B11C58F73E9") + + def test_case_sha3_384_2(self): + self.check('sha3_384', bytes.fromhex("41FB"), + "495CCE2714CD72C8C53C3363D22C58B55960FE26BE0BF3BBC7A3316DD563AD1D"+ + "B8410E75EEFEA655E39D4670EC0B1792") + + def test_case_sha3_384_3(self): + self.check('sha3_384', bytes.fromhex( + "433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+ + "8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+ + "444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+ + "E7E0846DCBB4CE"), + "135114508DD63E279E709C26F7817C0482766CDE49132E3EDF2EEDD8996F4E35"+ + "96D184100B384868249F1D8B8FDAA2C9") + + + def test_case_sha3_512_0(self): + self.check('sha3_512', b"", + "0EAB42DE4C3CEB9235FC91ACFFE746B29C29A8C366B7C60E4E67C466F36A4304"+ + "C00FA9CAF9D87976BA469BCBE06713B435F091EF2769FB160CDAB33D3670680E") + + def test_case_sha3_512_1(self): + self.check('sha3_512', bytes.fromhex("CC"), + "8630C13CBD066EA74BBE7FE468FEC1DEE10EDC1254FB4C1B7C5FD69B646E4416"+ + "0B8CE01D05A0908CA790DFB080F4B513BC3B6225ECE7A810371441A5AC666EB9") + + def test_case_sha3_512_2(self): + self.check('sha3_512', bytes.fromhex("41FB"), + "551DA6236F8B96FCE9F97F1190E901324F0B45E06DBBB5CDB8355D6ED1DC34B3"+ + "F0EAE7DCB68622FF232FA3CECE0D4616CDEB3931F93803662A28DF1CD535B731") + + def test_case_sha3_512_3(self): + self.check('sha3_512', bytes.fromhex( + "433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+ + "8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+ + "444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+ + "E7E0846DCBB4CE"), + "527D28E341E6B14F4684ADB4B824C496C6482E51149565D3D17226828884306B"+ + "51D6148A72622C2B75F5D3510B799D8BDC03EAEDE453676A6EC8FE03A1AD0EAB") + + def test_gil(self): # Check things work fine with an input larger than the size required # for multithreaded operation (which is hardwired to 2048). gil_minsize = 2048 + for name in self.supported_hash_names: + m = hashlib.new(name) + m.update(b'1') + m.update(b'#' * gil_minsize) + m.update(b'1') + + m = hashlib.new(name, b'x' * gil_minsize) + m.update(b'1') + m = hashlib.md5() m.update(b'1') m.update(b'#' * gil_minsize) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index d37689e..5f38cc9 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -17,24 +17,6 @@ #include "structmember.h" #include "hashlib.h" -#ifdef WITH_THREAD -#include "pythread.h" - #define ENTER_HASHLIB(obj) \ - if ((obj)->lock) { \ - if (!PyThread_acquire_lock((obj)->lock, 0)) { \ - Py_BEGIN_ALLOW_THREADS \ - PyThread_acquire_lock((obj)->lock, 1); \ - Py_END_ALLOW_THREADS \ - } \ - } - #define LEAVE_HASHLIB(obj) \ - if ((obj)->lock) { \ - PyThread_release_lock((obj)->lock); \ - } -#else - #define ENTER_HASHLIB(obj) - #define LEAVE_HASHLIB(obj) -#endif /* EVP is the preferred interface to hashing in OpenSSL */ #include @@ -43,10 +25,6 @@ #define MUNCH_SIZE INT_MAX -/* TODO(gps): We should probably make this a module or EVPobject attribute - * to allow the user to optimize based on the platform they're using. */ -#define HASHLIB_GIL_MINSIZE 2048 - #ifndef HASH_OBJ_CONSTRUCTOR #define HASH_OBJ_CONSTRUCTOR 0 #endif diff --git a/Modules/_sha3/cleanup.py b/Modules/_sha3/cleanup.py new file mode 100755 index 0000000..aabcb04 --- /dev/null +++ b/Modules/_sha3/cleanup.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright (C) 2012 Christian Heimes (christian@python.org) +# Licensed to PSF under a Contributor Agreement. +# +# cleanup Keccak sources + +import os +import re + +CPP1 = re.compile("^//(.*)") +CPP2 = re.compile("\ //(.*)") + +STATICS = ("void ", "int ", "HashReturn ", "const UINT64 ", "UINT16 ") + +HERE = os.path.dirname(os.path.abspath(__file__)) +KECCAK = os.path.join(HERE, "keccak") + +def getfiles(): + for name in os.listdir(KECCAK): + name = os.path.join(KECCAK, name) + if os.path.isfile(name): + yield name + +def cleanup(f): + buf = [] + for line in f: + # mark all functions and global data as static + if line.startswith(STATICS): + buf.append("static " + line) + continue + # remove UINT64 typedef, we have our own + if line.startswith("typedef unsigned long long int"): + buf.append("/* %s */\n" % line.strip()) + continue + # remove #include "brg_endian.h" + if "brg_endian.h" in line: + buf.append("/* %s */\n" % line.strip()) + continue + # transform C++ comments into ANSI C comments + line = CPP1.sub(r"/* \1 */", line) + line = CPP2.sub(r" /* \1 */", line) + buf.append(line) + return "".join(buf) + +for name in getfiles(): + with open(name) as f: + res = cleanup(f) + with open(name, "w") as f: + f.write(res) diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros b/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros new file mode 100644 index 0000000..c0c9029 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-32-rvk.macros @@ -0,0 +1,555 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by Ronny Van Keer, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +static const UINT32 KeccakF1600RoundConstants_int2[2*24] = +{ + 0x00000001UL, 0x00000000UL, + 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, + 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, + 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, + 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, + 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, + 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, + 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, + 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, + 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, + 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, + 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, + 0x00000000UL, 0x80008082UL +}; + +#undef rounds + +#define rounds \ +{ \ + UINT32 Da0, De0, Di0, Do0, Du0; \ + UINT32 Da1, De1, Di1, Do1, Du1; \ + UINT32 Ba, Be, Bi, Bo, Bu; \ + UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ + UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ + UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ + UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ + UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ + UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ + UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ + UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ + UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ + UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ + UINT32 Cw, Cx, Cy, Cz; \ + UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ + UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ + UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ + UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ + UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ + UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ + UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ + UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ + UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ + UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ + const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \ + UINT32 i; \ +\ + copyFromState(A, state) \ +\ + for( i = 12; i != 0; --i ) { \ + Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ + Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Da1 = Cz^Du0; \ +\ + Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Do1 = Cy^Cx; \ +\ + Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ + De1 = Cz^Cw; \ +\ + Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Aba0 ^= Da0; \ + Ba = Aba0; \ + Age0 ^= De0; \ + Be = ROL32(Age0, 22); \ + Aki1 ^= Di1; \ + Bi = ROL32(Aki1, 22); \ + Amo1 ^= Do1; \ + Bo = ROL32(Amo1, 11); \ + Asu0 ^= Du0; \ + Bu = ROL32(Asu0, 7); \ + Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \ + Ebe0 = Be ^((~Bi)& Bo ); \ + Ebi0 = Bi ^((~Bo)& Bu ); \ + Ebo0 = Bo ^((~Bu)& Ba ); \ + Ebu0 = Bu ^((~Ba)& Be ); \ +\ + Abo0 ^= Do0; \ + Ba = ROL32(Abo0, 14); \ + Agu0 ^= Du0; \ + Be = ROL32(Agu0, 10); \ + Aka1 ^= Da1; \ + Bi = ROL32(Aka1, 2); \ + Ame1 ^= De1; \ + Bo = ROL32(Ame1, 23); \ + Asi1 ^= Di1; \ + Bu = ROL32(Asi1, 31); \ + Ega0 = Ba ^((~Be)& Bi ); \ + Ege0 = Be ^((~Bi)& Bo ); \ + Egi0 = Bi ^((~Bo)& Bu ); \ + Ego0 = Bo ^((~Bu)& Ba ); \ + Egu0 = Bu ^((~Ba)& Be ); \ +\ + Abe1 ^= De1; \ + Ba = ROL32(Abe1, 1); \ + Agi0 ^= Di0; \ + Be = ROL32(Agi0, 3); \ + Ako1 ^= Do1; \ + Bi = ROL32(Ako1, 13); \ + Amu0 ^= Du0; \ + Bo = ROL32(Amu0, 4); \ + Asa0 ^= Da0; \ + Bu = ROL32(Asa0, 9); \ + Eka0 = Ba ^((~Be)& Bi ); \ + Eke0 = Be ^((~Bi)& Bo ); \ + Eki0 = Bi ^((~Bo)& Bu ); \ + Eko0 = Bo ^((~Bu)& Ba ); \ + Eku0 = Bu ^((~Ba)& Be ); \ +\ + Abu1 ^= Du1; \ + Ba = ROL32(Abu1, 14); \ + Aga0 ^= Da0; \ + Be = ROL32(Aga0, 18); \ + Ake0 ^= De0; \ + Bi = ROL32(Ake0, 5); \ + Ami1 ^= Di1; \ + Bo = ROL32(Ami1, 8); \ + Aso0 ^= Do0; \ + Bu = ROL32(Aso0, 28); \ + Ema0 = Ba ^((~Be)& Bi ); \ + Eme0 = Be ^((~Bi)& Bo ); \ + Emi0 = Bi ^((~Bo)& Bu ); \ + Emo0 = Bo ^((~Bu)& Ba ); \ + Emu0 = Bu ^((~Ba)& Be ); \ +\ + Abi0 ^= Di0; \ + Ba = ROL32(Abi0, 31); \ + Ago1 ^= Do1; \ + Be = ROL32(Ago1, 28); \ + Aku1 ^= Du1; \ + Bi = ROL32(Aku1, 20); \ + Ama1 ^= Da1; \ + Bo = ROL32(Ama1, 21); \ + Ase0 ^= De0; \ + Bu = ROL32(Ase0, 1); \ + Esa0 = Ba ^((~Be)& Bi ); \ + Ese0 = Be ^((~Bi)& Bo ); \ + Esi0 = Bi ^((~Bo)& Bu ); \ + Eso0 = Bo ^((~Bu)& Ba ); \ + Esu0 = Bu ^((~Ba)& Be ); \ +\ + Aba1 ^= Da1; \ + Ba = Aba1; \ + Age1 ^= De1; \ + Be = ROL32(Age1, 22); \ + Aki0 ^= Di0; \ + Bi = ROL32(Aki0, 21); \ + Amo0 ^= Do0; \ + Bo = ROL32(Amo0, 10); \ + Asu1 ^= Du1; \ + Bu = ROL32(Asu1, 7); \ + Eba1 = Ba ^((~Be)& Bi ); \ + Eba1 ^= *(pRoundConstants++); \ + Ebe1 = Be ^((~Bi)& Bo ); \ + Ebi1 = Bi ^((~Bo)& Bu ); \ + Ebo1 = Bo ^((~Bu)& Ba ); \ + Ebu1 = Bu ^((~Ba)& Be ); \ +\ + Abo1 ^= Do1; \ + Ba = ROL32(Abo1, 14); \ + Agu1 ^= Du1; \ + Be = ROL32(Agu1, 10); \ + Aka0 ^= Da0; \ + Bi = ROL32(Aka0, 1); \ + Ame0 ^= De0; \ + Bo = ROL32(Ame0, 22); \ + Asi0 ^= Di0; \ + Bu = ROL32(Asi0, 30); \ + Ega1 = Ba ^((~Be)& Bi ); \ + Ege1 = Be ^((~Bi)& Bo ); \ + Egi1 = Bi ^((~Bo)& Bu ); \ + Ego1 = Bo ^((~Bu)& Ba ); \ + Egu1 = Bu ^((~Ba)& Be ); \ +\ + Abe0 ^= De0; \ + Ba = Abe0; \ + Agi1 ^= Di1; \ + Be = ROL32(Agi1, 3); \ + Ako0 ^= Do0; \ + Bi = ROL32(Ako0, 12); \ + Amu1 ^= Du1; \ + Bo = ROL32(Amu1, 4); \ + Asa1 ^= Da1; \ + Bu = ROL32(Asa1, 9); \ + Eka1 = Ba ^((~Be)& Bi ); \ + Eke1 = Be ^((~Bi)& Bo ); \ + Eki1 = Bi ^((~Bo)& Bu ); \ + Eko1 = Bo ^((~Bu)& Ba ); \ + Eku1 = Bu ^((~Ba)& Be ); \ +\ + Abu0 ^= Du0; \ + Ba = ROL32(Abu0, 13); \ + Aga1 ^= Da1; \ + Be = ROL32(Aga1, 18); \ + Ake1 ^= De1; \ + Bi = ROL32(Ake1, 5); \ + Ami0 ^= Di0; \ + Bo = ROL32(Ami0, 7); \ + Aso1 ^= Do1; \ + Bu = ROL32(Aso1, 28); \ + Ema1 = Ba ^((~Be)& Bi ); \ + Eme1 = Be ^((~Bi)& Bo ); \ + Emi1 = Bi ^((~Bo)& Bu ); \ + Emo1 = Bo ^((~Bu)& Ba ); \ + Emu1 = Bu ^((~Ba)& Be ); \ +\ + Abi1 ^= Di1; \ + Ba = ROL32(Abi1, 31); \ + Ago0 ^= Do0; \ + Be = ROL32(Ago0, 27); \ + Aku0 ^= Du0; \ + Bi = ROL32(Aku0, 19); \ + Ama0 ^= Da0; \ + Bo = ROL32(Ama0, 20); \ + Ase1 ^= De1; \ + Bu = ROL32(Ase1, 1); \ + Esa1 = Ba ^((~Be)& Bi ); \ + Ese1 = Be ^((~Bi)& Bo ); \ + Esi1 = Bi ^((~Bo)& Bu ); \ + Eso1 = Bo ^((~Bu)& Ba ); \ + Esu1 = Bu ^((~Ba)& Be ); \ +\ + Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \ + Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \ + Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \ + Da1 = Cz^Du0; \ +\ + Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \ + Do1 = Cy^Cx; \ +\ + Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \ + De1 = Cz^Cw; \ +\ + Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \ + Di1 = Du1^Cw; \ +\ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Eba0 ^= Da0; \ + Ba = Eba0; \ + Ege0 ^= De0; \ + Be = ROL32(Ege0, 22); \ + Eki1 ^= Di1; \ + Bi = ROL32(Eki1, 22); \ + Emo1 ^= Do1; \ + Bo = ROL32(Emo1, 11); \ + Esu0 ^= Du0; \ + Bu = ROL32(Esu0, 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ +\ + Ebo0 ^= Do0; \ + Ba = ROL32(Ebo0, 14); \ + Egu0 ^= Du0; \ + Be = ROL32(Egu0, 10); \ + Eka1 ^= Da1; \ + Bi = ROL32(Eka1, 2); \ + Eme1 ^= De1; \ + Bo = ROL32(Eme1, 23); \ + Esi1 ^= Di1; \ + Bu = ROL32(Esi1, 31); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ +\ + Ebe1 ^= De1; \ + Ba = ROL32(Ebe1, 1); \ + Egi0 ^= Di0; \ + Be = ROL32(Egi0, 3); \ + Eko1 ^= Do1; \ + Bi = ROL32(Eko1, 13); \ + Emu0 ^= Du0; \ + Bo = ROL32(Emu0, 4); \ + Esa0 ^= Da0; \ + Bu = ROL32(Esa0, 9); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ +\ + Ebu1 ^= Du1; \ + Ba = ROL32(Ebu1, 14); \ + Ega0 ^= Da0; \ + Be = ROL32(Ega0, 18); \ + Eke0 ^= De0; \ + Bi = ROL32(Eke0, 5); \ + Emi1 ^= Di1; \ + Bo = ROL32(Emi1, 8); \ + Eso0 ^= Do0; \ + Bu = ROL32(Eso0, 28); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ +\ + Ebi0 ^= Di0; \ + Ba = ROL32(Ebi0, 31); \ + Ego1 ^= Do1; \ + Be = ROL32(Ego1, 28); \ + Eku1 ^= Du1; \ + Bi = ROL32(Eku1, 20); \ + Ema1 ^= Da1; \ + Bo = ROL32(Ema1, 21); \ + Ese0 ^= De0; \ + Bu = ROL32(Ese0, 1); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ +\ + Eba1 ^= Da1; \ + Ba = Eba1; \ + Ege1 ^= De1; \ + Be = ROL32(Ege1, 22); \ + Eki0 ^= Di0; \ + Bi = ROL32(Eki0, 21); \ + Emo0 ^= Do0; \ + Bo = ROL32(Emo0, 10); \ + Esu1 ^= Du1; \ + Bu = ROL32(Esu1, 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ +\ + Ebo1 ^= Do1; \ + Ba = ROL32(Ebo1, 14); \ + Egu1 ^= Du1; \ + Be = ROL32(Egu1, 10); \ + Eka0 ^= Da0; \ + Bi = ROL32(Eka0, 1); \ + Eme0 ^= De0; \ + Bo = ROL32(Eme0, 22); \ + Esi0 ^= Di0; \ + Bu = ROL32(Esi0, 30); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ +\ + Ebe0 ^= De0; \ + Ba = Ebe0; \ + Egi1 ^= Di1; \ + Be = ROL32(Egi1, 3); \ + Eko0 ^= Do0; \ + Bi = ROL32(Eko0, 12); \ + Emu1 ^= Du1; \ + Bo = ROL32(Emu1, 4); \ + Esa1 ^= Da1; \ + Bu = ROL32(Esa1, 9); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ +\ + Ebu0 ^= Du0; \ + Ba = ROL32(Ebu0, 13); \ + Ega1 ^= Da1; \ + Be = ROL32(Ega1, 18); \ + Eke1 ^= De1; \ + Bi = ROL32(Eke1, 5); \ + Emi0 ^= Di0; \ + Bo = ROL32(Emi0, 7); \ + Eso1 ^= Do1; \ + Bu = ROL32(Eso1, 28); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ +\ + Ebi1 ^= Di1; \ + Ba = ROL32(Ebi1, 31); \ + Ego0 ^= Do0; \ + Be = ROL32(Ego0, 27); \ + Eku0 ^= Du0; \ + Bi = ROL32(Eku0, 19); \ + Ema0 ^= Da0; \ + Bo = ROL32(Ema0, 20); \ + Ese1 ^= De1; \ + Bu = ROL32(Ese1, 1); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + } \ + copyToState(state, A) \ +} + +#define copyFromState(X, state) \ + X##ba0 = state[ 0]; \ + X##ba1 = state[ 1]; \ + X##be0 = state[ 2]; \ + X##be1 = state[ 3]; \ + X##bi0 = state[ 4]; \ + X##bi1 = state[ 5]; \ + X##bo0 = state[ 6]; \ + X##bo1 = state[ 7]; \ + X##bu0 = state[ 8]; \ + X##bu1 = state[ 9]; \ + X##ga0 = state[10]; \ + X##ga1 = state[11]; \ + X##ge0 = state[12]; \ + X##ge1 = state[13]; \ + X##gi0 = state[14]; \ + X##gi1 = state[15]; \ + X##go0 = state[16]; \ + X##go1 = state[17]; \ + X##gu0 = state[18]; \ + X##gu1 = state[19]; \ + X##ka0 = state[20]; \ + X##ka1 = state[21]; \ + X##ke0 = state[22]; \ + X##ke1 = state[23]; \ + X##ki0 = state[24]; \ + X##ki1 = state[25]; \ + X##ko0 = state[26]; \ + X##ko1 = state[27]; \ + X##ku0 = state[28]; \ + X##ku1 = state[29]; \ + X##ma0 = state[30]; \ + X##ma1 = state[31]; \ + X##me0 = state[32]; \ + X##me1 = state[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba0; \ + state[ 1] = X##ba1; \ + state[ 2] = X##be0; \ + state[ 3] = X##be1; \ + state[ 4] = X##bi0; \ + state[ 5] = X##bi1; \ + state[ 6] = X##bo0; \ + state[ 7] = X##bo1; \ + state[ 8] = X##bu0; \ + state[ 9] = X##bu1; \ + state[10] = X##ga0; \ + state[11] = X##ga1; \ + state[12] = X##ge0; \ + state[13] = X##ge1; \ + state[14] = X##gi0; \ + state[15] = X##gi1; \ + state[16] = X##go0; \ + state[17] = X##go1; \ + state[18] = X##gu0; \ + state[19] = X##gu1; \ + state[20] = X##ka0; \ + state[21] = X##ka1; \ + state[22] = X##ke0; \ + state[23] = X##ke1; \ + state[24] = X##ki0; \ + state[25] = X##ki1; \ + state[26] = X##ko0; \ + state[27] = X##ko1; \ + state[28] = X##ku0; \ + state[29] = X##ku1; \ + state[30] = X##ma0; \ + state[31] = X##ma1; \ + state[32] = X##me0; \ + state[33] = X##me1; \ + state[34] = X##mi0; \ + state[35] = X##mi1; \ + state[36] = X##mo0; \ + state[37] = X##mo1; \ + state[38] = X##mu0; \ + state[39] = X##mu1; \ + state[40] = X##sa0; \ + state[41] = X##sa1; \ + state[42] = X##se0; \ + state[43] = X##se1; \ + state[44] = X##si0; \ + state[45] = X##si1; \ + state[46] = X##so0; \ + state[47] = X##so1; \ + state[48] = X##su0; \ + state[49] = X##su1; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros b/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros new file mode 100644 index 0000000..373d61d --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-32-s1.macros @@ -0,0 +1,1187 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ + UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ + UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ + UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ + UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ + UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ + UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ + UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ + UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ + UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ + UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \ + UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \ + UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \ + UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \ + UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \ + UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \ + UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \ + UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \ + UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \ + UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \ + UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \ + UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \ + UINT32 Da0, De0, Di0, Do0, Du0; \ + UINT32 Da1, De1, Di1, Do1, Du1; \ + UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ + UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ + UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ + UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ + UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ + UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ + UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ + UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ + UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ + UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ + +#define prepareTheta \ + Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \ + Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \ + Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + Ca0 = E##ba0; \ + E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ + Ce0 = E##be0; \ + E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ + Ci0 = E##bi0; \ + E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ + Co0 = E##bo0; \ + E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ + Cu0 = E##bu0; \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + Ca1 = E##ba1; \ + E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ + Ce1 = E##be1; \ + E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ + Ci1 = E##bi1; \ + E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ + Co1 = E##bo1; \ + E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ + Cu1 = E##bu1; \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ + Ca0 ^= E##ga0; \ + E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ + Ce0 ^= E##ge0; \ + E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ + Ci0 ^= E##gi0; \ + E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ + Co0 ^= E##go0; \ + E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ + Cu0 ^= E##gu0; \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ + Ca1 ^= E##ga1; \ + E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ + Ce1 ^= E##ge1; \ + E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ + Ci1 ^= E##gi1; \ + E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ + Co1 ^= E##go1; \ + E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ + Cu1 ^= E##gu1; \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ + Ca0 ^= E##ka0; \ + E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ + Ce0 ^= E##ke0; \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + Ci0 ^= E##ki0; \ + E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ + Co0 ^= E##ko0; \ + E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ + Cu0 ^= E##ku0; \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ + Ca1 ^= E##ka1; \ + E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ + Ce1 ^= E##ke1; \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + Ci1 ^= E##ki1; \ + E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ + Co1 ^= E##ko1; \ + E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ + Cu1 ^= E##ku1; \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ + Ca0 ^= E##ma0; \ + E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ + Ce0 ^= E##me0; \ + E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ + Ci0 ^= E##mi0; \ + E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ + Co0 ^= E##mo0; \ + E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ + Cu0 ^= E##mu0; \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ + Ca1 ^= E##ma1; \ + E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ + Ce1 ^= E##me1; \ + E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ + Ci1 ^= E##mi1; \ + E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ + Co1 ^= E##mo1; \ + E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ + Cu1 ^= E##mu1; \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + Ca0 ^= E##sa0; \ + E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ + Ce0 ^= E##se0; \ + E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ + Ci0 ^= E##si0; \ + E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ + Co0 ^= E##so0; \ + E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ + Cu0 ^= E##su0; \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + Ca1 ^= E##sa1; \ + E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ + Ce1 ^= E##se1; \ + E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ + Ci1 ^= E##si1; \ + E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ + Co1 ^= E##so1; \ + E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ + Cu1 ^= E##su1; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ + E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ + E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ + E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ + E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ + E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ + E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ + E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ + E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ + E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ + E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ + E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ + E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ + E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ + E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ + E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ + E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ + E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ + E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ + E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ + E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ + E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ + E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ + E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ + E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ + E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ + E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ + E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ + E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ + E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ + E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ + E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ + E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + Ca0 = E##ba0; \ + E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ + Ce0 = E##be0; \ + E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ + Ci0 = E##bi0; \ + E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ + Co0 = E##bo0; \ + E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ + Cu0 = E##bu0; \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + Ca1 = E##ba1; \ + E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ + Ce1 = E##be1; \ + E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ + Ci1 = E##bi1; \ + E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ + Co1 = E##bo1; \ + E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ + Cu1 = E##bu1; \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ + Ca0 ^= E##ga0; \ + E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ + Ce0 ^= E##ge0; \ + E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ + Ci0 ^= E##gi0; \ + E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ + Co0 ^= E##go0; \ + E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ + Cu0 ^= E##gu0; \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ + Ca1 ^= E##ga1; \ + E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ + Ce1 ^= E##ge1; \ + E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ + Ci1 ^= E##gi1; \ + E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ + Co1 ^= E##go1; \ + E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ + Cu1 ^= E##gu1; \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ + Ca0 ^= E##ka0; \ + E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ + Ce0 ^= E##ke0; \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + Ci0 ^= E##ki0; \ + E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ + Co0 ^= E##ko0; \ + E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ + Cu0 ^= E##ku0; \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ + Ca1 ^= E##ka1; \ + E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ + Ce1 ^= E##ke1; \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + Ci1 ^= E##ki1; \ + E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ + Co1 ^= E##ko1; \ + E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ + Cu1 ^= E##ku1; \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ + Ca0 ^= E##ma0; \ + E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ + Ce0 ^= E##me0; \ + E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ + Ci0 ^= E##mi0; \ + E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ + Co0 ^= E##mo0; \ + E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ + Cu0 ^= E##mu0; \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ + Ca1 ^= E##ma1; \ + E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ + Ce1 ^= E##me1; \ + E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ + Ci1 ^= E##mi1; \ + E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ + Co1 ^= E##mo1; \ + E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ + Cu1 ^= E##mu1; \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + Ca0 ^= E##sa0; \ + E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ + Ce0 ^= E##se0; \ + E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ + Ci0 ^= E##si0; \ + E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ + Co0 ^= E##so0; \ + E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ + Cu0 ^= E##su0; \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + Ca1 ^= E##sa1; \ + E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ + Ce1 ^= E##se1; \ + E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ + Ci1 ^= E##si1; \ + E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ + Co1 ^= E##so1; \ + E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ + Cu1 ^= E##su1; \ +\ + +/* --- Code for round */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ + E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ + E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ + E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ + E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ + E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ + E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ + E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ + E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ + E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ + E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ + E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ + E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ + E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ + E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ + E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ + E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ + E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ + E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ + E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ + E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ + E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ + E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ + E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ + E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ + E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ + E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ + E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ + E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ + E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ + E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ + E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ + E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ +\ + +#endif /* UseBebigokimisa */ + +const UINT32 KeccakF1600RoundConstants_int2_0[24] = { + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL }; + +const UINT32 KeccakF1600RoundConstants_int2_1[24] = { + 0x00000000UL, + 0x00000089UL, + 0x8000008bUL, + 0x80008080UL, + 0x0000008bUL, + 0x00008000UL, + 0x80008088UL, + 0x80000082UL, + 0x0000000bUL, + 0x0000000aUL, + 0x00008082UL, + 0x00008003UL, + 0x0000808bUL, + 0x8000000bUL, + 0x8000008aUL, + 0x80000081UL, + 0x80000081UL, + 0x80000008UL, + 0x00000083UL, + 0x80008003UL, + 0x80008088UL, + 0x80000088UL, + 0x00008000UL, + 0x80008082UL }; + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba0 = state[ 0]^input[ 0]; \ + X##ba1 = state[ 1]^input[ 1]; \ + X##be0 = state[ 2]^input[ 2]; \ + X##be1 = state[ 3]^input[ 3]; \ + X##bi0 = state[ 4]^input[ 4]; \ + X##bi1 = state[ 5]^input[ 5]; \ + X##bo0 = state[ 6]^input[ 6]; \ + X##bo1 = state[ 7]^input[ 7]; \ + X##bu0 = state[ 8]^input[ 8]; \ + X##bu1 = state[ 9]^input[ 9]; \ + X##ga0 = state[10]^input[10]; \ + X##ga1 = state[11]^input[11]; \ + X##ge0 = state[12]^input[12]; \ + X##ge1 = state[13]^input[13]; \ + X##gi0 = state[14]^input[14]; \ + X##gi1 = state[15]^input[15]; \ + X##go0 = state[16]^input[16]; \ + X##go1 = state[17]^input[17]; \ + X##gu0 = state[18]^input[18]; \ + X##gu1 = state[19]^input[19]; \ + X##ka0 = state[20]^input[20]; \ + X##ka1 = state[21]^input[21]; \ + X##ke0 = state[22]^input[22]; \ + X##ke1 = state[23]^input[23]; \ + X##ki0 = state[24]^input[24]; \ + X##ki1 = state[25]^input[25]; \ + X##ko0 = state[26]^input[26]; \ + X##ko1 = state[27]^input[27]; \ + X##ku0 = state[28]^input[28]; \ + X##ku1 = state[29]^input[29]; \ + X##ma0 = state[30]^input[30]; \ + X##ma1 = state[31]^input[31]; \ + X##me0 = state[32]; \ + X##me1 = state[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba0 = state[ 0]^input[ 0]; \ + X##ba1 = state[ 1]^input[ 1]; \ + X##be0 = state[ 2]^input[ 2]; \ + X##be1 = state[ 3]^input[ 3]; \ + X##bi0 = state[ 4]^input[ 4]; \ + X##bi1 = state[ 5]^input[ 5]; \ + X##bo0 = state[ 6]^input[ 6]; \ + X##bo1 = state[ 7]^input[ 7]; \ + X##bu0 = state[ 8]^input[ 8]; \ + X##bu1 = state[ 9]^input[ 9]; \ + X##ga0 = state[10]^input[10]; \ + X##ga1 = state[11]^input[11]; \ + X##ge0 = state[12]^input[12]; \ + X##ge1 = state[13]^input[13]; \ + X##gi0 = state[14]^input[14]; \ + X##gi1 = state[15]^input[15]; \ + X##go0 = state[16]^input[16]; \ + X##go1 = state[17]^input[17]; \ + X##gu0 = state[18]^input[18]; \ + X##gu1 = state[19]^input[19]; \ + X##ka0 = state[20]^input[20]; \ + X##ka1 = state[21]^input[21]; \ + X##ke0 = state[22]^input[22]; \ + X##ke1 = state[23]^input[23]; \ + X##ki0 = state[24]^input[24]; \ + X##ki1 = state[25]^input[25]; \ + X##ko0 = state[26]^input[26]; \ + X##ko1 = state[27]^input[27]; \ + X##ku0 = state[28]^input[28]; \ + X##ku1 = state[29]^input[29]; \ + X##ma0 = state[30]^input[30]; \ + X##ma1 = state[31]^input[31]; \ + X##me0 = state[32]^input[32]; \ + X##me1 = state[33]^input[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyFromState(X, state) \ + X##ba0 = state[ 0]; \ + X##ba1 = state[ 1]; \ + X##be0 = state[ 2]; \ + X##be1 = state[ 3]; \ + X##bi0 = state[ 4]; \ + X##bi1 = state[ 5]; \ + X##bo0 = state[ 6]; \ + X##bo1 = state[ 7]; \ + X##bu0 = state[ 8]; \ + X##bu1 = state[ 9]; \ + X##ga0 = state[10]; \ + X##ga1 = state[11]; \ + X##ge0 = state[12]; \ + X##ge1 = state[13]; \ + X##gi0 = state[14]; \ + X##gi1 = state[15]; \ + X##go0 = state[16]; \ + X##go1 = state[17]; \ + X##gu0 = state[18]; \ + X##gu1 = state[19]; \ + X##ka0 = state[20]; \ + X##ka1 = state[21]; \ + X##ke0 = state[22]; \ + X##ke1 = state[23]; \ + X##ki0 = state[24]; \ + X##ki1 = state[25]; \ + X##ko0 = state[26]; \ + X##ko1 = state[27]; \ + X##ku0 = state[28]; \ + X##ku1 = state[29]; \ + X##ma0 = state[30]; \ + X##ma1 = state[31]; \ + X##me0 = state[32]; \ + X##me1 = state[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba0; \ + state[ 1] = X##ba1; \ + state[ 2] = X##be0; \ + state[ 3] = X##be1; \ + state[ 4] = X##bi0; \ + state[ 5] = X##bi1; \ + state[ 6] = X##bo0; \ + state[ 7] = X##bo1; \ + state[ 8] = X##bu0; \ + state[ 9] = X##bu1; \ + state[10] = X##ga0; \ + state[11] = X##ga1; \ + state[12] = X##ge0; \ + state[13] = X##ge1; \ + state[14] = X##gi0; \ + state[15] = X##gi1; \ + state[16] = X##go0; \ + state[17] = X##go1; \ + state[18] = X##gu0; \ + state[19] = X##gu1; \ + state[20] = X##ka0; \ + state[21] = X##ka1; \ + state[22] = X##ke0; \ + state[23] = X##ke1; \ + state[24] = X##ki0; \ + state[25] = X##ki1; \ + state[26] = X##ko0; \ + state[27] = X##ko1; \ + state[28] = X##ku0; \ + state[29] = X##ku1; \ + state[30] = X##ma0; \ + state[31] = X##ma1; \ + state[32] = X##me0; \ + state[33] = X##me1; \ + state[34] = X##mi0; \ + state[35] = X##mi1; \ + state[36] = X##mo0; \ + state[37] = X##mo1; \ + state[38] = X##mu0; \ + state[39] = X##mu1; \ + state[40] = X##sa0; \ + state[41] = X##sa1; \ + state[42] = X##se0; \ + state[43] = X##se1; \ + state[44] = X##si0; \ + state[45] = X##si1; \ + state[46] = X##so0; \ + state[47] = X##so1; \ + state[48] = X##su0; \ + state[49] = X##su1; \ + +#define copyStateVariables(X, Y) \ + X##ba0 = Y##ba0; \ + X##ba1 = Y##ba1; \ + X##be0 = Y##be0; \ + X##be1 = Y##be1; \ + X##bi0 = Y##bi0; \ + X##bi1 = Y##bi1; \ + X##bo0 = Y##bo0; \ + X##bo1 = Y##bo1; \ + X##bu0 = Y##bu0; \ + X##bu1 = Y##bu1; \ + X##ga0 = Y##ga0; \ + X##ga1 = Y##ga1; \ + X##ge0 = Y##ge0; \ + X##ge1 = Y##ge1; \ + X##gi0 = Y##gi0; \ + X##gi1 = Y##gi1; \ + X##go0 = Y##go0; \ + X##go1 = Y##go1; \ + X##gu0 = Y##gu0; \ + X##gu1 = Y##gu1; \ + X##ka0 = Y##ka0; \ + X##ka1 = Y##ka1; \ + X##ke0 = Y##ke0; \ + X##ke1 = Y##ke1; \ + X##ki0 = Y##ki0; \ + X##ki1 = Y##ki1; \ + X##ko0 = Y##ko0; \ + X##ko1 = Y##ko1; \ + X##ku0 = Y##ku0; \ + X##ku1 = Y##ku1; \ + X##ma0 = Y##ma0; \ + X##ma1 = Y##ma1; \ + X##me0 = Y##me0; \ + X##me1 = Y##me1; \ + X##mi0 = Y##mi0; \ + X##mi1 = Y##mi1; \ + X##mo0 = Y##mo0; \ + X##mo1 = Y##mo1; \ + X##mu0 = Y##mu0; \ + X##mu1 = Y##mu1; \ + X##sa0 = Y##sa0; \ + X##sa1 = Y##sa1; \ + X##se0 = Y##se0; \ + X##se1 = Y##se1; \ + X##si0 = Y##si0; \ + X##si1 = Y##si1; \ + X##so0 = Y##so0; \ + X##so1 = Y##so1; \ + X##su0 = Y##su0; \ + X##su1 = Y##su1; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros b/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros new file mode 100644 index 0000000..fa11762 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-32-s2.macros @@ -0,0 +1,1187 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ + UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ + UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ + UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ + UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ + UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ + UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ + UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ + UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ + UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ + UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \ + UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \ + UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \ + UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \ + UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \ + UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \ + UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \ + UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \ + UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \ + UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \ + UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \ + UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \ + UINT32 Da0, De0, Di0, Do0, Du0; \ + UINT32 Da1, De1, Di1, Do1, Du1; \ + UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ + UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ + UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ + UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ + UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ + UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ + UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ + UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ + UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ + UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ + +#define prepareTheta \ + Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \ + Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \ + Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + Ca0 = E##ba0; \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ + Ce0 = E##be0; \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ + Ci0 = E##bi0; \ + E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ + Co0 = E##bo0; \ + E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ + Cu0 = E##bu0; \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + Ca1 = E##ba1; \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ + Ce1 = E##be1; \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ + Ci1 = E##bi1; \ + E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ + Co1 = E##bo1; \ + E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ + Cu1 = E##bu1; \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ + Ca0 ^= E##ga0; \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ + Ce0 ^= E##ge0; \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ + Ci0 ^= E##gi0; \ + E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ + Co0 ^= E##go0; \ + E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ + Cu0 ^= E##gu0; \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ + Ca1 ^= E##ga1; \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ + Ce1 ^= E##ge1; \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ + Ci1 ^= E##gi1; \ + E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ + Co1 ^= E##go1; \ + E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ + Cu1 ^= E##gu1; \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ + Ca0 ^= E##ka0; \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ + Ce0 ^= E##ke0; \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + Ci0 ^= E##ki0; \ + E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ + Co0 ^= E##ko0; \ + E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ + Cu0 ^= E##ku0; \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ + Ca1 ^= E##ka1; \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ + Ce1 ^= E##ke1; \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + Ci1 ^= E##ki1; \ + E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ + Co1 ^= E##ko1; \ + E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ + Cu1 ^= E##ku1; \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ + Ca0 ^= E##ma0; \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ + Ce0 ^= E##me0; \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ + Ci0 ^= E##mi0; \ + E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ + Co0 ^= E##mo0; \ + E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ + Cu0 ^= E##mu0; \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ + Ca1 ^= E##ma1; \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ + Ce1 ^= E##me1; \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ + Ci1 ^= E##mi1; \ + E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ + Co1 ^= E##mo1; \ + E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ + Cu1 ^= E##mu1; \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + Ca0 ^= E##sa0; \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ + Ce0 ^= E##se0; \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ + Ci0 ^= E##si0; \ + E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ + Co0 ^= E##so0; \ + E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ + Cu0 ^= E##su0; \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + Ca1 ^= E##sa1; \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ + Ce1 ^= E##se1; \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ + Ci1 ^= E##si1; \ + E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ + Co1 ^= E##so1; \ + E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ + Cu1 ^= E##su1; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ + E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ + E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ + E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ + E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ + E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ + E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ + E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ + E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ + E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ + E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ + E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ + E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ + E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ + E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ + E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ + E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ + E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ + E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + Ca0 = E##ba0; \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ + Ce0 = E##be0; \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ + Ci0 = E##bi0; \ + E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ + Co0 = E##bo0; \ + E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ + Cu0 = E##bu0; \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + Ca1 = E##ba1; \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ + Ce1 = E##be1; \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ + Ci1 = E##bi1; \ + E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ + Co1 = E##bo1; \ + E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ + Cu1 = E##bu1; \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ + Ca0 ^= E##ga0; \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ + Ce0 ^= E##ge0; \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ + Ci0 ^= E##gi0; \ + E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ + Co0 ^= E##go0; \ + E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ + Cu0 ^= E##gu0; \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ + Ca1 ^= E##ga1; \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ + Ce1 ^= E##ge1; \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ + Ci1 ^= E##gi1; \ + E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ + Co1 ^= E##go1; \ + E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ + Cu1 ^= E##gu1; \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ + Ca0 ^= E##ka0; \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ + Ce0 ^= E##ke0; \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + Ci0 ^= E##ki0; \ + E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ + Co0 ^= E##ko0; \ + E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ + Cu0 ^= E##ku0; \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ + Ca1 ^= E##ka1; \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ + Ce1 ^= E##ke1; \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + Ci1 ^= E##ki1; \ + E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ + Co1 ^= E##ko1; \ + E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ + Cu1 ^= E##ku1; \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ + Ca0 ^= E##ma0; \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ + Ce0 ^= E##me0; \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ + Ci0 ^= E##mi0; \ + E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ + Co0 ^= E##mo0; \ + E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ + Cu0 ^= E##mu0; \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ + Ca1 ^= E##ma1; \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ + Ce1 ^= E##me1; \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ + Ci1 ^= E##mi1; \ + E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ + Co1 ^= E##mo1; \ + E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ + Cu1 ^= E##mu1; \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + Ca0 ^= E##sa0; \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ + Ce0 ^= E##se0; \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ + Ci0 ^= E##si0; \ + E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ + Co0 ^= E##so0; \ + E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ + Cu0 ^= E##su0; \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + Ca1 ^= E##sa1; \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ + Ce1 ^= E##se1; \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ + Ci1 ^= E##si1; \ + E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ + Co1 ^= E##so1; \ + E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ + Cu1 ^= E##su1; \ +\ + +/* --- Code for round */ +/* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da0 = Cu0^ROL32(Ce1, 1); \ + Da1 = Cu1^Ce0; \ + De0 = Ca0^ROL32(Ci1, 1); \ + De1 = Ca1^Ci0; \ + Di0 = Ce0^ROL32(Co1, 1); \ + Di1 = Ce1^Co0; \ + Do0 = Ci0^ROL32(Cu1, 1); \ + Do1 = Ci1^Cu0; \ + Du0 = Co0^ROL32(Ca1, 1); \ + Du1 = Co1^Ca0; \ +\ + A##ba0 ^= Da0; \ + Bba0 = A##ba0; \ + A##ge0 ^= De0; \ + Bbe0 = ROL32(A##ge0, 22); \ + A##ki1 ^= Di1; \ + Bbi0 = ROL32(A##ki1, 22); \ + E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ + E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ + A##mo1 ^= Do1; \ + Bbo0 = ROL32(A##mo1, 11); \ + E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ + A##su0 ^= Du0; \ + Bbu0 = ROL32(A##su0, 7); \ + E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ + E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ + E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ +\ + A##ba1 ^= Da1; \ + Bba1 = A##ba1; \ + A##ge1 ^= De1; \ + Bbe1 = ROL32(A##ge1, 22); \ + A##ki0 ^= Di0; \ + Bbi1 = ROL32(A##ki0, 21); \ + E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ + E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ + A##mo0 ^= Do0; \ + Bbo1 = ROL32(A##mo0, 10); \ + E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ + A##su1 ^= Du1; \ + Bbu1 = ROL32(A##su1, 7); \ + E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ + E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ + E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ +\ + A##bo0 ^= Do0; \ + Bga0 = ROL32(A##bo0, 14); \ + A##gu0 ^= Du0; \ + Bge0 = ROL32(A##gu0, 10); \ + A##ka1 ^= Da1; \ + Bgi0 = ROL32(A##ka1, 2); \ + E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ + A##me1 ^= De1; \ + Bgo0 = ROL32(A##me1, 23); \ + E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ + A##si1 ^= Di1; \ + Bgu0 = ROL32(A##si1, 31); \ + E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ + E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ + E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ +\ + A##bo1 ^= Do1; \ + Bga1 = ROL32(A##bo1, 14); \ + A##gu1 ^= Du1; \ + Bge1 = ROL32(A##gu1, 10); \ + A##ka0 ^= Da0; \ + Bgi1 = ROL32(A##ka0, 1); \ + E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ + A##me0 ^= De0; \ + Bgo1 = ROL32(A##me0, 22); \ + E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ + A##si0 ^= Di0; \ + Bgu1 = ROL32(A##si0, 30); \ + E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ + E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ + E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ +\ + A##be1 ^= De1; \ + Bka0 = ROL32(A##be1, 1); \ + A##gi0 ^= Di0; \ + Bke0 = ROL32(A##gi0, 3); \ + A##ko1 ^= Do1; \ + Bki0 = ROL32(A##ko1, 13); \ + E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ + A##mu0 ^= Du0; \ + Bko0 = ROL32(A##mu0, 4); \ + E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ + A##sa0 ^= Da0; \ + Bku0 = ROL32(A##sa0, 9); \ + E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ + E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ + E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ +\ + A##be0 ^= De0; \ + Bka1 = A##be0; \ + A##gi1 ^= Di1; \ + Bke1 = ROL32(A##gi1, 3); \ + A##ko0 ^= Do0; \ + Bki1 = ROL32(A##ko0, 12); \ + E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ + A##mu1 ^= Du1; \ + Bko1 = ROL32(A##mu1, 4); \ + E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ + A##sa1 ^= Da1; \ + Bku1 = ROL32(A##sa1, 9); \ + E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ + E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ + E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ +\ + A##bu1 ^= Du1; \ + Bma0 = ROL32(A##bu1, 14); \ + A##ga0 ^= Da0; \ + Bme0 = ROL32(A##ga0, 18); \ + A##ke0 ^= De0; \ + Bmi0 = ROL32(A##ke0, 5); \ + E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ + A##mi1 ^= Di1; \ + Bmo0 = ROL32(A##mi1, 8); \ + E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ + A##so0 ^= Do0; \ + Bmu0 = ROL32(A##so0, 28); \ + E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ + E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ + E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ +\ + A##bu0 ^= Du0; \ + Bma1 = ROL32(A##bu0, 13); \ + A##ga1 ^= Da1; \ + Bme1 = ROL32(A##ga1, 18); \ + A##ke1 ^= De1; \ + Bmi1 = ROL32(A##ke1, 5); \ + E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ + A##mi0 ^= Di0; \ + Bmo1 = ROL32(A##mi0, 7); \ + E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ + A##so1 ^= Do1; \ + Bmu1 = ROL32(A##so1, 28); \ + E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ + E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ + E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ +\ + A##bi0 ^= Di0; \ + Bsa0 = ROL32(A##bi0, 31); \ + A##go1 ^= Do1; \ + Bse0 = ROL32(A##go1, 28); \ + A##ku1 ^= Du1; \ + Bsi0 = ROL32(A##ku1, 20); \ + E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ + A##ma1 ^= Da1; \ + Bso0 = ROL32(A##ma1, 21); \ + E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ + A##se0 ^= De0; \ + Bsu0 = ROL32(A##se0, 1); \ + E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ + E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ + E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ +\ + A##bi1 ^= Di1; \ + Bsa1 = ROL32(A##bi1, 31); \ + A##go0 ^= Do0; \ + Bse1 = ROL32(A##go0, 27); \ + A##ku0 ^= Du0; \ + Bsi1 = ROL32(A##ku0, 19); \ + E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ + A##ma0 ^= Da0; \ + Bso1 = ROL32(A##ma0, 20); \ + E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ + A##se1 ^= De1; \ + Bsu1 = ROL32(A##se1, 1); \ + E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ + E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ + E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ +\ + +#endif /* UseBebigokimisa */ + +const UINT32 KeccakF1600RoundConstants_int2_0[24] = { + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000001UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL, + 0x00000001UL, + 0x00000000UL }; + +const UINT32 KeccakF1600RoundConstants_int2_1[24] = { + 0x00000000UL, + 0x00000089UL, + 0x8000008bUL, + 0x80008080UL, + 0x0000008bUL, + 0x00008000UL, + 0x80008088UL, + 0x80000082UL, + 0x0000000bUL, + 0x0000000aUL, + 0x00008082UL, + 0x00008003UL, + 0x0000808bUL, + 0x8000000bUL, + 0x8000008aUL, + 0x80000081UL, + 0x80000081UL, + 0x80000008UL, + 0x00000083UL, + 0x80008003UL, + 0x80008088UL, + 0x80000088UL, + 0x00008000UL, + 0x80008082UL }; + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba0 = state[ 0]^input[ 0]; \ + X##ba1 = state[ 1]^input[ 1]; \ + X##be0 = state[ 2]^input[ 2]; \ + X##be1 = state[ 3]^input[ 3]; \ + X##bi0 = state[ 4]^input[ 4]; \ + X##bi1 = state[ 5]^input[ 5]; \ + X##bo0 = state[ 6]^input[ 6]; \ + X##bo1 = state[ 7]^input[ 7]; \ + X##bu0 = state[ 8]^input[ 8]; \ + X##bu1 = state[ 9]^input[ 9]; \ + X##ga0 = state[10]^input[10]; \ + X##ga1 = state[11]^input[11]; \ + X##ge0 = state[12]^input[12]; \ + X##ge1 = state[13]^input[13]; \ + X##gi0 = state[14]^input[14]; \ + X##gi1 = state[15]^input[15]; \ + X##go0 = state[16]^input[16]; \ + X##go1 = state[17]^input[17]; \ + X##gu0 = state[18]^input[18]; \ + X##gu1 = state[19]^input[19]; \ + X##ka0 = state[20]^input[20]; \ + X##ka1 = state[21]^input[21]; \ + X##ke0 = state[22]^input[22]; \ + X##ke1 = state[23]^input[23]; \ + X##ki0 = state[24]^input[24]; \ + X##ki1 = state[25]^input[25]; \ + X##ko0 = state[26]^input[26]; \ + X##ko1 = state[27]^input[27]; \ + X##ku0 = state[28]^input[28]; \ + X##ku1 = state[29]^input[29]; \ + X##ma0 = state[30]^input[30]; \ + X##ma1 = state[31]^input[31]; \ + X##me0 = state[32]; \ + X##me1 = state[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba0 = state[ 0]^input[ 0]; \ + X##ba1 = state[ 1]^input[ 1]; \ + X##be0 = state[ 2]^input[ 2]; \ + X##be1 = state[ 3]^input[ 3]; \ + X##bi0 = state[ 4]^input[ 4]; \ + X##bi1 = state[ 5]^input[ 5]; \ + X##bo0 = state[ 6]^input[ 6]; \ + X##bo1 = state[ 7]^input[ 7]; \ + X##bu0 = state[ 8]^input[ 8]; \ + X##bu1 = state[ 9]^input[ 9]; \ + X##ga0 = state[10]^input[10]; \ + X##ga1 = state[11]^input[11]; \ + X##ge0 = state[12]^input[12]; \ + X##ge1 = state[13]^input[13]; \ + X##gi0 = state[14]^input[14]; \ + X##gi1 = state[15]^input[15]; \ + X##go0 = state[16]^input[16]; \ + X##go1 = state[17]^input[17]; \ + X##gu0 = state[18]^input[18]; \ + X##gu1 = state[19]^input[19]; \ + X##ka0 = state[20]^input[20]; \ + X##ka1 = state[21]^input[21]; \ + X##ke0 = state[22]^input[22]; \ + X##ke1 = state[23]^input[23]; \ + X##ki0 = state[24]^input[24]; \ + X##ki1 = state[25]^input[25]; \ + X##ko0 = state[26]^input[26]; \ + X##ko1 = state[27]^input[27]; \ + X##ku0 = state[28]^input[28]; \ + X##ku1 = state[29]^input[29]; \ + X##ma0 = state[30]^input[30]; \ + X##ma1 = state[31]^input[31]; \ + X##me0 = state[32]^input[32]; \ + X##me1 = state[33]^input[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyFromState(X, state) \ + X##ba0 = state[ 0]; \ + X##ba1 = state[ 1]; \ + X##be0 = state[ 2]; \ + X##be1 = state[ 3]; \ + X##bi0 = state[ 4]; \ + X##bi1 = state[ 5]; \ + X##bo0 = state[ 6]; \ + X##bo1 = state[ 7]; \ + X##bu0 = state[ 8]; \ + X##bu1 = state[ 9]; \ + X##ga0 = state[10]; \ + X##ga1 = state[11]; \ + X##ge0 = state[12]; \ + X##ge1 = state[13]; \ + X##gi0 = state[14]; \ + X##gi1 = state[15]; \ + X##go0 = state[16]; \ + X##go1 = state[17]; \ + X##gu0 = state[18]; \ + X##gu1 = state[19]; \ + X##ka0 = state[20]; \ + X##ka1 = state[21]; \ + X##ke0 = state[22]; \ + X##ke1 = state[23]; \ + X##ki0 = state[24]; \ + X##ki1 = state[25]; \ + X##ko0 = state[26]; \ + X##ko1 = state[27]; \ + X##ku0 = state[28]; \ + X##ku1 = state[29]; \ + X##ma0 = state[30]; \ + X##ma1 = state[31]; \ + X##me0 = state[32]; \ + X##me1 = state[33]; \ + X##mi0 = state[34]; \ + X##mi1 = state[35]; \ + X##mo0 = state[36]; \ + X##mo1 = state[37]; \ + X##mu0 = state[38]; \ + X##mu1 = state[39]; \ + X##sa0 = state[40]; \ + X##sa1 = state[41]; \ + X##se0 = state[42]; \ + X##se1 = state[43]; \ + X##si0 = state[44]; \ + X##si1 = state[45]; \ + X##so0 = state[46]; \ + X##so1 = state[47]; \ + X##su0 = state[48]; \ + X##su1 = state[49]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba0; \ + state[ 1] = X##ba1; \ + state[ 2] = X##be0; \ + state[ 3] = X##be1; \ + state[ 4] = X##bi0; \ + state[ 5] = X##bi1; \ + state[ 6] = X##bo0; \ + state[ 7] = X##bo1; \ + state[ 8] = X##bu0; \ + state[ 9] = X##bu1; \ + state[10] = X##ga0; \ + state[11] = X##ga1; \ + state[12] = X##ge0; \ + state[13] = X##ge1; \ + state[14] = X##gi0; \ + state[15] = X##gi1; \ + state[16] = X##go0; \ + state[17] = X##go1; \ + state[18] = X##gu0; \ + state[19] = X##gu1; \ + state[20] = X##ka0; \ + state[21] = X##ka1; \ + state[22] = X##ke0; \ + state[23] = X##ke1; \ + state[24] = X##ki0; \ + state[25] = X##ki1; \ + state[26] = X##ko0; \ + state[27] = X##ko1; \ + state[28] = X##ku0; \ + state[29] = X##ku1; \ + state[30] = X##ma0; \ + state[31] = X##ma1; \ + state[32] = X##me0; \ + state[33] = X##me1; \ + state[34] = X##mi0; \ + state[35] = X##mi1; \ + state[36] = X##mo0; \ + state[37] = X##mo1; \ + state[38] = X##mu0; \ + state[39] = X##mu1; \ + state[40] = X##sa0; \ + state[41] = X##sa1; \ + state[42] = X##se0; \ + state[43] = X##se1; \ + state[44] = X##si0; \ + state[45] = X##si1; \ + state[46] = X##so0; \ + state[47] = X##so1; \ + state[48] = X##su0; \ + state[49] = X##su1; \ + +#define copyStateVariables(X, Y) \ + X##ba0 = Y##ba0; \ + X##ba1 = Y##ba1; \ + X##be0 = Y##be0; \ + X##be1 = Y##be1; \ + X##bi0 = Y##bi0; \ + X##bi1 = Y##bi1; \ + X##bo0 = Y##bo0; \ + X##bo1 = Y##bo1; \ + X##bu0 = Y##bu0; \ + X##bu1 = Y##bu1; \ + X##ga0 = Y##ga0; \ + X##ga1 = Y##ga1; \ + X##ge0 = Y##ge0; \ + X##ge1 = Y##ge1; \ + X##gi0 = Y##gi0; \ + X##gi1 = Y##gi1; \ + X##go0 = Y##go0; \ + X##go1 = Y##go1; \ + X##gu0 = Y##gu0; \ + X##gu1 = Y##gu1; \ + X##ka0 = Y##ka0; \ + X##ka1 = Y##ka1; \ + X##ke0 = Y##ke0; \ + X##ke1 = Y##ke1; \ + X##ki0 = Y##ki0; \ + X##ki1 = Y##ki1; \ + X##ko0 = Y##ko0; \ + X##ko1 = Y##ko1; \ + X##ku0 = Y##ku0; \ + X##ku1 = Y##ku1; \ + X##ma0 = Y##ma0; \ + X##ma1 = Y##ma1; \ + X##me0 = Y##me0; \ + X##me1 = Y##me1; \ + X##mi0 = Y##mi0; \ + X##mi1 = Y##mi1; \ + X##mo0 = Y##mo0; \ + X##mo1 = Y##mo1; \ + X##mu0 = Y##mu0; \ + X##mu1 = Y##mu1; \ + X##sa0 = Y##sa0; \ + X##sa1 = Y##sa1; \ + X##se0 = Y##se0; \ + X##se1 = Y##se1; \ + X##si0 = Y##si0; \ + X##si1 = Y##si1; \ + X##so0 = Y##so0; \ + X##so1 = Y##so1; \ + X##su0 = Y##su0; \ + X##su1 = Y##su1; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-32.macros b/Modules/_sha3/keccak/KeccakF-1600-32.macros new file mode 100644 index 0000000..9ade600 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-32.macros @@ -0,0 +1,26 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifdef UseSchedule + #if (UseSchedule == 1) + #include "KeccakF-1600-32-s1.macros" + #elif (UseSchedule == 2) + #include "KeccakF-1600-32-s2.macros" + #elif (UseSchedule == 3) + #include "KeccakF-1600-32-rvk.macros" + #else + #error "This schedule is not supported." + #endif +#else + #include "KeccakF-1600-32-s1.macros" +#endif diff --git a/Modules/_sha3/keccak/KeccakF-1600-64.macros b/Modules/_sha3/keccak/KeccakF-1600-64.macros new file mode 100644 index 0000000..dc0f789 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-64.macros @@ -0,0 +1,728 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + X##sa = state[20]^input[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-int-set.h b/Modules/_sha3/keccak/KeccakF-1600-int-set.h new file mode 100644 index 0000000..0ed1d80 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-int-set.h @@ -0,0 +1,6 @@ +#define ProvideFast576 +#define ProvideFast832 +#define ProvideFast1024 +#define ProvideFast1088 +#define ProvideFast1152 +#define ProvideFast1344 diff --git a/Modules/_sha3/keccak/KeccakF-1600-interface.h b/Modules/_sha3/keccak/KeccakF-1600-interface.h new file mode 100644 index 0000000..ce2710e --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-interface.h @@ -0,0 +1,46 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakPermutationInterface_h_ +#define _KeccakPermutationInterface_h_ + +#include "KeccakF-1600-int-set.h" + +static void KeccakInitialize( void ); +static void KeccakInitializeState(unsigned char *state); +static void KeccakPermutation(unsigned char *state); +#ifdef ProvideFast576 +static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast832 +static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1024 +static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1088 +static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1152 +static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data); +#endif +#ifdef ProvideFast1344 +static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data); +#endif +static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount); +#ifdef ProvideFast1024 +static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data); +#endif +static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount); + +#endif diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h b/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h new file mode 100644 index 0000000..615c782 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-opt32-settings.h @@ -0,0 +1,6 @@ +/* +#define Unrolling 2 +#define UseBebigokimisa +#define UseInterleaveTables +#define UseSchedule 3 +*/ diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt32.c b/Modules/_sha3/keccak/KeccakF-1600-opt32.c new file mode 100644 index 0000000..dba6d59 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-opt32.c @@ -0,0 +1,524 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +/* #include "brg_endian.h" */ +#include "KeccakF-1600-opt32-settings.h" +#include "KeccakF-1600-interface.h" + +typedef unsigned char UINT8; +typedef unsigned short UINT16; +typedef unsigned int UINT32; +/* typedef unsigned long long int UINT64; */ + +#ifdef UseInterleaveTables +static int interleaveTablesBuilt = 0; +static UINT16 interleaveTable[65536]; +static UINT16 deinterleaveTable[65536]; + +static void buildInterleaveTables() +{ + UINT32 i, j; + UINT16 x; + + if (!interleaveTablesBuilt) { + for(i=0; i<65536; i++) { + x = 0; + for(j=0; j<16; j++) { + if (i & (1 << j)) + x |= (1 << (j/2 + 8*(j%2))); + } + interleaveTable[i] = x; + deinterleaveTable[x] = (UINT16)i; + } + interleaveTablesBuilt = 1; + } +} + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + +#define xor2bytesIntoInterleavedWords(even, odd, source, j) \ + i##j = interleaveTable[((const UINT16*)source)[j]]; \ + ((UINT8*)even)[j] ^= i##j & 0xFF; \ + ((UINT8*)odd)[j] ^= i##j >> 8; + +#define setInterleavedWordsInto2bytes(dest, even, odd, j) \ + d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ + ((UINT16*)dest)[j] = d##j; + +#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */ + +#define xor2bytesIntoInterleavedWords(even, odd, source, j) \ + i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \ + *even ^= (i##j & 0xFF) << (j*8); \ + *odd ^= ((i##j >> 8) & 0xFF) << (j*8); + +#define setInterleavedWordsInto2bytes(dest, even, odd, j) \ + d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ + dest[2*j] = d##j & 0xFF; \ + dest[2*j+1] = d##j >> 8; + +#endif /* Endianness */ + +static void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source) +{ + UINT16 i0, i1, i2, i3; + + xor2bytesIntoInterleavedWords(even, odd, source, 0) + xor2bytesIntoInterleavedWords(even, odd, source, 1) + xor2bytesIntoInterleavedWords(even, odd, source, 2) + xor2bytesIntoInterleavedWords(even, odd, source, 3) +} + +#define xorLanesIntoState(laneCount, state, input) \ + { \ + int i; \ + for(i=0; i<(laneCount); i++) \ + xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \ + } + +static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd) +{ + UINT16 d0, d1, d2, d3; + + setInterleavedWordsInto2bytes(dest, even, odd, 0) + setInterleavedWordsInto2bytes(dest, even, odd, 1) + setInterleavedWordsInto2bytes(dest, even, odd, 2) + setInterleavedWordsInto2bytes(dest, even, odd, 3) +} + +#define extractLanes(laneCount, state, data) \ + { \ + int i; \ + for(i=0; i<(laneCount); i++) \ + setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \ + } + +#else /* No interleaving tables */ + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + +/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +#define xorInterleavedLE(rateInLanes, state, input) \ + { \ + const UINT32 * pI = (const UINT32 *)input; \ + UINT32 * pS = state; \ + UINT32 t, x0, x1; \ + int i; \ + for (i = (rateInLanes)-1; i >= 0; --i) \ + { \ + x0 = *(pI++); \ + t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \ + t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \ + t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \ + t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \ + x1 = *(pI++); \ + t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \ + t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \ + t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \ + t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \ + *(pS++) ^= (UINT16)x0 | (x1 << 16); \ + *(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \ + } \ + } + +#define xorLanesIntoState(laneCount, state, input) \ + xorInterleavedLE(laneCount, state, input) + +#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */ + +/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +UINT64 toInterleaving(UINT64 x) +{ + UINT64 t; + + t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); + t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); + t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); + t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); + t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); + + return x; +} + +static void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source) +{ + /* This can be optimized */ + UINT64 sourceWord = + (UINT64)source[0] + ^ (((UINT64)source[1]) << 8) + ^ (((UINT64)source[2]) << 16) + ^ (((UINT64)source[3]) << 24) + ^ (((UINT64)source[4]) << 32) + ^ (((UINT64)source[5]) << 40) + ^ (((UINT64)source[6]) << 48) + ^ (((UINT64)source[7]) << 56); + UINT64 evenAndOddWord = toInterleaving(sourceWord); + evenAndOdd[0] ^= (UINT32)evenAndOddWord; + evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32); +} + +#define xorLanesIntoState(laneCount, state, input) \ + { \ + int i; \ + for(i=0; i<(laneCount); i++) \ + xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \ + } + +#endif /* Endianness */ + +/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +UINT64 fromInterleaving(UINT64 x) +{ + UINT64 t; + + t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); + t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); + t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); + t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); + t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); + + return x; +} + +static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + ((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd); +#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */ + /* This can be optimized */ + UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32); + UINT64 destWord = fromInterleaving(evenAndOddWord); + dest[0] = destWord & 0xFF; + dest[1] = (destWord >> 8) & 0xFF; + dest[2] = (destWord >> 16) & 0xFF; + dest[3] = (destWord >> 24) & 0xFF; + dest[4] = (destWord >> 32) & 0xFF; + dest[5] = (destWord >> 40) & 0xFF; + dest[6] = (destWord >> 48) & 0xFF; + dest[7] = (destWord >> 56) & 0xFF; +#endif /* Endianness */ +} + +#define extractLanes(laneCount, state, data) \ + { \ + int i; \ + for(i=0; i<(laneCount); i++) \ + setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \ + } + +#endif /* With or without interleaving tables */ + +#if defined(_MSC_VER) +#define ROL32(a, offset) _rotl(a, offset) +#elif (defined (__arm__) && defined(__ARMCC_VERSION)) +#define ROL32(a, offset) __ror(a, 32-(offset)) +#else +#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) +#endif + +#include "KeccakF-1600-unrolling.macros" +#include "KeccakF-1600-32.macros" + +#if (UseSchedule == 3) + +#ifdef UseBebigokimisa +#error "No lane complementing with schedule 3." +#endif + +#if (Unrolling != 2) +#error "Only unrolling 2 is supported by schedule 3." +#endif + +static void KeccakPermutationOnWords(UINT32 *state) +{ + rounds +} + +static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) +{ + xorLanesIntoState(laneCount, state, input) + rounds +} + +#ifdef ProvideFast576 +static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(9, state, input) + rounds +} +#endif + +#ifdef ProvideFast832 +static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(13, state, input) + rounds +} +#endif + +#ifdef ProvideFast1024 +static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(16, state, input) + rounds +} +#endif + +#ifdef ProvideFast1088 +static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(17, state, input) + rounds +} +#endif + +#ifdef ProvideFast1152 +static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(18, state, input) + rounds +} +#endif + +#ifdef ProvideFast1344 +static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) +{ + xorLanesIntoState(21, state, input) + rounds +} +#endif + +#else /* (Schedule != 3) */ + +static void KeccakPermutationOnWords(UINT32 *state) +{ + declareABCDE +#if (Unrolling != 24) + unsigned int i; +#endif + + copyFromState(A, state) + rounds +} + +static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(laneCount, state, input) + copyFromState(A, state) + rounds +} + +#ifdef ProvideFast576 +static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(9, state, input) + copyFromState(A, state) + rounds +} +#endif + +#ifdef ProvideFast832 +static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(13, state, input) + copyFromState(A, state) + rounds +} +#endif + +#ifdef ProvideFast1024 +static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(16, state, input) + copyFromState(A, state) + rounds +} +#endif + +#ifdef ProvideFast1088 +static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(17, state, input) + copyFromState(A, state) + rounds +} +#endif + +#ifdef ProvideFast1152 +static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(18, state, input) + copyFromState(A, state) + rounds +} +#endif + +#ifdef ProvideFast1344 +static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) +{ + declareABCDE + unsigned int i; + + xorLanesIntoState(21, state, input) + copyFromState(A, state) + rounds +} +#endif + +#endif + +static void KeccakInitialize() +{ +#ifdef UseInterleaveTables + buildInterleaveTables(); +#endif +} + +static void KeccakInitializeState(unsigned char *state) +{ + memset(state, 0, 200); +#ifdef UseBebigokimisa + ((UINT32*)state)[ 2] = ~(UINT32)0; + ((UINT32*)state)[ 3] = ~(UINT32)0; + ((UINT32*)state)[ 4] = ~(UINT32)0; + ((UINT32*)state)[ 5] = ~(UINT32)0; + ((UINT32*)state)[16] = ~(UINT32)0; + ((UINT32*)state)[17] = ~(UINT32)0; + ((UINT32*)state)[24] = ~(UINT32)0; + ((UINT32*)state)[25] = ~(UINT32)0; + ((UINT32*)state)[34] = ~(UINT32)0; + ((UINT32*)state)[35] = ~(UINT32)0; + ((UINT32*)state)[40] = ~(UINT32)0; + ((UINT32*)state)[41] = ~(UINT32)0; +#endif +} + +static void KeccakPermutation(unsigned char *state) +{ + /* We assume the state is always stored as interleaved 32-bit words */ + KeccakPermutationOnWords((UINT32*)state); +} + +#ifdef ProvideFast576 +static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data); +} +#endif + +#ifdef ProvideFast832 +static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data); +} +#endif + +#ifdef ProvideFast1024 +static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data); +} +#endif + +#ifdef ProvideFast1088 +static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data); +} +#endif + +#ifdef ProvideFast1152 +static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data); +} +#endif + +#ifdef ProvideFast1344 +static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) +{ + KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data); +} +#endif + +static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) +{ + KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount); +} + +#ifdef ProvideFast1024 +static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) +{ + extractLanes(16, state, data) +#ifdef UseBebigokimisa + ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; + ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; + ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; + ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; + ((UINT32*)data)[16] = ~((UINT32*)data)[16]; + ((UINT32*)data)[17] = ~((UINT32*)data)[17]; + ((UINT32*)data)[24] = ~((UINT32*)data)[24]; + ((UINT32*)data)[25] = ~((UINT32*)data)[25]; +#endif +} +#endif + +static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) +{ + extractLanes(laneCount, state, data) +#ifdef UseBebigokimisa + if (laneCount > 1) { + ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; + ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; + if (laneCount > 2) { + ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; + ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; + if (laneCount > 8) { + ((UINT32*)data)[16] = ~((UINT32*)data)[16]; + ((UINT32*)data)[17] = ~((UINT32*)data)[17]; + if (laneCount > 12) { + ((UINT32*)data)[24] = ~((UINT32*)data)[24]; + ((UINT32*)data)[25] = ~((UINT32*)data)[25]; + if (laneCount > 17) { + ((UINT32*)data)[34] = ~((UINT32*)data)[34]; + ((UINT32*)data)[35] = ~((UINT32*)data)[35]; + if (laneCount > 20) { + ((UINT32*)data)[40] = ~((UINT32*)data)[40]; + ((UINT32*)data)[41] = ~((UINT32*)data)[41]; + } + } + } + } + } + } +#endif +} diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h b/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h new file mode 100644 index 0000000..df83e63 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-opt64-settings.h @@ -0,0 +1,9 @@ +/* +#define Unrolling 24 +#define UseBebigokimisa +#define UseSSE +#define UseOnlySIMD64 +#define UseMMX +#define UseSHLD +#define UseXOP +*/ diff --git a/Modules/_sha3/keccak/KeccakF-1600-opt64.c b/Modules/_sha3/keccak/KeccakF-1600-opt64.c new file mode 100644 index 0000000..a68f951 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-opt64.c @@ -0,0 +1,508 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +/* #include "brg_endian.h" */ +#include "KeccakF-1600-opt64-settings.h" +#include "KeccakF-1600-interface.h" + +typedef unsigned char UINT8; +/* typedef unsigned long long int UINT64; */ + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + +#if defined(UseSSE) + #include + typedef __m128i V64; + typedef __m128i V128; + typedef union { + V128 v128; + UINT64 v64[2]; + } V6464; + + #define ANDnu64(a, b) _mm_andnot_si128(a, b) + #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) + #define XOR64(a, b) _mm_xor_si128(a, b) + #define XOReq64(a, b) a = _mm_xor_si128(a, b) + #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b) + + #define ANDnu128(a, b) _mm_andnot_si128(a, b) + #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) + #define CONST128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) + #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) + #define XOR128(a, b) _mm_xor_si128(a, b) + #define XOReq128(a, b) a = _mm_xor_si128(a, b) + #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) + #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) + #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) + #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) + #define ZERO128() _mm_setzero_si128() + + #ifdef UseOnlySIMD64 + #include "KeccakF-1600-simd64.macros" + #else +ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09}; + #include "KeccakF-1600-simd128.macros" + #endif + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseSSE" + #endif +#elif defined(UseXOP) + #include + typedef __m128i V64; + typedef __m128i V128; + + #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) + #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) + #define XOR64(a, b) _mm_xor_si128(a, b) + #define XOReq64(a, b) a = _mm_xor_si128(a, b) + + #define ANDnu128(a, b) _mm_andnot_si128(a, b) + #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) + #define CONST128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) + #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) + #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) + #define XOR128(a, b) _mm_xor_si128(a, b) + #define XOReq128(a, b) a = _mm_xor_si128(a, b) + #define ZERO128() _mm_setzero_si128() + + #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E) + #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b) + #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b) + #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2)) + #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a)) + #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) + #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) + + #define ROL6464same(a, o) _mm_roti_epi64(a, o) + #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 )) +ALIGN const UINT64 rot_0_20[2] = { 0, 20}; +ALIGN const UINT64 rot_44_3[2] = {44, 3}; +ALIGN const UINT64 rot_43_45[2] = {43, 45}; +ALIGN const UINT64 rot_21_61[2] = {21, 61}; +ALIGN const UINT64 rot_14_28[2] = {14, 28}; +ALIGN const UINT64 rot_1_36[2] = { 1, 36}; +ALIGN const UINT64 rot_6_10[2] = { 6, 10}; +ALIGN const UINT64 rot_25_15[2] = {25, 15}; +ALIGN const UINT64 rot_8_56[2] = { 8, 56}; +ALIGN const UINT64 rot_18_27[2] = {18, 27}; +ALIGN const UINT64 rot_62_55[2] = {62, 55}; +ALIGN const UINT64 rot_39_41[2] = {39, 41}; + +#if defined(UseSimulatedXOP) + /* For debugging purposes, when XOP is not available */ + #undef ROL6464 + #undef ROL6464same + #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) + V128 ROL6464(V128 a, int r0, int r1) + { + V128 a0 = ROL64(a, r0); + V128 a1 = COPY64HI2LO(ROL64(a, r1)); + return GET64LOLO(a0, a1); + } +#endif + + #include "KeccakF-1600-xop.macros" + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseXOP" + #endif +#elif defined(UseMMX) + #include + typedef __m64 V64; + #define ANDnu64(a, b) _mm_andnot_si64(a, b) + + #if (defined(_MSC_VER) || defined (__INTEL_COMPILER)) + #define LOAD64(a) *(V64*)&(a) + #define CONST64(a) *(V64*)&(a) + #define STORE64(a, b) *(V64*)&(a) = b + #else + #define LOAD64(a) (V64)a + #define CONST64(a) (V64)a + #define STORE64(a, b) a = (UINT64)b + #endif + #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o))) + #define XOR64(a, b) _mm_xor_si64(a, b) + #define XOReq64(a, b) a = _mm_xor_si64(a, b) + + #include "KeccakF-1600-simd64.macros" + + #ifdef UseBebigokimisa + #error "UseBebigokimisa cannot be used in combination with UseMMX" + #endif +#else + #if defined(_MSC_VER) + #define ROL64(a, offset) _rotl64(a, offset) + #elif defined(UseSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) + #else + #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) + #endif + + #include "KeccakF-1600-64.macros" +#endif + +#include "KeccakF-1600-unrolling.macros" + +static void KeccakPermutationOnWords(UINT64 *state) +{ + declareABCDE +#if (Unrolling != 24) + unsigned int i; +#endif + + copyFromState(A, state) + rounds +#if defined(UseMMX) + _mm_empty(); +#endif +} + +static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount) +{ + declareABCDE +#if (Unrolling != 24) + unsigned int i; +#endif + unsigned int j; + + for(j=0; j> (8*i)) & 0xFF; +} +*/ + +#ifdef ProvideFast1024 +static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, 128); +#else + unsigned int i; + + for(i=0; i<16; i++) + fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); +#endif +#ifdef UseBebigokimisa + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; +#endif +} +#endif + +static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} diff --git a/Modules/_sha3/keccak/KeccakF-1600-simd128.macros b/Modules/_sha3/keccak/KeccakF-1600-simd128.macros new file mode 100644 index 0000000..98e47f5 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-simd128.macros @@ -0,0 +1,651 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + V6464 Abage, Abegi, Abigo, Abogu, Abuga; \ + V6464 Akame, Akemi, Akimo, Akomu, Akuma; \ + V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \ + V64 Aba, Abe, Abi, Abo, Abu; \ + V64 Aga, Age, Agi, Ago, Agu; \ + V64 Aka, Ake, Aki, Ako, Aku; \ + V64 Ama, Ame, Ami, Amo, Amu; \ + V64 Asa, Ase, Asi, Aso, Asu; \ + V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \ + V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \ + V64 Bba, Bbe, Bbi, Bbo, Bbu; \ + V64 Bga, Bge, Bgi, Bgo, Bgu; \ + V64 Bka, Bke, Bki, Bko, Bku; \ + V64 Bma, Bme, Bmi, Bmo, Bmu; \ + V64 Bsa, Bse, Bsi, Bso, Bsu; \ + V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \ + V64 Ca, Ce, Ci, Co, Cu; \ + V64 Da, De, Di, Do, Du; \ + V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \ + V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \ + V64 Eba, Ebe, Ebi, Ebo, Ebu; \ + V64 Ega, Ege, Egi, Ego, Egu; \ + V64 Eka, Eke, Eki, Eko, Eku; \ + V64 Ema, Eme, Emi, Emo, Emu; \ + V64 Esa, Ese, Esi, Eso, Esu; \ + V128 Zero; + +#define prepareTheta + +#define computeD \ + Cua = GET64LOLO(Cu, Cae); \ + Dei = XOR128(Cae, ROL64in128(Cio, 1)); \ + Dou = XOR128(Cio, ROL64in128(Cua, 1)); \ + Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \ + De = Dei; \ + Di = COPY64HI2LO(Dei); \ + Do = Dou; \ + Du = COPY64HI2LO(Dou); + +/* --- Theta Rho Pi Chi Iota Prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit and 128-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + computeD \ + \ + A##ba = LOAD64(A##bage.v64[0]); \ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + Bbage = GET64LOLO(Bba, Bge); \ + A##ge = LOAD64(A##bage.v64[1]); \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + A##ka = LOAD64(A##kame.v64[0]); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + Bbegi = GET64LOLO(Bbe, Bgi); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + A##me = LOAD64(A##kame.v64[1]); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + Bbigo = GET64LOLO(Bbi, Bgo); \ + E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \ + XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \ + Cae = E##bage.v128; \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + Bbogu = GET64LOLO(Bbo, Bgu); \ + E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \ + Cei = E##begi.v128; \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + Bbuga = GET64LOLO(Bbu, Bga); \ + E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \ + E##bi = E##bigo.v128; \ + E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \ + Cio = E##bigo.v128; \ + E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \ + E##bo = E##bogu.v128; \ + E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \ + Cou = E##bogu.v128; \ + E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \ + E##bu = E##buga.v128; \ + E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \ + Cua = E##buga.v128; \ +\ + A##be = LOAD64(A##begi.v64[0]); \ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + Bkame = GET64LOLO(Bka, Bme); \ + A##gi = LOAD64(A##begi.v64[1]); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + A##ke = LOAD64(A##kemi.v64[0]); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + Bkemi = GET64LOLO(Bke, Bmi); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + A##mi = LOAD64(A##kemi.v64[1]); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + Bkimo = GET64LOLO(Bki, Bmo); \ + E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \ + XOReq128(Cae, E##kame.v128); \ + Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \ + Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \ + E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \ + XOReq128(Cei, E##kemi.v128); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + Bkuma = GET64LOLO(Bku, Bma); \ + E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \ + E##ki = E##kimo.v128; \ + E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \ + XOReq128(Cio, E##kimo.v128); \ + E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \ + E##ko = E##komu.v128; \ + E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \ + XOReq128(Cou, E##komu.v128); \ + E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \ + E##ku = E##kuma.v128; \ + E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \ + XOReq128(Cua, E##kuma.v128); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + Ca = E##sa; \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + Ce = E##se; \ + XOReq128(Cae, GET64LOLO(Ca, Ce)); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + Ci = E##si; \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + Co = E##so; \ + XOReq128(Cio, GET64LOLO(Ci, Co)); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ + Cu = E##su; \ +\ + Zero = ZERO128(); \ + XOReq128(Cae, GET64HIHI(Cua, Zero)); \ + XOReq128(Cae, GET64LOLO(Zero, Cei)); \ + XOReq128(Cio, GET64HIHI(Cei, Zero)); \ + XOReq128(Cio, GET64LOLO(Zero, Cou)); \ + XOReq128(Cua, GET64HIHI(Cou, Zero)); \ + XOReq64(Cu, Cua); \ + +/* --- Theta Rho Pi Chi Iota */ +/* --- 64-bit lanes mapped to 64-bit and 128-bit words */ +#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E) + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = LOAD64(state[ 9]); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = LOAD128(state[10]); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = LOAD128(state[12]); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cu = X##bu; \ + X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyFromState(X, state) \ + X##bae.v128 = LOAD128(state[ 0]); \ + X##ba = X##bae.v128; \ + X##be = GET64HIHI(X##bae.v128, X##bae.v128); \ + Cae = X##bae.v128; \ + X##bio.v128 = LOAD128(state[ 2]); \ + X##bi = X##bio.v128; \ + X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \ + Cio = X##bio.v128; \ + X##bu = LOAD64(state[ 4]); \ + Cu = X##bu; \ + X##gae.v128 = LOAD128u(state[ 5]); \ + X##ga = X##gae.v128; \ + X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \ + X##bage.v128 = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae.v128); \ + X##gio.v128 = LOAD128u(state[ 7]); \ + X##gi = X##gio.v128; \ + X##begi.v128 = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio.v128, X##gio.v128); \ + XOReq128(Cio, X##gio.v128); \ + X##gu = LOAD64(state[ 9]); \ + XOReq64(Cu, X##gu); \ + X##kae.v128 = LOAD128(state[10]); \ + X##ka = X##kae.v128; \ + X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \ + XOReq128(Cae, X##kae.v128); \ + X##kio.v128 = LOAD128(state[12]); \ + X##ki = X##kio.v128; \ + X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \ + XOReq128(Cio, X##kio.v128); \ + X##ku = LOAD64(state[14]); \ + XOReq64(Cu, X##ku); \ + X##mae.v128 = LOAD128u(state[15]); \ + X##ma = X##mae.v128; \ + X##me = GET64HIHI(X##mae.v128, X##mae.v128); \ + X##kame.v128 = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, X##mae.v128); \ + X##mio.v128 = LOAD128u(state[17]); \ + X##mi = X##mio.v128; \ + X##kemi.v128 = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \ + XOReq128(Cio, X##mio.v128); \ + X##mu = LOAD64(state[19]); \ + XOReq64(Cu, X##mu); \ + X##sae.v128 = LOAD128(state[20]); \ + X##sa = X##sae.v128; \ + X##se = GET64HIHI(X##sae.v128, X##sae.v128); \ + XOReq128(Cae, X##sae.v128); \ + X##sio.v128 = LOAD128(state[22]); \ + X##si = X##sio.v128; \ + X##so = GET64HIHI(X##sio.v128, X##sio.v128); \ + XOReq128(Cio, X##sio.v128); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cu, X##su); \ + +#define copyToState(state, X) \ + state[ 0] = A##bage.v64[0]; \ + state[ 1] = A##begi.v64[0]; \ + STORE64(state[ 2], X##bi); \ + STORE64(state[ 3], X##bo); \ + STORE64(state[ 4], X##bu); \ + STORE64(state[ 5], X##ga); \ + state[ 6] = A##bage.v64[1]; \ + state[ 7] = A##begi.v64[1]; \ + STORE64(state[ 8], X##go); \ + STORE64(state[ 9], X##gu); \ + state[10] = X##kame.v64[0]; \ + state[11] = X##kemi.v64[0]; \ + STORE64(state[12], X##ki); \ + STORE64(state[13], X##ko); \ + STORE64(state[14], X##ku); \ + STORE64(state[15], X##ma); \ + state[16] = X##kame.v64[1]; \ + state[17] = X##kemi.v64[1]; \ + STORE64(state[18], X##mo); \ + STORE64(state[19], X##mu); \ + STORE64(state[20], X##sa); \ + STORE64(state[21], X##se); \ + STORE64(state[22], X##si); \ + STORE64(state[23], X##so); \ + STORE64(state[24], X##su); \ + +#define copyStateVariables(X, Y) \ + X##bage = Y##bage; \ + X##begi = Y##begi; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##kame = Y##kame; \ + X##kemi = Y##kemi; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-simd64.macros b/Modules/_sha3/keccak/KeccakF-1600-simd64.macros new file mode 100644 index 0000000..06a30e2 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-simd64.macros @@ -0,0 +1,517 @@ +/* +Code automatically generated by KeccakTools! + +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + V64 Aba, Abe, Abi, Abo, Abu; \ + V64 Aga, Age, Agi, Ago, Agu; \ + V64 Aka, Ake, Aki, Ako, Aku; \ + V64 Ama, Ame, Ami, Amo, Amu; \ + V64 Asa, Ase, Asi, Aso, Asu; \ + V64 Bba, Bbe, Bbi, Bbo, Bbu; \ + V64 Bga, Bge, Bgi, Bgo, Bgu; \ + V64 Bka, Bke, Bki, Bko, Bku; \ + V64 Bma, Bme, Bmi, Bmo, Bmu; \ + V64 Bsa, Bse, Bsi, Bso, Bsu; \ + V64 Ca, Ce, Ci, Co, Cu; \ + V64 Da, De, Di, Do, Du; \ + V64 Eba, Ebe, Ebi, Ebo, Ebu; \ + V64 Ega, Ege, Egi, Ego, Egu; \ + V64 Eka, Eke, Eki, Eko, Eku; \ + V64 Ema, Eme, Emi, Emo, Emu; \ + V64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \ + Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \ + Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \ + Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \ + Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \ + +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = XOR64(Cu, ROL64(Ce, 1)); \ + De = XOR64(Ca, ROL64(Ci, 1)); \ + Di = XOR64(Ce, ROL64(Co, 1)); \ + Do = XOR64(Ci, ROL64(Cu, 1)); \ + Du = XOR64(Co, ROL64(Ca, 1)); \ +\ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ + XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ + Ca = E##ba; \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ + Ce = E##be; \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ + Ci = E##bi; \ + E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ + Co = E##bo; \ + E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ + Cu = E##bu; \ +\ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ + XOReq64(Ca, E##ga); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ + XOReq64(Ce, E##ge); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ + XOReq64(Ci, E##gi); \ + E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ + XOReq64(Co, E##go); \ + E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ + XOReq64(Cu, E##gu); \ +\ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ + XOReq64(Ca, E##ka); \ + XOReq64(A##mu, Du); \ + Bko = ROL64(A##mu, 8); \ + E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ + XOReq64(Ce, E##ke); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ + XOReq64(Ci, E##ki); \ + E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ + XOReq64(Co, E##ko); \ + E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ + XOReq64(Cu, E##ku); \ +\ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ + XOReq64(Ca, E##ma); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ + XOReq64(Ce, E##me); \ + XOReq64(A##so, Do); \ + Bmu = ROL64(A##so, 56); \ + E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ + XOReq64(Ci, E##mi); \ + E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ + XOReq64(Co, E##mo); \ + E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ + XOReq64(Cu, E##mu); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + XOReq64(Ca, E##sa); \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + XOReq64(Ce, E##se); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + XOReq64(Ci, E##si); \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + XOReq64(Co, E##so); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ + XOReq64(Cu, E##su); \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = XOR64(Cu, ROL64(Ce, 1)); \ + De = XOR64(Ca, ROL64(Ci, 1)); \ + Di = XOR64(Ce, ROL64(Co, 1)); \ + Do = XOR64(Ci, ROL64(Cu, 1)); \ + Du = XOR64(Co, ROL64(Ca, 1)); \ +\ + XOReq64(A##ba, Da); \ + Bba = A##ba; \ + XOReq64(A##ge, De); \ + Bbe = ROL64(A##ge, 44); \ + XOReq64(A##ki, Di); \ + Bbi = ROL64(A##ki, 43); \ + E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ + XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ + XOReq64(A##mo, Do); \ + Bbo = ROL64(A##mo, 21); \ + E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ + XOReq64(A##su, Du); \ + Bbu = ROL64(A##su, 14); \ + E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ + E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ + E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ +\ + XOReq64(A##bo, Do); \ + Bga = ROL64(A##bo, 28); \ + XOReq64(A##gu, Du); \ + Bge = ROL64(A##gu, 20); \ + XOReq64(A##ka, Da); \ + Bgi = ROL64(A##ka, 3); \ + E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ + XOReq64(A##me, De); \ + Bgo = ROL64(A##me, 45); \ + E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ + XOReq64(A##si, Di); \ + Bgu = ROL64(A##si, 61); \ + E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ + E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ + E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ +\ + XOReq64(A##be, De); \ + Bka = ROL64(A##be, 1); \ + XOReq64(A##gi, Di); \ + Bke = ROL64(A##gi, 6); \ + XOReq64(A##ko, Do); \ + Bki = ROL64(A##ko, 25); \ + E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ + XOReq64(A##mu, Du); \ + Bko = ROL64(A##mu, 8); \ + E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ + XOReq64(A##sa, Da); \ + Bku = ROL64(A##sa, 18); \ + E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ + E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ + E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ +\ + XOReq64(A##bu, Du); \ + Bma = ROL64(A##bu, 27); \ + XOReq64(A##ga, Da); \ + Bme = ROL64(A##ga, 36); \ + XOReq64(A##ke, De); \ + Bmi = ROL64(A##ke, 10); \ + E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ + XOReq64(A##mi, Di); \ + Bmo = ROL64(A##mi, 15); \ + E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ + XOReq64(A##so, Do); \ + Bmu = ROL64(A##so, 56); \ + E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ + E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ + E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ +\ + XOReq64(A##bi, Di); \ + Bsa = ROL64(A##bi, 62); \ + XOReq64(A##go, Do); \ + Bse = ROL64(A##go, 55); \ + XOReq64(A##ku, Du); \ + Bsi = ROL64(A##ku, 39); \ + E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ + XOReq64(A##ma, Da); \ + Bso = ROL64(A##ma, 41); \ + E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ + XOReq64(A##se, De); \ + Bsu = ROL64(A##se, 2); \ + E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ + E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ + E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ +\ + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = LOAD64(state[ 9]); \ + X##ka = LOAD64(state[10]); \ + X##ke = LOAD64(state[11]); \ + X##ki = LOAD64(state[12]); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ + X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ + X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ + X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ + X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ + X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ + X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ + X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ + X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ + X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ + X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ + X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ + X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \ + X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ + X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyFromState(X, state) \ + X##ba = LOAD64(state[ 0]); \ + X##be = LOAD64(state[ 1]); \ + X##bi = LOAD64(state[ 2]); \ + X##bo = LOAD64(state[ 3]); \ + X##bu = LOAD64(state[ 4]); \ + X##ga = LOAD64(state[ 5]); \ + X##ge = LOAD64(state[ 6]); \ + X##gi = LOAD64(state[ 7]); \ + X##go = LOAD64(state[ 8]); \ + X##gu = LOAD64(state[ 9]); \ + X##ka = LOAD64(state[10]); \ + X##ke = LOAD64(state[11]); \ + X##ki = LOAD64(state[12]); \ + X##ko = LOAD64(state[13]); \ + X##ku = LOAD64(state[14]); \ + X##ma = LOAD64(state[15]); \ + X##me = LOAD64(state[16]); \ + X##mi = LOAD64(state[17]); \ + X##mo = LOAD64(state[18]); \ + X##mu = LOAD64(state[19]); \ + X##sa = LOAD64(state[20]); \ + X##se = LOAD64(state[21]); \ + X##si = LOAD64(state[22]); \ + X##so = LOAD64(state[23]); \ + X##su = LOAD64(state[24]); \ + +#define copyToState(state, X) \ + STORE64(state[ 0], X##ba); \ + STORE64(state[ 1], X##be); \ + STORE64(state[ 2], X##bi); \ + STORE64(state[ 3], X##bo); \ + STORE64(state[ 4], X##bu); \ + STORE64(state[ 5], X##ga); \ + STORE64(state[ 6], X##ge); \ + STORE64(state[ 7], X##gi); \ + STORE64(state[ 8], X##go); \ + STORE64(state[ 9], X##gu); \ + STORE64(state[10], X##ka); \ + STORE64(state[11], X##ke); \ + STORE64(state[12], X##ki); \ + STORE64(state[13], X##ko); \ + STORE64(state[14], X##ku); \ + STORE64(state[15], X##ma); \ + STORE64(state[16], X##me); \ + STORE64(state[17], X##mi); \ + STORE64(state[18], X##mo); \ + STORE64(state[19], X##mu); \ + STORE64(state[20], X##sa); \ + STORE64(state[21], X##se); \ + STORE64(state[22], X##si); \ + STORE64(state[23], X##so); \ + STORE64(state[24], X##su); \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + diff --git a/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros b/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros new file mode 100644 index 0000000..83c694c --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-unrolling.macros @@ -0,0 +1,124 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (Unrolling == 24) +#define rounds \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + copyToState(state, A) +#elif (Unrolling == 12) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 8) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=8) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 6) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 4) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 3) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + copyToState(state, A) +#elif (Unrolling == 2) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + copyToState(state, A) +#elif (Unrolling == 1) +#define rounds \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + copyToState(state, A) +#else +#error "Unrolling is not correctly specified!" +#endif diff --git a/Modules/_sha3/keccak/KeccakF-1600-xop.macros b/Modules/_sha3/keccak/KeccakF-1600-xop.macros new file mode 100644 index 0000000..823c946 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakF-1600-xop.macros @@ -0,0 +1,573 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + V128 Abage, Abegi, Abigo, Abogu, Abuga; \ + V128 Akame, Akemi, Akimo, Akomu, Akuma; \ + V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \ + V64 Aba, Abe, Abi, Abo, Abu; \ + V64 Aga, Age, Agi, Ago, Agu; \ + V64 Aka, Ake, Aki, Ako, Aku; \ + V64 Ama, Ame, Ami, Amo, Amu; \ + V128 Asase, Asiso; \ + V64 Asu; \ + V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \ + V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \ + V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \ + V128 Cae, Cei, Cio, Cou, Cua; \ + V128 Dau, Dea, Die, Doi, Duo; \ + V128 Dua, Dae, Dei, Dio, Dou; \ + V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \ + V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \ + V128 Esase, Esiso; \ + V64 Esu; \ + V128 Zero; + +#define prepareTheta + +#define computeD \ + Cua = GET64LOLO(Cua, Cae); \ + Dei = XOR128(Cae, ROL6464same(Cio, 1)); \ + Dou = XOR128(Cio, ROL6464same(Cua, 1)); \ + Cei = GET64HILO(Cae, Cio); \ + Dae = XOR128(Cua, ROL6464same(Cei, 1)); \ + Dau = GET64LOHI(Dae, Dou); \ + Dea = SWAP64(Dae); \ + Die = SWAP64(Dei); \ + Doi = GET64LOLO(Dou, Die); \ + Duo = SWAP64(Dou); + +/* --- Theta Rho Pi Chi Iota Prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit and 128-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + computeD \ + \ + Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \ + Bbage = ROL6464(Bbage, 0, 20); \ + Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \ + Bbegi = ROL6464(Bbegi, 44, 3); \ + Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \ + Bbigo = ROL6464(Bbigo, 43, 45); \ + E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \ + XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \ + Cae = E##bage; \ + Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \ + Bbogu = ROL6464(Bbogu, 21, 61); \ + E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \ + Cei = E##begi; \ + Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \ + Bbuga = ROL6464(Bbuga, 14, 28); \ + E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \ + Cio = E##bigo; \ + E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \ + Cou = E##bogu; \ + E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \ + Cua = E##buga; \ +\ + Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \ + Bkame = ROL6464(Bkame, 1, 36); \ + Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \ + Bkemi = ROL6464(Bkemi, 6, 10); \ + Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \ + Bkimo = ROL6464(Bkimo, 25, 15); \ + E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \ + XOReq128(Cae, E##kame); \ + Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \ + Bkomu = ROL6464(Bkomu, 8, 56); \ + E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \ + XOReq128(Cei, E##kemi); \ + Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \ + Bkuma = ROL6464(Bkuma, 18, 27); \ + E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \ + XOReq128(Cio, E##kimo); \ + E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \ + XOReq128(Cou, E##komu); \ + E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \ + XOReq128(Cua, E##kuma); \ +\ + Bsase = XOR128(A##bigo, SWAP64(Doi)); \ + Bsase = ROL6464(Bsase, 62, 55); \ + Bsiso = XOR128(A##kuma, SWAP64(Dau)); \ + Bsiso = ROL6464(Bsiso, 39, 41); \ + Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \ + Bsusa = ROL6464same(Bsusa, 2); \ + Bsusa = GET64LOLO(Bsusa, Bsase); \ + Bsesi = GET64HILO(Bsase, Bsiso); \ + Bsosu = GET64HILO(Bsiso, Bsusa); \ + E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \ + XOReq128(Cae, E##sase); \ + E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \ + XOReq128(Cio, E##siso); \ + E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \ + XOReq128(Cua, E##su); \ +\ + Zero = ZERO128(); \ + XOReq128(Cae, GET64HIHI(Cua, Zero)); \ + XOReq128(Cae, GET64LOLO(Zero, Cei)); \ + XOReq128(Cio, GET64HIHI(Cei, Zero)); \ + XOReq128(Cio, GET64LOLO(Zero, Cou)); \ + XOReq128(Cua, GET64HIHI(Cou, Zero)); \ + +/* --- Theta Rho Pi Chi Iota */ +/* --- 64-bit lanes mapped to 64-bit and 128-bit words */ +#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E) + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +#define copyFromStateAndXor576bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = LOAD64(state[ 9]); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = LOAD128(state[10]); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = LOAD128(state[12]); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = LOAD128(state[14]); \ + XOReq64(Cua, X##kuma); \ + X##me = LOAD64(state[16]); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = LOAD128u(state[17]); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromStateAndXor832bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = LOAD128(state[14]); \ + XOReq64(Cua, X##kuma); \ + X##me = LOAD64(state[16]); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = LOAD128u(state[17]); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromStateAndXor1024bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ + XOReq64(Cua, X##kuma); \ + X##me = LOAD64(state[16]); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = LOAD128u(state[17]); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromStateAndXor1088bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ + XOReq64(Cua, X##kuma); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = LOAD128u(state[17]); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromStateAndXor1152bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ + XOReq64(Cua, X##kuma); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromStateAndXor1344bits(X, state, input) \ + X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ + Cua = X##bu; \ + X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ + XOReq64(Cua, X##kuma); \ + X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyFromState(X, state) \ + X##bae = LOAD128(state[ 0]); \ + X##ba = X##bae; \ + X##be = GET64HIHI(X##bae, X##bae); \ + Cae = X##bae; \ + X##bio = LOAD128(state[ 2]); \ + X##bi = X##bio; \ + X##bo = GET64HIHI(X##bio, X##bio); \ + Cio = X##bio; \ + X##bu = LOAD64(state[ 4]); \ + Cua = X##bu; \ + X##gae = LOAD128u(state[ 5]); \ + X##ga = X##gae; \ + X##buga = GET64LOLO(X##bu, X##ga); \ + X##ge = GET64HIHI(X##gae, X##gae); \ + X##bage = GET64LOLO(X##ba, X##ge); \ + XOReq128(Cae, X##gae); \ + X##gio = LOAD128u(state[ 7]); \ + X##gi = X##gio; \ + X##begi = GET64LOLO(X##be, X##gi); \ + X##go = GET64HIHI(X##gio, X##gio); \ + X##bigo = GET64LOLO(X##bi, X##go); \ + XOReq128(Cio, X##gio); \ + X##gu = LOAD64(state[ 9]); \ + X##bogu = GET64LOLO(X##bo, X##gu); \ + XOReq64(Cua, X##gu); \ + X##kae = LOAD128(state[10]); \ + X##ka = X##kae; \ + X##ke = GET64HIHI(X##kae, X##kae); \ + XOReq128(Cae, X##kae); \ + X##kio = LOAD128(state[12]); \ + X##ki = X##kio; \ + X##ko = GET64HIHI(X##kio, X##kio); \ + XOReq128(Cio, X##kio); \ + X##kuma = LOAD128(state[14]); \ + XOReq64(Cua, X##kuma); \ + X##me = LOAD64(state[16]); \ + X##kame = GET64LOLO(X##ka, X##me); \ + XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ + X##mio = LOAD128u(state[17]); \ + X##mi = X##mio; \ + X##kemi = GET64LOLO(X##ke, X##mi); \ + X##mo = GET64HIHI(X##mio, X##mio); \ + X##kimo = GET64LOLO(X##ki, X##mo); \ + XOReq128(Cio, X##mio); \ + X##mu = LOAD64(state[19]); \ + X##komu = GET64LOLO(X##ko, X##mu); \ + XOReq64(Cua, X##mu); \ + X##sase = LOAD128(state[20]); \ + XOReq128(Cae, X##sase); \ + X##siso = LOAD128(state[22]); \ + XOReq128(Cio, X##siso); \ + X##su = LOAD64(state[24]); \ + XOReq64(Cua, X##su); \ + +#define copyToState(state, X) \ + STORE64(state[ 0], X##bage); \ + STORE64(state[ 1], X##begi); \ + STORE64(state[ 2], X##bigo); \ + STORE64(state[ 3], X##bogu); \ + STORE128(state[ 4], X##buga); \ + STORE64(state[ 6], COPY64HI2LO(X##bage)); \ + STORE64(state[ 7], COPY64HI2LO(X##begi)); \ + STORE64(state[ 8], COPY64HI2LO(X##bigo)); \ + STORE64(state[ 9], COPY64HI2LO(X##bogu)); \ + STORE64(state[10], X##kame); \ + STORE64(state[11], X##kemi); \ + STORE64(state[12], X##kimo); \ + STORE64(state[13], X##komu); \ + STORE128(state[14], X##kuma); \ + STORE64(state[16], COPY64HI2LO(X##kame)); \ + STORE64(state[17], COPY64HI2LO(X##kemi)); \ + STORE64(state[18], COPY64HI2LO(X##kimo)); \ + STORE64(state[19], COPY64HI2LO(X##komu)); \ + STORE128(state[20], X##sase); \ + STORE128(state[22], X##siso); \ + STORE64(state[24], X##su); \ + +#define copyStateVariables(X, Y) \ + X##bage = Y##bage; \ + X##begi = Y##begi; \ + X##bigo = Y##bigo; \ + X##bogu = Y##bogu; \ + X##buga = Y##buga; \ + X##kame = Y##kame; \ + X##kemi = Y##kemi; \ + X##kimo = Y##kimo; \ + X##komu = Y##komu; \ + X##kuma = Y##kuma; \ + X##sase = Y##sase; \ + X##siso = Y##siso; \ + X##su = Y##su; \ + diff --git a/Modules/_sha3/keccak/KeccakNISTInterface.c b/Modules/_sha3/keccak/KeccakNISTInterface.c new file mode 100644 index 0000000..e94082b --- /dev/null +++ b/Modules/_sha3/keccak/KeccakNISTInterface.c @@ -0,0 +1,83 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakNISTInterface.h" +#include "KeccakF-1600-interface.h" + +static HashReturn Init(hashState *state, int hashbitlen) +{ + switch(hashbitlen) { + case 0: /* Default parameters, arbitrary length output */ + InitSponge((spongeState*)state, 1024, 576); + break; + case 224: + InitSponge((spongeState*)state, 1152, 448); + break; + case 256: + InitSponge((spongeState*)state, 1088, 512); + break; + case 384: + InitSponge((spongeState*)state, 832, 768); + break; + case 512: + InitSponge((spongeState*)state, 576, 1024); + break; + default: + return BAD_HASHLEN; + } + state->fixedOutputLength = hashbitlen; + return SUCCESS; +} + +static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) == 0) + return Absorb((spongeState*)state, data, databitlen); + else { + HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8)); + if (ret == SUCCESS) { + unsigned char lastByte; + /* Align the last partial byte to the least significant bits */ + lastByte = data[databitlen/8] >> (8 - (databitlen % 8)); + return Absorb((spongeState*)state, &lastByte, databitlen % 8); + } + else + return ret; + } +} + +static HashReturn Final(hashState *state, BitSequence *hashval) +{ + return Squeeze(state, hashval, state->fixedOutputLength); +} + +/* +static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval) +{ + hashState state; + HashReturn result; + + if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512)) + return BAD_HASHLEN; * Only the four fixed output lengths available through this API * + result = Init(&state, hashbitlen); + if (result != SUCCESS) + return result; + result = Update(&state, data, databitlen); + if (result != SUCCESS) + return result; + result = Final(&state, hashval); + return result; +} +*/ + diff --git a/Modules/_sha3/keccak/KeccakNISTInterface.h b/Modules/_sha3/keccak/KeccakNISTInterface.h new file mode 100644 index 0000000..244431b --- /dev/null +++ b/Modules/_sha3/keccak/KeccakNISTInterface.h @@ -0,0 +1,72 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakNISTInterface_h_ +#define _KeccakNISTInterface_h_ + +#include "KeccakSponge.h" + +typedef unsigned char BitSequence; +typedef unsigned long long DataLength; +typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; + +typedef spongeState hashState; + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The rate r and capacity c values are determined from @a hashbitlen. + * @param state Pointer to the state of the sponge function to be initialized. + * @param hashbitlen The desired number of output bits, + * or 0 for Keccak[] with default parameters + * and arbitrarily-long output. + * @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512. + * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. + */ +static HashReturn Init(hashState *state, int hashbitlen); +/** + * Function to give input data for the sponge function to absorb. + * @param state Pointer to the state of the sponge function initialized by Init(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the most significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Absorb(), databitLen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); +/** + * Function to squeeze output data from the sponge function. + * If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function. + * @param state Pointer to the state of the sponge function initialized by Init(). + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +static HashReturn Final(hashState *state, BitSequence *hashval); +/** + * Function to compute a hash using the Keccak[r, c] sponge function. + * The rate r and capacity c values are determined from @a hashbitlen. + * @param hashbitlen The desired number of output bits. + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the most significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @param hashval Pointer to the buffer where to store the output data. + * @pre The value of hashbitlen must be one of 224, 256, 384 and 512. + * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. + */ +/* +static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); +*/ + +#endif diff --git a/Modules/_sha3/keccak/KeccakSponge.c b/Modules/_sha3/keccak/KeccakSponge.c new file mode 100644 index 0000000..1ca6bf0 --- /dev/null +++ b/Modules/_sha3/keccak/KeccakSponge.c @@ -0,0 +1,266 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakSponge.h" +#include "KeccakF-1600-interface.h" +#ifdef KeccakReference +#include "displayIntermediateValues.h" +#endif + +static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != 1600) + return 1; + if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0)) + return 1; + KeccakInitialize(); + state->rate = rate; + state->capacity = capacity; + state->fixedOutputLength = 0; + KeccakInitializeState(state->state); + memset(state->dataQueue, 0, KeccakMaximumRateInBytes); + state->bitsInQueue = 0; + state->squeezing = 0; + state->bitsAvailableForSqueezing = 0; + + return 0; +} + +static void AbsorbQueue(spongeState *state) +{ + /* state->bitsInQueue is assumed to be equal to state->rate */ + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8); + #endif +#ifdef ProvideFast576 + if (state->rate == 576) + KeccakAbsorb576bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast832 + if (state->rate == 832) + KeccakAbsorb832bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1024 + if (state->rate == 1024) + KeccakAbsorb1024bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1088 + if (state->rate == 1088) + KeccakAbsorb1088bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1152 + if (state->rate == 1152) + KeccakAbsorb1152bits(state->state, state->dataQueue); + else +#endif +#ifdef ProvideFast1344 + if (state->rate == 1344) + KeccakAbsorb1344bits(state->state, state->dataQueue); + else +#endif + KeccakAbsorb(state->state, state->dataQueue, state->rate/64); + state->bitsInQueue = 0; +} + +static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen) +{ + unsigned long long i, j, wholeBlocks; + unsigned int partialBlock, partialByte; + const unsigned char *curData; + + if ((state->bitsInQueue % 8) != 0) + return 1; /* Only the last call may contain a partial byte */ + if (state->squeezing) + return 1; /* Too late for additional input */ + + i = 0; + while(i < databitlen) { + if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) { + wholeBlocks = (databitlen-i)/state->rate; + curData = data+i/8; +#ifdef ProvideFast576 + if (state->rate == 576) { + for(j=0; jrate/8); + #endif + KeccakAbsorb576bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast832 + if (state->rate == 832) { + for(j=0; jrate/8); + #endif + KeccakAbsorb832bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1024 + if (state->rate == 1024) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1024bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1088 + if (state->rate == 1088) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1088bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1152 + if (state->rate == 1152) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1152bits(state->state, curData); + } + } + else +#endif +#ifdef ProvideFast1344 + if (state->rate == 1344) { + for(j=0; jrate/8); + #endif + KeccakAbsorb1344bits(state->state, curData); + } + } + else +#endif + { + for(j=0; jrate/8) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, state->rate/8); + #endif + KeccakAbsorb(state->state, curData, state->rate/64); + } + } + i += wholeBlocks*state->rate; + } + else { + partialBlock = (unsigned int)(databitlen - i); + if (partialBlock+state->bitsInQueue > state->rate) + partialBlock = state->rate-state->bitsInQueue; + partialByte = partialBlock % 8; + partialBlock -= partialByte; + memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8); + state->bitsInQueue += partialBlock; + i += partialBlock; + if (state->bitsInQueue == state->rate) + AbsorbQueue(state); + if (partialByte > 0) { + unsigned char mask = (1 << partialByte)-1; + state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask; + state->bitsInQueue += partialByte; + i += partialByte; + } + } + } + return 0; +} + +static void PadAndSwitchToSqueezingPhase(spongeState *state) +{ + /* Note: the bits are numbered from 0=LSB to 7=MSB */ + if (state->bitsInQueue + 1 == state->rate) { + state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); + AbsorbQueue(state); + memset(state->dataQueue, 0, state->rate/8); + } + else { + memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8); + state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); + } + state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8); + AbsorbQueue(state); + + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif +#ifdef ProvideFast1024 + if (state->rate == 1024) { + KeccakExtract1024bits(state->state, state->dataQueue); + state->bitsAvailableForSqueezing = 1024; + } + else +#endif + { + KeccakExtract(state->state, state->dataQueue, state->rate/64); + state->bitsAvailableForSqueezing = state->rate; + } + #ifdef KeccakReference + displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8); + #endif + state->squeezing = 1; +} + +static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength) +{ + unsigned long long i; + unsigned int partialBlock; + + if (!state->squeezing) + PadAndSwitchToSqueezingPhase(state); + if ((outputLength % 8) != 0) + return 1; /* Only multiple of 8 bits are allowed, truncation can be done at user level */ + + i = 0; + while(i < outputLength) { + if (state->bitsAvailableForSqueezing == 0) { + KeccakPermutation(state->state); +#ifdef ProvideFast1024 + if (state->rate == 1024) { + KeccakExtract1024bits(state->state, state->dataQueue); + state->bitsAvailableForSqueezing = 1024; + } + else +#endif + { + KeccakExtract(state->state, state->dataQueue, state->rate/64); + state->bitsAvailableForSqueezing = state->rate; + } + #ifdef KeccakReference + displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8); + #endif + } + partialBlock = state->bitsAvailableForSqueezing; + if ((unsigned long long)partialBlock > outputLength - i) + partialBlock = (unsigned int)(outputLength - i); + memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8); + state->bitsAvailableForSqueezing -= partialBlock; + i += partialBlock; + } + return 0; +} diff --git a/Modules/_sha3/keccak/KeccakSponge.h b/Modules/_sha3/keccak/KeccakSponge.h new file mode 100644 index 0000000..a545cac --- /dev/null +++ b/Modules/_sha3/keccak/KeccakSponge.h @@ -0,0 +1,76 @@ +/* +The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +Michaël Peeters and Gilles Van Assche. For more information, feedback or +questions, please refer to our website: http://keccak.noekeon.org/ + +Implementation by the designers, +hereby denoted as "the implementer". + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +#define KeccakPermutationSize 1600 +#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8) +#define KeccakMaximumRate 1536 +#define KeccakMaximumRateInBytes (KeccakMaximumRate/8) + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + +ALIGN typedef struct spongeStateStruct { + ALIGN unsigned char state[KeccakPermutationSizeInBytes]; + ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes]; + unsigned int rate; + unsigned int capacity; + unsigned int bitsInQueue; + unsigned int fixedOutputLength; + int squeezing; + unsigned int bitsAvailableForSqueezing; +} spongeState; + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The sponge function is set to the absorbing phase. + * @param state Pointer to the state of the sponge function to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation. + * @return Zero if successful, 1 otherwise. + */ +static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity); +/** + * Function to give input data for the sponge function to absorb. + * @param state Pointer to the state of the sponge function initialized by InitSponge(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte. + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Absorb(), databitLen was a multiple of 8. + * @pre The sponge function must be in the absorbing phase, + * i.e., Squeeze() must not have been called before. + * @return Zero if successful, 1 otherwise. + */ +static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen); +/** + * Function to squeeze output data from the sponge function. + * If the sponge function was in the absorbing phase, this function + * switches it to the squeezing phase. + * @param state Pointer to the state of the sponge function initialized by InitSponge(). + * @param output Pointer to the buffer where to store the output data. + * @param outputLength The number of output bits desired. + * It must be a multiple of 8. + * @return Zero if successful, 1 otherwise. + */ +static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength); + +#endif diff --git a/Modules/_sha3/keccak/crypto_hash.h b/Modules/_sha3/keccak/crypto_hash.h new file mode 100644 index 0000000..e69de29 diff --git a/Modules/_sha3/sha3module.c b/Modules/_sha3/sha3module.c new file mode 100644 index 0000000..446fe92 --- /dev/null +++ b/Modules/_sha3/sha3module.c @@ -0,0 +1,569 @@ +/* SHA3 module + * + * This module provides an interface to the SHA3 algorithm + * + * See below for information about the original code this module was + * based upon. Additional work performed by: + * + * Andrew Kuchling (amk@amk.ca) + * Greg Stein (gstein@lyra.org) + * Trevor Perrin (trevp@trevp.net) + * Gregory P. Smith (greg@krypto.org) + * + * Copyright (C) 2012 Christian Heimes (christian@python.org) + * Licensed to PSF under a Contributor Agreement. + * + */ + +#include "Python.h" +#include "../hashlib.h" + +/* ************************************************************************** + * SHA-3 (Keccak) + * + * The code is based on KeccakReferenceAndOptimized-3.2.zip from 29 May 2012. + * + * The reference implementation is altered in this points: + * - C++ comments are converted to ANSI C comments. + * - All functions and globals are declared static. + * - The typedef for UINT64 is commented out. + * - brg_endian.h is removed. + * - KeccakF-1600-opt[32|64]-settings.h are commented out + * - Some unused functions are commented out to silence compiler warnings. + * + * In order to avoid name clashes with other software I have to declare all + * Keccak functions and global data as static. The C code is directly + * included into this file in order to access the static functions. + * + * Keccak can be tuned with several paramenters. I try to explain all options + * as far as I understand them. The reference implementation also contains + * assembler code for ARM platforms (NEON instructions). + * + * Common + * ====== + * + * Options: + * UseBebigokimisa, Unrolling + * + * - Unrolling: loop unrolling (24, 12, 8, 6, 4, 3, 2, 1) + * - UseBebigokimisa: lane complementing + * + * 64bit platforms + * =============== + * + * Additional options: + * UseSSE, UseOnlySIMD64, UseMMX, UseXOP, UseSHLD + * + * Optimized instructions (disabled by default): + * - UseSSE: use Stream SIMD extensions + * o UseOnlySIMD64: limit to 64bit instructions, otherwise 128bit + * o w/o UseOnlySIMD64: requires compiler agument -mssse3 or -mtune + * - UseMMX: use 64bit MMX instructions + * - UseXOP: use AMD's eXtended Operations (128bit SSE extension) + * + * Other: + * - Unrolling: default 24 + * - UseBebigokimisa: default 1 + * + * When neither UseSSE, UseMMX nor UseXOP is configured, ROL64 (rotate left + * 64) is implemented as: + * - Windows: _rotl64() + * - UseSHLD: use shld (shift left) asm optimization + * - otherwise: shift and xor + * + * UseBebigokimisa can't be used in combination with UseSSE, UseMMX or + * UseXOP. UseOnlySIMD64 has no effect unless UseSSE is specified. + * + * Tests have shown that UseSSE + UseOnlySIMD64 is about three to four + * times SLOWER than UseBebigokimisa. UseSSE and UseMMX are about two times + * slower. (tested by CH and AP) + * + * 32bit platforms + * =============== + * + * Additional options: + * UseInterleaveTables, UseSchedule + * + * - Unrolling: default 2 + * - UseBebigokimisa: default n/a + * - UseSchedule: ???, (1, 2, 3; default 3) + * - UseInterleaveTables: use two 64k lookup tables for (de)interleaving + * default: n/a + * + * schedules: + * - 3: no UseBebigokimisa, Unrolling must be 2 + * - 2 + 1: ??? + * + * *************************************************************************/ + +#if SIZEOF_VOID_P == 8 && defined(PY_UINT64_T) + /* 64bit platforms with unsigned int64 */ + #define KeccakImplementation 64 + #define Unrolling 24 + #define UseBebigokimisa + typedef PY_UINT64_T UINT64; +#elif SIZEOF_VOID_P == 4 && defined(PY_UINT64_T) + /* 32bit platforms with unsigned int64 */ + #define KeccakImplementation 32 + #define Unrolling 2 + #define UseSchedule 3 + typedef PY_UINT64_T UINT64; +#else + /* 32 or 64bit platforms without unsigned int64 */ + #warning no uint64_t available, force Keccak opt32 with interleave tables + #define KeccakImplementation 32 + #define Unrolling 2 + #define UseSchedule 3 + #define UseInterleaveTables +#endif + +/* replacement for brg_endian.h */ +#define IS_BIG_ENDIAN BIG_ENDIAN +#define IS_LITTLE_ENDIAN LITTLE_ENDIAN +#define PLATFORM_BYTE_ORDER BYTE_ORDER + +/* inline all Keccak dependencies */ +#include "keccak/KeccakNISTInterface.h" +#include "keccak/KeccakNISTInterface.c" +#include "keccak/KeccakSponge.c" +#if KeccakImplementation == 64 + #include "keccak/KeccakF-1600-opt64.c" +#elif KeccakImplementation == 32 + #include "keccak/KeccakF-1600-opt32.c" +#endif + +#define SHA3_BLOCKSIZE 200 /* 1600 bits */ +#define SHA3_MAX_DIGESTSIZE 64 /* 512 bits */ +#define SHA3_state hashState +#define SHA3_init Init +#define SHA3_process Update +#define SHA3_done Final +#define SHA3_copystate(dest, src) memcpy(&(dest), &(src), sizeof(SHA3_state)) +#define SHA3_clearstate(state) memset(&(state), 0, sizeof(SHA3_state)) + +/* The structure for storing SHA3 info */ + +typedef struct { + PyObject_HEAD + int hashbitlen; + SHA3_state hash_state; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif + +} SHA3object; + +static PyTypeObject SHA3type; + + +static SHA3object * +newSHA3object(int hashbitlen) +{ + SHA3object *newobj; + + /* check hashbitlen */ + switch(hashbitlen) { + /* supported hash length */ + case 224: + break; + case 256: + break; + case 384: + break; + case 512: + break; + case 0: + /* arbitrarily-long output isn't supported by this module */ + default: + /* everything else is an error */ + PyErr_SetString(PyExc_ValueError, + "hashbitlen must be one of 224, 256, 384 or 512."); + return NULL; + } + newobj = (SHA3object *)PyObject_New(SHA3object, &SHA3type); + if (newobj == NULL) { + return NULL; + } + newobj->hashbitlen = hashbitlen; +#ifdef WITH_THREAD + newobj->lock = NULL; +#endif + return newobj; +} + + +/* Internal methods for a hash object */ + +static void +SHA3_dealloc(SHA3object *self) +{ + SHA3_clearstate(self->hash_state); +#ifdef WITH_THREAD + if (self->lock) { + PyThread_free_lock(self->lock); + } +#endif + PyObject_Del(self); +} + + +/* External methods for a hash object */ + +PyDoc_STRVAR(SHA3_copy__doc__, "Return a copy of the hash object."); + +static PyObject * +SHA3_copy(SHA3object *self, PyObject *unused) +{ + SHA3object *newobj; + + if ((newobj = newSHA3object(self->hashbitlen)) == NULL) { + return NULL; + } + ENTER_HASHLIB(self); + SHA3_copystate(newobj->hash_state, self->hash_state); + LEAVE_HASHLIB(self); + return (PyObject *)newobj; +} + + +PyDoc_STRVAR(SHA3_digest__doc__, +"Return the digest value as a string of binary data."); + +static PyObject * +SHA3_digest(SHA3object *self, PyObject *unused) +{ + unsigned char digest[SHA3_MAX_DIGESTSIZE]; + SHA3_state temp; + HashReturn res; + + ENTER_HASHLIB(self); + SHA3_copystate(temp, self->hash_state); + LEAVE_HASHLIB(self); + res = SHA3_done(&temp, digest); + SHA3_clearstate(temp); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); + return NULL; + } + return PyBytes_FromStringAndSize((const char *)digest, + self->hashbitlen / 8); +} + + +PyDoc_STRVAR(SHA3_hexdigest__doc__, +"Return the digest value as a string of hexadecimal digits."); + +static PyObject * +SHA3_hexdigest(SHA3object *self, PyObject *unused) +{ + unsigned char digest[SHA3_MAX_DIGESTSIZE]; + SHA3_state temp; + HashReturn res; + PyObject *retval; + Py_UCS1 *hex_digest; + int digestlen, i, j; + + /* Get the raw (binary) digest value */ + ENTER_HASHLIB(self); + SHA3_copystate(temp, self->hash_state); + LEAVE_HASHLIB(self); + res = SHA3_done(&temp, digest); + SHA3_clearstate(temp); + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()"); + return NULL; + } + + /* Create a new string */ + digestlen = self->hashbitlen / 8; + retval = PyUnicode_New(digestlen * 2, 127); + if (!retval) + return NULL; + hex_digest = PyUnicode_1BYTE_DATA(retval); + + /* Make hex version of the digest */ + for(i=j=0; i < digestlen; i++) { + unsigned char c; + c = (digest[i] >> 4) & 0xf; + hex_digest[j++] = Py_hexdigits[c]; + c = (digest[i] & 0xf); + hex_digest[j++] = Py_hexdigits[c]; + } + assert(_PyUnicode_CheckConsistency(retval, 1)); + return retval; +} + +PyDoc_STRVAR(SHA3_update__doc__, +"Update this hash object's state with the provided string."); + +static PyObject * +SHA3_update(SHA3object *self, PyObject *args) +{ + PyObject *obj; + Py_buffer buf; + HashReturn res; + + if (!PyArg_ParseTuple(args, "O:update", &obj)) + return NULL; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + + /* add new data, the function takes the length in bits not bytes */ +#ifdef WITH_THREADS + if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) { + self->lock = PyThread_allocate_lock(); + } + /* Once a lock exists all code paths must be synchronized. We have to + * release the GIL even for small buffers as acquiring the lock may take + * an unlimited amount of time when another thread updates this object + * with lots of data. */ + if (self->lock) { + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + PyThread_release_lock(self->lock); + Py_END_ALLOW_THREADS + } + else { + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); + } +#else + res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8); +#endif + LEAVE_HASHLIB(self); + + if (res != SUCCESS) { + PyBuffer_Release(&buf); + PyErr_SetString(PyExc_RuntimeError, + "internal error in SHA3 Update()"); + return NULL; + } + + PyBuffer_Release(&buf); + Py_INCREF(Py_None); + return Py_None; +} + +static PyMethodDef SHA3_methods[] = { + {"copy", (PyCFunction)SHA3_copy, METH_NOARGS, + SHA3_copy__doc__}, + {"digest", (PyCFunction)SHA3_digest, METH_NOARGS, + SHA3_digest__doc__}, + {"hexdigest", (PyCFunction)SHA3_hexdigest, METH_NOARGS, + SHA3_hexdigest__doc__}, + {"update", (PyCFunction)SHA3_update, METH_VARARGS, + SHA3_update__doc__}, + {NULL, NULL} /* sentinel */ +}; + +static PyObject * +SHA3_get_block_size(SHA3object *self, void *closure) +{ + return PyLong_FromLong(SHA3_BLOCKSIZE); +} + +static PyObject * +SHA3_get_name(SHA3object *self, void *closure) +{ + return PyUnicode_FromFormat("sha3_%i", self->hashbitlen); +} + +static PyObject * +SHA3_get_digest_size(SHA3object *self, void *closure) +{ + return PyLong_FromLong(self->hashbitlen / 8); +} + + +static PyGetSetDef SHA3_getseters[] = { + {"block_size", (getter)SHA3_get_block_size, NULL, NULL, NULL}, + {"name", (getter)SHA3_get_name, NULL, NULL, NULL}, + {"digest_size", (getter)SHA3_get_digest_size, NULL, NULL, NULL}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject SHA3type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_sha3.SHA3", /* tp_name */ + sizeof(SHA3object), /* tp_size */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)SHA3_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + SHA3_methods, /* tp_methods */ + NULL, /* tp_members */ + SHA3_getseters, /* tp_getset */ +}; + + +/* constructor helper */ +static PyObject * +SHA3_factory(PyObject *args, PyObject *kwdict, const char *fmt, + int hashbitlen) +{ + SHA3object *newobj = NULL; + static char *kwlist[] = {"string", NULL}; + PyObject *data_obj = NULL; + Py_buffer buf; + HashReturn res; + + if (!PyArg_ParseTupleAndKeywords(args, kwdict, fmt, kwlist, + &data_obj)) { + return NULL; + } + + if (data_obj) + GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf); + + if ((newobj = newSHA3object(hashbitlen)) == NULL) { + goto error; + } + + if (SHA3_init(&newobj->hash_state, hashbitlen) != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, + "internal error in SHA3 Update()"); + goto error; + } + + if (data_obj) { +#ifdef WITH_THREADS + if (buf.len >= HASHLIB_GIL_MINSIZE) { + /* invariant: New objects can't be accessed by other code yet, + * thus it's safe to release the GIL without locking the object. + */ + Py_BEGIN_ALLOW_THREADS + res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8); + Py_END_ALLOW_THREADS + } + else { + res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8); + } +#else + res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8); +#endif + if (res != SUCCESS) { + PyErr_SetString(PyExc_RuntimeError, + "internal error in SHA3 Update()"); + goto error; + } + PyBuffer_Release(&buf); + } + + return (PyObject *)newobj; + + error: + if (newobj) { + SHA3_clearstate(newobj->hash_state); + /* self->lock is always NULL */ + } + if (data_obj) { + PyBuffer_Release(&buf); + } + return NULL; + +} + +PyDoc_STRVAR(sha3_224__doc__, +"sha3_224([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 28 bytes."); + +static PyObject * +sha3_224(PyObject *self, PyObject *args, PyObject *kwdict) +{ + return SHA3_factory(args, kwdict, "|O:sha3_224", 224); +} + + +PyDoc_STRVAR(sha3_256__doc__, +"sha3_256([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 32 bytes."); + +static PyObject * +sha3_256(PyObject *self, PyObject *args, PyObject *kwdict) +{ + return SHA3_factory(args, kwdict, "|O:sha3_256", 256); +} + +PyDoc_STRVAR(sha3_384__doc__, +"sha3_384([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 48 bytes."); + +static PyObject * +sha3_384(PyObject *self, PyObject *args, PyObject *kwdict) +{ + return SHA3_factory(args, kwdict, "|O:sha3_384", 384); +} + +PyDoc_STRVAR(sha3_512__doc__, +"sha3_512([string]) -> SHA3 object\n\ +\n\ +Return a new SHA3 hash object with a hashbit length of 64 bytes."); + +static PyObject * +sha3_512(PyObject *self, PyObject *args, PyObject *kwdict) +{ + return SHA3_factory(args, kwdict, "|O:sha3_512", 512); +} + + +/* List of functions exported by this module */ +static struct PyMethodDef SHA3_functions[] = { + {"sha3_224", (PyCFunction)sha3_224, METH_VARARGS|METH_KEYWORDS, + sha3_224__doc__}, + {"sha3_256", (PyCFunction)sha3_256, METH_VARARGS|METH_KEYWORDS, + sha3_256__doc__}, + {"sha3_384", (PyCFunction)sha3_384, METH_VARARGS|METH_KEYWORDS, + sha3_384__doc__}, + {"sha3_512", (PyCFunction)sha3_512, METH_VARARGS|METH_KEYWORDS, + sha3_512__doc__}, + {NULL, NULL} /* Sentinel */ +}; + + +/* Initialize this module. */ +static struct PyModuleDef _SHA3module = { + PyModuleDef_HEAD_INIT, + "_sha3", + NULL, + -1, + SHA3_functions, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__sha3(void) +{ + Py_TYPE(&SHA3type) = &PyType_Type; + if (PyType_Ready(&SHA3type) < 0) { + return NULL; + } + + return PyModule_Create(&_SHA3module); +} diff --git a/Modules/hashlib.h b/Modules/hashlib.h index db39cea..7cb6ee5 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -26,3 +26,36 @@ return NULL; \ } \ } while(0); + +/* + * Helper code to synchronize access to the hash object when the GIL is + * released around a CPU consuming hashlib operation. All code paths that + * access a mutable part of obj must be enclosed in a ENTER_HASHLIB / + * LEAVE_HASHLIB block or explicitly acquire and release the lock inside + * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for + * an operation. + */ + +#ifdef WITH_THREAD +#include "pythread.h" + #define ENTER_HASHLIB(obj) \ + if ((obj)->lock) { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } \ + } + #define LEAVE_HASHLIB(obj) \ + if ((obj)->lock) { \ + PyThread_release_lock((obj)->lock); \ + } +#else + #define ENTER_HASHLIB(obj) + #define LEAVE_HASHLIB(obj) +#endif + +/* TODO(gps): We should probably make this a module or EVPobject attribute + * to allow the user to optimize based on the platform they're using. */ +#define HASHLIB_GIL_MINSIZE 2048 + diff --git a/setup.py b/setup.py index 4cd3204..b030c4d 100644 --- a/setup.py +++ b/setup.py @@ -838,6 +838,15 @@ class PyBuildExt(build_ext): exts.append( Extension('_sha1', ['sha1module.c'], depends=['hashlib.h']) ) + # SHA-3 (Keccak) module + sha3_depends = ['hashlib.h'] + keccak = os.path.join(os.getcwd(), srcdir, 'Modules', '_sha3', + 'keccak') + for pattern in ('*.c', '*.h', '*.macros'): + sha3_depends.extend(glob(os.path.join(keccak, pattern))) + exts.append(Extension("_sha3", ["_sha3/sha3module.c"], + depends=sha3_depends)) + # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library -- cgit v0.12