diff options
Diffstat (limited to 'Modules/_sha3/kcp/KeccakP-1600-opt64.c')
-rw-r--r-- | Modules/_sha3/kcp/KeccakP-1600-opt64.c | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64.c b/Modules/_sha3/kcp/KeccakP-1600-opt64.c new file mode 100644 index 0000000..c90010d --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-opt64.c @@ -0,0 +1,474 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include <string.h> +#include <stdlib.h> +/* #include "brg_endian.h" */ +#include "KeccakP-1600-opt64-config.h" + +#if NOT_PYTHON +typedef unsigned char UINT8; +/* typedef unsigned long long int UINT64; */ +#endif + +#if defined(KeccakP1600_useLaneComplementing) +#define UseBebigokimisa +#endif + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#ifdef KeccakP1600_fullUnrolling +#define FullUnrolling +#else +#define Unrolling KeccakP1600_unrolling +#endif +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +#ifdef KeccakP1600_useLaneComplementing + ((UINT64*)state)[ 1] = ~(UINT64)0; + ((UINT64*)state)[ 2] = ~(UINT64)0; + ((UINT64*)state)[ 8] = ~(UINT64)0; + ((UINT64*)state)[12] = ~(UINT64)0; + ((UINT64*)state)[17] = ~(UINT64)0; + ((UINT64*)state)[20] = ~(UINT64)0; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT64 lane; + if (length == 0) + return; + if (length == 1) + lane = data[0]; + else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset*8; +#else + UINT64 lane = 0; + unsigned int i; + for(i=0; i<length; i++) + lane |= ((UINT64)data[i]) << ((i+offset)*8); +#endif + ((UINT64*)state)[lanePosition] ^= lane; +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + unsigned int i = 0; +#ifdef NO_MISALIGNED_ACCESSES + /* If either pointer is misaligned, fall back to byte-wise xor. */ + + if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) { + for (i = 0; i < laneCount * 8; i++) { + ((unsigned char*)state)[i] ^= data[i]; + } + } + else +#endif + { + /* Otherwise... */ + + for( ; (i+8)<=laneCount; i+=8) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; + ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; + ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4]; + ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5]; + ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6]; + ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7]; + } + for( ; (i+4)<=laneCount; i+=4) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2]; + ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3]; + } + for( ; (i+2)<=laneCount; i+=2) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1]; + } + if (i<laneCount) { + ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0]; + } + } +#else + unsigned int i; + UINT8 *curData = data; + for(i=0; i<laneCount; i++, curData+=8) { + UINT64 lane = (UINT64)curData[0] + | ((UINT64)curData[1] << 8) + | ((UINT64)curData[2] << 16) + | ((UINT64)curData[3] << 24) + | ((UINT64)curData[4] <<32) + | ((UINT64)curData[5] << 40) + | ((UINT64)curData[6] << 48) + | ((UINT64)curData[7] << 56); + ((UINT64*)state)[i] ^= lane; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) +{ + UINT64 lane = byte; + lane <<= (offset%8)*8; + ((UINT64*)state)[offset/8] ^= lane; +} +#endif + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) { + unsigned int i; + for(i=0; i<length; i++) + ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i]; + } + else +#endif + { + memcpy((unsigned char*)state+lanePosition*8+offset, data, length); + } +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + unsigned int lanePosition; + + for(lanePosition=0; lanePosition<laneCount; lanePosition++) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition]; + else + ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition]; +#else + memcpy(state, data, laneCount*8); +#endif +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#ifdef KeccakP1600_useLaneComplementing + unsigned int lanePosition; + + for(lanePosition=0; lanePosition<byteCount/8; lanePosition++) + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + ((UINT64*)state)[lanePosition] = ~0; + else + ((UINT64*)state)[lanePosition] = 0; + if (byteCount%8 != 0) { + lanePosition = byteCount/8; + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8); + else + memset((unsigned char*)state+lanePosition*8, 0, byteCount%8); + } +#else + memset(state, 0, byteCount); +#endif +#else +#error "Not yet implemented" +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) +{ + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + + copyFromState(A, stateAsLanes) + rounds24 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) +{ + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + + copyFromState(A, stateAsLanes) + rounds12 + copyToState(stateAsLanes, A) +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + UINT64 lane1[1]; + lane1[0] = lane; + memcpy(data, (UINT8*)lane1+offset, length); + } +#else + unsigned int i; + lane >>= offset*8; + for(i=0; i<length; i++) { + data[i] = lane & 0xFF; + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +void fromWordToBytes(UINT8 *bytes, const UINT64 word) +{ + unsigned int i; + + for(i=0; i<(64/8); i++) + bytes[i] = (word >> (8*i)) & 0xFF; +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i<laneCount; i++) + fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); +#endif +#ifdef KeccakP1600_useLaneComplementing + if (laneCount > 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + UINT64 lane1[1]; + lane1[0] = lane; + for(i=0; i<length; i++) + output[i] = input[i] ^ ((UINT8*)lane1)[offset+i]; + } +#else + unsigned int i; + lane >>= offset*8; + for(i=0; i<length; i++) { + output[i] = input[i] ^ (lane & 0xFF); + lane >>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#endif + + for(i=0; i<laneCount; i++) { +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i]; +#else + fromWordToBytes(temp, ((const UINT64*)state)[i]); + for(j=0; j<8; j++) + output[i*8+j] = input[i*8+j] ^ temp[j]; +#endif + } +#ifdef KeccakP1600_useLaneComplementing + if (laneCount > 1) { + ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; + if (laneCount > 2) { + ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; + if (laneCount > 8) { + ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; + if (laneCount > 12) { + ((UINT64*)output)[12] = ~((UINT64*)output)[12]; + if (laneCount > 17) { + ((UINT64*)output)[17] = ~((UINT64*)output)[17]; + if (laneCount > 20) { + ((UINT64*)output)[20] = ~((UINT64*)output)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + UINT64 *inDataAsLanes = (UINT64*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds24 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} |