diff options
Diffstat (limited to 'Modules/_sha3/kcp/KeccakP-1600-64.macros')
-rw-r--r-- | Modules/_sha3/kcp/KeccakP-1600-64.macros | 2208 |
1 files changed, 2208 insertions, 0 deletions
diff --git a/Modules/_sha3/kcp/KeccakP-1600-64.macros b/Modules/_sha3/kcp/KeccakP-1600-64.macros new file mode 100644 index 0000000..1f11fe3 --- /dev/null +++ b/Modules/_sha3/kcp/KeccakP-1600-64.macros @@ -0,0 +1,2208 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ + +/* --- Code for round, with prepare-theta */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ + +/* --- 64-bit lanes mapped to 64-bit words */ + +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#define copyFromStateAndAdd(X, state, input, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba = state[ 0]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + } \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + if (laneCount < 3) { \ + X##bi = state[ 2]; \ + } \ + else { \ + X##bi = state[ 2]^input[ 2]; \ + } \ + } \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu = state[ 4]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + } \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + if (laneCount < 7) { \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ge = state[ 6]^input[ 6]; \ + } \ + } \ + } \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go = state[ 8]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + } \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + if (laneCount < 11) { \ + X##ka = state[10]; \ + } \ + else { \ + X##ka = state[10]^input[10]; \ + } \ + } \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki = state[12]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + } \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + if (laneCount < 15) { \ + X##ku = state[14]; \ + } \ + else { \ + X##ku = state[14]^input[14]; \ + } \ + } \ + } \ + } \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me = state[16]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + } \ + X##mi = state[17]; \ + X##mo = state[18]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + if (laneCount < 19) { \ + X##mo = state[18]; \ + } \ + else { \ + X##mo = state[18]^input[18]; \ + } \ + } \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa = state[20]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + } \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + if (laneCount < 23) { \ + X##si = state[22]; \ + } \ + else { \ + X##si = state[22]^input[22]; \ + } \ + } \ + } \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + X##si = state[22]^input[22]; \ + X##so = state[23]^input[23]; \ + if (laneCount < 25) { \ + X##su = state[24]; \ + } \ + else { \ + X##su = state[24]^input[24]; \ + } \ + } \ + } + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= input[ 0]; \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= input[ 2]; \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= input[ 4]; \ + } \ + } \ + else { \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= input[ 6]; \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= input[ 8]; \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= input[10]; \ + } \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= input[12]; \ + } \ + } \ + else { \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= input[14]; \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= input[16]; \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= input[18]; \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= input[20]; \ + } \ + } \ + else { \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= input[22]; \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + X##si ^= input[22]; \ + X##so ^= input[23]; \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= input[24]; \ + } \ + } \ + } + +#ifdef UseBebigokimisa + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + } \ + else { \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + } \ + else { \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = ~X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = ~(input[index] ^ X##name); \ + X##name ^= output[index]; \ + +#else /* UseBebigokimisa */ + + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12]= X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = X##ki; \ + } \ + } \ + else { \ + output[12] = X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + } \ + else { \ + output[20] = X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#endif + +#define wrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + wrapOne(X, input, output, 14, ku) \ + wrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + wrapOne(X, input, output, 22, si) \ + wrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 24, su) \ + } \ + } \ + } + +#define unwrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + unwrapOne(X, input, output, 14, ku) \ + unwrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + unwrapOne(X, input, output, 22, si) \ + unwrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 24, su) \ + } \ + } \ + } |