├── rebar ├── .gitignore ├── test ├── data │ ├── LongMsgKAT.zip │ └── ShortMsgKAT.zip └── sha3_tests.erl ├── c_src ├── opt32 │ ├── KeccakF-1600-opt32-settings.h │ ├── KeccakF-1600-32.macros │ ├── KeccakF-1600-opt32.c │ ├── KeccakF-1600-32-rvk.macros │ └── KeccakF-1600-32-s1.macros ├── opt64 │ ├── KeccakF-1600-opt64-settings.h │ ├── KeccakF-1600-opt64.c │ ├── KeccakF-1600-simd64.macros │ ├── KeccakF-1600-64.macros │ └── KeccakF-1600-simd128.macros ├── KeccakF-1600-int-set.h ├── KeccakF-1600-interface.h ├── erl_nif_compat.h ├── KeccakNISTInterface.c ├── KeccakSponge.h ├── KeccakNISTInterface.h ├── KeccakF-1600-unrolling.macros ├── sha3_nifs.c ├── brg_endian.h ├── ref │ └── KeccakF-1600-reference.c └── KeccakSponge.c ├── Makefile ├── src ├── sha3.app.src └── sha3.erl ├── rebar.config └── README.md /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/sha3/master/rebar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | c_src/*.o 2 | c_src/*/*.o 3 | .eunit 4 | ebin/ 5 | priv/*.so 6 | -------------------------------------------------------------------------------- /test/data/LongMsgKAT.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/sha3/master/test/data/LongMsgKAT.zip -------------------------------------------------------------------------------- /test/data/ShortMsgKAT.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/b/sha3/master/test/data/ShortMsgKAT.zip -------------------------------------------------------------------------------- /c_src/opt32/KeccakF-1600-opt32-settings.h: -------------------------------------------------------------------------------- 1 | #define Unrolling 2 2 | //#define UseBebigokimisa 3 | //#define UseInterleaveTables 4 | #define UseSchedule 3 5 | -------------------------------------------------------------------------------- /c_src/opt64/KeccakF-1600-opt64-settings.h: -------------------------------------------------------------------------------- 1 | #define Unrolling 24 2 | #define UseBebigokimisa 3 | //#define UseSSE 4 | //#define UseOnlySIMD64 5 | //#define UseMMX 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | ./rebar -v get-deps 3 | ./rebar -v compile 4 | 5 | clean: 6 | ./rebar -v delete-deps 7 | ./rebar -v clean 8 | 9 | eunit: 10 | ./rebar -v eunit 11 | -------------------------------------------------------------------------------- /c_src/KeccakF-1600-int-set.h: -------------------------------------------------------------------------------- 1 | #define ProvideFast576 2 | #define ProvideFast832 3 | #define ProvideFast1024 4 | #define ProvideFast1088 5 | #define ProvideFast1152 6 | #define ProvideFast1344 7 | -------------------------------------------------------------------------------- /src/sha3.app.src: -------------------------------------------------------------------------------- 1 | {application, sha3, 2 | [ 3 | {description, "Keccak (SHA-3) hash function NIF"}, 4 | {vsn, "1.0.0"}, 5 | {modules, [ 6 | sha3, 7 | hex 8 | ]}, 9 | {registered, []}, 10 | {applications, [ 11 | kernel, 12 | stdlib 13 | ]}, 14 | {env, []} 15 | ]}. 16 | -------------------------------------------------------------------------------- /c_src/opt32/KeccakF-1600-32.macros: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #ifdef UseSchedule 15 | #if (UseSchedule == 1) 16 | #include "KeccakF-1600-32-s1.macros" 17 | #elif (UseSchedule == 2) 18 | #include "KeccakF-1600-32-s2.macros" 19 | #elif (UseSchedule == 3) 20 | #include "KeccakF-1600-32-rvk.macros" 21 | #else 22 | #error "This schedule is not supported." 23 | #endif 24 | #else 25 | #include "KeccakF-1600-32-s1.macros" 26 | #endif 27 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {cover_enabled, true}. 2 | 3 | {erl_opts, [warnings_as_errors]}. 4 | 5 | {deps, [ 6 | {hex, ".*", {git, "git://github.com/b/hex", "HEAD"}} 7 | ]}. 8 | 9 | {port_specs, [ 10 | {"-32-unix$", "priv/sha3_nifs.so", ["c_src/*.c", "c_src/opt32/*.c"]}, 11 | {"-64-unix$", "priv/sha3_nifs.so", ["c_src/*.c", "c_src/opt64/*.c"]} 12 | ]}. 13 | 14 | {port_env, [ 15 | %% Make sure to link -lstdc++ on linux or solaris 16 | {"(linux|solaris)", "CXXFLAGS", "-O2"}, 17 | {"(linux|solaris)", "LDFLAGS", "$LDFLAGS -lstdc++"}, 18 | 19 | %% OS X Leopard flags for 64-bit 20 | {"darwin9\.*-64-unix", "CXXFLAGS", "-O2 -m64"}, 21 | {"darwin9\.*-64-unix", "LDFLAGS", "-arch x86_64 -lstdc++"}, 22 | 23 | %% OS X Snow Leopard flags for 32-bit 24 | {"darwin1?\.*-32-unix", "CXXFLAGS", "-O2 -m32"}, 25 | {"darwin1?\.*-32-unix", "LDFLAGS", "-arch i386"}, 26 | 27 | %% OS X Snow Leopard/Lion flags for 64-bit 28 | {"darwin1?\.*-64-unix", "CXXFLAGS", "-O2 -m64"}, 29 | {"darwin1?\.*-64-unix", "LDFLAGS", "-arch x86_64"} 30 | ]}. 31 | -------------------------------------------------------------------------------- /src/sha3.erl: -------------------------------------------------------------------------------- 1 | -module(sha3). 2 | -author('b@b3k.us'). 3 | 4 | -export([init/1, 5 | update/2, 6 | update/3, 7 | final/1, 8 | hash/2, 9 | hash/3, 10 | hexhash/2]). 11 | 12 | -on_load(init/0). 13 | 14 | init() -> 15 | case code:priv_dir(sha3) of 16 | {error, bad_name} -> 17 | SoName = filename:join("../priv", "sha3_nifs"); 18 | Dir -> 19 | SoName = filename:join(Dir, "sha3_nifs") 20 | end, 21 | case erlang:load_nif(SoName, 0) of 22 | ok -> ok; 23 | {error, {load, _}} -> ok; 24 | {error, {reload, _}} -> ok; 25 | {error, {upgrade, _}} -> ok; 26 | Error -> Error 27 | end. 28 | 29 | -spec init(non_neg_integer()) -> {ok, binary()} | {error, atom()}. 30 | init(_Bits) -> 31 | "NIF library not loaded". 32 | 33 | -spec update(binary(), binary()) -> {ok, binary()} | {error, atom()}. 34 | update(State, Data) -> update(State, Data, bit_size(Data)). 35 | 36 | -spec update(binary(), binary(), non_neg_integer()) -> {ok, binary()} | {error, atom()}. 37 | update(_State, _Data, _BitLength) -> 38 | "NIF library not loaded". 39 | 40 | -spec final(binary()) -> {ok, binary()} | {error, atom()}. 41 | final(_State) -> 42 | "NIF library not loaded". 43 | 44 | hexhash(Bits, Data) -> 45 | {ok, Hash} = hash(Bits, Data, bit_size(Data)), 46 | list_to_binary(hex:bin_to_hexstr(Hash)). 47 | 48 | -spec hash(non_neg_integer(), binary()) -> {ok, binary()} | {error, atom()}. 49 | hash(Bits, Data) -> hash(Bits, Data, bit_size(Data)). 50 | 51 | -spec hash(non_neg_integer(), binary(), non_neg_integer()) -> {ok, binary()} | {error, atom()}. 52 | hash(_Bits, _Data, _BitLength) -> 53 | "NIF library not loaded". 54 | -------------------------------------------------------------------------------- /c_src/KeccakF-1600-interface.h: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #ifndef _KeccakPermutationInterface_h_ 15 | #define _KeccakPermutationInterface_h_ 16 | 17 | #include "KeccakF-1600-int-set.h" 18 | 19 | void KeccakInitialize( void ); 20 | void KeccakInitializeState(unsigned char *state); 21 | void KeccakPermutation(unsigned char *state); 22 | #ifdef ProvideFast576 23 | void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data); 24 | #endif 25 | #ifdef ProvideFast832 26 | void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data); 27 | #endif 28 | #ifdef ProvideFast1024 29 | void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data); 30 | #endif 31 | #ifdef ProvideFast1088 32 | void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data); 33 | #endif 34 | #ifdef ProvideFast1152 35 | void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data); 36 | #endif 37 | #ifdef ProvideFast1344 38 | void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data); 39 | #endif 40 | void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount); 41 | #ifdef ProvideFast1024 42 | void KeccakExtract1024bits(const unsigned char *state, unsigned char *data); 43 | #endif 44 | void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount); 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### sha3 2 | 3 | sha3 is a NIF wrapper around SHA-3/KECCAK cryptographic hashing function 4 | 5 | ### Quick Start 6 | 7 | You must have Erlang/OTP R14B or later and a GNU-style build system to compile and run sha3. 8 | 9 | git clone git://github.com/b/sha3.git 10 | cd sha3 11 | make 12 | 13 | Start up an Erlang shell with the path to sha3 included. 14 | 15 | cd path/to/sha3/ebin 16 | erl 17 | 18 | Hash a binary by calling sha3:hash/2 with the desired number of bits for the resulting hash: 19 | 20 | 1> Bits = 256. 21 | 256 22 | 2> Data = <<"foobarbazquux">>. 23 | <<"foobarbazquux">> 24 | 3> {ok, Hash} = sha3:hash(Bits, Data). 25 | {ok,<<44,116,195,124,154,54,203,192,209,31,49,133,80,8, 26 | 197,88,60,97,25,146,40,128,231,75,121,226,1,...>>} 27 | 4> bit_size(Hash). 28 | 256 29 | 30 | Supported hash bit lengths are 224, 256, 384, and 512. 31 | 32 | You may find sha3:hexhash/2 more useful, as it returns a hexadecimal-encoded string representing the hash: 33 | 34 | 5> HexHash = sha3:hexhash(Bits, Data). 35 | <<"2C74C37C9A36CBC0D11F31855008C5583C6119922880E74B79E2014A28F862DA">> 36 | 37 | Alternatively, you might want to incrementally hash a longer message: 38 | 39 | 6> Bits = 256. 40 | 256 41 | 7> Data1 = <<"foobar">>. 42 | <<"foobar">> 43 | 8> Data2 = <<"bazquux">>. 44 | <<"bazquux">> 45 | 9> {ok, Handle} = sha3:init(Bits). 46 | {ok,<<>>} 47 | 10> {ok, Handle} = sha3:update(Handle, Data1). 48 | {ok,<<>>} 49 | 11> {ok, Handle} = sha3:update(Handle, Data2). 50 | {ok,<<>>} 51 | 12> {ok, Hash} = sha3:final(Handle). 52 | {ok,<<44,116,195,124,154,54,203,192,209,31,49,133,80,8, 53 | 197,88,60,97,25,146,40,128,231,75,121,226,1,...>>} 54 | 13> bit_size(Hash). 55 | 256 56 | 57 | ### The SHA-3 Hash 58 | 59 | The underlying hashing code in sha3 is the reference implementation of KECCAK, now SHA-3, from the official NIST submission. 60 | 61 | Details on the algorithm as submitted and known analysis can be found at http://keccak.noekeon.org/. 62 | 63 | -------------------------------------------------------------------------------- /c_src/erl_nif_compat.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2010-2011 Basho Technologies, Inc. 2 | * 3 | * This file is provided to you under the Apache License, 4 | * Version 2.0 (the "License"); you may not use this file 5 | * except in compliance with the License. You may obtain 6 | * a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, 11 | * software distributed under the License is distributed on an 12 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | * KIND, either express or implied. See the License for the 14 | * specific language governing permissions and limitations 15 | * under the License. 16 | */ 17 | 18 | #ifndef ERL_NIF_COMPAT_H_ 19 | #define ERL_NIF_COMPAT_H_ 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif /* __cplusplus */ 24 | 25 | #include "erl_nif.h" 26 | 27 | #if ERL_NIF_MAJOR_VERSION == 1 && ERL_NIF_MINOR_VERSION == 0 28 | 29 | #define enif_open_resource_type_compat enif_open_resource_type 30 | #define enif_alloc_resource_compat enif_alloc_resource 31 | #define enif_release_resource_compat enif_release_resource 32 | #define enif_alloc_binary_compat enif_alloc_binary 33 | #define enif_alloc_compat enif_alloc 34 | #define enif_free_compat enif_free 35 | #define enif_cond_create erl_drv_cond_create 36 | #define enif_cond_destroy erl_drv_cond_destroy 37 | #define enif_cond_signal erl_drv_cond_signal 38 | #define enif_cond_broadcast erl_drv_cond_broadcast 39 | #define enif_cond_wait erl_drv_cond_wait 40 | #define ErlNifCond ErlDrvCond 41 | #endif /* R13B04 */ 42 | 43 | #if ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 0 44 | 45 | #define enif_open_resource_type_compat(E, N, D, F, T) \ 46 | enif_open_resource_type(E, NULL, N, D, F, T) 47 | 48 | #define enif_alloc_resource_compat(E, T, S) \ 49 | enif_alloc_resource(T, S) 50 | 51 | #define enif_release_resource_compat(E, H) \ 52 | enif_release_resource(H) 53 | 54 | #define enif_alloc_binary_compat(E, S, B) \ 55 | enif_alloc_binary(S, B) 56 | 57 | #define enif_alloc_compat(E, S) \ 58 | enif_alloc(S) 59 | 60 | #define enif_free_compat(E, P) \ 61 | enif_free(P) 62 | 63 | #endif /* R14 */ 64 | 65 | 66 | 67 | #ifdef __cplusplus 68 | } 69 | #endif /* __cplusplus */ 70 | 71 | #endif /* ERL_NIF_COMPAT_H_ */ 72 | -------------------------------------------------------------------------------- /c_src/KeccakNISTInterface.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #include 15 | #include "KeccakNISTInterface.h" 16 | #include "KeccakF-1600-interface.h" 17 | 18 | HashReturn Init(hashState *state, int hashbitlen) 19 | { 20 | switch(hashbitlen) { 21 | case 0: // Default parameters, arbitrary length output 22 | InitSponge((spongeState*)state, 1024, 576); 23 | break; 24 | case 224: 25 | InitSponge((spongeState*)state, 1152, 448); 26 | break; 27 | case 256: 28 | InitSponge((spongeState*)state, 1088, 512); 29 | break; 30 | case 384: 31 | InitSponge((spongeState*)state, 832, 768); 32 | break; 33 | case 512: 34 | InitSponge((spongeState*)state, 576, 1024); 35 | break; 36 | default: 37 | return BAD_HASHLEN; 38 | } 39 | state->fixedOutputLength = hashbitlen; 40 | return SUCCESS; 41 | } 42 | 43 | HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) 44 | { 45 | if ((databitlen % 8) == 0) 46 | return Absorb((spongeState*)state, data, databitlen); 47 | else { 48 | HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8)); 49 | if (ret == SUCCESS) { 50 | unsigned char lastByte; 51 | // Align the last partial byte to the least significant bits 52 | lastByte = data[databitlen/8] >> (8 - (databitlen % 8)); 53 | return Absorb((spongeState*)state, &lastByte, databitlen % 8); 54 | } 55 | else 56 | return ret; 57 | } 58 | } 59 | 60 | HashReturn Final(hashState *state, BitSequence *hashval) 61 | { 62 | return Squeeze(state, hashval, state->fixedOutputLength); 63 | } 64 | 65 | HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval) 66 | { 67 | hashState state; 68 | HashReturn result; 69 | 70 | if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512)) 71 | return BAD_HASHLEN; // Only the four fixed output lengths available through this API 72 | result = Init(&state, hashbitlen); 73 | if (result != SUCCESS) 74 | return result; 75 | result = Update(&state, data, databitlen); 76 | if (result != SUCCESS) 77 | return result; 78 | result = Final(&state, hashval); 79 | return result; 80 | } 81 | 82 | -------------------------------------------------------------------------------- /c_src/KeccakSponge.h: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #ifndef _KeccakSponge_h_ 15 | #define _KeccakSponge_h_ 16 | 17 | #define KeccakPermutationSize 1600 18 | #define KeccakPermutationSizeInBytes (KeccakPermutationSize/8) 19 | #define KeccakMaximumRate 1536 20 | #define KeccakMaximumRateInBytes (KeccakMaximumRate/8) 21 | 22 | #if defined(__GNUC__) 23 | #define ALIGN __attribute__ ((aligned(32))) 24 | #elif defined(_MSC_VER) 25 | #define ALIGN __declspec(align(32)) 26 | #else 27 | #define ALIGN 28 | #endif 29 | 30 | ALIGN typedef struct spongeStateStruct { 31 | ALIGN unsigned char state[KeccakPermutationSizeInBytes]; 32 | ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes]; 33 | unsigned int rate; 34 | unsigned int capacity; 35 | unsigned int bitsInQueue; 36 | unsigned int fixedOutputLength; 37 | int squeezing; 38 | unsigned int bitsAvailableForSqueezing; 39 | } spongeState; 40 | 41 | /** 42 | * Function to initialize the state of the Keccak[r, c] sponge function. 43 | * The sponge function is set to the absorbing phase. 44 | * @param state Pointer to the state of the sponge function to be initialized. 45 | * @param rate The value of the rate r. 46 | * @param capacity The value of the capacity c. 47 | * @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation. 48 | * @return Zero if successful, 1 otherwise. 49 | */ 50 | int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity); 51 | /** 52 | * Function to give input data for the sponge function to absorb. 53 | * @param state Pointer to the state of the sponge function initialized by InitSponge(). 54 | * @param data Pointer to the input data. 55 | * When @a databitLen is not a multiple of 8, the last bits of data must be 56 | * in the least significant bits of the last byte. 57 | * @param databitLen The number of input bits provided in the input data. 58 | * @pre In the previous call to Absorb(), databitLen was a multiple of 8. 59 | * @pre The sponge function must be in the absorbing phase, 60 | * i.e., Squeeze() must not have been called before. 61 | * @return Zero if successful, 1 otherwise. 62 | */ 63 | int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen); 64 | /** 65 | * Function to squeeze output data from the sponge function. 66 | * If the sponge function was in the absorbing phase, this function 67 | * switches it to the squeezing phase. 68 | * @param state Pointer to the state of the sponge function initialized by InitSponge(). 69 | * @param output Pointer to the buffer where to store the output data. 70 | * @param outputLength The number of output bits desired. 71 | * It must be a multiple of 8. 72 | * @return Zero if successful, 1 otherwise. 73 | */ 74 | int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /c_src/KeccakNISTInterface.h: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #ifndef _KeccakNISTInterface_h_ 15 | #define _KeccakNISTInterface_h_ 16 | 17 | #include "KeccakSponge.h" 18 | 19 | typedef unsigned char BitSequence; 20 | typedef unsigned long long DataLength; 21 | typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; 22 | 23 | typedef spongeState hashState; 24 | 25 | /** 26 | * Function to initialize the state of the Keccak[r, c] sponge function. 27 | * The rate r and capacity c values are determined from @a hashbitlen. 28 | * @param state Pointer to the state of the sponge function to be initialized. 29 | * @param hashbitlen The desired number of output bits, 30 | * or 0 for Keccak[] with default parameters 31 | * and arbitrarily-long output. 32 | * @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512. 33 | * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. 34 | */ 35 | HashReturn Init(hashState *state, int hashbitlen); 36 | /** 37 | * Function to give input data for the sponge function to absorb. 38 | * @param state Pointer to the state of the sponge function initialized by Init(). 39 | * @param data Pointer to the input data. 40 | * When @a databitLen is not a multiple of 8, the last bits of data must be 41 | * in the most significant bits of the last byte. 42 | * @param databitLen The number of input bits provided in the input data. 43 | * @pre In the previous call to Absorb(), databitLen was a multiple of 8. 44 | * @return SUCCESS if successful, FAIL otherwise. 45 | */ 46 | HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); 47 | /** 48 | * Function to squeeze output data from the sponge function. 49 | * If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen. 50 | * If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function. 51 | * @param state Pointer to the state of the sponge function initialized by Init(). 52 | * @param hashval Pointer to the buffer where to store the output data. 53 | * @return SUCCESS if successful, FAIL otherwise. 54 | */ 55 | HashReturn Final(hashState *state, BitSequence *hashval); 56 | /** 57 | * Function to compute a hash using the Keccak[r, c] sponge function. 58 | * The rate r and capacity c values are determined from @a hashbitlen. 59 | * @param hashbitlen The desired number of output bits. 60 | * @param data Pointer to the input data. 61 | * When @a databitLen is not a multiple of 8, the last bits of data must be 62 | * in the most significant bits of the last byte. 63 | * @param databitLen The number of input bits provided in the input data. 64 | * @param hashval Pointer to the buffer where to store the output data. 65 | * @pre The value of hashbitlen must be one of 224, 256, 384 and 512. 66 | * @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect. 67 | */ 68 | HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /test/sha3_tests.erl: -------------------------------------------------------------------------------- 1 | -module(sha3_tests). 2 | 3 | -include_lib("eunit/include/eunit.hrl"). 4 | 5 | test_hash(Bits, [Len, Msg, Md]) when Len > 0 -> 6 | {ok, Digest} = case Len rem 8 of 7 | 0 -> sha3:hash(Bits, hex:hexstr_to_bin(Msg)); 8 | _ -> sha3:hash(Bits, hex:hexstr_to_bin(Msg), Len) 9 | end, 10 | ?assertEqual(Md, string:to_upper(hex:bin_to_hexstr(Digest))); 11 | 12 | test_hash(Bits, [Len, Msg, Md]) -> 13 | {ok, Digest} = sha3:hash(Bits, hex:hexstr_to_bin(Msg), Len), 14 | ?assertEqual(Md, string:to_upper(hex:bin_to_hexstr(Digest))). 15 | 16 | test_lifecycle(Bits, [Len, Msg, Md]) when Len > 8 -> 17 | {ok, State} = sha3:init(Bits), 18 | <> = hex:hexstr_to_bin(Msg), 19 | {ok, State} = sha3:update(State, Bin1, 8), 20 | {ok, State} = sha3:update(State, Bin2, Len - 8), 21 | {ok, Digest} = sha3:final(State), 22 | ?assertEqual(Md, string:to_upper(hex:bin_to_hexstr(Digest))); 23 | 24 | test_lifecycle(Bits, [Len, Msg, Md]) -> 25 | {ok, State} = sha3:init(Bits), 26 | {ok, State} = sha3:update(State, hex:hexstr_to_bin(Msg), Len), 27 | {ok, Digest} = sha3:final(State), 28 | ?assertEqual(Md, string:to_upper(hex:bin_to_hexstr(Digest))). 29 | 30 | parse_triples(Lines) -> 31 | parse_triples(Lines, []). 32 | 33 | parse_triples([], Acc) -> 34 | Acc; 35 | parse_triples([Len, Msg, Md|Lines], Acc) -> 36 | ["Len", "=", Length] = string:tokens(Len, " "), 37 | ["Msg", "=", Message] = string:tokens(Msg, " "), 38 | ["MD", "=", Digest] = string:tokens(Md, " "), 39 | parse_triples(Lines, [[list_to_integer(Length), Message, Digest]|Acc]). 40 | 41 | msgkat(Set, Bits, Fun) -> 42 | {ok, Cwd} = file:get_cwd(), 43 | Filename = filename:join([Cwd, "..", "test", "data", Set ++ "MsgKAT.zip"]), 44 | {ok, ZipHandle} = zip:zip_open(Filename, [memory]), 45 | {ok, {_, Data}} = zip:zip_get(Set ++ "MsgKAT_" ++ integer_to_list(Bits) ++ ".txt", ZipHandle), 46 | Lines = lists:filter( 47 | fun(Line) -> 48 | case Line of 49 | "L" ++ _ -> true; 50 | "M" ++ _ -> true; 51 | _ -> false 52 | end 53 | end, string:tokens(binary_to_list(Data), "\n")), 54 | 55 | lists:foreach( 56 | fun(T) -> 57 | Fun(Bits, T) 58 | end, parse_triples(Lines)). 59 | 60 | shortmsgkat_224_hash_test_() -> {timeout, 60, fun() -> msgkat("Short", 224, fun test_hash/2) end}. 61 | shortmsgkat_256_hash_test_() -> {timeout, 60, fun() -> msgkat("Short", 256, fun test_hash/2) end}. 62 | shortmsgkat_384_hash_test_() -> {timeout, 60, fun() -> msgkat("Short", 384, fun test_hash/2) end}. 63 | shortmsgkat_512_hash_test_() -> {timeout, 60, fun() -> msgkat("Short", 512, fun test_hash/2) end}. 64 | 65 | shortmsgkat_224_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Short", 224, fun test_lifecycle/2) end}. 66 | shortmsgkat_256_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Short", 256, fun test_lifecycle/2) end}. 67 | shortmsgkat_384_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Short", 384, fun test_lifecycle/2) end}. 68 | shortmsgkat_512_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Short", 512, fun test_lifecycle/2) end}. 69 | 70 | longmsgkat_224_hash_test_() -> {timeout, 60, fun() -> msgkat("Long", 224, fun test_hash/2) end}. 71 | longmsgkat_256_hash_test_() -> {timeout, 60, fun() -> msgkat("Long", 256, fun test_hash/2) end}. 72 | longmsgkat_384_hash_test_() -> {timeout, 60, fun() -> msgkat("Long", 384, fun test_hash/2) end}. 73 | longmsgkat_512_hash_test_() -> {timeout, 60, fun() -> msgkat("Long", 512, fun test_hash/2) end}. 74 | 75 | longmsgkat_224_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Long", 224, fun test_lifecycle/2) end}. 76 | longmsgkat_256_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Long", 256, fun test_lifecycle/2) end}. 77 | longmsgkat_384_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Long", 384, fun test_lifecycle/2) end}. 78 | longmsgkat_512_lifecycle_test_() -> {timeout, 60, fun() -> msgkat("Long", 512, fun test_lifecycle/2) end}. 79 | -------------------------------------------------------------------------------- /c_src/KeccakF-1600-unrolling.macros: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #if (Unrolling == 24) 15 | #define rounds \ 16 | prepareTheta \ 17 | thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ 18 | thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ 19 | thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ 20 | thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ 21 | thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ 22 | thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ 23 | thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ 24 | thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ 25 | thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ 26 | thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ 27 | thetaRhoPiChiIotaPrepareTheta(10, A, E) \ 28 | thetaRhoPiChiIotaPrepareTheta(11, E, A) \ 29 | thetaRhoPiChiIotaPrepareTheta(12, A, E) \ 30 | thetaRhoPiChiIotaPrepareTheta(13, E, A) \ 31 | thetaRhoPiChiIotaPrepareTheta(14, A, E) \ 32 | thetaRhoPiChiIotaPrepareTheta(15, E, A) \ 33 | thetaRhoPiChiIotaPrepareTheta(16, A, E) \ 34 | thetaRhoPiChiIotaPrepareTheta(17, E, A) \ 35 | thetaRhoPiChiIotaPrepareTheta(18, A, E) \ 36 | thetaRhoPiChiIotaPrepareTheta(19, E, A) \ 37 | thetaRhoPiChiIotaPrepareTheta(20, A, E) \ 38 | thetaRhoPiChiIotaPrepareTheta(21, E, A) \ 39 | thetaRhoPiChiIotaPrepareTheta(22, A, E) \ 40 | thetaRhoPiChiIota(23, E, A) \ 41 | copyToState(state, A) 42 | #elif (Unrolling == 12) 43 | #define rounds \ 44 | prepareTheta \ 45 | for(i=0; i<24; i+=12) { \ 46 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 47 | thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ 48 | thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ 49 | thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ 50 | thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ 51 | thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ 52 | thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ 53 | thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ 54 | thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ 55 | thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ 56 | thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ 57 | thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ 58 | } \ 59 | copyToState(state, A) 60 | #elif (Unrolling == 8) 61 | #define rounds \ 62 | prepareTheta \ 63 | for(i=0; i<24; i+=8) { \ 64 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 65 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 66 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 67 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 68 | thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ 69 | thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ 70 | thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \ 71 | thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \ 72 | } \ 73 | copyToState(state, A) 74 | #elif (Unrolling == 6) 75 | #define rounds \ 76 | prepareTheta \ 77 | for(i=0; i<24; i+=6) { \ 78 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 79 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 80 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 81 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 82 | thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ 83 | thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ 84 | } \ 85 | copyToState(state, A) 86 | #elif (Unrolling == 4) 87 | #define rounds \ 88 | prepareTheta \ 89 | for(i=0; i<24; i+=4) { \ 90 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 91 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 92 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 93 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 94 | } \ 95 | copyToState(state, A) 96 | #elif (Unrolling == 3) 97 | #define rounds \ 98 | prepareTheta \ 99 | for(i=0; i<24; i+=3) { \ 100 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 101 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 102 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 103 | copyStateVariables(A, E) \ 104 | } \ 105 | copyToState(state, A) 106 | #elif (Unrolling == 2) 107 | #define rounds \ 108 | prepareTheta \ 109 | for(i=0; i<24; i+=2) { \ 110 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 111 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 112 | } \ 113 | copyToState(state, A) 114 | #elif (Unrolling == 1) 115 | #define rounds \ 116 | prepareTheta \ 117 | for(i=0; i<24; i++) { \ 118 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 119 | copyStateVariables(A, E) \ 120 | } \ 121 | copyToState(state, A) 122 | #else 123 | #error "Unrolling is not correctly specified!" 124 | #endif 125 | -------------------------------------------------------------------------------- /c_src/sha3_nifs.c: -------------------------------------------------------------------------------- 1 | #include "erl_nif.h" 2 | #include "erl_nif_compat.h" 3 | #include "KeccakNISTInterface.h" 4 | 5 | static ErlNifResourceType* keccak_hashstate; 6 | 7 | typedef struct 8 | { 9 | } keccak_handle; 10 | 11 | // Prototypes 12 | ERL_NIF_TERM keccak_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); 13 | ERL_NIF_TERM keccak_update(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); 14 | ERL_NIF_TERM keccak_final(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); 15 | ERL_NIF_TERM keccak_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); 16 | 17 | // lifecycle 18 | int load(ErlNifEnv* env, void ** priv_data, ERL_NIF_TERM load_info); 19 | int reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info); 20 | int upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM load_info); 21 | void unload(ErlNifEnv* env, void* priv); 22 | 23 | static ErlNifFunc nif_funcs[] = 24 | { 25 | {"init", 1, keccak_init}, 26 | {"update", 3, keccak_update}, 27 | {"final", 1, keccak_final}, 28 | {"hash", 3, keccak_hash} 29 | }; 30 | 31 | ERL_NIF_INIT(sha3, nif_funcs, load, NULL, NULL, NULL); 32 | 33 | static char *hash_return_strings[] = {"success", "fail", "bad_hashlen"}; 34 | 35 | int valid_length(int bits, int bufbytes) 36 | { 37 | int numbytes = bits / 8; 38 | 39 | if (bits % 8 > 0) 40 | { 41 | numbytes++; 42 | } 43 | 44 | if (numbytes <= bufbytes) 45 | { 46 | return 0; 47 | } 48 | 49 | return -1; 50 | } 51 | 52 | int load(ErlNifEnv* env, void ** priv_data, ERL_NIF_TERM load_info) 53 | { 54 | keccak_hashstate = enif_open_resource_type_compat(env, "hashstate", NULL, ERL_NIF_RT_CREATE, NULL); 55 | return 0; 56 | } 57 | 58 | int reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info) 59 | { 60 | return 0; 61 | } 62 | 63 | int upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM load_info) 64 | { 65 | return 0; 66 | } 67 | 68 | void unload(ErlNifEnv* env, void* priv) 69 | { 70 | return; 71 | } 72 | 73 | ERL_NIF_TERM keccak_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) 74 | { 75 | ERL_NIF_TERM hash_state_term; 76 | int bits = 0; 77 | if(!enif_get_int(env, argv[0], &bits)) 78 | return enif_make_badarg(env); 79 | 80 | hashState *state = (hashState*) enif_alloc_resource_compat(env, keccak_hashstate, sizeof(hashState)); 81 | HashReturn r = Init(state, bits); 82 | if (r == SUCCESS) { 83 | hash_state_term = enif_make_resource(env, state); 84 | enif_release_resource_compat(env, state); 85 | return enif_make_tuple2(env, enif_make_atom(env, "ok"), hash_state_term); 86 | } else { 87 | enif_release_resource_compat(env, state); 88 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, hash_return_strings[r])); 89 | } 90 | } 91 | 92 | ERL_NIF_TERM keccak_update(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) 93 | { 94 | hashState *state = NULL; 95 | enif_get_resource(env, argv[0], keccak_hashstate, (void**)&state); 96 | 97 | ErlNifBinary bin; 98 | enif_inspect_binary(env, argv[1], &bin); 99 | 100 | int bitlength = 0; 101 | if(!enif_get_int(env, argv[2], &bitlength)) 102 | return enif_make_badarg(env); 103 | 104 | if (valid_length(bitlength, bin.size) < 0) 105 | { 106 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, "invalid_length")); 107 | } 108 | 109 | HashReturn r = Update(state, (BitSequence *)(bin.data), bitlength); 110 | if (r == SUCCESS) 111 | { 112 | return enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_resource(env, state)); 113 | } else { 114 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, hash_return_strings[r])); 115 | } 116 | } 117 | 118 | ERL_NIF_TERM keccak_final(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) 119 | { 120 | hashState *state = NULL; 121 | enif_get_resource(env, argv[0], keccak_hashstate, (void**)&state); 122 | 123 | ErlNifBinary out; 124 | enif_alloc_binary_compat(env, (size_t)(state->fixedOutputLength/8), &out); 125 | 126 | HashReturn r = Final(state, (BitSequence *)out.data); 127 | if (r == SUCCESS) { 128 | return enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_binary(env, &out)); 129 | } else { 130 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, hash_return_strings[r])); 131 | } 132 | } 133 | 134 | ERL_NIF_TERM keccak_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) 135 | { 136 | int bits = 0; 137 | enif_get_int(env, argv[0], &bits); 138 | 139 | ErlNifBinary bin, out; 140 | enif_inspect_binary(env, argv[1], &bin); 141 | enif_alloc_binary_compat(env, (size_t)(bits/8), &out); 142 | 143 | int bitlength = 0; 144 | if(!enif_get_int(env, argv[2], &bitlength)) 145 | return enif_make_badarg(env); 146 | 147 | if (valid_length(bitlength, bin.size) < 0) 148 | { 149 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, "invalid_length")); 150 | } 151 | 152 | HashReturn r = Hash(bits, (BitSequence *)(bin.data), bitlength, (BitSequence *)out.data); 153 | if (r == SUCCESS) { 154 | return enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_binary(env, &out)); 155 | } else { 156 | return enif_make_tuple2(env, enif_make_atom(env, "error"), enif_make_atom(env, hash_return_strings[r])); 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /c_src/brg_endian.h: -------------------------------------------------------------------------------- 1 | /* 2 | --------------------------------------------------------------------------- 3 | Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. 4 | 5 | LICENSE TERMS 6 | 7 | The redistribution and use of this software (with or without changes) 8 | is allowed without the payment of fees or royalties provided that: 9 | 10 | 1. source code distributions include the above copyright notice, this 11 | list of conditions and the following disclaimer; 12 | 13 | 2. binary distributions include the above copyright notice, this list 14 | of conditions and the following disclaimer in their documentation; 15 | 16 | 3. the name of the copyright holder is not used to endorse products 17 | built using this software without specific written permission. 18 | 19 | DISCLAIMER 20 | 21 | This software is provided 'as is' with no explicit or implied warranties 22 | in respect of its properties, including, but not limited to, correctness 23 | and/or fitness for purpose. 24 | --------------------------------------------------------------------------- 25 | Issue Date: 20/12/2007 26 | Changes for ARM 9/9/2010 27 | */ 28 | 29 | /* modified by justin@basho.com to include Solaris endianness test */ 30 | 31 | #ifndef _BRG_ENDIAN_H 32 | #define _BRG_ENDIAN_H 33 | 34 | #define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ 35 | #define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ 36 | 37 | #if 0 38 | /* Include files where endian defines and byteswap functions may reside */ 39 | #if defined( __sun ) 40 | # include 41 | #elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) || \ 42 | defined (__DragonFly__) 43 | # include 44 | # include 45 | #elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ 46 | defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) 47 | # include 48 | #elif defined (__SVR4) && defined (__sun) 49 | # ifdef __sparc__ 50 | # define BYTE_ORDER IS_BIG_ENDIAN 51 | # else 52 | # define BYTE_ORDER IS_LITTLE_ENDIAN 53 | # endif 54 | #elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) 55 | # if !defined( __MINGW32__ ) && !defined( _AIX ) 56 | # include 57 | # if !defined( __BEOS__ ) 58 | # include 59 | # endif 60 | # endif 61 | #endif 62 | #endif 63 | 64 | /* Now attempt to set the define for platform byte order using any */ 65 | /* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ 66 | /* seem to encompass most endian symbol definitions */ 67 | 68 | #if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) 69 | # if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN 70 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 71 | # elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN 72 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 73 | # endif 74 | #elif defined( BIG_ENDIAN ) 75 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 76 | #elif defined( LITTLE_ENDIAN ) 77 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 78 | #endif 79 | 80 | #if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) 81 | # if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN 82 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 83 | # elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN 84 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 85 | # endif 86 | #elif defined( _BIG_ENDIAN ) 87 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 88 | #elif defined( _LITTLE_ENDIAN ) 89 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 90 | #endif 91 | 92 | #if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) 93 | # if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN 94 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 95 | # elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN 96 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 97 | # endif 98 | #elif defined( __BIG_ENDIAN ) 99 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 100 | #elif defined( __LITTLE_ENDIAN ) 101 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 102 | #endif 103 | 104 | #if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) 105 | # if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ 106 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 107 | # elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ 108 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 109 | # endif 110 | #elif defined( __BIG_ENDIAN__ ) 111 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 112 | #elif defined( __LITTLE_ENDIAN__ ) 113 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 114 | #endif 115 | 116 | /* if the platform byte order could not be determined, then try to */ 117 | /* set this define using common machine defines */ 118 | #if !defined(PLATFORM_BYTE_ORDER) 119 | 120 | #if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ 121 | defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ 122 | defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ 123 | defined( vax ) || defined( vms ) || defined( VMS ) || \ 124 | defined( __VMS ) || defined( _M_X64 ) 125 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 126 | 127 | #elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ 128 | defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ 129 | defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ 130 | defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ 131 | defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ 132 | defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ 133 | defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) 134 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 135 | 136 | #elif defined(__arm__) 137 | # ifdef __BIG_ENDIAN 138 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 139 | # else 140 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 141 | # endif 142 | #elif 1 /* **** EDIT HERE IF NECESSARY **** */ 143 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 144 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */ 145 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 146 | #else 147 | # error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order 148 | #endif 149 | 150 | #endif 151 | 152 | #endif 153 | -------------------------------------------------------------------------------- /c_src/ref/KeccakF-1600-reference.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #include 15 | #include "../brg_endian.h" 16 | #include "../KeccakNISTInterface.h" 17 | #include "../KeccakF-1600-interface.h" 18 | 19 | typedef unsigned char UINT8; 20 | typedef unsigned long long int UINT64; 21 | 22 | #define nrRounds 24 23 | UINT64 KeccakRoundConstants[nrRounds]; 24 | #define nrLanes 25 25 | unsigned int KeccakRhoOffsets[nrLanes]; 26 | 27 | void KeccakPermutationOnWords(UINT64 *state); 28 | void keccak_theta(UINT64 *A); 29 | void keccak_rho(UINT64 *A); 30 | void keccak_pi(UINT64 *A); 31 | void keccak_chi(UINT64 *A); 32 | void keccak_iota(UINT64 *A, unsigned int indexRound); 33 | 34 | void fromBytesToWords(UINT64 *stateAsWords, const unsigned char *state) 35 | { 36 | unsigned int i, j; 37 | 38 | for(i=0; i<(KeccakPermutationSize/64); i++) { 39 | stateAsWords[i] = 0; 40 | for(j=0; j<(64/8); j++) 41 | stateAsWords[i] |= (UINT64)(state[i*(64/8)+j]) << (8*j); 42 | } 43 | } 44 | 45 | void fromWordsToBytes(unsigned char *state, const UINT64 *stateAsWords) 46 | { 47 | unsigned int i, j; 48 | 49 | for(i=0; i<(KeccakPermutationSize/64); i++) 50 | for(j=0; j<(64/8); j++) 51 | state[i*(64/8)+j] = (stateAsWords[i] >> (8*j)) & 0xFF; 52 | } 53 | 54 | void KeccakPermutation(unsigned char *state) 55 | { 56 | #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) 57 | UINT64 stateAsWords[KeccakPermutationSize/64]; 58 | #endif 59 | 60 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 61 | KeccakPermutationOnWords((UINT64*)state); 62 | #else 63 | fromBytesToWords(stateAsWords, state); 64 | KeccakPermutationOnWords(stateAsWords); 65 | fromWordsToBytes(state, stateAsWords); 66 | #endif 67 | } 68 | 69 | void KeccakPermutationAfterXor(unsigned char *state, const unsigned char *data, unsigned int dataLengthInBytes) 70 | { 71 | unsigned int i; 72 | 73 | for(i=0; i> (64-offset))) : a) 93 | 94 | void keccak_theta(UINT64 *A) 95 | { 96 | unsigned int x, y; 97 | UINT64 C[5], D[5]; 98 | 99 | for(x=0; x<5; x++) { 100 | C[x] = 0; 101 | for(y=0; y<5; y++) 102 | C[x] ^= A[index(x, y)]; 103 | } 104 | for(x=0; x<5; x++) 105 | D[x] = ROL64(C[(x+1)%5], 1) ^ C[(x+4)%5]; 106 | for(x=0; x<5; x++) 107 | for(y=0; y<5; y++) 108 | A[index(x, y)] ^= D[x]; 109 | } 110 | 111 | void keccak_rho(UINT64 *A) 112 | { 113 | unsigned int x, y; 114 | 115 | for(x=0; x<5; x++) for(y=0; y<5; y++) 116 | A[index(x, y)] = ROL64(A[index(x, y)], KeccakRhoOffsets[index(x, y)]); 117 | } 118 | 119 | void keccak_pi(UINT64 *A) 120 | { 121 | unsigned int x, y; 122 | UINT64 tempA[25]; 123 | 124 | for(x=0; x<5; x++) 125 | for(y=0; y<5; y++) 126 | tempA[index(x, y)] = A[index(x, y)]; 127 | for(x=0; x<5; x++) 128 | for(y=0; y<5; y++) 129 | A[index(0*x+1*y, 2*x+3*y)] = tempA[index(x, y)]; 130 | } 131 | 132 | void keccak_chi(UINT64 *A) 133 | { 134 | unsigned int x, y; 135 | UINT64 C[5]; 136 | 137 | for(y=0; y<5; y++) { 138 | for(x=0; x<5; x++) 139 | C[x] = A[index(x, y)] ^ ((~A[index(x+1, y)]) & A[index(x+2, y)]); 140 | for(x=0; x<5; x++) 141 | A[index(x, y)] = C[x]; 142 | } 143 | } 144 | 145 | void keccak_iota(UINT64 *A, unsigned int indexRound) 146 | { 147 | A[index(0, 0)] ^= KeccakRoundConstants[indexRound]; 148 | } 149 | 150 | int LFSR86540(UINT8 *LFSR) 151 | { 152 | int result = ((*LFSR) & 0x01) != 0; 153 | if (((*LFSR) & 0x80) != 0) 154 | // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1 155 | (*LFSR) = ((*LFSR) << 1) ^ 0x71; 156 | else 157 | (*LFSR) <<= 1; 158 | return result; 159 | } 160 | 161 | void KeccakInitializeRoundConstants() 162 | { 163 | UINT8 LFSRstate = 0x01; 164 | unsigned int i, j, bitPosition; 165 | 166 | for(i=0; i 15 | #include "KeccakSponge.h" 16 | #include "KeccakF-1600-interface.h" 17 | 18 | int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity) 19 | { 20 | if (rate+capacity != 1600) 21 | return 1; 22 | if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0)) 23 | return 1; 24 | KeccakInitialize(); 25 | state->rate = rate; 26 | state->capacity = capacity; 27 | state->fixedOutputLength = 0; 28 | KeccakInitializeState(state->state); 29 | memset(state->dataQueue, 0, KeccakMaximumRateInBytes); 30 | state->bitsInQueue = 0; 31 | state->squeezing = 0; 32 | state->bitsAvailableForSqueezing = 0; 33 | 34 | return 0; 35 | } 36 | 37 | void AbsorbQueue(spongeState *state) 38 | { 39 | // state->bitsInQueue is assumed to be equal to state->rate 40 | #ifdef ProvideFast576 41 | if (state->rate == 576) 42 | KeccakAbsorb576bits(state->state, state->dataQueue); 43 | else 44 | #endif 45 | #ifdef ProvideFast832 46 | if (state->rate == 832) 47 | KeccakAbsorb832bits(state->state, state->dataQueue); 48 | else 49 | #endif 50 | #ifdef ProvideFast1024 51 | if (state->rate == 1024) 52 | KeccakAbsorb1024bits(state->state, state->dataQueue); 53 | else 54 | #endif 55 | #ifdef ProvideFast1088 56 | if (state->rate == 1088) 57 | KeccakAbsorb1088bits(state->state, state->dataQueue); 58 | else 59 | #endif 60 | #ifdef ProvideFast1152 61 | if (state->rate == 1152) 62 | KeccakAbsorb1152bits(state->state, state->dataQueue); 63 | else 64 | #endif 65 | #ifdef ProvideFast1344 66 | if (state->rate == 1344) 67 | KeccakAbsorb1344bits(state->state, state->dataQueue); 68 | else 69 | #endif 70 | KeccakAbsorb(state->state, state->dataQueue, state->rate/64); 71 | state->bitsInQueue = 0; 72 | } 73 | 74 | int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen) 75 | { 76 | unsigned long long i, j, wholeBlocks; 77 | unsigned int partialBlock, partialByte; 78 | const unsigned char *curData; 79 | 80 | if ((state->bitsInQueue % 8) != 0) 81 | return 1; // Only the last call may contain a partial byte 82 | if (state->squeezing) 83 | return 1; // Too late for additional input 84 | 85 | i = 0; 86 | while(i < databitlen) { 87 | if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) { 88 | wholeBlocks = (databitlen-i)/state->rate; 89 | curData = data+i/8; 90 | #ifdef ProvideFast576 91 | if (state->rate == 576) { 92 | for(j=0; jstate, curData); 94 | } 95 | } 96 | else 97 | #endif 98 | #ifdef ProvideFast832 99 | if (state->rate == 832) { 100 | for(j=0; jstate, curData); 102 | } 103 | } 104 | else 105 | #endif 106 | #ifdef ProvideFast1024 107 | if (state->rate == 1024) { 108 | for(j=0; jstate, curData); 110 | } 111 | } 112 | else 113 | #endif 114 | #ifdef ProvideFast1088 115 | if (state->rate == 1088) { 116 | for(j=0; jstate, curData); 118 | } 119 | } 120 | else 121 | #endif 122 | #ifdef ProvideFast1152 123 | if (state->rate == 1152) { 124 | for(j=0; jstate, curData); 126 | } 127 | } 128 | else 129 | #endif 130 | #ifdef ProvideFast1344 131 | if (state->rate == 1344) { 132 | for(j=0; jstate, curData); 134 | } 135 | } 136 | else 137 | #endif 138 | { 139 | for(j=0; jrate/8) { 140 | KeccakAbsorb(state->state, curData, state->rate/64); 141 | } 142 | } 143 | i += wholeBlocks*state->rate; 144 | } 145 | else { 146 | partialBlock = (unsigned int)(databitlen - i); 147 | if (partialBlock+state->bitsInQueue > state->rate) 148 | partialBlock = state->rate-state->bitsInQueue; 149 | partialByte = partialBlock % 8; 150 | partialBlock -= partialByte; 151 | memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8); 152 | state->bitsInQueue += partialBlock; 153 | i += partialBlock; 154 | if (state->bitsInQueue == state->rate) 155 | AbsorbQueue(state); 156 | if (partialByte > 0) { 157 | unsigned char mask = (1 << partialByte)-1; 158 | state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask; 159 | state->bitsInQueue += partialByte; 160 | i += partialByte; 161 | } 162 | } 163 | } 164 | return 0; 165 | } 166 | 167 | void PadAndSwitchToSqueezingPhase(spongeState *state) 168 | { 169 | // Note: the bits are numbered from 0=LSB to 7=MSB 170 | if (state->bitsInQueue + 1 == state->rate) { 171 | state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); 172 | AbsorbQueue(state); 173 | memset(state->dataQueue, 0, state->rate/8); 174 | } 175 | else { 176 | memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8); 177 | state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8); 178 | } 179 | state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8); 180 | AbsorbQueue(state); 181 | 182 | #ifdef ProvideFast1024 183 | if (state->rate == 1024) { 184 | KeccakExtract1024bits(state->state, state->dataQueue); 185 | state->bitsAvailableForSqueezing = 1024; 186 | } 187 | else 188 | #endif 189 | { 190 | KeccakExtract(state->state, state->dataQueue, state->rate/64); 191 | state->bitsAvailableForSqueezing = state->rate; 192 | } 193 | state->squeezing = 1; 194 | } 195 | 196 | int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength) 197 | { 198 | unsigned long long i; 199 | unsigned int partialBlock; 200 | 201 | if (!state->squeezing) 202 | PadAndSwitchToSqueezingPhase(state); 203 | if ((outputLength % 8) != 0) 204 | return 1; // Only multiple of 8 bits are allowed, truncation can be done at user level 205 | 206 | i = 0; 207 | while(i < outputLength) { 208 | if (state->bitsAvailableForSqueezing == 0) { 209 | KeccakPermutation(state->state); 210 | #ifdef ProvideFast1024 211 | if (state->rate == 1024) { 212 | KeccakExtract1024bits(state->state, state->dataQueue); 213 | state->bitsAvailableForSqueezing = 1024; 214 | } 215 | else 216 | #endif 217 | { 218 | KeccakExtract(state->state, state->dataQueue, state->rate/64); 219 | state->bitsAvailableForSqueezing = state->rate; 220 | } 221 | } 222 | partialBlock = state->bitsAvailableForSqueezing; 223 | if ((unsigned long long)partialBlock > outputLength - i) 224 | partialBlock = (unsigned int)(outputLength - i); 225 | memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8); 226 | state->bitsAvailableForSqueezing -= partialBlock; 227 | i += partialBlock; 228 | } 229 | return 0; 230 | } 231 | -------------------------------------------------------------------------------- /c_src/opt64/KeccakF-1600-opt64.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #include 15 | #include "../brg_endian.h" 16 | #include "KeccakF-1600-opt64-settings.h" 17 | #include "../KeccakF-1600-interface.h" 18 | 19 | typedef unsigned char UINT8; 20 | typedef unsigned long long int UINT64; 21 | 22 | #if defined(UseSSE) 23 | #include 24 | typedef __m128i V64; 25 | typedef __m128i V128; 26 | typedef union { 27 | V128 v128; 28 | UINT64 v64[2]; 29 | } V6464; 30 | 31 | #define ANDnu64(a, b) _mm_andnot_si128(a, b) 32 | #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a)) 33 | #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a)) 34 | #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) 35 | #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b) 36 | #define XOR64(a, b) _mm_xor_si128(a, b) 37 | #define XOReq64(a, b) a = _mm_xor_si128(a, b) 38 | 39 | #define ANDnu128(a, b) _mm_andnot_si128(a, b) 40 | #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b)) 41 | #define LOAD128(a) _mm_load_si128((const V128 *)&(a)) 42 | #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a)) 43 | #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o))) 44 | #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b) 45 | #define XOR128(a, b) _mm_xor_si128(a, b) 46 | #define XOReq128(a, b) a = _mm_xor_si128(a, b) 47 | #define GET64LO(a, b) _mm_unpacklo_epi64(a, b) 48 | #define GET64HI(a, b) _mm_unpackhi_epi64(a, b) 49 | #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE) 50 | #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44) 51 | #define ZERO128() _mm_setzero_si128() 52 | 53 | #ifdef UseOnlySIMD64 54 | #include "KeccakF-1600-simd64.macros" 55 | #else 56 | #include "KeccakF-1600-simd128.macros" 57 | #endif 58 | 59 | #ifdef UseBebigokimisa 60 | #error "UseBebigokimisa cannot be used in combination with UseSSE" 61 | #endif 62 | #elif defined(UseMMX) 63 | #include 64 | typedef __m64 V64; 65 | #define ANDnu64(a, b) _mm_andnot_si64(a, b) 66 | 67 | #if (defined(_MSC_VER) || defined (__INTEL_COMPILER)) 68 | #define LOAD64(a) *(V64*)&(a) 69 | #define CONST64(a) *(V64*)&(a) 70 | #define STORE64(a, b) *(V64*)&(a) = b 71 | #else 72 | #define LOAD64(a) (V64)a 73 | #define CONST64(a) (V64)a 74 | #define STORE64(a, b) a = (UINT64)b 75 | #endif 76 | #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o))) 77 | #define XOR64(a, b) _mm_xor_si64(a, b) 78 | #define XOReq64(a, b) a = _mm_xor_si64(a, b) 79 | 80 | #include "KeccakF-1600-simd64.macros" 81 | 82 | #ifdef UseBebigokimisa 83 | #error "UseBebigokimisa cannot be used in combination with UseMMX" 84 | #endif 85 | #else 86 | #if defined(_MSC_VER) 87 | #define ROL64(a, offset) _rotl64(a, offset) 88 | #else 89 | #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) 90 | #endif 91 | 92 | #include "KeccakF-1600-64.macros" 93 | #endif 94 | 95 | #include "../KeccakF-1600-unrolling.macros" 96 | 97 | void KeccakPermutationOnWords(UINT64 *state) 98 | { 99 | declareABCDE 100 | #if (Unrolling != 24) 101 | unsigned int i; 102 | #endif 103 | 104 | copyFromState(A, state) 105 | rounds 106 | #if defined(UseMMX) 107 | _mm_empty(); 108 | #endif 109 | } 110 | 111 | void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount) 112 | { 113 | declareABCDE 114 | #if (Unrolling != 24) 115 | unsigned int i; 116 | #endif 117 | unsigned int j; 118 | 119 | for(j=0; j> (8*i)) & 0xFF; 372 | } 373 | 374 | #ifdef ProvideFast1024 375 | void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) 376 | { 377 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 378 | memcpy(data, state, 128); 379 | #else 380 | unsigned int i; 381 | 382 | for(i=0; i<16; i++) 383 | fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]); 384 | #endif 385 | #ifdef UseBebigokimisa 386 | ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; 387 | ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; 388 | ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; 389 | ((UINT64*)data)[12] = ~((UINT64*)data)[12]; 390 | #endif 391 | } 392 | #endif 393 | 394 | void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) 395 | { 396 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 397 | memcpy(data, state, laneCount*8); 398 | #else 399 | unsigned int i; 400 | 401 | for(i=0; i 1) { 406 | ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; 407 | if (laneCount > 2) { 408 | ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; 409 | if (laneCount > 8) { 410 | ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; 411 | if (laneCount > 12) { 412 | ((UINT64*)data)[12] = ~((UINT64*)data)[12]; 413 | if (laneCount > 17) { 414 | ((UINT64*)data)[17] = ~((UINT64*)data)[17]; 415 | if (laneCount > 20) { 416 | ((UINT64*)data)[20] = ~((UINT64*)data)[20]; 417 | } 418 | } 419 | } 420 | } 421 | } 422 | } 423 | #endif 424 | } 425 | -------------------------------------------------------------------------------- /c_src/opt32/KeccakF-1600-opt32.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #include 15 | #include "../brg_endian.h" 16 | #include "KeccakF-1600-opt32-settings.h" 17 | #include "../KeccakF-1600-interface.h" 18 | 19 | typedef unsigned char UINT8; 20 | typedef unsigned short UINT16; 21 | typedef unsigned int UINT32; 22 | typedef unsigned long long int UINT64; 23 | 24 | #ifdef UseInterleaveTables 25 | int interleaveTablesBuilt = 0; 26 | UINT16 interleaveTable[65536]; 27 | UINT16 deinterleaveTable[65536]; 28 | 29 | void buildInterleaveTables() 30 | { 31 | UINT32 i, j; 32 | UINT16 x; 33 | 34 | if (!interleaveTablesBuilt) { 35 | for(i=0; i<65536; i++) { 36 | x = 0; 37 | for(j=0; j<16; j++) { 38 | if (i & (1 << j)) 39 | x |= (1 << (j/2 + 8*(j%2))); 40 | } 41 | interleaveTable[i] = x; 42 | deinterleaveTable[x] = (UINT16)i; 43 | } 44 | interleaveTablesBuilt = 1; 45 | } 46 | } 47 | 48 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 49 | 50 | #define xor2bytesIntoInterleavedWords(even, odd, source, j) \ 51 | i##j = interleaveTable[((const UINT16*)source)[j]]; \ 52 | ((UINT8*)even)[j] ^= i##j & 0xFF; \ 53 | ((UINT8*)odd)[j] ^= i##j >> 8; 54 | 55 | #define setInterleavedWordsInto2bytes(dest, even, odd, j) \ 56 | d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ 57 | ((UINT16*)dest)[j] = d##j; 58 | 59 | #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) 60 | 61 | #define xor2bytesIntoInterleavedWords(even, odd, source, j) \ 62 | i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \ 63 | *even ^= (i##j & 0xFF) << (j*8); \ 64 | *odd ^= ((i##j >> 8) & 0xFF) << (j*8); 65 | 66 | #define setInterleavedWordsInto2bytes(dest, even, odd, j) \ 67 | d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ 68 | dest[2*j] = d##j & 0xFF; \ 69 | dest[2*j+1] = d##j >> 8; 70 | 71 | #endif // Endianness 72 | 73 | void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source) 74 | { 75 | UINT16 i0, i1, i2, i3; 76 | 77 | xor2bytesIntoInterleavedWords(even, odd, source, 0) 78 | xor2bytesIntoInterleavedWords(even, odd, source, 1) 79 | xor2bytesIntoInterleavedWords(even, odd, source, 2) 80 | xor2bytesIntoInterleavedWords(even, odd, source, 3) 81 | } 82 | 83 | #define xorLanesIntoState(laneCount, state, input) \ 84 | { \ 85 | int i; \ 86 | for(i=0; i<(laneCount); i++) \ 87 | xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \ 88 | } 89 | 90 | void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd) 91 | { 92 | UINT16 d0, d1, d2, d3; 93 | 94 | setInterleavedWordsInto2bytes(dest, even, odd, 0) 95 | setInterleavedWordsInto2bytes(dest, even, odd, 1) 96 | setInterleavedWordsInto2bytes(dest, even, odd, 2) 97 | setInterleavedWordsInto2bytes(dest, even, odd, 3) 98 | } 99 | 100 | #define extractLanes(laneCount, state, data) \ 101 | { \ 102 | int i; \ 103 | for(i=0; i<(laneCount); i++) \ 104 | setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \ 105 | } 106 | 107 | #else // No interleaving tables 108 | 109 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 110 | 111 | // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 112 | #define xorInterleavedLE(rateInLanes, state, input) \ 113 | { \ 114 | const UINT32 * pI = (const UINT32 *)input; \ 115 | UINT32 * pS = state; \ 116 | UINT32 t, x0, x1; \ 117 | int i; \ 118 | for (i = (rateInLanes)-1; i >= 0; --i) \ 119 | { \ 120 | x0 = *(pI++); \ 121 | t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \ 122 | t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \ 123 | t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \ 124 | t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \ 125 | x1 = *(pI++); \ 126 | t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \ 127 | t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \ 128 | t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \ 129 | t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \ 130 | *(pS++) ^= (UINT16)x0 | (x1 << 16); \ 131 | *(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \ 132 | } \ 133 | } 134 | 135 | #define xorLanesIntoState(laneCount, state, input) \ 136 | xorInterleavedLE(laneCount, state, input) 137 | 138 | #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) 139 | 140 | // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 141 | UINT64 toInterleaving(UINT64 x) 142 | { 143 | UINT64 t; 144 | 145 | t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); 146 | t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); 147 | t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); 148 | t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); 149 | t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); 150 | 151 | return x; 152 | } 153 | 154 | void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source) 155 | { 156 | // This can be optimized 157 | UINT64 sourceWord = 158 | (UINT64)source[0] 159 | ^ (((UINT64)source[1]) << 8) 160 | ^ (((UINT64)source[2]) << 16) 161 | ^ (((UINT64)source[3]) << 24) 162 | ^ (((UINT64)source[4]) << 32) 163 | ^ (((UINT64)source[5]) << 40) 164 | ^ (((UINT64)source[6]) << 48) 165 | ^ (((UINT64)source[7]) << 56); 166 | UINT64 evenAndOddWord = toInterleaving(sourceWord); 167 | evenAndOdd[0] ^= (UINT32)evenAndOddWord; 168 | evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32); 169 | } 170 | 171 | #define xorLanesIntoState(laneCount, state, input) \ 172 | { \ 173 | int i; \ 174 | for(i=0; i<(laneCount); i++) \ 175 | xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \ 176 | } 177 | 178 | #endif // Endianness 179 | 180 | // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 181 | UINT64 fromInterleaving(UINT64 x) 182 | { 183 | UINT64 t; 184 | 185 | t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); 186 | t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); 187 | t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); 188 | t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); 189 | t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); 190 | 191 | return x; 192 | } 193 | 194 | void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd) 195 | { 196 | #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) 197 | ((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd); 198 | #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) 199 | // This can be optimized 200 | UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32); 201 | UINT64 destWord = fromInterleaving(evenAndOddWord); 202 | dest[0] = destWord & 0xFF; 203 | dest[1] = (destWord >> 8) & 0xFF; 204 | dest[2] = (destWord >> 16) & 0xFF; 205 | dest[3] = (destWord >> 24) & 0xFF; 206 | dest[4] = (destWord >> 32) & 0xFF; 207 | dest[5] = (destWord >> 40) & 0xFF; 208 | dest[6] = (destWord >> 48) & 0xFF; 209 | dest[7] = (destWord >> 56) & 0xFF; 210 | #endif // Endianness 211 | } 212 | 213 | #define extractLanes(laneCount, state, data) \ 214 | { \ 215 | int i; \ 216 | for(i=0; i<(laneCount); i++) \ 217 | setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \ 218 | } 219 | 220 | #endif // With or without interleaving tables 221 | 222 | #if defined(_MSC_VER) 223 | #define ROL32(a, offset) _rotl(a, offset) 224 | #elif (defined (__arm__) && defined(__ARMCC_VERSION)) 225 | #define ROL32(a, offset) __ror(a, 32-(offset)) 226 | #else 227 | #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) 228 | #endif 229 | 230 | #include "../KeccakF-1600-unrolling.macros" 231 | #include "KeccakF-1600-32.macros" 232 | 233 | #if (UseSchedule == 3) 234 | 235 | #ifdef UseBebigokimisa 236 | #error "No lane complementing with schedule 3." 237 | #endif 238 | 239 | #if (Unrolling != 2) 240 | #error "Only unrolling 2 is supported by schedule 3." 241 | #endif 242 | 243 | void KeccakPermutationOnWords(UINT32 *state) 244 | { 245 | rounds 246 | } 247 | 248 | void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) 249 | { 250 | xorLanesIntoState(laneCount, state, input) 251 | rounds 252 | } 253 | 254 | #ifdef ProvideFast576 255 | void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) 256 | { 257 | xorLanesIntoState(9, state, input) 258 | rounds 259 | } 260 | #endif 261 | 262 | #ifdef ProvideFast832 263 | void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) 264 | { 265 | xorLanesIntoState(13, state, input) 266 | rounds 267 | } 268 | #endif 269 | 270 | #ifdef ProvideFast1024 271 | void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) 272 | { 273 | xorLanesIntoState(16, state, input) 274 | rounds 275 | } 276 | #endif 277 | 278 | #ifdef ProvideFast1088 279 | void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) 280 | { 281 | xorLanesIntoState(17, state, input) 282 | rounds 283 | } 284 | #endif 285 | 286 | #ifdef ProvideFast1152 287 | void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) 288 | { 289 | xorLanesIntoState(18, state, input) 290 | rounds 291 | } 292 | #endif 293 | 294 | #ifdef ProvideFast1344 295 | void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) 296 | { 297 | xorLanesIntoState(21, state, input) 298 | rounds 299 | } 300 | #endif 301 | 302 | #else // (Schedule != 3) 303 | 304 | void KeccakPermutationOnWords(UINT32 *state) 305 | { 306 | declareABCDE 307 | #if (Unrolling != 24) 308 | unsigned int i; 309 | #endif 310 | 311 | copyFromState(A, state) 312 | rounds 313 | } 314 | 315 | void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) 316 | { 317 | declareABCDE 318 | unsigned int i; 319 | 320 | xorLanesIntoState(laneCount, state, input) 321 | copyFromState(A, state) 322 | rounds 323 | } 324 | 325 | #ifdef ProvideFast576 326 | void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) 327 | { 328 | declareABCDE 329 | unsigned int i; 330 | 331 | xorLanesIntoState(9, state, input) 332 | copyFromState(A, state) 333 | rounds 334 | } 335 | #endif 336 | 337 | #ifdef ProvideFast832 338 | void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) 339 | { 340 | declareABCDE 341 | unsigned int i; 342 | 343 | xorLanesIntoState(13, state, input) 344 | copyFromState(A, state) 345 | rounds 346 | } 347 | #endif 348 | 349 | #ifdef ProvideFast1024 350 | void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) 351 | { 352 | declareABCDE 353 | unsigned int i; 354 | 355 | xorLanesIntoState(16, state, input) 356 | copyFromState(A, state) 357 | rounds 358 | } 359 | #endif 360 | 361 | #ifdef ProvideFast1088 362 | void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) 363 | { 364 | declareABCDE 365 | unsigned int i; 366 | 367 | xorLanesIntoState(17, state, input) 368 | copyFromState(A, state) 369 | rounds 370 | } 371 | #endif 372 | 373 | #ifdef ProvideFast1152 374 | void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) 375 | { 376 | declareABCDE 377 | unsigned int i; 378 | 379 | xorLanesIntoState(18, state, input) 380 | copyFromState(A, state) 381 | rounds 382 | } 383 | #endif 384 | 385 | #ifdef ProvideFast1344 386 | void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) 387 | { 388 | declareABCDE 389 | unsigned int i; 390 | 391 | xorLanesIntoState(21, state, input) 392 | copyFromState(A, state) 393 | rounds 394 | } 395 | #endif 396 | 397 | #endif 398 | 399 | void KeccakInitialize() 400 | { 401 | #ifdef UseInterleaveTables 402 | buildInterleaveTables(); 403 | #endif 404 | } 405 | 406 | void KeccakInitializeState(unsigned char *state) 407 | { 408 | memset(state, 0, 200); 409 | #ifdef UseBebigokimisa 410 | ((UINT32*)state)[ 2] = ~(UINT32)0; 411 | ((UINT32*)state)[ 3] = ~(UINT32)0; 412 | ((UINT32*)state)[ 4] = ~(UINT32)0; 413 | ((UINT32*)state)[ 5] = ~(UINT32)0; 414 | ((UINT32*)state)[16] = ~(UINT32)0; 415 | ((UINT32*)state)[17] = ~(UINT32)0; 416 | ((UINT32*)state)[24] = ~(UINT32)0; 417 | ((UINT32*)state)[25] = ~(UINT32)0; 418 | ((UINT32*)state)[34] = ~(UINT32)0; 419 | ((UINT32*)state)[35] = ~(UINT32)0; 420 | ((UINT32*)state)[40] = ~(UINT32)0; 421 | ((UINT32*)state)[41] = ~(UINT32)0; 422 | #endif 423 | } 424 | 425 | void KeccakPermutation(unsigned char *state) 426 | { 427 | // We assume the state is always stored as interleaved 32-bit words 428 | KeccakPermutationOnWords((UINT32*)state); 429 | } 430 | 431 | #ifdef ProvideFast576 432 | void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) 433 | { 434 | KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data); 435 | } 436 | #endif 437 | 438 | #ifdef ProvideFast832 439 | void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) 440 | { 441 | KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data); 442 | } 443 | #endif 444 | 445 | #ifdef ProvideFast1024 446 | void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) 447 | { 448 | KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data); 449 | } 450 | #endif 451 | 452 | #ifdef ProvideFast1088 453 | void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) 454 | { 455 | KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data); 456 | } 457 | #endif 458 | 459 | #ifdef ProvideFast1152 460 | void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) 461 | { 462 | KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data); 463 | } 464 | #endif 465 | 466 | #ifdef ProvideFast1344 467 | void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) 468 | { 469 | KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data); 470 | } 471 | #endif 472 | 473 | void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) 474 | { 475 | KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount); 476 | } 477 | 478 | #ifdef ProvideFast1024 479 | void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) 480 | { 481 | extractLanes(16, state, data) 482 | #ifdef UseBebigokimisa 483 | ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; 484 | ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; 485 | ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; 486 | ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; 487 | ((UINT32*)data)[16] = ~((UINT32*)data)[16]; 488 | ((UINT32*)data)[17] = ~((UINT32*)data)[17]; 489 | ((UINT32*)data)[24] = ~((UINT32*)data)[24]; 490 | ((UINT32*)data)[25] = ~((UINT32*)data)[25]; 491 | #endif 492 | } 493 | #endif 494 | 495 | void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) 496 | { 497 | extractLanes(laneCount, state, data) 498 | #ifdef UseBebigokimisa 499 | if (laneCount > 1) { 500 | ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; 501 | ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; 502 | if (laneCount > 2) { 503 | ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; 504 | ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; 505 | if (laneCount > 8) { 506 | ((UINT32*)data)[16] = ~((UINT32*)data)[16]; 507 | ((UINT32*)data)[17] = ~((UINT32*)data)[17]; 508 | if (laneCount > 12) { 509 | ((UINT32*)data)[24] = ~((UINT32*)data)[24]; 510 | ((UINT32*)data)[25] = ~((UINT32*)data)[25]; 511 | if (laneCount > 17) { 512 | ((UINT32*)data)[34] = ~((UINT32*)data)[34]; 513 | ((UINT32*)data)[35] = ~((UINT32*)data)[35]; 514 | if (laneCount > 20) { 515 | ((UINT32*)data)[40] = ~((UINT32*)data)[40]; 516 | ((UINT32*)data)[41] = ~((UINT32*)data)[41]; 517 | } 518 | } 519 | } 520 | } 521 | } 522 | } 523 | #endif 524 | } 525 | -------------------------------------------------------------------------------- /c_src/opt32/KeccakF-1600-32-rvk.macros: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by Ronny Van Keer, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | static const UINT32 KeccakF1600RoundConstants_int2[2*24] = 15 | { 16 | 0x00000001UL, 0x00000000UL, 17 | 0x00000000UL, 0x00000089UL, 18 | 0x00000000UL, 0x8000008bUL, 19 | 0x00000000UL, 0x80008080UL, 20 | 0x00000001UL, 0x0000008bUL, 21 | 0x00000001UL, 0x00008000UL, 22 | 0x00000001UL, 0x80008088UL, 23 | 0x00000001UL, 0x80000082UL, 24 | 0x00000000UL, 0x0000000bUL, 25 | 0x00000000UL, 0x0000000aUL, 26 | 0x00000001UL, 0x00008082UL, 27 | 0x00000000UL, 0x00008003UL, 28 | 0x00000001UL, 0x0000808bUL, 29 | 0x00000001UL, 0x8000000bUL, 30 | 0x00000001UL, 0x8000008aUL, 31 | 0x00000001UL, 0x80000081UL, 32 | 0x00000000UL, 0x80000081UL, 33 | 0x00000000UL, 0x80000008UL, 34 | 0x00000000UL, 0x00000083UL, 35 | 0x00000000UL, 0x80008003UL, 36 | 0x00000001UL, 0x80008088UL, 37 | 0x00000000UL, 0x80000088UL, 38 | 0x00000001UL, 0x00008000UL, 39 | 0x00000000UL, 0x80008082UL 40 | }; 41 | 42 | #undef rounds 43 | 44 | #define rounds \ 45 | { \ 46 | UINT32 Da0, De0, Di0, Do0, Du0; \ 47 | UINT32 Da1, De1, Di1, Do1, Du1; \ 48 | UINT32 Ba, Be, Bi, Bo, Bu; \ 49 | UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ 50 | UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ 51 | UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ 52 | UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ 53 | UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ 54 | UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ 55 | UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ 56 | UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ 57 | UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ 58 | UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ 59 | UINT32 Cw, Cx, Cy, Cz; \ 60 | UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ 61 | UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ 62 | UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ 63 | UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ 64 | UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ 65 | UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ 66 | UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ 67 | UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ 68 | UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ 69 | UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ 70 | const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \ 71 | UINT32 i; \ 72 | \ 73 | copyFromState(A, state) \ 74 | \ 75 | for( i = 12; i != 0; --i ) { \ 76 | Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ 77 | Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ 78 | Da0 = Cx^ROL32(Du1, 1); \ 79 | Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ 80 | Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ 81 | Da1 = Cz^Du0; \ 82 | \ 83 | Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ 84 | Do0 = Cw^ROL32(Cz, 1); \ 85 | Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ 86 | Do1 = Cy^Cx; \ 87 | \ 88 | Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ 89 | De0 = Cx^ROL32(Cy, 1); \ 90 | Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ 91 | De1 = Cz^Cw; \ 92 | \ 93 | Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ 94 | Di0 = Du0^ROL32(Cy, 1); \ 95 | Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ 96 | Di1 = Du1^Cw; \ 97 | \ 98 | Du0 = Cw^ROL32(Cz, 1); \ 99 | Du1 = Cy^Cx; \ 100 | \ 101 | Aba0 ^= Da0; \ 102 | Ba = Aba0; \ 103 | Age0 ^= De0; \ 104 | Be = ROL32(Age0, 22); \ 105 | Aki1 ^= Di1; \ 106 | Bi = ROL32(Aki1, 22); \ 107 | Amo1 ^= Do1; \ 108 | Bo = ROL32(Amo1, 11); \ 109 | Asu0 ^= Du0; \ 110 | Bu = ROL32(Asu0, 7); \ 111 | Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \ 112 | Ebe0 = Be ^((~Bi)& Bo ); \ 113 | Ebi0 = Bi ^((~Bo)& Bu ); \ 114 | Ebo0 = Bo ^((~Bu)& Ba ); \ 115 | Ebu0 = Bu ^((~Ba)& Be ); \ 116 | \ 117 | Abo0 ^= Do0; \ 118 | Ba = ROL32(Abo0, 14); \ 119 | Agu0 ^= Du0; \ 120 | Be = ROL32(Agu0, 10); \ 121 | Aka1 ^= Da1; \ 122 | Bi = ROL32(Aka1, 2); \ 123 | Ame1 ^= De1; \ 124 | Bo = ROL32(Ame1, 23); \ 125 | Asi1 ^= Di1; \ 126 | Bu = ROL32(Asi1, 31); \ 127 | Ega0 = Ba ^((~Be)& Bi ); \ 128 | Ege0 = Be ^((~Bi)& Bo ); \ 129 | Egi0 = Bi ^((~Bo)& Bu ); \ 130 | Ego0 = Bo ^((~Bu)& Ba ); \ 131 | Egu0 = Bu ^((~Ba)& Be ); \ 132 | \ 133 | Abe1 ^= De1; \ 134 | Ba = ROL32(Abe1, 1); \ 135 | Agi0 ^= Di0; \ 136 | Be = ROL32(Agi0, 3); \ 137 | Ako1 ^= Do1; \ 138 | Bi = ROL32(Ako1, 13); \ 139 | Amu0 ^= Du0; \ 140 | Bo = ROL32(Amu0, 4); \ 141 | Asa0 ^= Da0; \ 142 | Bu = ROL32(Asa0, 9); \ 143 | Eka0 = Ba ^((~Be)& Bi ); \ 144 | Eke0 = Be ^((~Bi)& Bo ); \ 145 | Eki0 = Bi ^((~Bo)& Bu ); \ 146 | Eko0 = Bo ^((~Bu)& Ba ); \ 147 | Eku0 = Bu ^((~Ba)& Be ); \ 148 | \ 149 | Abu1 ^= Du1; \ 150 | Ba = ROL32(Abu1, 14); \ 151 | Aga0 ^= Da0; \ 152 | Be = ROL32(Aga0, 18); \ 153 | Ake0 ^= De0; \ 154 | Bi = ROL32(Ake0, 5); \ 155 | Ami1 ^= Di1; \ 156 | Bo = ROL32(Ami1, 8); \ 157 | Aso0 ^= Do0; \ 158 | Bu = ROL32(Aso0, 28); \ 159 | Ema0 = Ba ^((~Be)& Bi ); \ 160 | Eme0 = Be ^((~Bi)& Bo ); \ 161 | Emi0 = Bi ^((~Bo)& Bu ); \ 162 | Emo0 = Bo ^((~Bu)& Ba ); \ 163 | Emu0 = Bu ^((~Ba)& Be ); \ 164 | \ 165 | Abi0 ^= Di0; \ 166 | Ba = ROL32(Abi0, 31); \ 167 | Ago1 ^= Do1; \ 168 | Be = ROL32(Ago1, 28); \ 169 | Aku1 ^= Du1; \ 170 | Bi = ROL32(Aku1, 20); \ 171 | Ama1 ^= Da1; \ 172 | Bo = ROL32(Ama1, 21); \ 173 | Ase0 ^= De0; \ 174 | Bu = ROL32(Ase0, 1); \ 175 | Esa0 = Ba ^((~Be)& Bi ); \ 176 | Ese0 = Be ^((~Bi)& Bo ); \ 177 | Esi0 = Bi ^((~Bo)& Bu ); \ 178 | Eso0 = Bo ^((~Bu)& Ba ); \ 179 | Esu0 = Bu ^((~Ba)& Be ); \ 180 | \ 181 | Aba1 ^= Da1; \ 182 | Ba = Aba1; \ 183 | Age1 ^= De1; \ 184 | Be = ROL32(Age1, 22); \ 185 | Aki0 ^= Di0; \ 186 | Bi = ROL32(Aki0, 21); \ 187 | Amo0 ^= Do0; \ 188 | Bo = ROL32(Amo0, 10); \ 189 | Asu1 ^= Du1; \ 190 | Bu = ROL32(Asu1, 7); \ 191 | Eba1 = Ba ^((~Be)& Bi ); \ 192 | Eba1 ^= *(pRoundConstants++); \ 193 | Ebe1 = Be ^((~Bi)& Bo ); \ 194 | Ebi1 = Bi ^((~Bo)& Bu ); \ 195 | Ebo1 = Bo ^((~Bu)& Ba ); \ 196 | Ebu1 = Bu ^((~Ba)& Be ); \ 197 | \ 198 | Abo1 ^= Do1; \ 199 | Ba = ROL32(Abo1, 14); \ 200 | Agu1 ^= Du1; \ 201 | Be = ROL32(Agu1, 10); \ 202 | Aka0 ^= Da0; \ 203 | Bi = ROL32(Aka0, 1); \ 204 | Ame0 ^= De0; \ 205 | Bo = ROL32(Ame0, 22); \ 206 | Asi0 ^= Di0; \ 207 | Bu = ROL32(Asi0, 30); \ 208 | Ega1 = Ba ^((~Be)& Bi ); \ 209 | Ege1 = Be ^((~Bi)& Bo ); \ 210 | Egi1 = Bi ^((~Bo)& Bu ); \ 211 | Ego1 = Bo ^((~Bu)& Ba ); \ 212 | Egu1 = Bu ^((~Ba)& Be ); \ 213 | \ 214 | Abe0 ^= De0; \ 215 | Ba = Abe0; \ 216 | Agi1 ^= Di1; \ 217 | Be = ROL32(Agi1, 3); \ 218 | Ako0 ^= Do0; \ 219 | Bi = ROL32(Ako0, 12); \ 220 | Amu1 ^= Du1; \ 221 | Bo = ROL32(Amu1, 4); \ 222 | Asa1 ^= Da1; \ 223 | Bu = ROL32(Asa1, 9); \ 224 | Eka1 = Ba ^((~Be)& Bi ); \ 225 | Eke1 = Be ^((~Bi)& Bo ); \ 226 | Eki1 = Bi ^((~Bo)& Bu ); \ 227 | Eko1 = Bo ^((~Bu)& Ba ); \ 228 | Eku1 = Bu ^((~Ba)& Be ); \ 229 | \ 230 | Abu0 ^= Du0; \ 231 | Ba = ROL32(Abu0, 13); \ 232 | Aga1 ^= Da1; \ 233 | Be = ROL32(Aga1, 18); \ 234 | Ake1 ^= De1; \ 235 | Bi = ROL32(Ake1, 5); \ 236 | Ami0 ^= Di0; \ 237 | Bo = ROL32(Ami0, 7); \ 238 | Aso1 ^= Do1; \ 239 | Bu = ROL32(Aso1, 28); \ 240 | Ema1 = Ba ^((~Be)& Bi ); \ 241 | Eme1 = Be ^((~Bi)& Bo ); \ 242 | Emi1 = Bi ^((~Bo)& Bu ); \ 243 | Emo1 = Bo ^((~Bu)& Ba ); \ 244 | Emu1 = Bu ^((~Ba)& Be ); \ 245 | \ 246 | Abi1 ^= Di1; \ 247 | Ba = ROL32(Abi1, 31); \ 248 | Ago0 ^= Do0; \ 249 | Be = ROL32(Ago0, 27); \ 250 | Aku0 ^= Du0; \ 251 | Bi = ROL32(Aku0, 19); \ 252 | Ama0 ^= Da0; \ 253 | Bo = ROL32(Ama0, 20); \ 254 | Ase1 ^= De1; \ 255 | Bu = ROL32(Ase1, 1); \ 256 | Esa1 = Ba ^((~Be)& Bi ); \ 257 | Ese1 = Be ^((~Bi)& Bo ); \ 258 | Esi1 = Bi ^((~Bo)& Bu ); \ 259 | Eso1 = Bo ^((~Bu)& Ba ); \ 260 | Esu1 = Bu ^((~Ba)& Be ); \ 261 | \ 262 | Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \ 263 | Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \ 264 | Da0 = Cx^ROL32(Du1, 1); \ 265 | Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \ 266 | Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \ 267 | Da1 = Cz^Du0; \ 268 | \ 269 | Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \ 270 | Do0 = Cw^ROL32(Cz, 1); \ 271 | Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \ 272 | Do1 = Cy^Cx; \ 273 | \ 274 | Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \ 275 | De0 = Cx^ROL32(Cy, 1); \ 276 | Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \ 277 | De1 = Cz^Cw; \ 278 | \ 279 | Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \ 280 | Di0 = Du0^ROL32(Cy, 1); \ 281 | Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \ 282 | Di1 = Du1^Cw; \ 283 | \ 284 | Du0 = Cw^ROL32(Cz, 1); \ 285 | Du1 = Cy^Cx; \ 286 | \ 287 | Eba0 ^= Da0; \ 288 | Ba = Eba0; \ 289 | Ege0 ^= De0; \ 290 | Be = ROL32(Ege0, 22); \ 291 | Eki1 ^= Di1; \ 292 | Bi = ROL32(Eki1, 22); \ 293 | Emo1 ^= Do1; \ 294 | Bo = ROL32(Emo1, 11); \ 295 | Esu0 ^= Du0; \ 296 | Bu = ROL32(Esu0, 7); \ 297 | Aba0 = Ba ^((~Be)& Bi ); \ 298 | Aba0 ^= *(pRoundConstants++); \ 299 | Abe0 = Be ^((~Bi)& Bo ); \ 300 | Abi0 = Bi ^((~Bo)& Bu ); \ 301 | Abo0 = Bo ^((~Bu)& Ba ); \ 302 | Abu0 = Bu ^((~Ba)& Be ); \ 303 | \ 304 | Ebo0 ^= Do0; \ 305 | Ba = ROL32(Ebo0, 14); \ 306 | Egu0 ^= Du0; \ 307 | Be = ROL32(Egu0, 10); \ 308 | Eka1 ^= Da1; \ 309 | Bi = ROL32(Eka1, 2); \ 310 | Eme1 ^= De1; \ 311 | Bo = ROL32(Eme1, 23); \ 312 | Esi1 ^= Di1; \ 313 | Bu = ROL32(Esi1, 31); \ 314 | Aga0 = Ba ^((~Be)& Bi ); \ 315 | Age0 = Be ^((~Bi)& Bo ); \ 316 | Agi0 = Bi ^((~Bo)& Bu ); \ 317 | Ago0 = Bo ^((~Bu)& Ba ); \ 318 | Agu0 = Bu ^((~Ba)& Be ); \ 319 | \ 320 | Ebe1 ^= De1; \ 321 | Ba = ROL32(Ebe1, 1); \ 322 | Egi0 ^= Di0; \ 323 | Be = ROL32(Egi0, 3); \ 324 | Eko1 ^= Do1; \ 325 | Bi = ROL32(Eko1, 13); \ 326 | Emu0 ^= Du0; \ 327 | Bo = ROL32(Emu0, 4); \ 328 | Esa0 ^= Da0; \ 329 | Bu = ROL32(Esa0, 9); \ 330 | Aka0 = Ba ^((~Be)& Bi ); \ 331 | Ake0 = Be ^((~Bi)& Bo ); \ 332 | Aki0 = Bi ^((~Bo)& Bu ); \ 333 | Ako0 = Bo ^((~Bu)& Ba ); \ 334 | Aku0 = Bu ^((~Ba)& Be ); \ 335 | \ 336 | Ebu1 ^= Du1; \ 337 | Ba = ROL32(Ebu1, 14); \ 338 | Ega0 ^= Da0; \ 339 | Be = ROL32(Ega0, 18); \ 340 | Eke0 ^= De0; \ 341 | Bi = ROL32(Eke0, 5); \ 342 | Emi1 ^= Di1; \ 343 | Bo = ROL32(Emi1, 8); \ 344 | Eso0 ^= Do0; \ 345 | Bu = ROL32(Eso0, 28); \ 346 | Ama0 = Ba ^((~Be)& Bi ); \ 347 | Ame0 = Be ^((~Bi)& Bo ); \ 348 | Ami0 = Bi ^((~Bo)& Bu ); \ 349 | Amo0 = Bo ^((~Bu)& Ba ); \ 350 | Amu0 = Bu ^((~Ba)& Be ); \ 351 | \ 352 | Ebi0 ^= Di0; \ 353 | Ba = ROL32(Ebi0, 31); \ 354 | Ego1 ^= Do1; \ 355 | Be = ROL32(Ego1, 28); \ 356 | Eku1 ^= Du1; \ 357 | Bi = ROL32(Eku1, 20); \ 358 | Ema1 ^= Da1; \ 359 | Bo = ROL32(Ema1, 21); \ 360 | Ese0 ^= De0; \ 361 | Bu = ROL32(Ese0, 1); \ 362 | Asa0 = Ba ^((~Be)& Bi ); \ 363 | Ase0 = Be ^((~Bi)& Bo ); \ 364 | Asi0 = Bi ^((~Bo)& Bu ); \ 365 | Aso0 = Bo ^((~Bu)& Ba ); \ 366 | Asu0 = Bu ^((~Ba)& Be ); \ 367 | \ 368 | Eba1 ^= Da1; \ 369 | Ba = Eba1; \ 370 | Ege1 ^= De1; \ 371 | Be = ROL32(Ege1, 22); \ 372 | Eki0 ^= Di0; \ 373 | Bi = ROL32(Eki0, 21); \ 374 | Emo0 ^= Do0; \ 375 | Bo = ROL32(Emo0, 10); \ 376 | Esu1 ^= Du1; \ 377 | Bu = ROL32(Esu1, 7); \ 378 | Aba1 = Ba ^((~Be)& Bi ); \ 379 | Aba1 ^= *(pRoundConstants++); \ 380 | Abe1 = Be ^((~Bi)& Bo ); \ 381 | Abi1 = Bi ^((~Bo)& Bu ); \ 382 | Abo1 = Bo ^((~Bu)& Ba ); \ 383 | Abu1 = Bu ^((~Ba)& Be ); \ 384 | \ 385 | Ebo1 ^= Do1; \ 386 | Ba = ROL32(Ebo1, 14); \ 387 | Egu1 ^= Du1; \ 388 | Be = ROL32(Egu1, 10); \ 389 | Eka0 ^= Da0; \ 390 | Bi = ROL32(Eka0, 1); \ 391 | Eme0 ^= De0; \ 392 | Bo = ROL32(Eme0, 22); \ 393 | Esi0 ^= Di0; \ 394 | Bu = ROL32(Esi0, 30); \ 395 | Aga1 = Ba ^((~Be)& Bi ); \ 396 | Age1 = Be ^((~Bi)& Bo ); \ 397 | Agi1 = Bi ^((~Bo)& Bu ); \ 398 | Ago1 = Bo ^((~Bu)& Ba ); \ 399 | Agu1 = Bu ^((~Ba)& Be ); \ 400 | \ 401 | Ebe0 ^= De0; \ 402 | Ba = Ebe0; \ 403 | Egi1 ^= Di1; \ 404 | Be = ROL32(Egi1, 3); \ 405 | Eko0 ^= Do0; \ 406 | Bi = ROL32(Eko0, 12); \ 407 | Emu1 ^= Du1; \ 408 | Bo = ROL32(Emu1, 4); \ 409 | Esa1 ^= Da1; \ 410 | Bu = ROL32(Esa1, 9); \ 411 | Aka1 = Ba ^((~Be)& Bi ); \ 412 | Ake1 = Be ^((~Bi)& Bo ); \ 413 | Aki1 = Bi ^((~Bo)& Bu ); \ 414 | Ako1 = Bo ^((~Bu)& Ba ); \ 415 | Aku1 = Bu ^((~Ba)& Be ); \ 416 | \ 417 | Ebu0 ^= Du0; \ 418 | Ba = ROL32(Ebu0, 13); \ 419 | Ega1 ^= Da1; \ 420 | Be = ROL32(Ega1, 18); \ 421 | Eke1 ^= De1; \ 422 | Bi = ROL32(Eke1, 5); \ 423 | Emi0 ^= Di0; \ 424 | Bo = ROL32(Emi0, 7); \ 425 | Eso1 ^= Do1; \ 426 | Bu = ROL32(Eso1, 28); \ 427 | Ama1 = Ba ^((~Be)& Bi ); \ 428 | Ame1 = Be ^((~Bi)& Bo ); \ 429 | Ami1 = Bi ^((~Bo)& Bu ); \ 430 | Amo1 = Bo ^((~Bu)& Ba ); \ 431 | Amu1 = Bu ^((~Ba)& Be ); \ 432 | \ 433 | Ebi1 ^= Di1; \ 434 | Ba = ROL32(Ebi1, 31); \ 435 | Ego0 ^= Do0; \ 436 | Be = ROL32(Ego0, 27); \ 437 | Eku0 ^= Du0; \ 438 | Bi = ROL32(Eku0, 19); \ 439 | Ema0 ^= Da0; \ 440 | Bo = ROL32(Ema0, 20); \ 441 | Ese1 ^= De1; \ 442 | Bu = ROL32(Ese1, 1); \ 443 | Asa1 = Ba ^((~Be)& Bi ); \ 444 | Ase1 = Be ^((~Bi)& Bo ); \ 445 | Asi1 = Bi ^((~Bo)& Bu ); \ 446 | Aso1 = Bo ^((~Bu)& Ba ); \ 447 | Asu1 = Bu ^((~Ba)& Be ); \ 448 | } \ 449 | copyToState(state, A) \ 450 | } 451 | 452 | #define copyFromState(X, state) \ 453 | X##ba0 = state[ 0]; \ 454 | X##ba1 = state[ 1]; \ 455 | X##be0 = state[ 2]; \ 456 | X##be1 = state[ 3]; \ 457 | X##bi0 = state[ 4]; \ 458 | X##bi1 = state[ 5]; \ 459 | X##bo0 = state[ 6]; \ 460 | X##bo1 = state[ 7]; \ 461 | X##bu0 = state[ 8]; \ 462 | X##bu1 = state[ 9]; \ 463 | X##ga0 = state[10]; \ 464 | X##ga1 = state[11]; \ 465 | X##ge0 = state[12]; \ 466 | X##ge1 = state[13]; \ 467 | X##gi0 = state[14]; \ 468 | X##gi1 = state[15]; \ 469 | X##go0 = state[16]; \ 470 | X##go1 = state[17]; \ 471 | X##gu0 = state[18]; \ 472 | X##gu1 = state[19]; \ 473 | X##ka0 = state[20]; \ 474 | X##ka1 = state[21]; \ 475 | X##ke0 = state[22]; \ 476 | X##ke1 = state[23]; \ 477 | X##ki0 = state[24]; \ 478 | X##ki1 = state[25]; \ 479 | X##ko0 = state[26]; \ 480 | X##ko1 = state[27]; \ 481 | X##ku0 = state[28]; \ 482 | X##ku1 = state[29]; \ 483 | X##ma0 = state[30]; \ 484 | X##ma1 = state[31]; \ 485 | X##me0 = state[32]; \ 486 | X##me1 = state[33]; \ 487 | X##mi0 = state[34]; \ 488 | X##mi1 = state[35]; \ 489 | X##mo0 = state[36]; \ 490 | X##mo1 = state[37]; \ 491 | X##mu0 = state[38]; \ 492 | X##mu1 = state[39]; \ 493 | X##sa0 = state[40]; \ 494 | X##sa1 = state[41]; \ 495 | X##se0 = state[42]; \ 496 | X##se1 = state[43]; \ 497 | X##si0 = state[44]; \ 498 | X##si1 = state[45]; \ 499 | X##so0 = state[46]; \ 500 | X##so1 = state[47]; \ 501 | X##su0 = state[48]; \ 502 | X##su1 = state[49]; \ 503 | 504 | #define copyToState(state, X) \ 505 | state[ 0] = X##ba0; \ 506 | state[ 1] = X##ba1; \ 507 | state[ 2] = X##be0; \ 508 | state[ 3] = X##be1; \ 509 | state[ 4] = X##bi0; \ 510 | state[ 5] = X##bi1; \ 511 | state[ 6] = X##bo0; \ 512 | state[ 7] = X##bo1; \ 513 | state[ 8] = X##bu0; \ 514 | state[ 9] = X##bu1; \ 515 | state[10] = X##ga0; \ 516 | state[11] = X##ga1; \ 517 | state[12] = X##ge0; \ 518 | state[13] = X##ge1; \ 519 | state[14] = X##gi0; \ 520 | state[15] = X##gi1; \ 521 | state[16] = X##go0; \ 522 | state[17] = X##go1; \ 523 | state[18] = X##gu0; \ 524 | state[19] = X##gu1; \ 525 | state[20] = X##ka0; \ 526 | state[21] = X##ka1; \ 527 | state[22] = X##ke0; \ 528 | state[23] = X##ke1; \ 529 | state[24] = X##ki0; \ 530 | state[25] = X##ki1; \ 531 | state[26] = X##ko0; \ 532 | state[27] = X##ko1; \ 533 | state[28] = X##ku0; \ 534 | state[29] = X##ku1; \ 535 | state[30] = X##ma0; \ 536 | state[31] = X##ma1; \ 537 | state[32] = X##me0; \ 538 | state[33] = X##me1; \ 539 | state[34] = X##mi0; \ 540 | state[35] = X##mi1; \ 541 | state[36] = X##mo0; \ 542 | state[37] = X##mo1; \ 543 | state[38] = X##mu0; \ 544 | state[39] = X##mu1; \ 545 | state[40] = X##sa0; \ 546 | state[41] = X##sa1; \ 547 | state[42] = X##se0; \ 548 | state[43] = X##se1; \ 549 | state[44] = X##si0; \ 550 | state[45] = X##si1; \ 551 | state[46] = X##so0; \ 552 | state[47] = X##so1; \ 553 | state[48] = X##su0; \ 554 | state[49] = X##su1; \ 555 | 556 | -------------------------------------------------------------------------------- /c_src/opt64/KeccakF-1600-simd64.macros: -------------------------------------------------------------------------------- 1 | /* 2 | Code automatically generated by KeccakTools! 3 | 4 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 5 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 6 | questions, please refer to our website: http://keccak.noekeon.org/ 7 | 8 | Implementation by the designers, 9 | hereby denoted as "the implementer". 10 | 11 | To the extent possible under law, the implementer has waived all copyright 12 | and related or neighboring rights to the source code in this file. 13 | http://creativecommons.org/publicdomain/zero/1.0/ 14 | */ 15 | 16 | #define declareABCDE \ 17 | V64 Aba, Abe, Abi, Abo, Abu; \ 18 | V64 Aga, Age, Agi, Ago, Agu; \ 19 | V64 Aka, Ake, Aki, Ako, Aku; \ 20 | V64 Ama, Ame, Ami, Amo, Amu; \ 21 | V64 Asa, Ase, Asi, Aso, Asu; \ 22 | V64 Bba, Bbe, Bbi, Bbo, Bbu; \ 23 | V64 Bga, Bge, Bgi, Bgo, Bgu; \ 24 | V64 Bka, Bke, Bki, Bko, Bku; \ 25 | V64 Bma, Bme, Bmi, Bmo, Bmu; \ 26 | V64 Bsa, Bse, Bsi, Bso, Bsu; \ 27 | V64 Ca, Ce, Ci, Co, Cu; \ 28 | V64 Da, De, Di, Do, Du; \ 29 | V64 Eba, Ebe, Ebi, Ebo, Ebu; \ 30 | V64 Ega, Ege, Egi, Ego, Egu; \ 31 | V64 Eka, Eke, Eki, Eko, Eku; \ 32 | V64 Ema, Eme, Emi, Emo, Emu; \ 33 | V64 Esa, Ese, Esi, Eso, Esu; \ 34 | 35 | #define prepareTheta \ 36 | Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \ 37 | Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \ 38 | Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \ 39 | Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \ 40 | Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \ 41 | 42 | // --- Code for round, with prepare-theta 43 | // --- 64-bit lanes mapped to 64-bit words 44 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 45 | Da = XOR64(Cu, ROL64(Ce, 1)); \ 46 | De = XOR64(Ca, ROL64(Ci, 1)); \ 47 | Di = XOR64(Ce, ROL64(Co, 1)); \ 48 | Do = XOR64(Ci, ROL64(Cu, 1)); \ 49 | Du = XOR64(Co, ROL64(Ca, 1)); \ 50 | \ 51 | XOReq64(A##ba, Da); \ 52 | Bba = A##ba; \ 53 | XOReq64(A##ge, De); \ 54 | Bbe = ROL64(A##ge, 44); \ 55 | XOReq64(A##ki, Di); \ 56 | Bbi = ROL64(A##ki, 43); \ 57 | E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ 58 | XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ 59 | Ca = E##ba; \ 60 | XOReq64(A##mo, Do); \ 61 | Bbo = ROL64(A##mo, 21); \ 62 | E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ 63 | Ce = E##be; \ 64 | XOReq64(A##su, Du); \ 65 | Bbu = ROL64(A##su, 14); \ 66 | E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ 67 | Ci = E##bi; \ 68 | E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ 69 | Co = E##bo; \ 70 | E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ 71 | Cu = E##bu; \ 72 | \ 73 | XOReq64(A##bo, Do); \ 74 | Bga = ROL64(A##bo, 28); \ 75 | XOReq64(A##gu, Du); \ 76 | Bge = ROL64(A##gu, 20); \ 77 | XOReq64(A##ka, Da); \ 78 | Bgi = ROL64(A##ka, 3); \ 79 | E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ 80 | XOReq64(Ca, E##ga); \ 81 | XOReq64(A##me, De); \ 82 | Bgo = ROL64(A##me, 45); \ 83 | E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ 84 | XOReq64(Ce, E##ge); \ 85 | XOReq64(A##si, Di); \ 86 | Bgu = ROL64(A##si, 61); \ 87 | E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ 88 | XOReq64(Ci, E##gi); \ 89 | E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ 90 | XOReq64(Co, E##go); \ 91 | E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ 92 | XOReq64(Cu, E##gu); \ 93 | \ 94 | XOReq64(A##be, De); \ 95 | Bka = ROL64(A##be, 1); \ 96 | XOReq64(A##gi, Di); \ 97 | Bke = ROL64(A##gi, 6); \ 98 | XOReq64(A##ko, Do); \ 99 | Bki = ROL64(A##ko, 25); \ 100 | E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ 101 | XOReq64(Ca, E##ka); \ 102 | XOReq64(A##mu, Du); \ 103 | Bko = ROL64(A##mu, 8); \ 104 | E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ 105 | XOReq64(Ce, E##ke); \ 106 | XOReq64(A##sa, Da); \ 107 | Bku = ROL64(A##sa, 18); \ 108 | E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ 109 | XOReq64(Ci, E##ki); \ 110 | E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ 111 | XOReq64(Co, E##ko); \ 112 | E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ 113 | XOReq64(Cu, E##ku); \ 114 | \ 115 | XOReq64(A##bu, Du); \ 116 | Bma = ROL64(A##bu, 27); \ 117 | XOReq64(A##ga, Da); \ 118 | Bme = ROL64(A##ga, 36); \ 119 | XOReq64(A##ke, De); \ 120 | Bmi = ROL64(A##ke, 10); \ 121 | E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ 122 | XOReq64(Ca, E##ma); \ 123 | XOReq64(A##mi, Di); \ 124 | Bmo = ROL64(A##mi, 15); \ 125 | E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ 126 | XOReq64(Ce, E##me); \ 127 | XOReq64(A##so, Do); \ 128 | Bmu = ROL64(A##so, 56); \ 129 | E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ 130 | XOReq64(Ci, E##mi); \ 131 | E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ 132 | XOReq64(Co, E##mo); \ 133 | E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ 134 | XOReq64(Cu, E##mu); \ 135 | \ 136 | XOReq64(A##bi, Di); \ 137 | Bsa = ROL64(A##bi, 62); \ 138 | XOReq64(A##go, Do); \ 139 | Bse = ROL64(A##go, 55); \ 140 | XOReq64(A##ku, Du); \ 141 | Bsi = ROL64(A##ku, 39); \ 142 | E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ 143 | XOReq64(Ca, E##sa); \ 144 | XOReq64(A##ma, Da); \ 145 | Bso = ROL64(A##ma, 41); \ 146 | E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ 147 | XOReq64(Ce, E##se); \ 148 | XOReq64(A##se, De); \ 149 | Bsu = ROL64(A##se, 2); \ 150 | E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ 151 | XOReq64(Ci, E##si); \ 152 | E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ 153 | XOReq64(Co, E##so); \ 154 | E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ 155 | XOReq64(Cu, E##su); \ 156 | \ 157 | 158 | // --- Code for round 159 | // --- 64-bit lanes mapped to 64-bit words 160 | #define thetaRhoPiChiIota(i, A, E) \ 161 | Da = XOR64(Cu, ROL64(Ce, 1)); \ 162 | De = XOR64(Ca, ROL64(Ci, 1)); \ 163 | Di = XOR64(Ce, ROL64(Co, 1)); \ 164 | Do = XOR64(Ci, ROL64(Cu, 1)); \ 165 | Du = XOR64(Co, ROL64(Ca, 1)); \ 166 | \ 167 | XOReq64(A##ba, Da); \ 168 | Bba = A##ba; \ 169 | XOReq64(A##ge, De); \ 170 | Bbe = ROL64(A##ge, 44); \ 171 | XOReq64(A##ki, Di); \ 172 | Bbi = ROL64(A##ki, 43); \ 173 | E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \ 174 | XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \ 175 | XOReq64(A##mo, Do); \ 176 | Bbo = ROL64(A##mo, 21); \ 177 | E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \ 178 | XOReq64(A##su, Du); \ 179 | Bbu = ROL64(A##su, 14); \ 180 | E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \ 181 | E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \ 182 | E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \ 183 | \ 184 | XOReq64(A##bo, Do); \ 185 | Bga = ROL64(A##bo, 28); \ 186 | XOReq64(A##gu, Du); \ 187 | Bge = ROL64(A##gu, 20); \ 188 | XOReq64(A##ka, Da); \ 189 | Bgi = ROL64(A##ka, 3); \ 190 | E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \ 191 | XOReq64(A##me, De); \ 192 | Bgo = ROL64(A##me, 45); \ 193 | E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \ 194 | XOReq64(A##si, Di); \ 195 | Bgu = ROL64(A##si, 61); \ 196 | E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \ 197 | E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \ 198 | E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \ 199 | \ 200 | XOReq64(A##be, De); \ 201 | Bka = ROL64(A##be, 1); \ 202 | XOReq64(A##gi, Di); \ 203 | Bke = ROL64(A##gi, 6); \ 204 | XOReq64(A##ko, Do); \ 205 | Bki = ROL64(A##ko, 25); \ 206 | E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \ 207 | XOReq64(A##mu, Du); \ 208 | Bko = ROL64(A##mu, 8); \ 209 | E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \ 210 | XOReq64(A##sa, Da); \ 211 | Bku = ROL64(A##sa, 18); \ 212 | E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \ 213 | E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \ 214 | E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \ 215 | \ 216 | XOReq64(A##bu, Du); \ 217 | Bma = ROL64(A##bu, 27); \ 218 | XOReq64(A##ga, Da); \ 219 | Bme = ROL64(A##ga, 36); \ 220 | XOReq64(A##ke, De); \ 221 | Bmi = ROL64(A##ke, 10); \ 222 | E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \ 223 | XOReq64(A##mi, Di); \ 224 | Bmo = ROL64(A##mi, 15); \ 225 | E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \ 226 | XOReq64(A##so, Do); \ 227 | Bmu = ROL64(A##so, 56); \ 228 | E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \ 229 | E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \ 230 | E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \ 231 | \ 232 | XOReq64(A##bi, Di); \ 233 | Bsa = ROL64(A##bi, 62); \ 234 | XOReq64(A##go, Do); \ 235 | Bse = ROL64(A##go, 55); \ 236 | XOReq64(A##ku, Du); \ 237 | Bsi = ROL64(A##ku, 39); \ 238 | E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ 239 | XOReq64(A##ma, Da); \ 240 | Bso = ROL64(A##ma, 41); \ 241 | E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ 242 | XOReq64(A##se, De); \ 243 | Bsu = ROL64(A##se, 2); \ 244 | E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ 245 | E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ 246 | E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ 247 | \ 248 | 249 | const UINT64 KeccakF1600RoundConstants[24] = { 250 | 0x0000000000000001ULL, 251 | 0x0000000000008082ULL, 252 | 0x800000000000808aULL, 253 | 0x8000000080008000ULL, 254 | 0x000000000000808bULL, 255 | 0x0000000080000001ULL, 256 | 0x8000000080008081ULL, 257 | 0x8000000000008009ULL, 258 | 0x000000000000008aULL, 259 | 0x0000000000000088ULL, 260 | 0x0000000080008009ULL, 261 | 0x000000008000000aULL, 262 | 0x000000008000808bULL, 263 | 0x800000000000008bULL, 264 | 0x8000000000008089ULL, 265 | 0x8000000000008003ULL, 266 | 0x8000000000008002ULL, 267 | 0x8000000000000080ULL, 268 | 0x000000000000800aULL, 269 | 0x800000008000000aULL, 270 | 0x8000000080008081ULL, 271 | 0x8000000000008080ULL, 272 | 0x0000000080000001ULL, 273 | 0x8000000080008008ULL }; 274 | 275 | #define copyFromStateAndXor576bits(X, state, input) \ 276 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 277 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 278 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 279 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 280 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 281 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 282 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 283 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 284 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 285 | X##gu = LOAD64(state[ 9]); \ 286 | X##ka = LOAD64(state[10]); \ 287 | X##ke = LOAD64(state[11]); \ 288 | X##ki = LOAD64(state[12]); \ 289 | X##ko = LOAD64(state[13]); \ 290 | X##ku = LOAD64(state[14]); \ 291 | X##ma = LOAD64(state[15]); \ 292 | X##me = LOAD64(state[16]); \ 293 | X##mi = LOAD64(state[17]); \ 294 | X##mo = LOAD64(state[18]); \ 295 | X##mu = LOAD64(state[19]); \ 296 | X##sa = LOAD64(state[20]); \ 297 | X##se = LOAD64(state[21]); \ 298 | X##si = LOAD64(state[22]); \ 299 | X##so = LOAD64(state[23]); \ 300 | X##su = LOAD64(state[24]); \ 301 | 302 | #define copyFromStateAndXor832bits(X, state, input) \ 303 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 304 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 305 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 306 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 307 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 308 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 309 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 310 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 311 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 312 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 313 | X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ 314 | X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ 315 | X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ 316 | X##ko = LOAD64(state[13]); \ 317 | X##ku = LOAD64(state[14]); \ 318 | X##ma = LOAD64(state[15]); \ 319 | X##me = LOAD64(state[16]); \ 320 | X##mi = LOAD64(state[17]); \ 321 | X##mo = LOAD64(state[18]); \ 322 | X##mu = LOAD64(state[19]); \ 323 | X##sa = LOAD64(state[20]); \ 324 | X##se = LOAD64(state[21]); \ 325 | X##si = LOAD64(state[22]); \ 326 | X##so = LOAD64(state[23]); \ 327 | X##su = LOAD64(state[24]); \ 328 | 329 | #define copyFromStateAndXor1024bits(X, state, input) \ 330 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 331 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 332 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 333 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 334 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 335 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 336 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 337 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 338 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 339 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 340 | X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ 341 | X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ 342 | X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ 343 | X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ 344 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 345 | X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ 346 | X##me = LOAD64(state[16]); \ 347 | X##mi = LOAD64(state[17]); \ 348 | X##mo = LOAD64(state[18]); \ 349 | X##mu = LOAD64(state[19]); \ 350 | X##sa = LOAD64(state[20]); \ 351 | X##se = LOAD64(state[21]); \ 352 | X##si = LOAD64(state[22]); \ 353 | X##so = LOAD64(state[23]); \ 354 | X##su = LOAD64(state[24]); \ 355 | 356 | #define copyFromStateAndXor1088bits(X, state, input) \ 357 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 358 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 359 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 360 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 361 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 362 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 363 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 364 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 365 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 366 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 367 | X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ 368 | X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ 369 | X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ 370 | X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ 371 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 372 | X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ 373 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ 374 | X##mi = LOAD64(state[17]); \ 375 | X##mo = LOAD64(state[18]); \ 376 | X##mu = LOAD64(state[19]); \ 377 | X##sa = LOAD64(state[20]); \ 378 | X##se = LOAD64(state[21]); \ 379 | X##si = LOAD64(state[22]); \ 380 | X##so = LOAD64(state[23]); \ 381 | X##su = LOAD64(state[24]); \ 382 | 383 | #define copyFromStateAndXor1152bits(X, state, input) \ 384 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 385 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 386 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 387 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 388 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 389 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 390 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 391 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 392 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 393 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 394 | X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ 395 | X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ 396 | X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ 397 | X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ 398 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 399 | X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ 400 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ 401 | X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ 402 | X##mo = LOAD64(state[18]); \ 403 | X##mu = LOAD64(state[19]); \ 404 | X##sa = LOAD64(state[20]); \ 405 | X##se = LOAD64(state[21]); \ 406 | X##si = LOAD64(state[22]); \ 407 | X##so = LOAD64(state[23]); \ 408 | X##su = LOAD64(state[24]); \ 409 | 410 | #define copyFromStateAndXor1344bits(X, state, input) \ 411 | X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \ 412 | X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \ 413 | X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \ 414 | X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \ 415 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 416 | X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \ 417 | X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \ 418 | X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \ 419 | X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \ 420 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 421 | X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \ 422 | X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \ 423 | X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \ 424 | X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \ 425 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 426 | X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \ 427 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ 428 | X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \ 429 | X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \ 430 | X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ 431 | X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \ 432 | X##se = LOAD64(state[21]); \ 433 | X##si = LOAD64(state[22]); \ 434 | X##so = LOAD64(state[23]); \ 435 | X##su = LOAD64(state[24]); \ 436 | 437 | #define copyFromState(X, state) \ 438 | X##ba = LOAD64(state[ 0]); \ 439 | X##be = LOAD64(state[ 1]); \ 440 | X##bi = LOAD64(state[ 2]); \ 441 | X##bo = LOAD64(state[ 3]); \ 442 | X##bu = LOAD64(state[ 4]); \ 443 | X##ga = LOAD64(state[ 5]); \ 444 | X##ge = LOAD64(state[ 6]); \ 445 | X##gi = LOAD64(state[ 7]); \ 446 | X##go = LOAD64(state[ 8]); \ 447 | X##gu = LOAD64(state[ 9]); \ 448 | X##ka = LOAD64(state[10]); \ 449 | X##ke = LOAD64(state[11]); \ 450 | X##ki = LOAD64(state[12]); \ 451 | X##ko = LOAD64(state[13]); \ 452 | X##ku = LOAD64(state[14]); \ 453 | X##ma = LOAD64(state[15]); \ 454 | X##me = LOAD64(state[16]); \ 455 | X##mi = LOAD64(state[17]); \ 456 | X##mo = LOAD64(state[18]); \ 457 | X##mu = LOAD64(state[19]); \ 458 | X##sa = LOAD64(state[20]); \ 459 | X##se = LOAD64(state[21]); \ 460 | X##si = LOAD64(state[22]); \ 461 | X##so = LOAD64(state[23]); \ 462 | X##su = LOAD64(state[24]); \ 463 | 464 | #define copyToState(state, X) \ 465 | STORE64(state[ 0], X##ba); \ 466 | STORE64(state[ 1], X##be); \ 467 | STORE64(state[ 2], X##bi); \ 468 | STORE64(state[ 3], X##bo); \ 469 | STORE64(state[ 4], X##bu); \ 470 | STORE64(state[ 5], X##ga); \ 471 | STORE64(state[ 6], X##ge); \ 472 | STORE64(state[ 7], X##gi); \ 473 | STORE64(state[ 8], X##go); \ 474 | STORE64(state[ 9], X##gu); \ 475 | STORE64(state[10], X##ka); \ 476 | STORE64(state[11], X##ke); \ 477 | STORE64(state[12], X##ki); \ 478 | STORE64(state[13], X##ko); \ 479 | STORE64(state[14], X##ku); \ 480 | STORE64(state[15], X##ma); \ 481 | STORE64(state[16], X##me); \ 482 | STORE64(state[17], X##mi); \ 483 | STORE64(state[18], X##mo); \ 484 | STORE64(state[19], X##mu); \ 485 | STORE64(state[20], X##sa); \ 486 | STORE64(state[21], X##se); \ 487 | STORE64(state[22], X##si); \ 488 | STORE64(state[23], X##so); \ 489 | STORE64(state[24], X##su); \ 490 | 491 | #define copyStateVariables(X, Y) \ 492 | X##ba = Y##ba; \ 493 | X##be = Y##be; \ 494 | X##bi = Y##bi; \ 495 | X##bo = Y##bo; \ 496 | X##bu = Y##bu; \ 497 | X##ga = Y##ga; \ 498 | X##ge = Y##ge; \ 499 | X##gi = Y##gi; \ 500 | X##go = Y##go; \ 501 | X##gu = Y##gu; \ 502 | X##ka = Y##ka; \ 503 | X##ke = Y##ke; \ 504 | X##ki = Y##ki; \ 505 | X##ko = Y##ko; \ 506 | X##ku = Y##ku; \ 507 | X##ma = Y##ma; \ 508 | X##me = Y##me; \ 509 | X##mi = Y##mi; \ 510 | X##mo = Y##mo; \ 511 | X##mu = Y##mu; \ 512 | X##sa = Y##sa; \ 513 | X##se = Y##se; \ 514 | X##si = Y##si; \ 515 | X##so = Y##so; \ 516 | X##su = Y##su; \ 517 | 518 | -------------------------------------------------------------------------------- /c_src/opt64/KeccakF-1600-64.macros: -------------------------------------------------------------------------------- 1 | /* 2 | Code automatically generated by KeccakTools! 3 | 4 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 5 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 6 | questions, please refer to our website: http://keccak.noekeon.org/ 7 | 8 | Implementation by the designers, 9 | hereby denoted as "the implementer". 10 | 11 | To the extent possible under law, the implementer has waived all copyright 12 | and related or neighboring rights to the source code in this file. 13 | http://creativecommons.org/publicdomain/zero/1.0/ 14 | */ 15 | 16 | #define declareABCDE \ 17 | UINT64 Aba, Abe, Abi, Abo, Abu; \ 18 | UINT64 Aga, Age, Agi, Ago, Agu; \ 19 | UINT64 Aka, Ake, Aki, Ako, Aku; \ 20 | UINT64 Ama, Ame, Ami, Amo, Amu; \ 21 | UINT64 Asa, Ase, Asi, Aso, Asu; \ 22 | UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ 23 | UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ 24 | UINT64 Bka, Bke, Bki, Bko, Bku; \ 25 | UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ 26 | UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ 27 | UINT64 Ca, Ce, Ci, Co, Cu; \ 28 | UINT64 Da, De, Di, Do, Du; \ 29 | UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ 30 | UINT64 Ega, Ege, Egi, Ego, Egu; \ 31 | UINT64 Eka, Eke, Eki, Eko, Eku; \ 32 | UINT64 Ema, Eme, Emi, Emo, Emu; \ 33 | UINT64 Esa, Ese, Esi, Eso, Esu; \ 34 | 35 | #define prepareTheta \ 36 | Ca = Aba^Aga^Aka^Ama^Asa; \ 37 | Ce = Abe^Age^Ake^Ame^Ase; \ 38 | Ci = Abi^Agi^Aki^Ami^Asi; \ 39 | Co = Abo^Ago^Ako^Amo^Aso; \ 40 | Cu = Abu^Agu^Aku^Amu^Asu; \ 41 | 42 | #ifdef UseBebigokimisa 43 | // --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') 44 | // --- 64-bit lanes mapped to 64-bit words 45 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 46 | Da = Cu^ROL64(Ce, 1); \ 47 | De = Ca^ROL64(Ci, 1); \ 48 | Di = Ce^ROL64(Co, 1); \ 49 | Do = Ci^ROL64(Cu, 1); \ 50 | Du = Co^ROL64(Ca, 1); \ 51 | \ 52 | A##ba ^= Da; \ 53 | Bba = A##ba; \ 54 | A##ge ^= De; \ 55 | Bbe = ROL64(A##ge, 44); \ 56 | A##ki ^= Di; \ 57 | Bbi = ROL64(A##ki, 43); \ 58 | A##mo ^= Do; \ 59 | Bbo = ROL64(A##mo, 21); \ 60 | A##su ^= Du; \ 61 | Bbu = ROL64(A##su, 14); \ 62 | E##ba = Bba ^( Bbe | Bbi ); \ 63 | E##ba ^= KeccakF1600RoundConstants[i]; \ 64 | Ca = E##ba; \ 65 | E##be = Bbe ^((~Bbi)| Bbo ); \ 66 | Ce = E##be; \ 67 | E##bi = Bbi ^( Bbo & Bbu ); \ 68 | Ci = E##bi; \ 69 | E##bo = Bbo ^( Bbu | Bba ); \ 70 | Co = E##bo; \ 71 | E##bu = Bbu ^( Bba & Bbe ); \ 72 | Cu = E##bu; \ 73 | \ 74 | A##bo ^= Do; \ 75 | Bga = ROL64(A##bo, 28); \ 76 | A##gu ^= Du; \ 77 | Bge = ROL64(A##gu, 20); \ 78 | A##ka ^= Da; \ 79 | Bgi = ROL64(A##ka, 3); \ 80 | A##me ^= De; \ 81 | Bgo = ROL64(A##me, 45); \ 82 | A##si ^= Di; \ 83 | Bgu = ROL64(A##si, 61); \ 84 | E##ga = Bga ^( Bge | Bgi ); \ 85 | Ca ^= E##ga; \ 86 | E##ge = Bge ^( Bgi & Bgo ); \ 87 | Ce ^= E##ge; \ 88 | E##gi = Bgi ^( Bgo |(~Bgu)); \ 89 | Ci ^= E##gi; \ 90 | E##go = Bgo ^( Bgu | Bga ); \ 91 | Co ^= E##go; \ 92 | E##gu = Bgu ^( Bga & Bge ); \ 93 | Cu ^= E##gu; \ 94 | \ 95 | A##be ^= De; \ 96 | Bka = ROL64(A##be, 1); \ 97 | A##gi ^= Di; \ 98 | Bke = ROL64(A##gi, 6); \ 99 | A##ko ^= Do; \ 100 | Bki = ROL64(A##ko, 25); \ 101 | A##mu ^= Du; \ 102 | Bko = ROL64(A##mu, 8); \ 103 | A##sa ^= Da; \ 104 | Bku = ROL64(A##sa, 18); \ 105 | E##ka = Bka ^( Bke | Bki ); \ 106 | Ca ^= E##ka; \ 107 | E##ke = Bke ^( Bki & Bko ); \ 108 | Ce ^= E##ke; \ 109 | E##ki = Bki ^((~Bko)& Bku ); \ 110 | Ci ^= E##ki; \ 111 | E##ko = (~Bko)^( Bku | Bka ); \ 112 | Co ^= E##ko; \ 113 | E##ku = Bku ^( Bka & Bke ); \ 114 | Cu ^= E##ku; \ 115 | \ 116 | A##bu ^= Du; \ 117 | Bma = ROL64(A##bu, 27); \ 118 | A##ga ^= Da; \ 119 | Bme = ROL64(A##ga, 36); \ 120 | A##ke ^= De; \ 121 | Bmi = ROL64(A##ke, 10); \ 122 | A##mi ^= Di; \ 123 | Bmo = ROL64(A##mi, 15); \ 124 | A##so ^= Do; \ 125 | Bmu = ROL64(A##so, 56); \ 126 | E##ma = Bma ^( Bme & Bmi ); \ 127 | Ca ^= E##ma; \ 128 | E##me = Bme ^( Bmi | Bmo ); \ 129 | Ce ^= E##me; \ 130 | E##mi = Bmi ^((~Bmo)| Bmu ); \ 131 | Ci ^= E##mi; \ 132 | E##mo = (~Bmo)^( Bmu & Bma ); \ 133 | Co ^= E##mo; \ 134 | E##mu = Bmu ^( Bma | Bme ); \ 135 | Cu ^= E##mu; \ 136 | \ 137 | A##bi ^= Di; \ 138 | Bsa = ROL64(A##bi, 62); \ 139 | A##go ^= Do; \ 140 | Bse = ROL64(A##go, 55); \ 141 | A##ku ^= Du; \ 142 | Bsi = ROL64(A##ku, 39); \ 143 | A##ma ^= Da; \ 144 | Bso = ROL64(A##ma, 41); \ 145 | A##se ^= De; \ 146 | Bsu = ROL64(A##se, 2); \ 147 | E##sa = Bsa ^((~Bse)& Bsi ); \ 148 | Ca ^= E##sa; \ 149 | E##se = (~Bse)^( Bsi | Bso ); \ 150 | Ce ^= E##se; \ 151 | E##si = Bsi ^( Bso & Bsu ); \ 152 | Ci ^= E##si; \ 153 | E##so = Bso ^( Bsu | Bsa ); \ 154 | Co ^= E##so; \ 155 | E##su = Bsu ^( Bsa & Bse ); \ 156 | Cu ^= E##su; \ 157 | \ 158 | 159 | // --- Code for round (lane complementing pattern 'bebigokimisa') 160 | // --- 64-bit lanes mapped to 64-bit words 161 | #define thetaRhoPiChiIota(i, A, E) \ 162 | Da = Cu^ROL64(Ce, 1); \ 163 | De = Ca^ROL64(Ci, 1); \ 164 | Di = Ce^ROL64(Co, 1); \ 165 | Do = Ci^ROL64(Cu, 1); \ 166 | Du = Co^ROL64(Ca, 1); \ 167 | \ 168 | A##ba ^= Da; \ 169 | Bba = A##ba; \ 170 | A##ge ^= De; \ 171 | Bbe = ROL64(A##ge, 44); \ 172 | A##ki ^= Di; \ 173 | Bbi = ROL64(A##ki, 43); \ 174 | A##mo ^= Do; \ 175 | Bbo = ROL64(A##mo, 21); \ 176 | A##su ^= Du; \ 177 | Bbu = ROL64(A##su, 14); \ 178 | E##ba = Bba ^( Bbe | Bbi ); \ 179 | E##ba ^= KeccakF1600RoundConstants[i]; \ 180 | E##be = Bbe ^((~Bbi)| Bbo ); \ 181 | E##bi = Bbi ^( Bbo & Bbu ); \ 182 | E##bo = Bbo ^( Bbu | Bba ); \ 183 | E##bu = Bbu ^( Bba & Bbe ); \ 184 | \ 185 | A##bo ^= Do; \ 186 | Bga = ROL64(A##bo, 28); \ 187 | A##gu ^= Du; \ 188 | Bge = ROL64(A##gu, 20); \ 189 | A##ka ^= Da; \ 190 | Bgi = ROL64(A##ka, 3); \ 191 | A##me ^= De; \ 192 | Bgo = ROL64(A##me, 45); \ 193 | A##si ^= Di; \ 194 | Bgu = ROL64(A##si, 61); \ 195 | E##ga = Bga ^( Bge | Bgi ); \ 196 | E##ge = Bge ^( Bgi & Bgo ); \ 197 | E##gi = Bgi ^( Bgo |(~Bgu)); \ 198 | E##go = Bgo ^( Bgu | Bga ); \ 199 | E##gu = Bgu ^( Bga & Bge ); \ 200 | \ 201 | A##be ^= De; \ 202 | Bka = ROL64(A##be, 1); \ 203 | A##gi ^= Di; \ 204 | Bke = ROL64(A##gi, 6); \ 205 | A##ko ^= Do; \ 206 | Bki = ROL64(A##ko, 25); \ 207 | A##mu ^= Du; \ 208 | Bko = ROL64(A##mu, 8); \ 209 | A##sa ^= Da; \ 210 | Bku = ROL64(A##sa, 18); \ 211 | E##ka = Bka ^( Bke | Bki ); \ 212 | E##ke = Bke ^( Bki & Bko ); \ 213 | E##ki = Bki ^((~Bko)& Bku ); \ 214 | E##ko = (~Bko)^( Bku | Bka ); \ 215 | E##ku = Bku ^( Bka & Bke ); \ 216 | \ 217 | A##bu ^= Du; \ 218 | Bma = ROL64(A##bu, 27); \ 219 | A##ga ^= Da; \ 220 | Bme = ROL64(A##ga, 36); \ 221 | A##ke ^= De; \ 222 | Bmi = ROL64(A##ke, 10); \ 223 | A##mi ^= Di; \ 224 | Bmo = ROL64(A##mi, 15); \ 225 | A##so ^= Do; \ 226 | Bmu = ROL64(A##so, 56); \ 227 | E##ma = Bma ^( Bme & Bmi ); \ 228 | E##me = Bme ^( Bmi | Bmo ); \ 229 | E##mi = Bmi ^((~Bmo)| Bmu ); \ 230 | E##mo = (~Bmo)^( Bmu & Bma ); \ 231 | E##mu = Bmu ^( Bma | Bme ); \ 232 | \ 233 | A##bi ^= Di; \ 234 | Bsa = ROL64(A##bi, 62); \ 235 | A##go ^= Do; \ 236 | Bse = ROL64(A##go, 55); \ 237 | A##ku ^= Du; \ 238 | Bsi = ROL64(A##ku, 39); \ 239 | A##ma ^= Da; \ 240 | Bso = ROL64(A##ma, 41); \ 241 | A##se ^= De; \ 242 | Bsu = ROL64(A##se, 2); \ 243 | E##sa = Bsa ^((~Bse)& Bsi ); \ 244 | E##se = (~Bse)^( Bsi | Bso ); \ 245 | E##si = Bsi ^( Bso & Bsu ); \ 246 | E##so = Bso ^( Bsu | Bsa ); \ 247 | E##su = Bsu ^( Bsa & Bse ); \ 248 | \ 249 | 250 | #else // UseBebigokimisa 251 | // --- Code for round, with prepare-theta 252 | // --- 64-bit lanes mapped to 64-bit words 253 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 254 | Da = Cu^ROL64(Ce, 1); \ 255 | De = Ca^ROL64(Ci, 1); \ 256 | Di = Ce^ROL64(Co, 1); \ 257 | Do = Ci^ROL64(Cu, 1); \ 258 | Du = Co^ROL64(Ca, 1); \ 259 | \ 260 | A##ba ^= Da; \ 261 | Bba = A##ba; \ 262 | A##ge ^= De; \ 263 | Bbe = ROL64(A##ge, 44); \ 264 | A##ki ^= Di; \ 265 | Bbi = ROL64(A##ki, 43); \ 266 | A##mo ^= Do; \ 267 | Bbo = ROL64(A##mo, 21); \ 268 | A##su ^= Du; \ 269 | Bbu = ROL64(A##su, 14); \ 270 | E##ba = Bba ^((~Bbe)& Bbi ); \ 271 | E##ba ^= KeccakF1600RoundConstants[i]; \ 272 | Ca = E##ba; \ 273 | E##be = Bbe ^((~Bbi)& Bbo ); \ 274 | Ce = E##be; \ 275 | E##bi = Bbi ^((~Bbo)& Bbu ); \ 276 | Ci = E##bi; \ 277 | E##bo = Bbo ^((~Bbu)& Bba ); \ 278 | Co = E##bo; \ 279 | E##bu = Bbu ^((~Bba)& Bbe ); \ 280 | Cu = E##bu; \ 281 | \ 282 | A##bo ^= Do; \ 283 | Bga = ROL64(A##bo, 28); \ 284 | A##gu ^= Du; \ 285 | Bge = ROL64(A##gu, 20); \ 286 | A##ka ^= Da; \ 287 | Bgi = ROL64(A##ka, 3); \ 288 | A##me ^= De; \ 289 | Bgo = ROL64(A##me, 45); \ 290 | A##si ^= Di; \ 291 | Bgu = ROL64(A##si, 61); \ 292 | E##ga = Bga ^((~Bge)& Bgi ); \ 293 | Ca ^= E##ga; \ 294 | E##ge = Bge ^((~Bgi)& Bgo ); \ 295 | Ce ^= E##ge; \ 296 | E##gi = Bgi ^((~Bgo)& Bgu ); \ 297 | Ci ^= E##gi; \ 298 | E##go = Bgo ^((~Bgu)& Bga ); \ 299 | Co ^= E##go; \ 300 | E##gu = Bgu ^((~Bga)& Bge ); \ 301 | Cu ^= E##gu; \ 302 | \ 303 | A##be ^= De; \ 304 | Bka = ROL64(A##be, 1); \ 305 | A##gi ^= Di; \ 306 | Bke = ROL64(A##gi, 6); \ 307 | A##ko ^= Do; \ 308 | Bki = ROL64(A##ko, 25); \ 309 | A##mu ^= Du; \ 310 | Bko = ROL64(A##mu, 8); \ 311 | A##sa ^= Da; \ 312 | Bku = ROL64(A##sa, 18); \ 313 | E##ka = Bka ^((~Bke)& Bki ); \ 314 | Ca ^= E##ka; \ 315 | E##ke = Bke ^((~Bki)& Bko ); \ 316 | Ce ^= E##ke; \ 317 | E##ki = Bki ^((~Bko)& Bku ); \ 318 | Ci ^= E##ki; \ 319 | E##ko = Bko ^((~Bku)& Bka ); \ 320 | Co ^= E##ko; \ 321 | E##ku = Bku ^((~Bka)& Bke ); \ 322 | Cu ^= E##ku; \ 323 | \ 324 | A##bu ^= Du; \ 325 | Bma = ROL64(A##bu, 27); \ 326 | A##ga ^= Da; \ 327 | Bme = ROL64(A##ga, 36); \ 328 | A##ke ^= De; \ 329 | Bmi = ROL64(A##ke, 10); \ 330 | A##mi ^= Di; \ 331 | Bmo = ROL64(A##mi, 15); \ 332 | A##so ^= Do; \ 333 | Bmu = ROL64(A##so, 56); \ 334 | E##ma = Bma ^((~Bme)& Bmi ); \ 335 | Ca ^= E##ma; \ 336 | E##me = Bme ^((~Bmi)& Bmo ); \ 337 | Ce ^= E##me; \ 338 | E##mi = Bmi ^((~Bmo)& Bmu ); \ 339 | Ci ^= E##mi; \ 340 | E##mo = Bmo ^((~Bmu)& Bma ); \ 341 | Co ^= E##mo; \ 342 | E##mu = Bmu ^((~Bma)& Bme ); \ 343 | Cu ^= E##mu; \ 344 | \ 345 | A##bi ^= Di; \ 346 | Bsa = ROL64(A##bi, 62); \ 347 | A##go ^= Do; \ 348 | Bse = ROL64(A##go, 55); \ 349 | A##ku ^= Du; \ 350 | Bsi = ROL64(A##ku, 39); \ 351 | A##ma ^= Da; \ 352 | Bso = ROL64(A##ma, 41); \ 353 | A##se ^= De; \ 354 | Bsu = ROL64(A##se, 2); \ 355 | E##sa = Bsa ^((~Bse)& Bsi ); \ 356 | Ca ^= E##sa; \ 357 | E##se = Bse ^((~Bsi)& Bso ); \ 358 | Ce ^= E##se; \ 359 | E##si = Bsi ^((~Bso)& Bsu ); \ 360 | Ci ^= E##si; \ 361 | E##so = Bso ^((~Bsu)& Bsa ); \ 362 | Co ^= E##so; \ 363 | E##su = Bsu ^((~Bsa)& Bse ); \ 364 | Cu ^= E##su; \ 365 | \ 366 | 367 | // --- Code for round 368 | // --- 64-bit lanes mapped to 64-bit words 369 | #define thetaRhoPiChiIota(i, A, E) \ 370 | Da = Cu^ROL64(Ce, 1); \ 371 | De = Ca^ROL64(Ci, 1); \ 372 | Di = Ce^ROL64(Co, 1); \ 373 | Do = Ci^ROL64(Cu, 1); \ 374 | Du = Co^ROL64(Ca, 1); \ 375 | \ 376 | A##ba ^= Da; \ 377 | Bba = A##ba; \ 378 | A##ge ^= De; \ 379 | Bbe = ROL64(A##ge, 44); \ 380 | A##ki ^= Di; \ 381 | Bbi = ROL64(A##ki, 43); \ 382 | A##mo ^= Do; \ 383 | Bbo = ROL64(A##mo, 21); \ 384 | A##su ^= Du; \ 385 | Bbu = ROL64(A##su, 14); \ 386 | E##ba = Bba ^((~Bbe)& Bbi ); \ 387 | E##ba ^= KeccakF1600RoundConstants[i]; \ 388 | E##be = Bbe ^((~Bbi)& Bbo ); \ 389 | E##bi = Bbi ^((~Bbo)& Bbu ); \ 390 | E##bo = Bbo ^((~Bbu)& Bba ); \ 391 | E##bu = Bbu ^((~Bba)& Bbe ); \ 392 | \ 393 | A##bo ^= Do; \ 394 | Bga = ROL64(A##bo, 28); \ 395 | A##gu ^= Du; \ 396 | Bge = ROL64(A##gu, 20); \ 397 | A##ka ^= Da; \ 398 | Bgi = ROL64(A##ka, 3); \ 399 | A##me ^= De; \ 400 | Bgo = ROL64(A##me, 45); \ 401 | A##si ^= Di; \ 402 | Bgu = ROL64(A##si, 61); \ 403 | E##ga = Bga ^((~Bge)& Bgi ); \ 404 | E##ge = Bge ^((~Bgi)& Bgo ); \ 405 | E##gi = Bgi ^((~Bgo)& Bgu ); \ 406 | E##go = Bgo ^((~Bgu)& Bga ); \ 407 | E##gu = Bgu ^((~Bga)& Bge ); \ 408 | \ 409 | A##be ^= De; \ 410 | Bka = ROL64(A##be, 1); \ 411 | A##gi ^= Di; \ 412 | Bke = ROL64(A##gi, 6); \ 413 | A##ko ^= Do; \ 414 | Bki = ROL64(A##ko, 25); \ 415 | A##mu ^= Du; \ 416 | Bko = ROL64(A##mu, 8); \ 417 | A##sa ^= Da; \ 418 | Bku = ROL64(A##sa, 18); \ 419 | E##ka = Bka ^((~Bke)& Bki ); \ 420 | E##ke = Bke ^((~Bki)& Bko ); \ 421 | E##ki = Bki ^((~Bko)& Bku ); \ 422 | E##ko = Bko ^((~Bku)& Bka ); \ 423 | E##ku = Bku ^((~Bka)& Bke ); \ 424 | \ 425 | A##bu ^= Du; \ 426 | Bma = ROL64(A##bu, 27); \ 427 | A##ga ^= Da; \ 428 | Bme = ROL64(A##ga, 36); \ 429 | A##ke ^= De; \ 430 | Bmi = ROL64(A##ke, 10); \ 431 | A##mi ^= Di; \ 432 | Bmo = ROL64(A##mi, 15); \ 433 | A##so ^= Do; \ 434 | Bmu = ROL64(A##so, 56); \ 435 | E##ma = Bma ^((~Bme)& Bmi ); \ 436 | E##me = Bme ^((~Bmi)& Bmo ); \ 437 | E##mi = Bmi ^((~Bmo)& Bmu ); \ 438 | E##mo = Bmo ^((~Bmu)& Bma ); \ 439 | E##mu = Bmu ^((~Bma)& Bme ); \ 440 | \ 441 | A##bi ^= Di; \ 442 | Bsa = ROL64(A##bi, 62); \ 443 | A##go ^= Do; \ 444 | Bse = ROL64(A##go, 55); \ 445 | A##ku ^= Du; \ 446 | Bsi = ROL64(A##ku, 39); \ 447 | A##ma ^= Da; \ 448 | Bso = ROL64(A##ma, 41); \ 449 | A##se ^= De; \ 450 | Bsu = ROL64(A##se, 2); \ 451 | E##sa = Bsa ^((~Bse)& Bsi ); \ 452 | E##se = Bse ^((~Bsi)& Bso ); \ 453 | E##si = Bsi ^((~Bso)& Bsu ); \ 454 | E##so = Bso ^((~Bsu)& Bsa ); \ 455 | E##su = Bsu ^((~Bsa)& Bse ); \ 456 | \ 457 | 458 | #endif // UseBebigokimisa 459 | 460 | const UINT64 KeccakF1600RoundConstants[24] = { 461 | 0x0000000000000001ULL, 462 | 0x0000000000008082ULL, 463 | 0x800000000000808aULL, 464 | 0x8000000080008000ULL, 465 | 0x000000000000808bULL, 466 | 0x0000000080000001ULL, 467 | 0x8000000080008081ULL, 468 | 0x8000000000008009ULL, 469 | 0x000000000000008aULL, 470 | 0x0000000000000088ULL, 471 | 0x0000000080008009ULL, 472 | 0x000000008000000aULL, 473 | 0x000000008000808bULL, 474 | 0x800000000000008bULL, 475 | 0x8000000000008089ULL, 476 | 0x8000000000008003ULL, 477 | 0x8000000000008002ULL, 478 | 0x8000000000000080ULL, 479 | 0x000000000000800aULL, 480 | 0x800000008000000aULL, 481 | 0x8000000080008081ULL, 482 | 0x8000000000008080ULL, 483 | 0x0000000080000001ULL, 484 | 0x8000000080008008ULL }; 485 | 486 | #define copyFromStateAndXor576bits(X, state, input) \ 487 | X##ba = state[ 0]^input[ 0]; \ 488 | X##be = state[ 1]^input[ 1]; \ 489 | X##bi = state[ 2]^input[ 2]; \ 490 | X##bo = state[ 3]^input[ 3]; \ 491 | X##bu = state[ 4]^input[ 4]; \ 492 | X##ga = state[ 5]^input[ 5]; \ 493 | X##ge = state[ 6]^input[ 6]; \ 494 | X##gi = state[ 7]^input[ 7]; \ 495 | X##go = state[ 8]^input[ 8]; \ 496 | X##gu = state[ 9]; \ 497 | X##ka = state[10]; \ 498 | X##ke = state[11]; \ 499 | X##ki = state[12]; \ 500 | X##ko = state[13]; \ 501 | X##ku = state[14]; \ 502 | X##ma = state[15]; \ 503 | X##me = state[16]; \ 504 | X##mi = state[17]; \ 505 | X##mo = state[18]; \ 506 | X##mu = state[19]; \ 507 | X##sa = state[20]; \ 508 | X##se = state[21]; \ 509 | X##si = state[22]; \ 510 | X##so = state[23]; \ 511 | X##su = state[24]; \ 512 | 513 | #define copyFromStateAndXor832bits(X, state, input) \ 514 | X##ba = state[ 0]^input[ 0]; \ 515 | X##be = state[ 1]^input[ 1]; \ 516 | X##bi = state[ 2]^input[ 2]; \ 517 | X##bo = state[ 3]^input[ 3]; \ 518 | X##bu = state[ 4]^input[ 4]; \ 519 | X##ga = state[ 5]^input[ 5]; \ 520 | X##ge = state[ 6]^input[ 6]; \ 521 | X##gi = state[ 7]^input[ 7]; \ 522 | X##go = state[ 8]^input[ 8]; \ 523 | X##gu = state[ 9]^input[ 9]; \ 524 | X##ka = state[10]^input[10]; \ 525 | X##ke = state[11]^input[11]; \ 526 | X##ki = state[12]^input[12]; \ 527 | X##ko = state[13]; \ 528 | X##ku = state[14]; \ 529 | X##ma = state[15]; \ 530 | X##me = state[16]; \ 531 | X##mi = state[17]; \ 532 | X##mo = state[18]; \ 533 | X##mu = state[19]; \ 534 | X##sa = state[20]; \ 535 | X##se = state[21]; \ 536 | X##si = state[22]; \ 537 | X##so = state[23]; \ 538 | X##su = state[24]; \ 539 | 540 | #define copyFromStateAndXor1024bits(X, state, input) \ 541 | X##ba = state[ 0]^input[ 0]; \ 542 | X##be = state[ 1]^input[ 1]; \ 543 | X##bi = state[ 2]^input[ 2]; \ 544 | X##bo = state[ 3]^input[ 3]; \ 545 | X##bu = state[ 4]^input[ 4]; \ 546 | X##ga = state[ 5]^input[ 5]; \ 547 | X##ge = state[ 6]^input[ 6]; \ 548 | X##gi = state[ 7]^input[ 7]; \ 549 | X##go = state[ 8]^input[ 8]; \ 550 | X##gu = state[ 9]^input[ 9]; \ 551 | X##ka = state[10]^input[10]; \ 552 | X##ke = state[11]^input[11]; \ 553 | X##ki = state[12]^input[12]; \ 554 | X##ko = state[13]^input[13]; \ 555 | X##ku = state[14]^input[14]; \ 556 | X##ma = state[15]^input[15]; \ 557 | X##me = state[16]; \ 558 | X##mi = state[17]; \ 559 | X##mo = state[18]; \ 560 | X##mu = state[19]; \ 561 | X##sa = state[20]; \ 562 | X##se = state[21]; \ 563 | X##si = state[22]; \ 564 | X##so = state[23]; \ 565 | X##su = state[24]; \ 566 | 567 | #define copyFromStateAndXor1088bits(X, state, input) \ 568 | X##ba = state[ 0]^input[ 0]; \ 569 | X##be = state[ 1]^input[ 1]; \ 570 | X##bi = state[ 2]^input[ 2]; \ 571 | X##bo = state[ 3]^input[ 3]; \ 572 | X##bu = state[ 4]^input[ 4]; \ 573 | X##ga = state[ 5]^input[ 5]; \ 574 | X##ge = state[ 6]^input[ 6]; \ 575 | X##gi = state[ 7]^input[ 7]; \ 576 | X##go = state[ 8]^input[ 8]; \ 577 | X##gu = state[ 9]^input[ 9]; \ 578 | X##ka = state[10]^input[10]; \ 579 | X##ke = state[11]^input[11]; \ 580 | X##ki = state[12]^input[12]; \ 581 | X##ko = state[13]^input[13]; \ 582 | X##ku = state[14]^input[14]; \ 583 | X##ma = state[15]^input[15]; \ 584 | X##me = state[16]^input[16]; \ 585 | X##mi = state[17]; \ 586 | X##mo = state[18]; \ 587 | X##mu = state[19]; \ 588 | X##sa = state[20]; \ 589 | X##se = state[21]; \ 590 | X##si = state[22]; \ 591 | X##so = state[23]; \ 592 | X##su = state[24]; \ 593 | 594 | #define copyFromStateAndXor1152bits(X, state, input) \ 595 | X##ba = state[ 0]^input[ 0]; \ 596 | X##be = state[ 1]^input[ 1]; \ 597 | X##bi = state[ 2]^input[ 2]; \ 598 | X##bo = state[ 3]^input[ 3]; \ 599 | X##bu = state[ 4]^input[ 4]; \ 600 | X##ga = state[ 5]^input[ 5]; \ 601 | X##ge = state[ 6]^input[ 6]; \ 602 | X##gi = state[ 7]^input[ 7]; \ 603 | X##go = state[ 8]^input[ 8]; \ 604 | X##gu = state[ 9]^input[ 9]; \ 605 | X##ka = state[10]^input[10]; \ 606 | X##ke = state[11]^input[11]; \ 607 | X##ki = state[12]^input[12]; \ 608 | X##ko = state[13]^input[13]; \ 609 | X##ku = state[14]^input[14]; \ 610 | X##ma = state[15]^input[15]; \ 611 | X##me = state[16]^input[16]; \ 612 | X##mi = state[17]^input[17]; \ 613 | X##mo = state[18]; \ 614 | X##mu = state[19]; \ 615 | X##sa = state[20]; \ 616 | X##se = state[21]; \ 617 | X##si = state[22]; \ 618 | X##so = state[23]; \ 619 | X##su = state[24]; \ 620 | 621 | #define copyFromStateAndXor1344bits(X, state, input) \ 622 | X##ba = state[ 0]^input[ 0]; \ 623 | X##be = state[ 1]^input[ 1]; \ 624 | X##bi = state[ 2]^input[ 2]; \ 625 | X##bo = state[ 3]^input[ 3]; \ 626 | X##bu = state[ 4]^input[ 4]; \ 627 | X##ga = state[ 5]^input[ 5]; \ 628 | X##ge = state[ 6]^input[ 6]; \ 629 | X##gi = state[ 7]^input[ 7]; \ 630 | X##go = state[ 8]^input[ 8]; \ 631 | X##gu = state[ 9]^input[ 9]; \ 632 | X##ka = state[10]^input[10]; \ 633 | X##ke = state[11]^input[11]; \ 634 | X##ki = state[12]^input[12]; \ 635 | X##ko = state[13]^input[13]; \ 636 | X##ku = state[14]^input[14]; \ 637 | X##ma = state[15]^input[15]; \ 638 | X##me = state[16]^input[16]; \ 639 | X##mi = state[17]^input[17]; \ 640 | X##mo = state[18]^input[18]; \ 641 | X##mu = state[19]^input[19]; \ 642 | X##sa = state[20]^input[20]; \ 643 | X##se = state[21]; \ 644 | X##si = state[22]; \ 645 | X##so = state[23]; \ 646 | X##su = state[24]; \ 647 | 648 | #define copyFromState(X, state) \ 649 | X##ba = state[ 0]; \ 650 | X##be = state[ 1]; \ 651 | X##bi = state[ 2]; \ 652 | X##bo = state[ 3]; \ 653 | X##bu = state[ 4]; \ 654 | X##ga = state[ 5]; \ 655 | X##ge = state[ 6]; \ 656 | X##gi = state[ 7]; \ 657 | X##go = state[ 8]; \ 658 | X##gu = state[ 9]; \ 659 | X##ka = state[10]; \ 660 | X##ke = state[11]; \ 661 | X##ki = state[12]; \ 662 | X##ko = state[13]; \ 663 | X##ku = state[14]; \ 664 | X##ma = state[15]; \ 665 | X##me = state[16]; \ 666 | X##mi = state[17]; \ 667 | X##mo = state[18]; \ 668 | X##mu = state[19]; \ 669 | X##sa = state[20]; \ 670 | X##se = state[21]; \ 671 | X##si = state[22]; \ 672 | X##so = state[23]; \ 673 | X##su = state[24]; \ 674 | 675 | #define copyToState(state, X) \ 676 | state[ 0] = X##ba; \ 677 | state[ 1] = X##be; \ 678 | state[ 2] = X##bi; \ 679 | state[ 3] = X##bo; \ 680 | state[ 4] = X##bu; \ 681 | state[ 5] = X##ga; \ 682 | state[ 6] = X##ge; \ 683 | state[ 7] = X##gi; \ 684 | state[ 8] = X##go; \ 685 | state[ 9] = X##gu; \ 686 | state[10] = X##ka; \ 687 | state[11] = X##ke; \ 688 | state[12] = X##ki; \ 689 | state[13] = X##ko; \ 690 | state[14] = X##ku; \ 691 | state[15] = X##ma; \ 692 | state[16] = X##me; \ 693 | state[17] = X##mi; \ 694 | state[18] = X##mo; \ 695 | state[19] = X##mu; \ 696 | state[20] = X##sa; \ 697 | state[21] = X##se; \ 698 | state[22] = X##si; \ 699 | state[23] = X##so; \ 700 | state[24] = X##su; \ 701 | 702 | #define copyStateVariables(X, Y) \ 703 | X##ba = Y##ba; \ 704 | X##be = Y##be; \ 705 | X##bi = Y##bi; \ 706 | X##bo = Y##bo; \ 707 | X##bu = Y##bu; \ 708 | X##ga = Y##ga; \ 709 | X##ge = Y##ge; \ 710 | X##gi = Y##gi; \ 711 | X##go = Y##go; \ 712 | X##gu = Y##gu; \ 713 | X##ka = Y##ka; \ 714 | X##ke = Y##ke; \ 715 | X##ki = Y##ki; \ 716 | X##ko = Y##ko; \ 717 | X##ku = Y##ku; \ 718 | X##ma = Y##ma; \ 719 | X##me = Y##me; \ 720 | X##mi = Y##mi; \ 721 | X##mo = Y##mo; \ 722 | X##mu = Y##mu; \ 723 | X##sa = Y##sa; \ 724 | X##se = Y##se; \ 725 | X##si = Y##si; \ 726 | X##so = Y##so; \ 727 | X##su = Y##su; \ 728 | 729 | -------------------------------------------------------------------------------- /c_src/opt64/KeccakF-1600-simd128.macros: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by the designers, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | #define declareABCDE \ 15 | V6464 Abage, Abegi, Abigo, Abogu, Abuga; \ 16 | V6464 Akame, Akemi, Akimo, Akomu, Akuma; \ 17 | V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \ 18 | V64 Aba, Abe, Abi, Abo, Abu; \ 19 | V64 Aga, Age, Agi, Ago, Agu; \ 20 | V64 Aka, Ake, Aki, Ako, Aku; \ 21 | V64 Ama, Ame, Ami, Amo, Amu; \ 22 | V64 Asa, Ase, Asi, Aso, Asu; \ 23 | V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \ 24 | V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \ 25 | V64 Bba, Bbe, Bbi, Bbo, Bbu; \ 26 | V64 Bga, Bge, Bgi, Bgo, Bgu; \ 27 | V64 Bka, Bke, Bki, Bko, Bku; \ 28 | V64 Bma, Bme, Bmi, Bmo, Bmu; \ 29 | V64 Bsa, Bse, Bsi, Bso, Bsu; \ 30 | V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \ 31 | V64 Ca, Ce, Ci, Co, Cu; \ 32 | V64 Da, De, Di, Do, Du; \ 33 | V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \ 34 | V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \ 35 | V64 Eba, Ebe, Ebi, Ebo, Ebu; \ 36 | V64 Ega, Ege, Egi, Ego, Egu; \ 37 | V64 Eka, Eke, Eki, Eko, Eku; \ 38 | V64 Ema, Eme, Emi, Emo, Emu; \ 39 | V64 Esa, Ese, Esi, Eso, Esu; \ 40 | V128 Zero; 41 | 42 | #define prepareTheta 43 | 44 | #define computeD \ 45 | Cua = GET64LO(Cu, Cae); \ 46 | Dei = XOR128(Cae, ROL64in128(Cio, 1)); \ 47 | Dou = XOR128(Cio, ROL64in128(Cua, 1)); \ 48 | Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \ 49 | De = Dei; \ 50 | Di = COPY64HI2LO(Dei); \ 51 | Do = Dou; \ 52 | Du = COPY64HI2LO(Dou); 53 | 54 | // --- Theta Rho Pi Chi Iota Prepare-theta 55 | // --- 64-bit lanes mapped to 64-bit and 128-bit words 56 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 57 | computeD \ 58 | \ 59 | A##ba = LOAD64(A##bage.v64[0]); \ 60 | XOReq64(A##ba, Da); \ 61 | Bba = A##ba; \ 62 | XOReq64(A##gu, Du); \ 63 | Bge = ROL64(A##gu, 20); \ 64 | Bbage = GET64LO(Bba, Bge); \ 65 | A##ge = LOAD64(A##bage.v64[1]); \ 66 | XOReq64(A##ge, De); \ 67 | Bbe = ROL64(A##ge, 44); \ 68 | A##ka = LOAD64(A##kame.v64[0]); \ 69 | XOReq64(A##ka, Da); \ 70 | Bgi = ROL64(A##ka, 3); \ 71 | Bbegi = GET64LO(Bbe, Bgi); \ 72 | XOReq64(A##ki, Di); \ 73 | Bbi = ROL64(A##ki, 43); \ 74 | A##me = LOAD64(A##kame.v64[1]); \ 75 | XOReq64(A##me, De); \ 76 | Bgo = ROL64(A##me, 45); \ 77 | Bbigo = GET64LO(Bbi, Bgo); \ 78 | E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \ 79 | XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \ 80 | Cae = E##bage.v128; \ 81 | XOReq64(A##mo, Do); \ 82 | Bbo = ROL64(A##mo, 21); \ 83 | XOReq64(A##si, Di); \ 84 | Bgu = ROL64(A##si, 61); \ 85 | Bbogu = GET64LO(Bbo, Bgu); \ 86 | E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \ 87 | Cei = E##begi.v128; \ 88 | XOReq64(A##su, Du); \ 89 | Bbu = ROL64(A##su, 14); \ 90 | XOReq64(A##bo, Do); \ 91 | Bga = ROL64(A##bo, 28); \ 92 | Bbuga = GET64LO(Bbu, Bga); \ 93 | E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \ 94 | E##bi = E##bigo.v128; \ 95 | E##go = GET64HI(E##bigo.v128, E##bigo.v128); \ 96 | Cio = E##bigo.v128; \ 97 | E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \ 98 | E##bo = E##bogu.v128; \ 99 | E##gu = GET64HI(E##bogu.v128, E##bogu.v128); \ 100 | Cou = E##bogu.v128; \ 101 | E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \ 102 | E##bu = E##buga.v128; \ 103 | E##ga = GET64HI(E##buga.v128, E##buga.v128); \ 104 | Cua = E##buga.v128; \ 105 | \ 106 | A##be = LOAD64(A##begi.v64[0]); \ 107 | XOReq64(A##be, De); \ 108 | Bka = ROL64(A##be, 1); \ 109 | XOReq64(A##ga, Da); \ 110 | Bme = ROL64(A##ga, 36); \ 111 | Bkame = GET64LO(Bka, Bme); \ 112 | A##gi = LOAD64(A##begi.v64[1]); \ 113 | XOReq64(A##gi, Di); \ 114 | Bke = ROL64(A##gi, 6); \ 115 | A##ke = LOAD64(A##kemi.v64[0]); \ 116 | XOReq64(A##ke, De); \ 117 | Bmi = ROL64(A##ke, 10); \ 118 | Bkemi = GET64LO(Bke, Bmi); \ 119 | XOReq64(A##ko, Do); \ 120 | Bki = ROL64(A##ko, 25); \ 121 | A##mi = LOAD64(A##kemi.v64[1]); \ 122 | XOReq64(A##mi, Di); \ 123 | Bmo = ROL64(A##mi, 15); \ 124 | Bkimo = GET64LO(Bki, Bmo); \ 125 | E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \ 126 | XOReq128(Cae, E##kame.v128); \ 127 | XOReq64(A##mu, Du); \ 128 | Bko = ROL64(A##mu, 8); \ 129 | XOReq64(A##so, Do); \ 130 | Bmu = ROL64(A##so, 56); \ 131 | Bkomu = GET64LO(Bko, Bmu); \ 132 | E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \ 133 | XOReq128(Cei, E##kemi.v128); \ 134 | XOReq64(A##sa, Da); \ 135 | Bku = ROL64(A##sa, 18); \ 136 | XOReq64(A##bu, Du); \ 137 | Bma = ROL64(A##bu, 27); \ 138 | Bkuma = GET64LO(Bku, Bma); \ 139 | E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \ 140 | E##ki = E##kimo.v128; \ 141 | E##mo = GET64HI(E##kimo.v128, E##kimo.v128); \ 142 | XOReq128(Cio, E##kimo.v128); \ 143 | E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \ 144 | E##ko = E##komu.v128; \ 145 | E##mu = GET64HI(E##komu.v128, E##komu.v128); \ 146 | XOReq128(Cou, E##komu.v128); \ 147 | E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \ 148 | E##ku = E##kuma.v128; \ 149 | E##ma = GET64HI(E##kuma.v128, E##kuma.v128); \ 150 | XOReq128(Cua, E##kuma.v128); \ 151 | \ 152 | XOReq64(A##bi, Di); \ 153 | Bsa = ROL64(A##bi, 62); \ 154 | XOReq64(A##go, Do); \ 155 | Bse = ROL64(A##go, 55); \ 156 | XOReq64(A##ku, Du); \ 157 | Bsi = ROL64(A##ku, 39); \ 158 | E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \ 159 | Ca = E##sa; \ 160 | XOReq64(A##ma, Da); \ 161 | Bso = ROL64(A##ma, 41); \ 162 | E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \ 163 | Ce = E##se; \ 164 | XOReq128(Cae, GET64LO(Ca, Ce)); \ 165 | XOReq64(A##se, De); \ 166 | Bsu = ROL64(A##se, 2); \ 167 | E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \ 168 | Ci = E##si; \ 169 | E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \ 170 | Co = E##so; \ 171 | XOReq128(Cio, GET64LO(Ci, Co)); \ 172 | E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \ 173 | Cu = E##su; \ 174 | \ 175 | Zero = ZERO128(); \ 176 | XOReq128(Cae, GET64HI(Cua, Zero)); \ 177 | XOReq128(Cae, GET64LO(Zero, Cei)); \ 178 | XOReq128(Cio, GET64HI(Cei, Zero)); \ 179 | XOReq128(Cio, GET64LO(Zero, Cou)); \ 180 | XOReq128(Cua, GET64HI(Cou, Zero)); \ 181 | XOReq64(Cu, Cua); \ 182 | 183 | // --- Theta Rho Pi Chi Iota 184 | // --- 64-bit lanes mapped to 64-bit and 128-bit words 185 | #define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E) 186 | 187 | const UINT64 KeccakF1600RoundConstants[24] = { 188 | 0x0000000000000001ULL, 189 | 0x0000000000008082ULL, 190 | 0x800000000000808aULL, 191 | 0x8000000080008000ULL, 192 | 0x000000000000808bULL, 193 | 0x0000000080000001ULL, 194 | 0x8000000080008081ULL, 195 | 0x8000000000008009ULL, 196 | 0x000000000000008aULL, 197 | 0x0000000000000088ULL, 198 | 0x0000000080008009ULL, 199 | 0x000000008000000aULL, 200 | 0x000000008000808bULL, 201 | 0x800000000000008bULL, 202 | 0x8000000000008089ULL, 203 | 0x8000000000008003ULL, 204 | 0x8000000000008002ULL, 205 | 0x8000000000000080ULL, 206 | 0x000000000000800aULL, 207 | 0x800000008000000aULL, 208 | 0x8000000080008081ULL, 209 | 0x8000000000008080ULL, 210 | 0x0000000080000001ULL, 211 | 0x8000000080008008ULL }; 212 | 213 | #define copyFromStateAndXor576bits(X, state, input) \ 214 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 215 | X##ba = X##bae.v128; \ 216 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 217 | Cae = X##bae.v128; \ 218 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 219 | X##bi = X##bio.v128; \ 220 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 221 | Cio = X##bio.v128; \ 222 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 223 | Cu = X##bu; \ 224 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 225 | X##ga = X##gae.v128; \ 226 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 227 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 228 | XOReq128(Cae, X##gae.v128); \ 229 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 230 | X##gi = X##gio.v128; \ 231 | X##begi.v128 = GET64LO(X##be, X##gi); \ 232 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 233 | XOReq128(Cio, X##gio.v128); \ 234 | X##gu = LOAD64(state[ 9]); \ 235 | XOReq64(Cu, X##gu); \ 236 | X##kae.v128 = LOAD128(state[10]); \ 237 | X##ka = X##kae.v128; \ 238 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 239 | XOReq128(Cae, X##kae.v128); \ 240 | X##kio.v128 = LOAD128(state[12]); \ 241 | X##ki = X##kio.v128; \ 242 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 243 | XOReq128(Cio, X##kio.v128); \ 244 | X##ku = LOAD64(state[14]); \ 245 | XOReq64(Cu, X##ku); \ 246 | X##mae.v128 = LOAD128u(state[15]); \ 247 | X##ma = X##mae.v128; \ 248 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 249 | X##kame.v128 = GET64LO(X##ka, X##me); \ 250 | XOReq128(Cae, X##mae.v128); \ 251 | X##mio.v128 = LOAD128u(state[17]); \ 252 | X##mi = X##mio.v128; \ 253 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 254 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 255 | XOReq128(Cio, X##mio.v128); \ 256 | X##mu = LOAD64(state[19]); \ 257 | XOReq64(Cu, X##mu); \ 258 | X##sae.v128 = LOAD128(state[20]); \ 259 | X##sa = X##sae.v128; \ 260 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 261 | XOReq128(Cae, X##sae.v128); \ 262 | X##sio.v128 = LOAD128(state[22]); \ 263 | X##si = X##sio.v128; \ 264 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 265 | XOReq128(Cio, X##sio.v128); \ 266 | X##su = LOAD64(state[24]); \ 267 | XOReq64(Cu, X##su); \ 268 | 269 | #define copyFromStateAndXor832bits(X, state, input) \ 270 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 271 | X##ba = X##bae.v128; \ 272 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 273 | Cae = X##bae.v128; \ 274 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 275 | X##bi = X##bio.v128; \ 276 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 277 | Cio = X##bio.v128; \ 278 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 279 | Cu = X##bu; \ 280 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 281 | X##ga = X##gae.v128; \ 282 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 283 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 284 | XOReq128(Cae, X##gae.v128); \ 285 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 286 | X##gi = X##gio.v128; \ 287 | X##begi.v128 = GET64LO(X##be, X##gi); \ 288 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 289 | XOReq128(Cio, X##gio.v128); \ 290 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 291 | XOReq64(Cu, X##gu); \ 292 | X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ 293 | X##ka = X##kae.v128; \ 294 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 295 | XOReq128(Cae, X##kae.v128); \ 296 | X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \ 297 | X##ki = X##kio.v128; \ 298 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 299 | XOReq128(Cio, X##kio.v128); \ 300 | X##ku = LOAD64(state[14]); \ 301 | XOReq64(Cu, X##ku); \ 302 | X##mae.v128 = LOAD128u(state[15]); \ 303 | X##ma = X##mae.v128; \ 304 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 305 | X##kame.v128 = GET64LO(X##ka, X##me); \ 306 | XOReq128(Cae, X##mae.v128); \ 307 | X##mio.v128 = LOAD128u(state[17]); \ 308 | X##mi = X##mio.v128; \ 309 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 310 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 311 | XOReq128(Cio, X##mio.v128); \ 312 | X##mu = LOAD64(state[19]); \ 313 | XOReq64(Cu, X##mu); \ 314 | X##sae.v128 = LOAD128(state[20]); \ 315 | X##sa = X##sae.v128; \ 316 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 317 | XOReq128(Cae, X##sae.v128); \ 318 | X##sio.v128 = LOAD128(state[22]); \ 319 | X##si = X##sio.v128; \ 320 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 321 | XOReq128(Cio, X##sio.v128); \ 322 | X##su = LOAD64(state[24]); \ 323 | XOReq64(Cu, X##su); \ 324 | 325 | #define copyFromStateAndXor1024bits(X, state, input) \ 326 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 327 | X##ba = X##bae.v128; \ 328 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 329 | Cae = X##bae.v128; \ 330 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 331 | X##bi = X##bio.v128; \ 332 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 333 | Cio = X##bio.v128; \ 334 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 335 | Cu = X##bu; \ 336 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 337 | X##ga = X##gae.v128; \ 338 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 339 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 340 | XOReq128(Cae, X##gae.v128); \ 341 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 342 | X##gi = X##gio.v128; \ 343 | X##begi.v128 = GET64LO(X##be, X##gi); \ 344 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 345 | XOReq128(Cio, X##gio.v128); \ 346 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 347 | XOReq64(Cu, X##gu); \ 348 | X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ 349 | X##ka = X##kae.v128; \ 350 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 351 | XOReq128(Cae, X##kae.v128); \ 352 | X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ 353 | X##ki = X##kio.v128; \ 354 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 355 | XOReq128(Cio, X##kio.v128); \ 356 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 357 | XOReq64(Cu, X##ku); \ 358 | X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \ 359 | X##ma = X##mae.v128; \ 360 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 361 | X##kame.v128 = GET64LO(X##ka, X##me); \ 362 | XOReq128(Cae, X##mae.v128); \ 363 | X##mio.v128 = LOAD128u(state[17]); \ 364 | X##mi = X##mio.v128; \ 365 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 366 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 367 | XOReq128(Cio, X##mio.v128); \ 368 | X##mu = LOAD64(state[19]); \ 369 | XOReq64(Cu, X##mu); \ 370 | X##sae.v128 = LOAD128(state[20]); \ 371 | X##sa = X##sae.v128; \ 372 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 373 | XOReq128(Cae, X##sae.v128); \ 374 | X##sio.v128 = LOAD128(state[22]); \ 375 | X##si = X##sio.v128; \ 376 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 377 | XOReq128(Cio, X##sio.v128); \ 378 | X##su = LOAD64(state[24]); \ 379 | XOReq64(Cu, X##su); \ 380 | 381 | #define copyFromStateAndXor1088bits(X, state, input) \ 382 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 383 | X##ba = X##bae.v128; \ 384 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 385 | Cae = X##bae.v128; \ 386 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 387 | X##bi = X##bio.v128; \ 388 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 389 | Cio = X##bio.v128; \ 390 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 391 | Cu = X##bu; \ 392 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 393 | X##ga = X##gae.v128; \ 394 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 395 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 396 | XOReq128(Cae, X##gae.v128); \ 397 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 398 | X##gi = X##gio.v128; \ 399 | X##begi.v128 = GET64LO(X##be, X##gi); \ 400 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 401 | XOReq128(Cio, X##gio.v128); \ 402 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 403 | XOReq64(Cu, X##gu); \ 404 | X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ 405 | X##ka = X##kae.v128; \ 406 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 407 | XOReq128(Cae, X##kae.v128); \ 408 | X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ 409 | X##ki = X##kio.v128; \ 410 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 411 | XOReq128(Cio, X##kio.v128); \ 412 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 413 | XOReq64(Cu, X##ku); \ 414 | X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ 415 | X##ma = X##mae.v128; \ 416 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 417 | X##kame.v128 = GET64LO(X##ka, X##me); \ 418 | XOReq128(Cae, X##mae.v128); \ 419 | X##mio.v128 = LOAD128u(state[17]); \ 420 | X##mi = X##mio.v128; \ 421 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 422 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 423 | XOReq128(Cio, X##mio.v128); \ 424 | X##mu = LOAD64(state[19]); \ 425 | XOReq64(Cu, X##mu); \ 426 | X##sae.v128 = LOAD128(state[20]); \ 427 | X##sa = X##sae.v128; \ 428 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 429 | XOReq128(Cae, X##sae.v128); \ 430 | X##sio.v128 = LOAD128(state[22]); \ 431 | X##si = X##sio.v128; \ 432 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 433 | XOReq128(Cio, X##sio.v128); \ 434 | X##su = LOAD64(state[24]); \ 435 | XOReq64(Cu, X##su); \ 436 | 437 | #define copyFromStateAndXor1152bits(X, state, input) \ 438 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 439 | X##ba = X##bae.v128; \ 440 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 441 | Cae = X##bae.v128; \ 442 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 443 | X##bi = X##bio.v128; \ 444 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 445 | Cio = X##bio.v128; \ 446 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 447 | Cu = X##bu; \ 448 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 449 | X##ga = X##gae.v128; \ 450 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 451 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 452 | XOReq128(Cae, X##gae.v128); \ 453 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 454 | X##gi = X##gio.v128; \ 455 | X##begi.v128 = GET64LO(X##be, X##gi); \ 456 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 457 | XOReq128(Cio, X##gio.v128); \ 458 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 459 | XOReq64(Cu, X##gu); \ 460 | X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ 461 | X##ka = X##kae.v128; \ 462 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 463 | XOReq128(Cae, X##kae.v128); \ 464 | X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ 465 | X##ki = X##kio.v128; \ 466 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 467 | XOReq128(Cio, X##kio.v128); \ 468 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 469 | XOReq64(Cu, X##ku); \ 470 | X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ 471 | X##ma = X##mae.v128; \ 472 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 473 | X##kame.v128 = GET64LO(X##ka, X##me); \ 474 | XOReq128(Cae, X##mae.v128); \ 475 | X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \ 476 | X##mi = X##mio.v128; \ 477 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 478 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 479 | XOReq128(Cio, X##mio.v128); \ 480 | X##mu = LOAD64(state[19]); \ 481 | XOReq64(Cu, X##mu); \ 482 | X##sae.v128 = LOAD128(state[20]); \ 483 | X##sa = X##sae.v128; \ 484 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 485 | XOReq128(Cae, X##sae.v128); \ 486 | X##sio.v128 = LOAD128(state[22]); \ 487 | X##si = X##sio.v128; \ 488 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 489 | XOReq128(Cio, X##sio.v128); \ 490 | X##su = LOAD64(state[24]); \ 491 | XOReq64(Cu, X##su); \ 492 | 493 | #define copyFromStateAndXor1344bits(X, state, input) \ 494 | X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ 495 | X##ba = X##bae.v128; \ 496 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 497 | Cae = X##bae.v128; \ 498 | X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ 499 | X##bi = X##bio.v128; \ 500 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 501 | Cio = X##bio.v128; \ 502 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ 503 | Cu = X##bu; \ 504 | X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ 505 | X##ga = X##gae.v128; \ 506 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 507 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 508 | XOReq128(Cae, X##gae.v128); \ 509 | X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ 510 | X##gi = X##gio.v128; \ 511 | X##begi.v128 = GET64LO(X##be, X##gi); \ 512 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 513 | XOReq128(Cio, X##gio.v128); \ 514 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ 515 | XOReq64(Cu, X##gu); \ 516 | X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ 517 | X##ka = X##kae.v128; \ 518 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 519 | XOReq128(Cae, X##kae.v128); \ 520 | X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ 521 | X##ki = X##kio.v128; \ 522 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 523 | XOReq128(Cio, X##kio.v128); \ 524 | X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \ 525 | XOReq64(Cu, X##ku); \ 526 | X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \ 527 | X##ma = X##mae.v128; \ 528 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 529 | X##kame.v128 = GET64LO(X##ka, X##me); \ 530 | XOReq128(Cae, X##mae.v128); \ 531 | X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \ 532 | X##mi = X##mio.v128; \ 533 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 534 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 535 | XOReq128(Cio, X##mio.v128); \ 536 | X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ 537 | XOReq64(Cu, X##mu); \ 538 | X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \ 539 | X##sa = X##sae.v128; \ 540 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 541 | XOReq128(Cae, X##sae.v128); \ 542 | X##sio.v128 = LOAD128(state[22]); \ 543 | X##si = X##sio.v128; \ 544 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 545 | XOReq128(Cio, X##sio.v128); \ 546 | X##su = LOAD64(state[24]); \ 547 | XOReq64(Cu, X##su); \ 548 | 549 | #define copyFromState(X, state) \ 550 | X##bae.v128 = LOAD128(state[ 0]); \ 551 | X##ba = X##bae.v128; \ 552 | X##be = GET64HI(X##bae.v128, X##bae.v128); \ 553 | Cae = X##bae.v128; \ 554 | X##bio.v128 = LOAD128(state[ 2]); \ 555 | X##bi = X##bio.v128; \ 556 | X##bo = GET64HI(X##bio.v128, X##bio.v128); \ 557 | Cio = X##bio.v128; \ 558 | X##bu = LOAD64(state[ 4]); \ 559 | Cu = X##bu; \ 560 | X##gae.v128 = LOAD128u(state[ 5]); \ 561 | X##ga = X##gae.v128; \ 562 | X##ge = GET64HI(X##gae.v128, X##gae.v128); \ 563 | X##bage.v128 = GET64LO(X##ba, X##ge); \ 564 | XOReq128(Cae, X##gae.v128); \ 565 | X##gio.v128 = LOAD128u(state[ 7]); \ 566 | X##gi = X##gio.v128; \ 567 | X##begi.v128 = GET64LO(X##be, X##gi); \ 568 | X##go = GET64HI(X##gio.v128, X##gio.v128); \ 569 | XOReq128(Cio, X##gio.v128); \ 570 | X##gu = LOAD64(state[ 9]); \ 571 | XOReq64(Cu, X##gu); \ 572 | X##kae.v128 = LOAD128(state[10]); \ 573 | X##ka = X##kae.v128; \ 574 | X##ke = GET64HI(X##kae.v128, X##kae.v128); \ 575 | XOReq128(Cae, X##kae.v128); \ 576 | X##kio.v128 = LOAD128(state[12]); \ 577 | X##ki = X##kio.v128; \ 578 | X##ko = GET64HI(X##kio.v128, X##kio.v128); \ 579 | XOReq128(Cio, X##kio.v128); \ 580 | X##ku = LOAD64(state[14]); \ 581 | XOReq64(Cu, X##ku); \ 582 | X##mae.v128 = LOAD128u(state[15]); \ 583 | X##ma = X##mae.v128; \ 584 | X##me = GET64HI(X##mae.v128, X##mae.v128); \ 585 | X##kame.v128 = GET64LO(X##ka, X##me); \ 586 | XOReq128(Cae, X##mae.v128); \ 587 | X##mio.v128 = LOAD128u(state[17]); \ 588 | X##mi = X##mio.v128; \ 589 | X##kemi.v128 = GET64LO(X##ke, X##mi); \ 590 | X##mo = GET64HI(X##mio.v128, X##mio.v128); \ 591 | XOReq128(Cio, X##mio.v128); \ 592 | X##mu = LOAD64(state[19]); \ 593 | XOReq64(Cu, X##mu); \ 594 | X##sae.v128 = LOAD128(state[20]); \ 595 | X##sa = X##sae.v128; \ 596 | X##se = GET64HI(X##sae.v128, X##sae.v128); \ 597 | XOReq128(Cae, X##sae.v128); \ 598 | X##sio.v128 = LOAD128(state[22]); \ 599 | X##si = X##sio.v128; \ 600 | X##so = GET64HI(X##sio.v128, X##sio.v128); \ 601 | XOReq128(Cio, X##sio.v128); \ 602 | X##su = LOAD64(state[24]); \ 603 | XOReq64(Cu, X##su); \ 604 | 605 | #define copyToState(state, X) \ 606 | state[ 0] = A##bage.v64[0]; \ 607 | state[ 1] = A##begi.v64[0]; \ 608 | STORE64(state[ 2], X##bi); \ 609 | STORE64(state[ 3], X##bo); \ 610 | STORE64(state[ 4], X##bu); \ 611 | STORE64(state[ 5], X##ga); \ 612 | state[ 6] = A##bage.v64[1]; \ 613 | state[ 7] = A##begi.v64[1]; \ 614 | STORE64(state[ 8], X##go); \ 615 | STORE64(state[ 9], X##gu); \ 616 | state[10] = X##kame.v64[0]; \ 617 | state[11] = X##kemi.v64[0]; \ 618 | STORE64(state[12], X##ki); \ 619 | STORE64(state[13], X##ko); \ 620 | STORE64(state[14], X##ku); \ 621 | STORE64(state[15], X##ma); \ 622 | state[16] = X##kame.v64[1]; \ 623 | state[17] = X##kemi.v64[1]; \ 624 | STORE64(state[18], X##mo); \ 625 | STORE64(state[19], X##mu); \ 626 | STORE64(state[20], X##sa); \ 627 | STORE64(state[21], X##se); \ 628 | STORE64(state[22], X##si); \ 629 | STORE64(state[23], X##so); \ 630 | STORE64(state[24], X##su); \ 631 | 632 | #define copyStateVariables(X, Y) \ 633 | X##bage = Y##bage; \ 634 | X##begi = Y##begi; \ 635 | X##bi = Y##bi; \ 636 | X##bo = Y##bo; \ 637 | X##bu = Y##bu; \ 638 | X##ga = Y##ga; \ 639 | X##go = Y##go; \ 640 | X##gu = Y##gu; \ 641 | X##kame = Y##kame; \ 642 | X##kemi = Y##kemi; \ 643 | X##ki = Y##ki; \ 644 | X##ko = Y##ko; \ 645 | X##ku = Y##ku; \ 646 | X##ma = Y##ma; \ 647 | X##mo = Y##mo; \ 648 | X##mu = Y##mu; \ 649 | X##sa = Y##sa; \ 650 | X##se = Y##se; \ 651 | X##si = Y##si; \ 652 | X##so = Y##so; \ 653 | X##su = Y##su; \ 654 | 655 | -------------------------------------------------------------------------------- /c_src/opt32/KeccakF-1600-32-s1.macros: -------------------------------------------------------------------------------- 1 | /* 2 | Code automatically generated by KeccakTools! 3 | 4 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 5 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 6 | questions, please refer to our website: http://keccak.noekeon.org/ 7 | 8 | Implementation by the designers, 9 | hereby denoted as "the implementer". 10 | 11 | To the extent possible under law, the implementer has waived all copyright 12 | and related or neighboring rights to the source code in this file. 13 | http://creativecommons.org/publicdomain/zero/1.0/ 14 | */ 15 | 16 | #define declareABCDE \ 17 | UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ 18 | UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ 19 | UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ 20 | UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ 21 | UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ 22 | UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ 23 | UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ 24 | UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ 25 | UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ 26 | UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ 27 | UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \ 28 | UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \ 29 | UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \ 30 | UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \ 31 | UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \ 32 | UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \ 33 | UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \ 34 | UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \ 35 | UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \ 36 | UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \ 37 | UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \ 38 | UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \ 39 | UINT32 Da0, De0, Di0, Do0, Du0; \ 40 | UINT32 Da1, De1, Di1, Do1, Du1; \ 41 | UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ 42 | UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ 43 | UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ 44 | UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ 45 | UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ 46 | UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ 47 | UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ 48 | UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ 49 | UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ 50 | UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ 51 | 52 | #define prepareTheta \ 53 | Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \ 54 | Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \ 55 | Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \ 56 | Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \ 57 | Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \ 58 | Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \ 59 | Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \ 60 | Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \ 61 | Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \ 62 | Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \ 63 | 64 | #ifdef UseBebigokimisa 65 | // --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') 66 | // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words 67 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 68 | Da0 = Cu0^ROL32(Ce1, 1); \ 69 | Da1 = Cu1^Ce0; \ 70 | De0 = Ca0^ROL32(Ci1, 1); \ 71 | De1 = Ca1^Ci0; \ 72 | Di0 = Ce0^ROL32(Co1, 1); \ 73 | Di1 = Ce1^Co0; \ 74 | Do0 = Ci0^ROL32(Cu1, 1); \ 75 | Do1 = Ci1^Cu0; \ 76 | Du0 = Co0^ROL32(Ca1, 1); \ 77 | Du1 = Co1^Ca0; \ 78 | \ 79 | A##ba0 ^= Da0; \ 80 | Bba0 = A##ba0; \ 81 | A##ge0 ^= De0; \ 82 | Bbe0 = ROL32(A##ge0, 22); \ 83 | A##ki1 ^= Di1; \ 84 | Bbi0 = ROL32(A##ki1, 22); \ 85 | A##mo1 ^= Do1; \ 86 | Bbo0 = ROL32(A##mo1, 11); \ 87 | A##su0 ^= Du0; \ 88 | Bbu0 = ROL32(A##su0, 7); \ 89 | E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ 90 | E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ 91 | Ca0 = E##ba0; \ 92 | E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ 93 | Ce0 = E##be0; \ 94 | E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ 95 | Ci0 = E##bi0; \ 96 | E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ 97 | Co0 = E##bo0; \ 98 | E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ 99 | Cu0 = E##bu0; \ 100 | \ 101 | A##ba1 ^= Da1; \ 102 | Bba1 = A##ba1; \ 103 | A##ge1 ^= De1; \ 104 | Bbe1 = ROL32(A##ge1, 22); \ 105 | A##ki0 ^= Di0; \ 106 | Bbi1 = ROL32(A##ki0, 21); \ 107 | A##mo0 ^= Do0; \ 108 | Bbo1 = ROL32(A##mo0, 10); \ 109 | A##su1 ^= Du1; \ 110 | Bbu1 = ROL32(A##su1, 7); \ 111 | E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ 112 | E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ 113 | Ca1 = E##ba1; \ 114 | E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ 115 | Ce1 = E##be1; \ 116 | E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ 117 | Ci1 = E##bi1; \ 118 | E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ 119 | Co1 = E##bo1; \ 120 | E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ 121 | Cu1 = E##bu1; \ 122 | \ 123 | A##bo0 ^= Do0; \ 124 | Bga0 = ROL32(A##bo0, 14); \ 125 | A##gu0 ^= Du0; \ 126 | Bge0 = ROL32(A##gu0, 10); \ 127 | A##ka1 ^= Da1; \ 128 | Bgi0 = ROL32(A##ka1, 2); \ 129 | A##me1 ^= De1; \ 130 | Bgo0 = ROL32(A##me1, 23); \ 131 | A##si1 ^= Di1; \ 132 | Bgu0 = ROL32(A##si1, 31); \ 133 | E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ 134 | Ca0 ^= E##ga0; \ 135 | E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ 136 | Ce0 ^= E##ge0; \ 137 | E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ 138 | Ci0 ^= E##gi0; \ 139 | E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ 140 | Co0 ^= E##go0; \ 141 | E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ 142 | Cu0 ^= E##gu0; \ 143 | \ 144 | A##bo1 ^= Do1; \ 145 | Bga1 = ROL32(A##bo1, 14); \ 146 | A##gu1 ^= Du1; \ 147 | Bge1 = ROL32(A##gu1, 10); \ 148 | A##ka0 ^= Da0; \ 149 | Bgi1 = ROL32(A##ka0, 1); \ 150 | A##me0 ^= De0; \ 151 | Bgo1 = ROL32(A##me0, 22); \ 152 | A##si0 ^= Di0; \ 153 | Bgu1 = ROL32(A##si0, 30); \ 154 | E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ 155 | Ca1 ^= E##ga1; \ 156 | E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ 157 | Ce1 ^= E##ge1; \ 158 | E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ 159 | Ci1 ^= E##gi1; \ 160 | E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ 161 | Co1 ^= E##go1; \ 162 | E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ 163 | Cu1 ^= E##gu1; \ 164 | \ 165 | A##be1 ^= De1; \ 166 | Bka0 = ROL32(A##be1, 1); \ 167 | A##gi0 ^= Di0; \ 168 | Bke0 = ROL32(A##gi0, 3); \ 169 | A##ko1 ^= Do1; \ 170 | Bki0 = ROL32(A##ko1, 13); \ 171 | A##mu0 ^= Du0; \ 172 | Bko0 = ROL32(A##mu0, 4); \ 173 | A##sa0 ^= Da0; \ 174 | Bku0 = ROL32(A##sa0, 9); \ 175 | E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ 176 | Ca0 ^= E##ka0; \ 177 | E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ 178 | Ce0 ^= E##ke0; \ 179 | E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ 180 | Ci0 ^= E##ki0; \ 181 | E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ 182 | Co0 ^= E##ko0; \ 183 | E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ 184 | Cu0 ^= E##ku0; \ 185 | \ 186 | A##be0 ^= De0; \ 187 | Bka1 = A##be0; \ 188 | A##gi1 ^= Di1; \ 189 | Bke1 = ROL32(A##gi1, 3); \ 190 | A##ko0 ^= Do0; \ 191 | Bki1 = ROL32(A##ko0, 12); \ 192 | A##mu1 ^= Du1; \ 193 | Bko1 = ROL32(A##mu1, 4); \ 194 | A##sa1 ^= Da1; \ 195 | Bku1 = ROL32(A##sa1, 9); \ 196 | E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ 197 | Ca1 ^= E##ka1; \ 198 | E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ 199 | Ce1 ^= E##ke1; \ 200 | E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ 201 | Ci1 ^= E##ki1; \ 202 | E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ 203 | Co1 ^= E##ko1; \ 204 | E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ 205 | Cu1 ^= E##ku1; \ 206 | \ 207 | A##bu1 ^= Du1; \ 208 | Bma0 = ROL32(A##bu1, 14); \ 209 | A##ga0 ^= Da0; \ 210 | Bme0 = ROL32(A##ga0, 18); \ 211 | A##ke0 ^= De0; \ 212 | Bmi0 = ROL32(A##ke0, 5); \ 213 | A##mi1 ^= Di1; \ 214 | Bmo0 = ROL32(A##mi1, 8); \ 215 | A##so0 ^= Do0; \ 216 | Bmu0 = ROL32(A##so0, 28); \ 217 | E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ 218 | Ca0 ^= E##ma0; \ 219 | E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ 220 | Ce0 ^= E##me0; \ 221 | E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ 222 | Ci0 ^= E##mi0; \ 223 | E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ 224 | Co0 ^= E##mo0; \ 225 | E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ 226 | Cu0 ^= E##mu0; \ 227 | \ 228 | A##bu0 ^= Du0; \ 229 | Bma1 = ROL32(A##bu0, 13); \ 230 | A##ga1 ^= Da1; \ 231 | Bme1 = ROL32(A##ga1, 18); \ 232 | A##ke1 ^= De1; \ 233 | Bmi1 = ROL32(A##ke1, 5); \ 234 | A##mi0 ^= Di0; \ 235 | Bmo1 = ROL32(A##mi0, 7); \ 236 | A##so1 ^= Do1; \ 237 | Bmu1 = ROL32(A##so1, 28); \ 238 | E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ 239 | Ca1 ^= E##ma1; \ 240 | E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ 241 | Ce1 ^= E##me1; \ 242 | E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ 243 | Ci1 ^= E##mi1; \ 244 | E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ 245 | Co1 ^= E##mo1; \ 246 | E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ 247 | Cu1 ^= E##mu1; \ 248 | \ 249 | A##bi0 ^= Di0; \ 250 | Bsa0 = ROL32(A##bi0, 31); \ 251 | A##go1 ^= Do1; \ 252 | Bse0 = ROL32(A##go1, 28); \ 253 | A##ku1 ^= Du1; \ 254 | Bsi0 = ROL32(A##ku1, 20); \ 255 | A##ma1 ^= Da1; \ 256 | Bso0 = ROL32(A##ma1, 21); \ 257 | A##se0 ^= De0; \ 258 | Bsu0 = ROL32(A##se0, 1); \ 259 | E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ 260 | Ca0 ^= E##sa0; \ 261 | E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ 262 | Ce0 ^= E##se0; \ 263 | E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ 264 | Ci0 ^= E##si0; \ 265 | E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ 266 | Co0 ^= E##so0; \ 267 | E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ 268 | Cu0 ^= E##su0; \ 269 | \ 270 | A##bi1 ^= Di1; \ 271 | Bsa1 = ROL32(A##bi1, 31); \ 272 | A##go0 ^= Do0; \ 273 | Bse1 = ROL32(A##go0, 27); \ 274 | A##ku0 ^= Du0; \ 275 | Bsi1 = ROL32(A##ku0, 19); \ 276 | A##ma0 ^= Da0; \ 277 | Bso1 = ROL32(A##ma0, 20); \ 278 | A##se1 ^= De1; \ 279 | Bsu1 = ROL32(A##se1, 1); \ 280 | E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ 281 | Ca1 ^= E##sa1; \ 282 | E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ 283 | Ce1 ^= E##se1; \ 284 | E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ 285 | Ci1 ^= E##si1; \ 286 | E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ 287 | Co1 ^= E##so1; \ 288 | E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ 289 | Cu1 ^= E##su1; \ 290 | \ 291 | 292 | // --- Code for round (lane complementing pattern 'bebigokimisa') 293 | // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words 294 | #define thetaRhoPiChiIota(i, A, E) \ 295 | Da0 = Cu0^ROL32(Ce1, 1); \ 296 | Da1 = Cu1^Ce0; \ 297 | De0 = Ca0^ROL32(Ci1, 1); \ 298 | De1 = Ca1^Ci0; \ 299 | Di0 = Ce0^ROL32(Co1, 1); \ 300 | Di1 = Ce1^Co0; \ 301 | Do0 = Ci0^ROL32(Cu1, 1); \ 302 | Do1 = Ci1^Cu0; \ 303 | Du0 = Co0^ROL32(Ca1, 1); \ 304 | Du1 = Co1^Ca0; \ 305 | \ 306 | A##ba0 ^= Da0; \ 307 | Bba0 = A##ba0; \ 308 | A##ge0 ^= De0; \ 309 | Bbe0 = ROL32(A##ge0, 22); \ 310 | A##ki1 ^= Di1; \ 311 | Bbi0 = ROL32(A##ki1, 22); \ 312 | A##mo1 ^= Do1; \ 313 | Bbo0 = ROL32(A##mo1, 11); \ 314 | A##su0 ^= Du0; \ 315 | Bbu0 = ROL32(A##su0, 7); \ 316 | E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ 317 | E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ 318 | E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ 319 | E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ 320 | E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ 321 | E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ 322 | \ 323 | A##ba1 ^= Da1; \ 324 | Bba1 = A##ba1; \ 325 | A##ge1 ^= De1; \ 326 | Bbe1 = ROL32(A##ge1, 22); \ 327 | A##ki0 ^= Di0; \ 328 | Bbi1 = ROL32(A##ki0, 21); \ 329 | A##mo0 ^= Do0; \ 330 | Bbo1 = ROL32(A##mo0, 10); \ 331 | A##su1 ^= Du1; \ 332 | Bbu1 = ROL32(A##su1, 7); \ 333 | E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ 334 | E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ 335 | E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ 336 | E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ 337 | E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ 338 | E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ 339 | \ 340 | A##bo0 ^= Do0; \ 341 | Bga0 = ROL32(A##bo0, 14); \ 342 | A##gu0 ^= Du0; \ 343 | Bge0 = ROL32(A##gu0, 10); \ 344 | A##ka1 ^= Da1; \ 345 | Bgi0 = ROL32(A##ka1, 2); \ 346 | A##me1 ^= De1; \ 347 | Bgo0 = ROL32(A##me1, 23); \ 348 | A##si1 ^= Di1; \ 349 | Bgu0 = ROL32(A##si1, 31); \ 350 | E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ 351 | E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ 352 | E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ 353 | E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ 354 | E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ 355 | \ 356 | A##bo1 ^= Do1; \ 357 | Bga1 = ROL32(A##bo1, 14); \ 358 | A##gu1 ^= Du1; \ 359 | Bge1 = ROL32(A##gu1, 10); \ 360 | A##ka0 ^= Da0; \ 361 | Bgi1 = ROL32(A##ka0, 1); \ 362 | A##me0 ^= De0; \ 363 | Bgo1 = ROL32(A##me0, 22); \ 364 | A##si0 ^= Di0; \ 365 | Bgu1 = ROL32(A##si0, 30); \ 366 | E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ 367 | E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ 368 | E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ 369 | E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ 370 | E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ 371 | \ 372 | A##be1 ^= De1; \ 373 | Bka0 = ROL32(A##be1, 1); \ 374 | A##gi0 ^= Di0; \ 375 | Bke0 = ROL32(A##gi0, 3); \ 376 | A##ko1 ^= Do1; \ 377 | Bki0 = ROL32(A##ko1, 13); \ 378 | A##mu0 ^= Du0; \ 379 | Bko0 = ROL32(A##mu0, 4); \ 380 | A##sa0 ^= Da0; \ 381 | Bku0 = ROL32(A##sa0, 9); \ 382 | E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ 383 | E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ 384 | E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ 385 | E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ 386 | E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ 387 | \ 388 | A##be0 ^= De0; \ 389 | Bka1 = A##be0; \ 390 | A##gi1 ^= Di1; \ 391 | Bke1 = ROL32(A##gi1, 3); \ 392 | A##ko0 ^= Do0; \ 393 | Bki1 = ROL32(A##ko0, 12); \ 394 | A##mu1 ^= Du1; \ 395 | Bko1 = ROL32(A##mu1, 4); \ 396 | A##sa1 ^= Da1; \ 397 | Bku1 = ROL32(A##sa1, 9); \ 398 | E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ 399 | E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ 400 | E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ 401 | E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ 402 | E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ 403 | \ 404 | A##bu1 ^= Du1; \ 405 | Bma0 = ROL32(A##bu1, 14); \ 406 | A##ga0 ^= Da0; \ 407 | Bme0 = ROL32(A##ga0, 18); \ 408 | A##ke0 ^= De0; \ 409 | Bmi0 = ROL32(A##ke0, 5); \ 410 | A##mi1 ^= Di1; \ 411 | Bmo0 = ROL32(A##mi1, 8); \ 412 | A##so0 ^= Do0; \ 413 | Bmu0 = ROL32(A##so0, 28); \ 414 | E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ 415 | E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ 416 | E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ 417 | E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ 418 | E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ 419 | \ 420 | A##bu0 ^= Du0; \ 421 | Bma1 = ROL32(A##bu0, 13); \ 422 | A##ga1 ^= Da1; \ 423 | Bme1 = ROL32(A##ga1, 18); \ 424 | A##ke1 ^= De1; \ 425 | Bmi1 = ROL32(A##ke1, 5); \ 426 | A##mi0 ^= Di0; \ 427 | Bmo1 = ROL32(A##mi0, 7); \ 428 | A##so1 ^= Do1; \ 429 | Bmu1 = ROL32(A##so1, 28); \ 430 | E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ 431 | E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ 432 | E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ 433 | E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ 434 | E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ 435 | \ 436 | A##bi0 ^= Di0; \ 437 | Bsa0 = ROL32(A##bi0, 31); \ 438 | A##go1 ^= Do1; \ 439 | Bse0 = ROL32(A##go1, 28); \ 440 | A##ku1 ^= Du1; \ 441 | Bsi0 = ROL32(A##ku1, 20); \ 442 | A##ma1 ^= Da1; \ 443 | Bso0 = ROL32(A##ma1, 21); \ 444 | A##se0 ^= De0; \ 445 | Bsu0 = ROL32(A##se0, 1); \ 446 | E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ 447 | E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ 448 | E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ 449 | E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ 450 | E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ 451 | \ 452 | A##bi1 ^= Di1; \ 453 | Bsa1 = ROL32(A##bi1, 31); \ 454 | A##go0 ^= Do0; \ 455 | Bse1 = ROL32(A##go0, 27); \ 456 | A##ku0 ^= Du0; \ 457 | Bsi1 = ROL32(A##ku0, 19); \ 458 | A##ma0 ^= Da0; \ 459 | Bso1 = ROL32(A##ma0, 20); \ 460 | A##se1 ^= De1; \ 461 | Bsu1 = ROL32(A##se1, 1); \ 462 | E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ 463 | E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ 464 | E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ 465 | E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ 466 | E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ 467 | \ 468 | 469 | #else // UseBebigokimisa 470 | // --- Code for round, with prepare-theta 471 | // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words 472 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 473 | Da0 = Cu0^ROL32(Ce1, 1); \ 474 | Da1 = Cu1^Ce0; \ 475 | De0 = Ca0^ROL32(Ci1, 1); \ 476 | De1 = Ca1^Ci0; \ 477 | Di0 = Ce0^ROL32(Co1, 1); \ 478 | Di1 = Ce1^Co0; \ 479 | Do0 = Ci0^ROL32(Cu1, 1); \ 480 | Do1 = Ci1^Cu0; \ 481 | Du0 = Co0^ROL32(Ca1, 1); \ 482 | Du1 = Co1^Ca0; \ 483 | \ 484 | A##ba0 ^= Da0; \ 485 | Bba0 = A##ba0; \ 486 | A##ge0 ^= De0; \ 487 | Bbe0 = ROL32(A##ge0, 22); \ 488 | A##ki1 ^= Di1; \ 489 | Bbi0 = ROL32(A##ki1, 22); \ 490 | A##mo1 ^= Do1; \ 491 | Bbo0 = ROL32(A##mo1, 11); \ 492 | A##su0 ^= Du0; \ 493 | Bbu0 = ROL32(A##su0, 7); \ 494 | E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ 495 | E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ 496 | Ca0 = E##ba0; \ 497 | E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ 498 | Ce0 = E##be0; \ 499 | E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ 500 | Ci0 = E##bi0; \ 501 | E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ 502 | Co0 = E##bo0; \ 503 | E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ 504 | Cu0 = E##bu0; \ 505 | \ 506 | A##ba1 ^= Da1; \ 507 | Bba1 = A##ba1; \ 508 | A##ge1 ^= De1; \ 509 | Bbe1 = ROL32(A##ge1, 22); \ 510 | A##ki0 ^= Di0; \ 511 | Bbi1 = ROL32(A##ki0, 21); \ 512 | A##mo0 ^= Do0; \ 513 | Bbo1 = ROL32(A##mo0, 10); \ 514 | A##su1 ^= Du1; \ 515 | Bbu1 = ROL32(A##su1, 7); \ 516 | E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ 517 | E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ 518 | Ca1 = E##ba1; \ 519 | E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ 520 | Ce1 = E##be1; \ 521 | E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ 522 | Ci1 = E##bi1; \ 523 | E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ 524 | Co1 = E##bo1; \ 525 | E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ 526 | Cu1 = E##bu1; \ 527 | \ 528 | A##bo0 ^= Do0; \ 529 | Bga0 = ROL32(A##bo0, 14); \ 530 | A##gu0 ^= Du0; \ 531 | Bge0 = ROL32(A##gu0, 10); \ 532 | A##ka1 ^= Da1; \ 533 | Bgi0 = ROL32(A##ka1, 2); \ 534 | A##me1 ^= De1; \ 535 | Bgo0 = ROL32(A##me1, 23); \ 536 | A##si1 ^= Di1; \ 537 | Bgu0 = ROL32(A##si1, 31); \ 538 | E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ 539 | Ca0 ^= E##ga0; \ 540 | E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ 541 | Ce0 ^= E##ge0; \ 542 | E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ 543 | Ci0 ^= E##gi0; \ 544 | E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ 545 | Co0 ^= E##go0; \ 546 | E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ 547 | Cu0 ^= E##gu0; \ 548 | \ 549 | A##bo1 ^= Do1; \ 550 | Bga1 = ROL32(A##bo1, 14); \ 551 | A##gu1 ^= Du1; \ 552 | Bge1 = ROL32(A##gu1, 10); \ 553 | A##ka0 ^= Da0; \ 554 | Bgi1 = ROL32(A##ka0, 1); \ 555 | A##me0 ^= De0; \ 556 | Bgo1 = ROL32(A##me0, 22); \ 557 | A##si0 ^= Di0; \ 558 | Bgu1 = ROL32(A##si0, 30); \ 559 | E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ 560 | Ca1 ^= E##ga1; \ 561 | E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ 562 | Ce1 ^= E##ge1; \ 563 | E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ 564 | Ci1 ^= E##gi1; \ 565 | E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ 566 | Co1 ^= E##go1; \ 567 | E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ 568 | Cu1 ^= E##gu1; \ 569 | \ 570 | A##be1 ^= De1; \ 571 | Bka0 = ROL32(A##be1, 1); \ 572 | A##gi0 ^= Di0; \ 573 | Bke0 = ROL32(A##gi0, 3); \ 574 | A##ko1 ^= Do1; \ 575 | Bki0 = ROL32(A##ko1, 13); \ 576 | A##mu0 ^= Du0; \ 577 | Bko0 = ROL32(A##mu0, 4); \ 578 | A##sa0 ^= Da0; \ 579 | Bku0 = ROL32(A##sa0, 9); \ 580 | E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ 581 | Ca0 ^= E##ka0; \ 582 | E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ 583 | Ce0 ^= E##ke0; \ 584 | E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ 585 | Ci0 ^= E##ki0; \ 586 | E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ 587 | Co0 ^= E##ko0; \ 588 | E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ 589 | Cu0 ^= E##ku0; \ 590 | \ 591 | A##be0 ^= De0; \ 592 | Bka1 = A##be0; \ 593 | A##gi1 ^= Di1; \ 594 | Bke1 = ROL32(A##gi1, 3); \ 595 | A##ko0 ^= Do0; \ 596 | Bki1 = ROL32(A##ko0, 12); \ 597 | A##mu1 ^= Du1; \ 598 | Bko1 = ROL32(A##mu1, 4); \ 599 | A##sa1 ^= Da1; \ 600 | Bku1 = ROL32(A##sa1, 9); \ 601 | E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ 602 | Ca1 ^= E##ka1; \ 603 | E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ 604 | Ce1 ^= E##ke1; \ 605 | E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ 606 | Ci1 ^= E##ki1; \ 607 | E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ 608 | Co1 ^= E##ko1; \ 609 | E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ 610 | Cu1 ^= E##ku1; \ 611 | \ 612 | A##bu1 ^= Du1; \ 613 | Bma0 = ROL32(A##bu1, 14); \ 614 | A##ga0 ^= Da0; \ 615 | Bme0 = ROL32(A##ga0, 18); \ 616 | A##ke0 ^= De0; \ 617 | Bmi0 = ROL32(A##ke0, 5); \ 618 | A##mi1 ^= Di1; \ 619 | Bmo0 = ROL32(A##mi1, 8); \ 620 | A##so0 ^= Do0; \ 621 | Bmu0 = ROL32(A##so0, 28); \ 622 | E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ 623 | Ca0 ^= E##ma0; \ 624 | E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ 625 | Ce0 ^= E##me0; \ 626 | E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ 627 | Ci0 ^= E##mi0; \ 628 | E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ 629 | Co0 ^= E##mo0; \ 630 | E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ 631 | Cu0 ^= E##mu0; \ 632 | \ 633 | A##bu0 ^= Du0; \ 634 | Bma1 = ROL32(A##bu0, 13); \ 635 | A##ga1 ^= Da1; \ 636 | Bme1 = ROL32(A##ga1, 18); \ 637 | A##ke1 ^= De1; \ 638 | Bmi1 = ROL32(A##ke1, 5); \ 639 | A##mi0 ^= Di0; \ 640 | Bmo1 = ROL32(A##mi0, 7); \ 641 | A##so1 ^= Do1; \ 642 | Bmu1 = ROL32(A##so1, 28); \ 643 | E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ 644 | Ca1 ^= E##ma1; \ 645 | E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ 646 | Ce1 ^= E##me1; \ 647 | E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ 648 | Ci1 ^= E##mi1; \ 649 | E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ 650 | Co1 ^= E##mo1; \ 651 | E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ 652 | Cu1 ^= E##mu1; \ 653 | \ 654 | A##bi0 ^= Di0; \ 655 | Bsa0 = ROL32(A##bi0, 31); \ 656 | A##go1 ^= Do1; \ 657 | Bse0 = ROL32(A##go1, 28); \ 658 | A##ku1 ^= Du1; \ 659 | Bsi0 = ROL32(A##ku1, 20); \ 660 | A##ma1 ^= Da1; \ 661 | Bso0 = ROL32(A##ma1, 21); \ 662 | A##se0 ^= De0; \ 663 | Bsu0 = ROL32(A##se0, 1); \ 664 | E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ 665 | Ca0 ^= E##sa0; \ 666 | E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ 667 | Ce0 ^= E##se0; \ 668 | E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ 669 | Ci0 ^= E##si0; \ 670 | E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ 671 | Co0 ^= E##so0; \ 672 | E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ 673 | Cu0 ^= E##su0; \ 674 | \ 675 | A##bi1 ^= Di1; \ 676 | Bsa1 = ROL32(A##bi1, 31); \ 677 | A##go0 ^= Do0; \ 678 | Bse1 = ROL32(A##go0, 27); \ 679 | A##ku0 ^= Du0; \ 680 | Bsi1 = ROL32(A##ku0, 19); \ 681 | A##ma0 ^= Da0; \ 682 | Bso1 = ROL32(A##ma0, 20); \ 683 | A##se1 ^= De1; \ 684 | Bsu1 = ROL32(A##se1, 1); \ 685 | E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ 686 | Ca1 ^= E##sa1; \ 687 | E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ 688 | Ce1 ^= E##se1; \ 689 | E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ 690 | Ci1 ^= E##si1; \ 691 | E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ 692 | Co1 ^= E##so1; \ 693 | E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ 694 | Cu1 ^= E##su1; \ 695 | \ 696 | 697 | // --- Code for round 698 | // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words 699 | #define thetaRhoPiChiIota(i, A, E) \ 700 | Da0 = Cu0^ROL32(Ce1, 1); \ 701 | Da1 = Cu1^Ce0; \ 702 | De0 = Ca0^ROL32(Ci1, 1); \ 703 | De1 = Ca1^Ci0; \ 704 | Di0 = Ce0^ROL32(Co1, 1); \ 705 | Di1 = Ce1^Co0; \ 706 | Do0 = Ci0^ROL32(Cu1, 1); \ 707 | Do1 = Ci1^Cu0; \ 708 | Du0 = Co0^ROL32(Ca1, 1); \ 709 | Du1 = Co1^Ca0; \ 710 | \ 711 | A##ba0 ^= Da0; \ 712 | Bba0 = A##ba0; \ 713 | A##ge0 ^= De0; \ 714 | Bbe0 = ROL32(A##ge0, 22); \ 715 | A##ki1 ^= Di1; \ 716 | Bbi0 = ROL32(A##ki1, 22); \ 717 | A##mo1 ^= Do1; \ 718 | Bbo0 = ROL32(A##mo1, 11); \ 719 | A##su0 ^= Du0; \ 720 | Bbu0 = ROL32(A##su0, 7); \ 721 | E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ 722 | E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ 723 | E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ 724 | E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ 725 | E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ 726 | E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ 727 | \ 728 | A##ba1 ^= Da1; \ 729 | Bba1 = A##ba1; \ 730 | A##ge1 ^= De1; \ 731 | Bbe1 = ROL32(A##ge1, 22); \ 732 | A##ki0 ^= Di0; \ 733 | Bbi1 = ROL32(A##ki0, 21); \ 734 | A##mo0 ^= Do0; \ 735 | Bbo1 = ROL32(A##mo0, 10); \ 736 | A##su1 ^= Du1; \ 737 | Bbu1 = ROL32(A##su1, 7); \ 738 | E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ 739 | E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ 740 | E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ 741 | E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ 742 | E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ 743 | E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ 744 | \ 745 | A##bo0 ^= Do0; \ 746 | Bga0 = ROL32(A##bo0, 14); \ 747 | A##gu0 ^= Du0; \ 748 | Bge0 = ROL32(A##gu0, 10); \ 749 | A##ka1 ^= Da1; \ 750 | Bgi0 = ROL32(A##ka1, 2); \ 751 | A##me1 ^= De1; \ 752 | Bgo0 = ROL32(A##me1, 23); \ 753 | A##si1 ^= Di1; \ 754 | Bgu0 = ROL32(A##si1, 31); \ 755 | E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ 756 | E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ 757 | E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ 758 | E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ 759 | E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ 760 | \ 761 | A##bo1 ^= Do1; \ 762 | Bga1 = ROL32(A##bo1, 14); \ 763 | A##gu1 ^= Du1; \ 764 | Bge1 = ROL32(A##gu1, 10); \ 765 | A##ka0 ^= Da0; \ 766 | Bgi1 = ROL32(A##ka0, 1); \ 767 | A##me0 ^= De0; \ 768 | Bgo1 = ROL32(A##me0, 22); \ 769 | A##si0 ^= Di0; \ 770 | Bgu1 = ROL32(A##si0, 30); \ 771 | E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ 772 | E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ 773 | E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ 774 | E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ 775 | E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ 776 | \ 777 | A##be1 ^= De1; \ 778 | Bka0 = ROL32(A##be1, 1); \ 779 | A##gi0 ^= Di0; \ 780 | Bke0 = ROL32(A##gi0, 3); \ 781 | A##ko1 ^= Do1; \ 782 | Bki0 = ROL32(A##ko1, 13); \ 783 | A##mu0 ^= Du0; \ 784 | Bko0 = ROL32(A##mu0, 4); \ 785 | A##sa0 ^= Da0; \ 786 | Bku0 = ROL32(A##sa0, 9); \ 787 | E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ 788 | E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ 789 | E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ 790 | E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ 791 | E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ 792 | \ 793 | A##be0 ^= De0; \ 794 | Bka1 = A##be0; \ 795 | A##gi1 ^= Di1; \ 796 | Bke1 = ROL32(A##gi1, 3); \ 797 | A##ko0 ^= Do0; \ 798 | Bki1 = ROL32(A##ko0, 12); \ 799 | A##mu1 ^= Du1; \ 800 | Bko1 = ROL32(A##mu1, 4); \ 801 | A##sa1 ^= Da1; \ 802 | Bku1 = ROL32(A##sa1, 9); \ 803 | E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ 804 | E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ 805 | E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ 806 | E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ 807 | E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ 808 | \ 809 | A##bu1 ^= Du1; \ 810 | Bma0 = ROL32(A##bu1, 14); \ 811 | A##ga0 ^= Da0; \ 812 | Bme0 = ROL32(A##ga0, 18); \ 813 | A##ke0 ^= De0; \ 814 | Bmi0 = ROL32(A##ke0, 5); \ 815 | A##mi1 ^= Di1; \ 816 | Bmo0 = ROL32(A##mi1, 8); \ 817 | A##so0 ^= Do0; \ 818 | Bmu0 = ROL32(A##so0, 28); \ 819 | E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ 820 | E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ 821 | E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ 822 | E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ 823 | E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ 824 | \ 825 | A##bu0 ^= Du0; \ 826 | Bma1 = ROL32(A##bu0, 13); \ 827 | A##ga1 ^= Da1; \ 828 | Bme1 = ROL32(A##ga1, 18); \ 829 | A##ke1 ^= De1; \ 830 | Bmi1 = ROL32(A##ke1, 5); \ 831 | A##mi0 ^= Di0; \ 832 | Bmo1 = ROL32(A##mi0, 7); \ 833 | A##so1 ^= Do1; \ 834 | Bmu1 = ROL32(A##so1, 28); \ 835 | E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ 836 | E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ 837 | E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ 838 | E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ 839 | E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ 840 | \ 841 | A##bi0 ^= Di0; \ 842 | Bsa0 = ROL32(A##bi0, 31); \ 843 | A##go1 ^= Do1; \ 844 | Bse0 = ROL32(A##go1, 28); \ 845 | A##ku1 ^= Du1; \ 846 | Bsi0 = ROL32(A##ku1, 20); \ 847 | A##ma1 ^= Da1; \ 848 | Bso0 = ROL32(A##ma1, 21); \ 849 | A##se0 ^= De0; \ 850 | Bsu0 = ROL32(A##se0, 1); \ 851 | E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ 852 | E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ 853 | E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ 854 | E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ 855 | E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ 856 | \ 857 | A##bi1 ^= Di1; \ 858 | Bsa1 = ROL32(A##bi1, 31); \ 859 | A##go0 ^= Do0; \ 860 | Bse1 = ROL32(A##go0, 27); \ 861 | A##ku0 ^= Du0; \ 862 | Bsi1 = ROL32(A##ku0, 19); \ 863 | A##ma0 ^= Da0; \ 864 | Bso1 = ROL32(A##ma0, 20); \ 865 | A##se1 ^= De1; \ 866 | Bsu1 = ROL32(A##se1, 1); \ 867 | E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ 868 | E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ 869 | E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ 870 | E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ 871 | E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ 872 | \ 873 | 874 | #endif // UseBebigokimisa 875 | 876 | const UINT32 KeccakF1600RoundConstants_int2_0[24] = { 877 | 0x00000001UL, 878 | 0x00000000UL, 879 | 0x00000000UL, 880 | 0x00000000UL, 881 | 0x00000001UL, 882 | 0x00000001UL, 883 | 0x00000001UL, 884 | 0x00000001UL, 885 | 0x00000000UL, 886 | 0x00000000UL, 887 | 0x00000001UL, 888 | 0x00000000UL, 889 | 0x00000001UL, 890 | 0x00000001UL, 891 | 0x00000001UL, 892 | 0x00000001UL, 893 | 0x00000000UL, 894 | 0x00000000UL, 895 | 0x00000000UL, 896 | 0x00000000UL, 897 | 0x00000001UL, 898 | 0x00000000UL, 899 | 0x00000001UL, 900 | 0x00000000UL }; 901 | 902 | const UINT32 KeccakF1600RoundConstants_int2_1[24] = { 903 | 0x00000000UL, 904 | 0x00000089UL, 905 | 0x8000008bUL, 906 | 0x80008080UL, 907 | 0x0000008bUL, 908 | 0x00008000UL, 909 | 0x80008088UL, 910 | 0x80000082UL, 911 | 0x0000000bUL, 912 | 0x0000000aUL, 913 | 0x00008082UL, 914 | 0x00008003UL, 915 | 0x0000808bUL, 916 | 0x8000000bUL, 917 | 0x8000008aUL, 918 | 0x80000081UL, 919 | 0x80000081UL, 920 | 0x80000008UL, 921 | 0x00000083UL, 922 | 0x80008003UL, 923 | 0x80008088UL, 924 | 0x80000088UL, 925 | 0x00008000UL, 926 | 0x80008082UL }; 927 | 928 | #define copyFromStateAndXor1024bits(X, state, input) \ 929 | X##ba0 = state[ 0]^input[ 0]; \ 930 | X##ba1 = state[ 1]^input[ 1]; \ 931 | X##be0 = state[ 2]^input[ 2]; \ 932 | X##be1 = state[ 3]^input[ 3]; \ 933 | X##bi0 = state[ 4]^input[ 4]; \ 934 | X##bi1 = state[ 5]^input[ 5]; \ 935 | X##bo0 = state[ 6]^input[ 6]; \ 936 | X##bo1 = state[ 7]^input[ 7]; \ 937 | X##bu0 = state[ 8]^input[ 8]; \ 938 | X##bu1 = state[ 9]^input[ 9]; \ 939 | X##ga0 = state[10]^input[10]; \ 940 | X##ga1 = state[11]^input[11]; \ 941 | X##ge0 = state[12]^input[12]; \ 942 | X##ge1 = state[13]^input[13]; \ 943 | X##gi0 = state[14]^input[14]; \ 944 | X##gi1 = state[15]^input[15]; \ 945 | X##go0 = state[16]^input[16]; \ 946 | X##go1 = state[17]^input[17]; \ 947 | X##gu0 = state[18]^input[18]; \ 948 | X##gu1 = state[19]^input[19]; \ 949 | X##ka0 = state[20]^input[20]; \ 950 | X##ka1 = state[21]^input[21]; \ 951 | X##ke0 = state[22]^input[22]; \ 952 | X##ke1 = state[23]^input[23]; \ 953 | X##ki0 = state[24]^input[24]; \ 954 | X##ki1 = state[25]^input[25]; \ 955 | X##ko0 = state[26]^input[26]; \ 956 | X##ko1 = state[27]^input[27]; \ 957 | X##ku0 = state[28]^input[28]; \ 958 | X##ku1 = state[29]^input[29]; \ 959 | X##ma0 = state[30]^input[30]; \ 960 | X##ma1 = state[31]^input[31]; \ 961 | X##me0 = state[32]; \ 962 | X##me1 = state[33]; \ 963 | X##mi0 = state[34]; \ 964 | X##mi1 = state[35]; \ 965 | X##mo0 = state[36]; \ 966 | X##mo1 = state[37]; \ 967 | X##mu0 = state[38]; \ 968 | X##mu1 = state[39]; \ 969 | X##sa0 = state[40]; \ 970 | X##sa1 = state[41]; \ 971 | X##se0 = state[42]; \ 972 | X##se1 = state[43]; \ 973 | X##si0 = state[44]; \ 974 | X##si1 = state[45]; \ 975 | X##so0 = state[46]; \ 976 | X##so1 = state[47]; \ 977 | X##su0 = state[48]; \ 978 | X##su1 = state[49]; \ 979 | 980 | #define copyFromStateAndXor1088bits(X, state, input) \ 981 | X##ba0 = state[ 0]^input[ 0]; \ 982 | X##ba1 = state[ 1]^input[ 1]; \ 983 | X##be0 = state[ 2]^input[ 2]; \ 984 | X##be1 = state[ 3]^input[ 3]; \ 985 | X##bi0 = state[ 4]^input[ 4]; \ 986 | X##bi1 = state[ 5]^input[ 5]; \ 987 | X##bo0 = state[ 6]^input[ 6]; \ 988 | X##bo1 = state[ 7]^input[ 7]; \ 989 | X##bu0 = state[ 8]^input[ 8]; \ 990 | X##bu1 = state[ 9]^input[ 9]; \ 991 | X##ga0 = state[10]^input[10]; \ 992 | X##ga1 = state[11]^input[11]; \ 993 | X##ge0 = state[12]^input[12]; \ 994 | X##ge1 = state[13]^input[13]; \ 995 | X##gi0 = state[14]^input[14]; \ 996 | X##gi1 = state[15]^input[15]; \ 997 | X##go0 = state[16]^input[16]; \ 998 | X##go1 = state[17]^input[17]; \ 999 | X##gu0 = state[18]^input[18]; \ 1000 | X##gu1 = state[19]^input[19]; \ 1001 | X##ka0 = state[20]^input[20]; \ 1002 | X##ka1 = state[21]^input[21]; \ 1003 | X##ke0 = state[22]^input[22]; \ 1004 | X##ke1 = state[23]^input[23]; \ 1005 | X##ki0 = state[24]^input[24]; \ 1006 | X##ki1 = state[25]^input[25]; \ 1007 | X##ko0 = state[26]^input[26]; \ 1008 | X##ko1 = state[27]^input[27]; \ 1009 | X##ku0 = state[28]^input[28]; \ 1010 | X##ku1 = state[29]^input[29]; \ 1011 | X##ma0 = state[30]^input[30]; \ 1012 | X##ma1 = state[31]^input[31]; \ 1013 | X##me0 = state[32]^input[32]; \ 1014 | X##me1 = state[33]^input[33]; \ 1015 | X##mi0 = state[34]; \ 1016 | X##mi1 = state[35]; \ 1017 | X##mo0 = state[36]; \ 1018 | X##mo1 = state[37]; \ 1019 | X##mu0 = state[38]; \ 1020 | X##mu1 = state[39]; \ 1021 | X##sa0 = state[40]; \ 1022 | X##sa1 = state[41]; \ 1023 | X##se0 = state[42]; \ 1024 | X##se1 = state[43]; \ 1025 | X##si0 = state[44]; \ 1026 | X##si1 = state[45]; \ 1027 | X##so0 = state[46]; \ 1028 | X##so1 = state[47]; \ 1029 | X##su0 = state[48]; \ 1030 | X##su1 = state[49]; \ 1031 | 1032 | #define copyFromState(X, state) \ 1033 | X##ba0 = state[ 0]; \ 1034 | X##ba1 = state[ 1]; \ 1035 | X##be0 = state[ 2]; \ 1036 | X##be1 = state[ 3]; \ 1037 | X##bi0 = state[ 4]; \ 1038 | X##bi1 = state[ 5]; \ 1039 | X##bo0 = state[ 6]; \ 1040 | X##bo1 = state[ 7]; \ 1041 | X##bu0 = state[ 8]; \ 1042 | X##bu1 = state[ 9]; \ 1043 | X##ga0 = state[10]; \ 1044 | X##ga1 = state[11]; \ 1045 | X##ge0 = state[12]; \ 1046 | X##ge1 = state[13]; \ 1047 | X##gi0 = state[14]; \ 1048 | X##gi1 = state[15]; \ 1049 | X##go0 = state[16]; \ 1050 | X##go1 = state[17]; \ 1051 | X##gu0 = state[18]; \ 1052 | X##gu1 = state[19]; \ 1053 | X##ka0 = state[20]; \ 1054 | X##ka1 = state[21]; \ 1055 | X##ke0 = state[22]; \ 1056 | X##ke1 = state[23]; \ 1057 | X##ki0 = state[24]; \ 1058 | X##ki1 = state[25]; \ 1059 | X##ko0 = state[26]; \ 1060 | X##ko1 = state[27]; \ 1061 | X##ku0 = state[28]; \ 1062 | X##ku1 = state[29]; \ 1063 | X##ma0 = state[30]; \ 1064 | X##ma1 = state[31]; \ 1065 | X##me0 = state[32]; \ 1066 | X##me1 = state[33]; \ 1067 | X##mi0 = state[34]; \ 1068 | X##mi1 = state[35]; \ 1069 | X##mo0 = state[36]; \ 1070 | X##mo1 = state[37]; \ 1071 | X##mu0 = state[38]; \ 1072 | X##mu1 = state[39]; \ 1073 | X##sa0 = state[40]; \ 1074 | X##sa1 = state[41]; \ 1075 | X##se0 = state[42]; \ 1076 | X##se1 = state[43]; \ 1077 | X##si0 = state[44]; \ 1078 | X##si1 = state[45]; \ 1079 | X##so0 = state[46]; \ 1080 | X##so1 = state[47]; \ 1081 | X##su0 = state[48]; \ 1082 | X##su1 = state[49]; \ 1083 | 1084 | #define copyToState(state, X) \ 1085 | state[ 0] = X##ba0; \ 1086 | state[ 1] = X##ba1; \ 1087 | state[ 2] = X##be0; \ 1088 | state[ 3] = X##be1; \ 1089 | state[ 4] = X##bi0; \ 1090 | state[ 5] = X##bi1; \ 1091 | state[ 6] = X##bo0; \ 1092 | state[ 7] = X##bo1; \ 1093 | state[ 8] = X##bu0; \ 1094 | state[ 9] = X##bu1; \ 1095 | state[10] = X##ga0; \ 1096 | state[11] = X##ga1; \ 1097 | state[12] = X##ge0; \ 1098 | state[13] = X##ge1; \ 1099 | state[14] = X##gi0; \ 1100 | state[15] = X##gi1; \ 1101 | state[16] = X##go0; \ 1102 | state[17] = X##go1; \ 1103 | state[18] = X##gu0; \ 1104 | state[19] = X##gu1; \ 1105 | state[20] = X##ka0; \ 1106 | state[21] = X##ka1; \ 1107 | state[22] = X##ke0; \ 1108 | state[23] = X##ke1; \ 1109 | state[24] = X##ki0; \ 1110 | state[25] = X##ki1; \ 1111 | state[26] = X##ko0; \ 1112 | state[27] = X##ko1; \ 1113 | state[28] = X##ku0; \ 1114 | state[29] = X##ku1; \ 1115 | state[30] = X##ma0; \ 1116 | state[31] = X##ma1; \ 1117 | state[32] = X##me0; \ 1118 | state[33] = X##me1; \ 1119 | state[34] = X##mi0; \ 1120 | state[35] = X##mi1; \ 1121 | state[36] = X##mo0; \ 1122 | state[37] = X##mo1; \ 1123 | state[38] = X##mu0; \ 1124 | state[39] = X##mu1; \ 1125 | state[40] = X##sa0; \ 1126 | state[41] = X##sa1; \ 1127 | state[42] = X##se0; \ 1128 | state[43] = X##se1; \ 1129 | state[44] = X##si0; \ 1130 | state[45] = X##si1; \ 1131 | state[46] = X##so0; \ 1132 | state[47] = X##so1; \ 1133 | state[48] = X##su0; \ 1134 | state[49] = X##su1; \ 1135 | 1136 | #define copyStateVariables(X, Y) \ 1137 | X##ba0 = Y##ba0; \ 1138 | X##ba1 = Y##ba1; \ 1139 | X##be0 = Y##be0; \ 1140 | X##be1 = Y##be1; \ 1141 | X##bi0 = Y##bi0; \ 1142 | X##bi1 = Y##bi1; \ 1143 | X##bo0 = Y##bo0; \ 1144 | X##bo1 = Y##bo1; \ 1145 | X##bu0 = Y##bu0; \ 1146 | X##bu1 = Y##bu1; \ 1147 | X##ga0 = Y##ga0; \ 1148 | X##ga1 = Y##ga1; \ 1149 | X##ge0 = Y##ge0; \ 1150 | X##ge1 = Y##ge1; \ 1151 | X##gi0 = Y##gi0; \ 1152 | X##gi1 = Y##gi1; \ 1153 | X##go0 = Y##go0; \ 1154 | X##go1 = Y##go1; \ 1155 | X##gu0 = Y##gu0; \ 1156 | X##gu1 = Y##gu1; \ 1157 | X##ka0 = Y##ka0; \ 1158 | X##ka1 = Y##ka1; \ 1159 | X##ke0 = Y##ke0; \ 1160 | X##ke1 = Y##ke1; \ 1161 | X##ki0 = Y##ki0; \ 1162 | X##ki1 = Y##ki1; \ 1163 | X##ko0 = Y##ko0; \ 1164 | X##ko1 = Y##ko1; \ 1165 | X##ku0 = Y##ku0; \ 1166 | X##ku1 = Y##ku1; \ 1167 | X##ma0 = Y##ma0; \ 1168 | X##ma1 = Y##ma1; \ 1169 | X##me0 = Y##me0; \ 1170 | X##me1 = Y##me1; \ 1171 | X##mi0 = Y##mi0; \ 1172 | X##mi1 = Y##mi1; \ 1173 | X##mo0 = Y##mo0; \ 1174 | X##mo1 = Y##mo1; \ 1175 | X##mu0 = Y##mu0; \ 1176 | X##mu1 = Y##mu1; \ 1177 | X##sa0 = Y##sa0; \ 1178 | X##sa1 = Y##sa1; \ 1179 | X##se0 = Y##se0; \ 1180 | X##se1 = Y##se1; \ 1181 | X##si0 = Y##si0; \ 1182 | X##si1 = Y##si1; \ 1183 | X##so0 = Y##so0; \ 1184 | X##so1 = Y##so1; \ 1185 | X##su0 = Y##su0; \ 1186 | X##su1 = Y##su1; \ 1187 | 1188 | --------------------------------------------------------------------------------