├── .gitignore ├── Keccak-simple-settings.h ├── Keccak-simple.c ├── LICENSE ├── Makefile ├── README.md ├── consts.S ├── consts4x.S ├── cpucycles.c ├── cpucycles.h ├── crypto_hash.h ├── cvariable.h ├── fe25519.h ├── fe25519_add.c ├── fe25519_freeze.S ├── fe25519_getparity.c ├── fe25519_invert.c ├── fe25519_iseq_vartime.c ├── fe25519_mul.S ├── fe25519_neg.c ├── fe25519_nsquare.S ├── fe25519_pack.c ├── fe25519_pow2523.c ├── fe25519_setint.c ├── fe25519_square.S ├── fe25519_sub.c ├── fe25519_unpack.c ├── ge25519.data ├── ge25519.h ├── ge25519_add.c ├── ge25519_add_p1p1.S ├── ge25519_dbl_p1p1.S ├── ge25519_double.c ├── ge25519_lookup.S ├── ge25519_lookup_niels.S ├── ge25519_nielsadd2.S ├── ge25519_p1p1_to_p2.S ├── ge25519_p1p1_to_p3.S ├── ge25519_pack.c ├── ge25519_scalarmult.c ├── ge25519_scalarmult_base.c ├── ge25519_setneutral.c ├── ge25519_unpack.c ├── ge4x.c ├── ge4x.data ├── ge4x.h ├── ge4x_add_p1p1.S ├── ge4x_double_p1p1.S ├── ge4x_lookup.S ├── ge4x_lookup_niels.S ├── ge4x_niels_add_p1p1.S ├── ge4x_pack.c ├── ge4x_unpack_vartime.c ├── gfe4x.c ├── gfe4x.h ├── gfe4x_add.S ├── gfe4x_getparity.c ├── gfe4x_iseq_vartime.c ├── gfe4x_mul.S ├── gfe4x_nsquare.c ├── gfe4x_pow2523.c ├── gfe4x_square.S ├── gfe4x_sub.S ├── network.c ├── network.h ├── ot_config.h ├── ot_receiver.c ├── ot_receiver.h ├── ot_receiver_test.c ├── ot_sender.c ├── ot_sender.h ├── ot_sender_test.c ├── randombytes.c ├── randombytes.h ├── sc25519.h ├── sc25519_from32bytes.c ├── sc25519_random.c ├── sc25519_window4.c └── to_4x.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | ot_sender_test 31 | ot_receiver_test 32 | 33 | # Debug files 34 | *.dSYM/ 35 | -------------------------------------------------------------------------------- /Keccak-simple-settings.h: -------------------------------------------------------------------------------- 1 | #define cKeccakB 1600 2 | #define cKeccakR 1088 3 | -------------------------------------------------------------------------------- /Keccak-simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, 3 | Michaël Peeters and Gilles Van Assche. For more information, feedback or 4 | questions, please refer to our website: http://keccak.noekeon.org/ 5 | 6 | Implementation by Ronny Van Keer, 7 | hereby denoted as "the implementer". 8 | 9 | To the extent possible under law, the implementer has waived all copyright 10 | and related or neighboring rights to the source code in this file. 11 | http://creativecommons.org/publicdomain/zero/1.0/ 12 | */ 13 | 14 | // WARNING: This implementation assumes a little-endian platform. Support for big-endinanness is not yet implemented. 15 | 16 | #include 17 | #include "Keccak-simple-settings.h" 18 | #define cKeccakR_SizeInBytes (cKeccakR / 8) 19 | #include "crypto_hash.h" 20 | #ifndef crypto_hash_BYTES 21 | #ifdef cKeccakFixedOutputLengthInBytes 22 | #define crypto_hash_BYTES cKeccakFixedOutputLengthInBytes 23 | #else 24 | #define crypto_hash_BYTES cKeccakR_SizeInBytes 25 | #endif 26 | #endif 27 | #if (crypto_hash_BYTES > cKeccakR_SizeInBytes) 28 | #error "Full squeezing not yet implemented" 29 | #endif 30 | 31 | #if (cKeccakB == 1600) 32 | typedef unsigned long long UINT64; 33 | typedef UINT64 tKeccakLane; 34 | #define cKeccakNumberOfRounds 24 35 | #elif (cKeccakB == 800) 36 | typedef unsigned int UINT32; 37 | // WARNING: on 8-bit and 16-bit platforms, this should be replaced by: 38 | //typedef unsigned long UINT32; 39 | typedef UINT32 tKeccakLane; 40 | #define cKeccakNumberOfRounds 22 41 | #elif (cKeccakB == 400) 42 | typedef unsigned short UINT16; 43 | typedef UINT16 tKeccakLane; 44 | #define cKeccakNumberOfRounds 20 45 | #elif (cKeccakB == 200) 46 | typedef unsigned char UINT8; 47 | typedef UINT8 tKeccakLane; 48 | #define cKeccakNumberOfRounds 18 49 | #else 50 | #error "Unsupported Keccak-f width" 51 | #endif 52 | 53 | #define cKeccakLaneSizeInBits (sizeof(tKeccakLane) * 8) 54 | 55 | #define ROL(a, offset) ((((tKeccakLane)a) << ((offset) % cKeccakLaneSizeInBits)) ^ (((tKeccakLane)a) >> (cKeccakLaneSizeInBits-((offset) % cKeccakLaneSizeInBits)))) 56 | #if ((cKeccakB/25) == 8) 57 | #define ROL_mult8(a, offset) ((tKeccakLane)a) 58 | #else 59 | #define ROL_mult8(a, offset) ROL(a, offset) 60 | #endif 61 | void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount ); 62 | 63 | int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen ) 64 | { 65 | tKeccakLane state[5 * 5]; 66 | #if (crypto_hash_BYTES >= cKeccakR_SizeInBytes) 67 | #define temp out 68 | #else 69 | unsigned char temp[cKeccakR_SizeInBytes]; 70 | #endif 71 | 72 | memset( state, 0, sizeof(state) ); 73 | 74 | for ( /* empty */; inlen >= cKeccakR_SizeInBytes; inlen -= cKeccakR_SizeInBytes, in += cKeccakR_SizeInBytes ) 75 | { 76 | KeccakF( state, (const tKeccakLane*)in, cKeccakR_SizeInBytes / sizeof(tKeccakLane) ); 77 | } 78 | 79 | // padding 80 | memcpy( temp, in, (size_t)inlen ); 81 | temp[inlen++] = 1; 82 | memset( temp+inlen, 0, cKeccakR_SizeInBytes - (size_t)inlen ); 83 | temp[cKeccakR_SizeInBytes-1] |= 0x80; 84 | KeccakF( state, (const tKeccakLane*)temp, cKeccakR_SizeInBytes / sizeof(tKeccakLane) ); 85 | memcpy( out, state, crypto_hash_BYTES ); 86 | #if (crypto_hash_BYTES >= cKeccakR_SizeInBytes) 87 | #undef temp 88 | #endif 89 | 90 | return ( 0 ); 91 | } 92 | 93 | 94 | const tKeccakLane KeccakF_RoundConstants[cKeccakNumberOfRounds] = 95 | { 96 | (tKeccakLane)0x0000000000000001ULL, 97 | (tKeccakLane)0x0000000000008082ULL, 98 | (tKeccakLane)0x800000000000808aULL, 99 | (tKeccakLane)0x8000000080008000ULL, 100 | (tKeccakLane)0x000000000000808bULL, 101 | (tKeccakLane)0x0000000080000001ULL, 102 | (tKeccakLane)0x8000000080008081ULL, 103 | (tKeccakLane)0x8000000000008009ULL, 104 | (tKeccakLane)0x000000000000008aULL, 105 | (tKeccakLane)0x0000000000000088ULL, 106 | (tKeccakLane)0x0000000080008009ULL, 107 | (tKeccakLane)0x000000008000000aULL, 108 | (tKeccakLane)0x000000008000808bULL, 109 | (tKeccakLane)0x800000000000008bULL, 110 | (tKeccakLane)0x8000000000008089ULL, 111 | (tKeccakLane)0x8000000000008003ULL, 112 | (tKeccakLane)0x8000000000008002ULL, 113 | (tKeccakLane)0x8000000000000080ULL 114 | #if (cKeccakB >= 400) 115 | , (tKeccakLane)0x000000000000800aULL, 116 | (tKeccakLane)0x800000008000000aULL 117 | #if (cKeccakB >= 800) 118 | , (tKeccakLane)0x8000000080008081ULL, 119 | (tKeccakLane)0x8000000000008080ULL 120 | #if (cKeccakB == 1600) 121 | , (tKeccakLane)0x0000000080000001ULL, 122 | (tKeccakLane)0x8000000080008008ULL 123 | #endif 124 | #endif 125 | #endif 126 | }; 127 | 128 | void KeccakF( tKeccakLane * state, const tKeccakLane *in, int laneCount ) 129 | { 130 | 131 | { 132 | while ( --laneCount >= 0 ) 133 | { 134 | state[laneCount] ^= in[laneCount]; 135 | } 136 | } 137 | 138 | { 139 | tKeccakLane Aba, Abe, Abi, Abo, Abu; 140 | tKeccakLane Aga, Age, Agi, Ago, Agu; 141 | tKeccakLane Aka, Ake, Aki, Ako, Aku; 142 | tKeccakLane Ama, Ame, Ami, Amo, Amu; 143 | tKeccakLane Asa, Ase, Asi, Aso, Asu; 144 | tKeccakLane BCa, BCe, BCi, BCo, BCu; 145 | tKeccakLane Da, De, Di, Do, Du; 146 | tKeccakLane Eba, Ebe, Ebi, Ebo, Ebu; 147 | tKeccakLane Ega, Ege, Egi, Ego, Egu; 148 | tKeccakLane Eka, Eke, Eki, Eko, Eku; 149 | tKeccakLane Ema, Eme, Emi, Emo, Emu; 150 | tKeccakLane Esa, Ese, Esi, Eso, Esu; 151 | #define round laneCount 152 | 153 | //copyFromState(A, state) 154 | Aba = state[ 0]; 155 | Abe = state[ 1]; 156 | Abi = state[ 2]; 157 | Abo = state[ 3]; 158 | Abu = state[ 4]; 159 | Aga = state[ 5]; 160 | Age = state[ 6]; 161 | Agi = state[ 7]; 162 | Ago = state[ 8]; 163 | Agu = state[ 9]; 164 | Aka = state[10]; 165 | Ake = state[11]; 166 | Aki = state[12]; 167 | Ako = state[13]; 168 | Aku = state[14]; 169 | Ama = state[15]; 170 | Ame = state[16]; 171 | Ami = state[17]; 172 | Amo = state[18]; 173 | Amu = state[19]; 174 | Asa = state[20]; 175 | Ase = state[21]; 176 | Asi = state[22]; 177 | Aso = state[23]; 178 | Asu = state[24]; 179 | 180 | for( round = 0; round < cKeccakNumberOfRounds; round += 2 ) 181 | { 182 | // prepareTheta 183 | BCa = Aba^Aga^Aka^Ama^Asa; 184 | BCe = Abe^Age^Ake^Ame^Ase; 185 | BCi = Abi^Agi^Aki^Ami^Asi; 186 | BCo = Abo^Ago^Ako^Amo^Aso; 187 | BCu = Abu^Agu^Aku^Amu^Asu; 188 | 189 | //thetaRhoPiChiIotaPrepareTheta(round , A, E) 190 | Da = BCu^ROL(BCe, 1); 191 | De = BCa^ROL(BCi, 1); 192 | Di = BCe^ROL(BCo, 1); 193 | Do = BCi^ROL(BCu, 1); 194 | Du = BCo^ROL(BCa, 1); 195 | 196 | Aba ^= Da; 197 | BCa = Aba; 198 | Age ^= De; 199 | BCe = ROL(Age, 44); 200 | Aki ^= Di; 201 | BCi = ROL(Aki, 43); 202 | Amo ^= Do; 203 | BCo = ROL(Amo, 21); 204 | Asu ^= Du; 205 | BCu = ROL(Asu, 14); 206 | Eba = BCa ^((~BCe)& BCi ); 207 | Eba ^= (tKeccakLane)KeccakF_RoundConstants[round]; 208 | Ebe = BCe ^((~BCi)& BCo ); 209 | Ebi = BCi ^((~BCo)& BCu ); 210 | Ebo = BCo ^((~BCu)& BCa ); 211 | Ebu = BCu ^((~BCa)& BCe ); 212 | 213 | Abo ^= Do; 214 | BCa = ROL(Abo, 28); 215 | Agu ^= Du; 216 | BCe = ROL(Agu, 20); 217 | Aka ^= Da; 218 | BCi = ROL(Aka, 3); 219 | Ame ^= De; 220 | BCo = ROL(Ame, 45); 221 | Asi ^= Di; 222 | BCu = ROL(Asi, 61); 223 | Ega = BCa ^((~BCe)& BCi ); 224 | Ege = BCe ^((~BCi)& BCo ); 225 | Egi = BCi ^((~BCo)& BCu ); 226 | Ego = BCo ^((~BCu)& BCa ); 227 | Egu = BCu ^((~BCa)& BCe ); 228 | 229 | Abe ^= De; 230 | BCa = ROL(Abe, 1); 231 | Agi ^= Di; 232 | BCe = ROL(Agi, 6); 233 | Ako ^= Do; 234 | BCi = ROL(Ako, 25); 235 | Amu ^= Du; 236 | BCo = ROL_mult8(Amu, 8); 237 | Asa ^= Da; 238 | BCu = ROL(Asa, 18); 239 | Eka = BCa ^((~BCe)& BCi ); 240 | Eke = BCe ^((~BCi)& BCo ); 241 | Eki = BCi ^((~BCo)& BCu ); 242 | Eko = BCo ^((~BCu)& BCa ); 243 | Eku = BCu ^((~BCa)& BCe ); 244 | 245 | Abu ^= Du; 246 | BCa = ROL(Abu, 27); 247 | Aga ^= Da; 248 | BCe = ROL(Aga, 36); 249 | Ake ^= De; 250 | BCi = ROL(Ake, 10); 251 | Ami ^= Di; 252 | BCo = ROL(Ami, 15); 253 | Aso ^= Do; 254 | BCu = ROL_mult8(Aso, 56); 255 | Ema = BCa ^((~BCe)& BCi ); 256 | Eme = BCe ^((~BCi)& BCo ); 257 | Emi = BCi ^((~BCo)& BCu ); 258 | Emo = BCo ^((~BCu)& BCa ); 259 | Emu = BCu ^((~BCa)& BCe ); 260 | 261 | Abi ^= Di; 262 | BCa = ROL(Abi, 62); 263 | Ago ^= Do; 264 | BCe = ROL(Ago, 55); 265 | Aku ^= Du; 266 | BCi = ROL(Aku, 39); 267 | Ama ^= Da; 268 | BCo = ROL(Ama, 41); 269 | Ase ^= De; 270 | BCu = ROL(Ase, 2); 271 | Esa = BCa ^((~BCe)& BCi ); 272 | Ese = BCe ^((~BCi)& BCo ); 273 | Esi = BCi ^((~BCo)& BCu ); 274 | Eso = BCo ^((~BCu)& BCa ); 275 | Esu = BCu ^((~BCa)& BCe ); 276 | 277 | // prepareTheta 278 | BCa = Eba^Ega^Eka^Ema^Esa; 279 | BCe = Ebe^Ege^Eke^Eme^Ese; 280 | BCi = Ebi^Egi^Eki^Emi^Esi; 281 | BCo = Ebo^Ego^Eko^Emo^Eso; 282 | BCu = Ebu^Egu^Eku^Emu^Esu; 283 | 284 | //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) 285 | Da = BCu^ROL(BCe, 1); 286 | De = BCa^ROL(BCi, 1); 287 | Di = BCe^ROL(BCo, 1); 288 | Do = BCi^ROL(BCu, 1); 289 | Du = BCo^ROL(BCa, 1); 290 | 291 | Eba ^= Da; 292 | BCa = Eba; 293 | Ege ^= De; 294 | BCe = ROL(Ege, 44); 295 | Eki ^= Di; 296 | BCi = ROL(Eki, 43); 297 | Emo ^= Do; 298 | BCo = ROL(Emo, 21); 299 | Esu ^= Du; 300 | BCu = ROL(Esu, 14); 301 | Aba = BCa ^((~BCe)& BCi ); 302 | Aba ^= (tKeccakLane)KeccakF_RoundConstants[round+1]; 303 | Abe = BCe ^((~BCi)& BCo ); 304 | Abi = BCi ^((~BCo)& BCu ); 305 | Abo = BCo ^((~BCu)& BCa ); 306 | Abu = BCu ^((~BCa)& BCe ); 307 | 308 | Ebo ^= Do; 309 | BCa = ROL(Ebo, 28); 310 | Egu ^= Du; 311 | BCe = ROL(Egu, 20); 312 | Eka ^= Da; 313 | BCi = ROL(Eka, 3); 314 | Eme ^= De; 315 | BCo = ROL(Eme, 45); 316 | Esi ^= Di; 317 | BCu = ROL(Esi, 61); 318 | Aga = BCa ^((~BCe)& BCi ); 319 | Age = BCe ^((~BCi)& BCo ); 320 | Agi = BCi ^((~BCo)& BCu ); 321 | Ago = BCo ^((~BCu)& BCa ); 322 | Agu = BCu ^((~BCa)& BCe ); 323 | 324 | Ebe ^= De; 325 | BCa = ROL(Ebe, 1); 326 | Egi ^= Di; 327 | BCe = ROL(Egi, 6); 328 | Eko ^= Do; 329 | BCi = ROL(Eko, 25); 330 | Emu ^= Du; 331 | BCo = ROL_mult8(Emu, 8); 332 | Esa ^= Da; 333 | BCu = ROL(Esa, 18); 334 | Aka = BCa ^((~BCe)& BCi ); 335 | Ake = BCe ^((~BCi)& BCo ); 336 | Aki = BCi ^((~BCo)& BCu ); 337 | Ako = BCo ^((~BCu)& BCa ); 338 | Aku = BCu ^((~BCa)& BCe ); 339 | 340 | Ebu ^= Du; 341 | BCa = ROL(Ebu, 27); 342 | Ega ^= Da; 343 | BCe = ROL(Ega, 36); 344 | Eke ^= De; 345 | BCi = ROL(Eke, 10); 346 | Emi ^= Di; 347 | BCo = ROL(Emi, 15); 348 | Eso ^= Do; 349 | BCu = ROL_mult8(Eso, 56); 350 | Ama = BCa ^((~BCe)& BCi ); 351 | Ame = BCe ^((~BCi)& BCo ); 352 | Ami = BCi ^((~BCo)& BCu ); 353 | Amo = BCo ^((~BCu)& BCa ); 354 | Amu = BCu ^((~BCa)& BCe ); 355 | 356 | Ebi ^= Di; 357 | BCa = ROL(Ebi, 62); 358 | Ego ^= Do; 359 | BCe = ROL(Ego, 55); 360 | Eku ^= Du; 361 | BCi = ROL(Eku, 39); 362 | Ema ^= Da; 363 | BCo = ROL(Ema, 41); 364 | Ese ^= De; 365 | BCu = ROL(Ese, 2); 366 | Asa = BCa ^((~BCe)& BCi ); 367 | Ase = BCe ^((~BCi)& BCo ); 368 | Asi = BCi ^((~BCo)& BCu ); 369 | Aso = BCo ^((~BCu)& BCa ); 370 | Asu = BCu ^((~BCa)& BCe ); 371 | } 372 | 373 | //copyToState(state, A) 374 | state[ 0] = Aba; 375 | state[ 1] = Abe; 376 | state[ 2] = Abi; 377 | state[ 3] = Abo; 378 | state[ 4] = Abu; 379 | state[ 5] = Aga; 380 | state[ 6] = Age; 381 | state[ 7] = Agi; 382 | state[ 8] = Ago; 383 | state[ 9] = Agu; 384 | state[10] = Aka; 385 | state[11] = Ake; 386 | state[12] = Aki; 387 | state[13] = Ako; 388 | state[14] = Aku; 389 | state[15] = Ama; 390 | state[16] = Ame; 391 | state[17] = Ami; 392 | state[18] = Amo; 393 | state[19] = Amu; 394 | state[20] = Asa; 395 | state[21] = Ase; 396 | state[22] = Asi; 397 | state[23] = Aso; 398 | state[24] = Asu; 399 | 400 | #undef round 401 | } 402 | 403 | } 404 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = /usr/bin/gcc 2 | CFLAGS = -O3 -Wall -Wextra 3 | AS = $(CC) $(CFLAGS) -c 4 | 5 | OBJS+= Keccak-simple.o 6 | OBJS+= randombytes.o 7 | OBJS+= cpucycles.o 8 | 9 | OBJS+= network.o 10 | OBJS+= ot_sender.o 11 | OBJS+= ot_receiver.o 12 | 13 | OBJS+= sc25519_random.o 14 | OBJS+= sc25519_from32bytes.o 15 | OBJS+= sc25519_window4.o 16 | 17 | OBJS+= fe25519_add.o 18 | OBJS+= fe25519_freeze.o 19 | OBJS+= fe25519_getparity.o 20 | OBJS+= fe25519_invert.o 21 | OBJS+= fe25519_iseq_vartime.o 22 | OBJS+= fe25519_mul.o 23 | OBJS+= fe25519_neg.o 24 | OBJS+= fe25519_nsquare.o 25 | OBJS+= fe25519_pack.o 26 | OBJS+= fe25519_pow2523.o 27 | OBJS+= fe25519_setint.o 28 | OBJS+= fe25519_square.o 29 | OBJS+= fe25519_sub.o 30 | OBJS+= fe25519_unpack.o 31 | 32 | OBJS+= ge25519_pack.o 33 | OBJS+= ge25519_unpack.o 34 | OBJS+= ge25519_setneutral.o 35 | OBJS+= ge25519_dbl_p1p1.o 36 | OBJS+= ge25519_add_p1p1.o 37 | OBJS+= ge25519_nielsadd2.o 38 | OBJS+= ge25519_p1p1_to_p2.o 39 | OBJS+= ge25519_p1p1_to_p3.o 40 | OBJS+= ge25519_double.o 41 | OBJS+= ge25519_add.o 42 | OBJS+= ge25519_scalarmult_base.o 43 | OBJS+= ge25519_scalarmult.o 44 | OBJS+= ge25519_lookup.o 45 | OBJS+= ge25519_lookup_niels.o 46 | 47 | OBJS+= ge4x.o 48 | OBJS+= gfe4x.o 49 | OBJS+= gfe4x_add.o 50 | OBJS+= gfe4x_nsquare.o 51 | OBJS+= gfe4x_square.o 52 | OBJS+= gfe4x_getparity.o 53 | OBJS+= gfe4x_iseq_vartime.o 54 | OBJS+= gfe4x_mul.o 55 | OBJS+= gfe4x_pow2523.o 56 | OBJS+= gfe4x_sub.o 57 | 58 | OBJS+= ge4x_double_p1p1.o 59 | OBJS+= ge4x_add_p1p1.o 60 | OBJS+= ge4x_niels_add_p1p1.o 61 | OBJS+= ge4x_unpack_vartime.o 62 | OBJS+= ge4x_pack.o 63 | OBJS+= ge4x_lookup_niels.o 64 | OBJS+= ge4x_lookup.o 65 | 66 | OBJS+= consts.o 67 | OBJS+= consts4x.o 68 | 69 | ###################################################### 70 | 71 | all: ot_sender_test ot_receiver_test libsimpleot 72 | 73 | libsimpleot: $(OBJS) 74 | $(AR) -crs libsimpleot.a $(OBJS) 75 | 76 | ot_sender_test: ot_sender_test.o $(OBJS) 77 | $(CC) $(CFLAGS) -o $@ $^ 78 | 79 | ot_receiver_test: ot_receiver_test.o $(OBJS) 80 | $(CC) $(CFLAGS) -o $@ $^ 81 | 82 | %.o: %.c 83 | $(CC) $(CFLAGS) -c $< 84 | 85 | %.o: %.S 86 | $(AS) $< 87 | 88 | ###################################################### 89 | 90 | .PHONY: clean 91 | 92 | clean: 93 | -rm -f ot_sender_test 94 | -rm -f ot_receiver_test 95 | -rm -f *.o 96 | -rm -f libsimpleot.a 97 | 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SimpleOT 2 | The Simplest Oblivious Transfer Protocol by Chou and Orlandi. http://users-cs.au.dk/orlandi/simpleOT/ 3 | Contains a few minor bug fixes and modifications for usability. 4 | -------------------------------------------------------------------------------- /consts.S: -------------------------------------------------------------------------------- 1 | .data 2 | 3 | .globl CONST_REDMASK51 4 | .globl CONST_121666_213 5 | .globl CONST_2P0 6 | .globl CONST_2P1234 7 | .globl CONST_4P0 8 | .globl CONST_4P1234 9 | .globl CONST_MU0 10 | .globl CONST_MU1 11 | .globl CONST_MU2 12 | .globl CONST_MU3 13 | .globl CONST_MU4 14 | .globl CONST_ORDER0 15 | .globl CONST_ORDER1 16 | .globl CONST_ORDER2 17 | .globl CONST_ORDER3 18 | .globl CONST_EC2D0 19 | .globl CONST_EC2D1 20 | .globl CONST_EC2D2 21 | .globl CONST_EC2D3 22 | .globl CONST_EC2D4 23 | .globl CONST__38 24 | 25 | .globl _Fs00 26 | .globl _51_1 27 | 28 | .p2align 4 29 | 30 | CONST_REDMASK51: .quad 0x0007FFFFFFFFFFFF 31 | CONST_121666_213: .quad 996687872 32 | CONST_2P0: .quad 0xFFFFFFFFFFFDA 33 | CONST_2P1234: .quad 0xFFFFFFFFFFFFE 34 | CONST_4P0: .quad 0x1FFFFFFFFFFFB4 35 | CONST_4P1234: .quad 0x1FFFFFFFFFFFFC 36 | CONST_MU0: .quad 0xED9CE5A30A2C131B 37 | CONST_MU1: .quad 0x2106215D086329A7 38 | CONST_MU2: .quad 0xFFFFFFFFFFFFFFEB 39 | CONST_MU3: .quad 0xFFFFFFFFFFFFFFFF 40 | CONST_MU4: .quad 0x000000000000000F 41 | CONST_ORDER0: .quad 0x5812631A5CF5D3ED 42 | CONST_ORDER1: .quad 0x14DEF9DEA2F79CD6 43 | CONST_ORDER2: .quad 0x0000000000000000 44 | CONST_ORDER3: .quad 0x1000000000000000 45 | CONST_EC2D0: .quad 1859910466990425 46 | CONST_EC2D1: .quad 932731440258426 47 | CONST_EC2D2: .quad 1072319116312658 48 | CONST_EC2D3: .quad 1815898335770999 49 | CONST_EC2D4: .quad 633789495995903 50 | CONST__38: .quad 38 51 | 52 | _Fs00: .quad 0xFFFFFFFFFFFFFF00 53 | 54 | _51_1: .quad 2251799813685247 55 | 56 | -------------------------------------------------------------------------------- /consts4x.S: -------------------------------------------------------------------------------- 1 | .globl _zero 2 | .globl _mzero 3 | .globl _one 4 | .globl _mone 5 | .globl _two 6 | 7 | .globl _allone 8 | 9 | .globl _idx_8 10 | .globl _idx_7 11 | .globl _idx_6 12 | .globl _idx_5 13 | .globl _idx_4 14 | .globl _idx_3 15 | .globl _idx_2 16 | .globl _idx_1 17 | .globl _idx0 18 | .globl _idx1 19 | .globl _idx2 20 | .globl _idx3 21 | .globl _idx4 22 | .globl _idx5 23 | .globl _idx6 24 | .globl _idx7 25 | 26 | .globl _F0 27 | 28 | .p2align 5 29 | 30 | _zero: .double 0.0, 0.0, 0.0, 0.0 31 | _mzero: .double -0.0, -0.0, -0.0, -0.0 32 | _one: .double 1.0, 1.0, 1.0, 1.0 33 | _mone: .double -1.0, -1.0, -1.0, -1.0 34 | _two: .double 2.0, 2.0, 2.0, 2.0 35 | _mtwo: .double -2.0, -2.0, -2.0, -2.0 36 | 37 | _allone: .quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 38 | 39 | _idx_8: .quad 0xF8, 0xF8, 0xF8, 0xF8 40 | _idx_7: .quad 0xF9, 0xF9, 0xF9, 0xF9 41 | _idx_6: .quad 0xFA, 0xFA, 0xFA, 0xFA 42 | _idx_5: .quad 0xFB, 0xFB, 0xFB, 0xFB 43 | _idx_4: .quad 0xFC, 0xFC, 0xFC, 0xFC 44 | _idx_3: .quad 0xFD, 0xFD, 0xFD, 0xFD 45 | _idx_2: .quad 0xFE, 0xFE, 0xFE, 0xFE 46 | _idx_1: .quad 0xFF, 0xFF, 0xFF, 0xFF 47 | _idx0: .quad 0x00, 0x00, 0x00, 0x00 48 | _idx1: .quad 0x01, 0x01, 0x01, 0x01 49 | _idx2: .quad 0x02, 0x02, 0x02, 0x02 50 | _idx3: .quad 0x03, 0x03, 0x03, 0x03 51 | _idx4: .quad 0x04, 0x04, 0x04, 0x04 52 | _idx5: .quad 0x05, 0x05, 0x05, 0x05 53 | _idx6: .quad 0x06, 0x06, 0x06, 0x06 54 | _idx7: .quad 0x07, 0x07, 0x07, 0x07 55 | 56 | _F0: .quad 0xF0, 0xF0, 0xF0, 0xF0 57 | 58 | -------------------------------------------------------------------------------- /cpucycles.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | long long cpucycles_amd64cpuinfo(void) 5 | { 6 | unsigned long long result; 7 | asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" 8 | : "=a" (result) :: "%rdx"); 9 | return result; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /cpucycles.h: -------------------------------------------------------------------------------- 1 | /* 2 | cpucycles amd64cpuinfo.h version 20100803 3 | D. J. Bernstein 4 | Public domain. 5 | */ 6 | 7 | #ifndef CPUCYCLES_amd64cpuinfo_h 8 | #define CPUCYCLES_amd64cpuinfo_h 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | extern long long cpucycles_amd64cpuinfo(void); 15 | extern long long cpucycles_amd64cpuinfo_persecond(void); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | 21 | #ifndef cpucycles_implementation 22 | #define cpucycles_implementation "amd64cpuinfo" 23 | #define cpucycles cpucycles_amd64cpuinfo 24 | #define cpucycles_persecond cpucycles_amd64cpuinfo_persecond 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /crypto_hash.h: -------------------------------------------------------------------------------- 1 | #ifndef CRYPTO_HASH_H 2 | #define CRYPTO_HASH_H 3 | 4 | #define crypto_hash_BYTES 32 5 | 6 | int crypto_hash( unsigned char *out, 7 | const unsigned char *in, 8 | unsigned long long inlen ); 9 | 10 | #endif //ifndef CRYPTO_HASH_H 11 | 12 | -------------------------------------------------------------------------------- /cvariable.h: -------------------------------------------------------------------------------- 1 | #ifdef __MACH__ 2 | #define cvariable(var) _##var 3 | #else 4 | #define cvariable(var) var 5 | #endif 6 | -------------------------------------------------------------------------------- /fe25519.h: -------------------------------------------------------------------------------- 1 | #ifndef FE25519_H 2 | #define FE25519_H 3 | 4 | typedef struct 5 | { 6 | unsigned long long v[5]; 7 | } 8 | fe25519; 9 | 10 | void fe25519_freeze(fe25519 *r); 11 | 12 | void fe25519_unpack(fe25519 *r, const unsigned char x[32]); 13 | 14 | void fe25519_pack(unsigned char r[32], const fe25519 *x); 15 | 16 | void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b); 17 | 18 | void fe25519_cswap(fe25519 *r, fe25519 *x, unsigned char b); 19 | 20 | void fe25519_setint(fe25519 *r, unsigned int v); 21 | 22 | void fe25519_copy(fe25519 *r, const fe25519 *x); 23 | 24 | void fe25519_neg(fe25519 *r, const fe25519 *x); 25 | 26 | unsigned char fe25519_getparity(const fe25519 *x); 27 | 28 | int fe25519_iszero_vartime(const fe25519 *x); 29 | 30 | int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y); 31 | 32 | void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y); 33 | 34 | void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y); 35 | 36 | void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y); 37 | 38 | void fe25519_mul121666(fe25519 *r, const fe25519 *x); 39 | 40 | void fe25519_square(fe25519 *r, const fe25519 *x); 41 | 42 | void fe25519_nsquare(fe25519 *r, unsigned long long n); 43 | 44 | void simpleot_fe25519_invert(fe25519 *r, const fe25519 *x); 45 | 46 | void fe25519_pow2523(fe25519 *r, const fe25519 *x); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /fe25519_add.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y) 4 | { 5 | r->v[0] = x->v[0] + y->v[0]; 6 | r->v[1] = x->v[1] + y->v[1]; 7 | r->v[2] = x->v[2] + y->v[2]; 8 | r->v[3] = x->v[3] + y->v[3]; 9 | r->v[4] = x->v[4] + y->v[4]; 10 | } 11 | -------------------------------------------------------------------------------- /fe25519_freeze.S: -------------------------------------------------------------------------------- 1 | 2 | # qhasm: int64 rp 3 | 4 | # qhasm: input rp 5 | 6 | # qhasm: int64 r0 7 | 8 | # qhasm: int64 r1 9 | 10 | # qhasm: int64 r2 11 | 12 | # qhasm: int64 r3 13 | 14 | # qhasm: int64 r4 15 | 16 | # qhasm: int64 t 17 | 18 | # qhasm: int64 loop 19 | 20 | # qhasm: int64 two51minus1 21 | 22 | # qhasm: int64 two51minus19 23 | 24 | # qhasm: int64 caller1 25 | 26 | # qhasm: int64 caller2 27 | 28 | # qhasm: int64 caller3 29 | 30 | # qhasm: int64 caller4 31 | 32 | # qhasm: int64 caller5 33 | 34 | # qhasm: int64 caller6 35 | 36 | # qhasm: int64 caller7 37 | 38 | # qhasm: caller caller1 39 | 40 | # qhasm: caller caller2 41 | 42 | # qhasm: caller caller3 43 | 44 | # qhasm: caller caller4 45 | 46 | # qhasm: caller caller5 47 | 48 | # qhasm: caller caller6 49 | 50 | # qhasm: caller caller7 51 | 52 | # qhasm: stack64 caller1_stack 53 | 54 | # qhasm: stack64 caller2_stack 55 | 56 | # qhasm: stack64 caller3_stack 57 | 58 | # qhasm: stack64 caller4_stack 59 | 60 | # qhasm: stack64 caller5_stack 61 | 62 | # qhasm: stack64 caller6_stack 63 | 64 | # qhasm: stack64 caller7_stack 65 | 66 | # qhasm: enter fe25519_freeze 67 | .text 68 | .p2align 5 69 | .globl _fe25519_freeze 70 | .globl fe25519_freeze 71 | _fe25519_freeze: 72 | fe25519_freeze: 73 | mov %rsp,%r11 74 | and $31,%r11 75 | add $64,%r11 76 | sub %r11,%rsp 77 | 78 | # qhasm: caller1_stack = caller1 79 | # asm 1: movq caller1_stack=stack64#1 80 | # asm 2: movq caller1_stack=0(%rsp) 81 | movq %r11,0(%rsp) 82 | 83 | # qhasm: caller2_stack = caller2 84 | # asm 1: movq caller2_stack=stack64#2 85 | # asm 2: movq caller2_stack=8(%rsp) 86 | movq %r12,8(%rsp) 87 | 88 | # qhasm: caller3_stack = caller3 89 | # asm 1: movq caller3_stack=stack64#3 90 | # asm 2: movq caller3_stack=16(%rsp) 91 | movq %r13,16(%rsp) 92 | 93 | # qhasm: caller4_stack = caller4 94 | # asm 1: movq caller4_stack=stack64#4 95 | # asm 2: movq caller4_stack=24(%rsp) 96 | movq %r14,24(%rsp) 97 | 98 | # qhasm: caller5_stack = caller5 99 | # asm 1: movq caller5_stack=stack64#5 100 | # asm 2: movq caller5_stack=32(%rsp) 101 | movq %r15,32(%rsp) 102 | 103 | # qhasm: caller6_stack = caller6 104 | # asm 1: movq caller6_stack=stack64#6 105 | # asm 2: movq caller6_stack=40(%rsp) 106 | movq %rbx,40(%rsp) 107 | 108 | # qhasm: caller7_stack = caller7 109 | # asm 1: movq caller7_stack=stack64#7 110 | # asm 2: movq caller7_stack=48(%rsp) 111 | movq %rbp,48(%rsp) 112 | 113 | # qhasm: r0 = *(uint64 *) (rp + 0) 114 | # asm 1: movq 0(r0=int64#2 115 | # asm 2: movq 0(r0=%rsi 116 | movq 0(%rdi),%rsi 117 | 118 | # qhasm: r1 = *(uint64 *) (rp + 8) 119 | # asm 1: movq 8(r1=int64#3 120 | # asm 2: movq 8(r1=%rdx 121 | movq 8(%rdi),%rdx 122 | 123 | # qhasm: r2 = *(uint64 *) (rp + 16) 124 | # asm 1: movq 16(r2=int64#4 125 | # asm 2: movq 16(r2=%rcx 126 | movq 16(%rdi),%rcx 127 | 128 | # qhasm: r3 = *(uint64 *) (rp + 24) 129 | # asm 1: movq 24(r3=int64#5 130 | # asm 2: movq 24(r3=%r8 131 | movq 24(%rdi),%r8 132 | 133 | # qhasm: r4 = *(uint64 *) (rp + 32) 134 | # asm 1: movq 32(r4=int64#6 135 | # asm 2: movq 32(r4=%r9 136 | movq 32(%rdi),%r9 137 | 138 | # qhasm: two51minus1 = *(uint64 *) &CONST_REDMASK51 139 | # asm 1: movq CONST_REDMASK51,>two51minus1=int64#7 140 | # asm 2: movq CONST_REDMASK51,>two51minus1=%rax 141 | movq CONST_REDMASK51(%rip),%rax 142 | 143 | # qhasm: two51minus19 = two51minus1 144 | # asm 1: mov two51minus19=int64#8 145 | # asm 2: mov two51minus19=%r10 146 | mov %rax,%r10 147 | 148 | # qhasm: two51minus19 -= 18 149 | # asm 1: sub $18,loop=int64#9 155 | # asm 2: mov $3,>loop=%r11 156 | mov $3,%r11 157 | 158 | # qhasm: reduceloop: 159 | ._reduceloop: 160 | 161 | # qhasm: t = r0 162 | # asm 1: mov t=int64#10 163 | # asm 2: mov t=%r12 164 | mov %rsi,%r12 165 | 166 | # qhasm: (uint64) t >>= 51 167 | # asm 1: shr $51,t=int64#10 183 | # asm 2: mov t=%r12 184 | mov %rdx,%r12 185 | 186 | # qhasm: (uint64) t >>= 51 187 | # asm 1: shr $51,t=int64#10 203 | # asm 2: mov t=%r12 204 | mov %rcx,%r12 205 | 206 | # qhasm: (uint64) t >>= 51 207 | # asm 1: shr $51,t=int64#10 223 | # asm 2: mov t=%r12 224 | mov %r8,%r12 225 | 226 | # qhasm: (uint64) t >>= 51 227 | # asm 1: shr $51,t=int64#10 243 | # asm 2: mov t=%r12 244 | mov %r9,%r12 245 | 246 | # qhasm: (uint64) t >>= 51 247 | # asm 1: shr $51,t=int64#10 258 | # asm 2: imulq $19,t=%r12 259 | imulq $19,%r12,%r12 260 | 261 | # qhasm: r0 += t 262 | # asm 1: add ? loop -= 1 267 | # asm 1: sub $1, 273 | ja ._reduceloop 274 | 275 | # qhasm: t = 1 276 | # asm 1: mov $1,>t=int64#10 277 | # asm 2: mov $1,>t=%r12 278 | mov $1,%r12 279 | 280 | # qhasm: signedcaller1=int64#9 397 | # asm 2: movq caller1=%r11 398 | movq 0(%rsp),%r11 399 | 400 | # qhasm: caller2 = caller2_stack 401 | # asm 1: movq caller2=int64#10 402 | # asm 2: movq caller2=%r12 403 | movq 8(%rsp),%r12 404 | 405 | # qhasm: caller3 = caller3_stack 406 | # asm 1: movq caller3=int64#11 407 | # asm 2: movq caller3=%r13 408 | movq 16(%rsp),%r13 409 | 410 | # qhasm: caller4 = caller4_stack 411 | # asm 1: movq caller4=int64#12 412 | # asm 2: movq caller4=%r14 413 | movq 24(%rsp),%r14 414 | 415 | # qhasm: caller5 = caller5_stack 416 | # asm 1: movq caller5=int64#13 417 | # asm 2: movq caller5=%r15 418 | movq 32(%rsp),%r15 419 | 420 | # qhasm: caller6 = caller6_stack 421 | # asm 1: movq caller6=int64#14 422 | # asm 2: movq caller6=%rbx 423 | movq 40(%rsp),%rbx 424 | 425 | # qhasm: caller7 = caller7_stack 426 | # asm 1: movq caller7=int64#15 427 | # asm 2: movq caller7=%rbp 428 | movq 48(%rsp),%rbp 429 | 430 | # qhasm: leave 431 | add %r11,%rsp 432 | mov %rdi,%rax 433 | mov %rsi,%rdx 434 | ret 435 | -------------------------------------------------------------------------------- /fe25519_getparity.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | unsigned char fe25519_getparity(const fe25519 *x) 4 | { 5 | fe25519 t = *x; 6 | fe25519_freeze(&t); 7 | return (unsigned char)t.v[0] & 1; 8 | } 9 | -------------------------------------------------------------------------------- /fe25519_invert.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void simpleot_fe25519_invert(fe25519 *r, const fe25519 *x) 4 | { 5 | fe25519 z2; 6 | fe25519 z9; 7 | fe25519 z11; 8 | fe25519 z2_5_0; 9 | fe25519 z2_10_0; 10 | fe25519 z2_20_0; 11 | fe25519 z2_50_0; 12 | fe25519 z2_100_0; 13 | fe25519 t; 14 | 15 | /* 2 */ fe25519_square(&z2,x); 16 | /* 4 */ fe25519_square(&t,&z2); 17 | /* 8 */ fe25519_square(&t,&t); 18 | /* 9 */ fe25519_mul(&z9,&t,x); 19 | /* 11 */ fe25519_mul(&z11,&z9,&z2); 20 | /* 22 */ fe25519_square(&t,&z11); 21 | /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t,&z9); 22 | 23 | /* 2^6 - 2^1 */ fe25519_square(&t,&z2_5_0); 24 | /* 2^10 - 2^5 */ fe25519_nsquare(&t,4); 25 | /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t,&z2_5_0); 26 | 27 | /* 2^11 - 2^1 */ fe25519_square(&t,&z2_10_0); 28 | /* 2^20 - 2^10 */ fe25519_nsquare(&t,9); 29 | /* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t,&z2_10_0); 30 | 31 | /* 2^21 - 2^1 */ fe25519_square(&t,&z2_20_0); 32 | /* 2^40 - 2^20 */ fe25519_nsquare(&t,19); 33 | /* 2^40 - 2^0 */ fe25519_mul(&t,&t,&z2_20_0); 34 | 35 | /* 2^41 - 2^1 */ fe25519_square(&t,&t); 36 | /* 2^50 - 2^10 */ fe25519_nsquare(&t,9); 37 | /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t,&z2_10_0); 38 | 39 | /* 2^51 - 2^1 */ fe25519_square(&t,&z2_50_0); 40 | /* 2^100 - 2^50 */ fe25519_nsquare(&t,49); 41 | /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t,&z2_50_0); 42 | 43 | /* 2^101 - 2^1 */ fe25519_square(&t,&z2_100_0); 44 | /* 2^200 - 2^100 */ fe25519_nsquare(&t,99); 45 | /* 2^200 - 2^0 */ fe25519_mul(&t,&t,&z2_100_0); 46 | 47 | /* 2^201 - 2^1 */ fe25519_square(&t,&t); 48 | /* 2^250 - 2^50 */ fe25519_nsquare(&t,49); 49 | /* 2^250 - 2^0 */ fe25519_mul(&t,&t,&z2_50_0); 50 | 51 | /* 2^251 - 2^1 */ fe25519_square(&t,&t); 52 | /* 2^252 - 2^2 */ fe25519_square(&t,&t); 53 | /* 2^253 - 2^3 */ fe25519_square(&t,&t); 54 | 55 | /* 2^254 - 2^4 */ fe25519_square(&t,&t); 56 | 57 | /* 2^255 - 2^5 */ fe25519_square(&t,&t); 58 | /* 2^255 - 21 */ fe25519_mul(r,&t,&z11); 59 | } 60 | -------------------------------------------------------------------------------- /fe25519_iseq_vartime.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y) 4 | { 5 | fe25519 t1 = *x; 6 | fe25519 t2 = *y; 7 | fe25519_freeze(&t1); 8 | fe25519_freeze(&t2); 9 | if(t1.v[0] != t2.v[0]) return 0; 10 | if(t1.v[1] != t2.v[1]) return 0; 11 | if(t1.v[2] != t2.v[2]) return 0; 12 | if(t1.v[3] != t2.v[3]) return 0; 13 | if(t1.v[4] != t2.v[4]) return 0; 14 | return 1; 15 | } 16 | -------------------------------------------------------------------------------- /fe25519_mul.S: -------------------------------------------------------------------------------- 1 | 2 | # qhasm: int64 rp 3 | 4 | # qhasm: int64 xp 5 | 6 | # qhasm: int64 yp 7 | 8 | # qhasm: input rp 9 | 10 | # qhasm: input xp 11 | 12 | # qhasm: input yp 13 | 14 | # qhasm: int64 r0 15 | 16 | # qhasm: int64 r1 17 | 18 | # qhasm: int64 r2 19 | 20 | # qhasm: int64 r3 21 | 22 | # qhasm: int64 r4 23 | 24 | # qhasm: int64 c1 25 | 26 | # qhasm: int64 c2 27 | 28 | # qhasm: int64 c3 29 | 30 | # qhasm: int64 c4 31 | 32 | # qhasm: int64 c5 33 | 34 | # qhasm: int64 c6 35 | 36 | # qhasm: int64 c7 37 | 38 | # qhasm: caller c1 39 | 40 | # qhasm: caller c2 41 | 42 | # qhasm: caller c3 43 | 44 | # qhasm: caller c4 45 | 46 | # qhasm: caller c5 47 | 48 | # qhasm: caller c6 49 | 50 | # qhasm: caller c7 51 | 52 | # qhasm: stack64 c1_stack 53 | 54 | # qhasm: stack64 c2_stack 55 | 56 | # qhasm: stack64 c3_stack 57 | 58 | # qhasm: stack64 c4_stack 59 | 60 | # qhasm: stack64 c5_stack 61 | 62 | # qhasm: stack64 c6_stack 63 | 64 | # qhasm: stack64 c7_stack 65 | 66 | # qhasm: stack64 x119_stack 67 | 68 | # qhasm: stack64 x219_stack 69 | 70 | # qhasm: stack64 x319_stack 71 | 72 | # qhasm: stack64 x419_stack 73 | 74 | # qhasm: stack64 rp_stack 75 | 76 | # qhasm: int64 mulr01 77 | 78 | # qhasm: int64 mulr11 79 | 80 | # qhasm: int64 mulr21 81 | 82 | # qhasm: int64 mulr31 83 | 84 | # qhasm: int64 mulr41 85 | 86 | # qhasm: int64 mulrax 87 | 88 | # qhasm: int64 mulrdx 89 | 90 | # qhasm: int64 mult 91 | 92 | # qhasm: int64 mulredmask 93 | 94 | # qhasm: stack64 mulx219_stack 95 | 96 | # qhasm: stack64 mulx319_stack 97 | 98 | # qhasm: stack64 mulx419_stack 99 | 100 | # qhasm: enter fe25519_mul 101 | .text 102 | .p2align 5 103 | .globl _fe25519_mul 104 | .globl fe25519_mul 105 | _fe25519_mul: 106 | fe25519_mul: 107 | mov %rsp,%r11 108 | and $31,%r11 109 | add $96,%r11 110 | sub %r11,%rsp 111 | 112 | # qhasm: c1_stack = c1 113 | # asm 1: movq c1_stack=stack64#1 114 | # asm 2: movq c1_stack=0(%rsp) 115 | movq %r11,0(%rsp) 116 | 117 | # qhasm: c2_stack = c2 118 | # asm 1: movq c2_stack=stack64#2 119 | # asm 2: movq c2_stack=8(%rsp) 120 | movq %r12,8(%rsp) 121 | 122 | # qhasm: c3_stack = c3 123 | # asm 1: movq c3_stack=stack64#3 124 | # asm 2: movq c3_stack=16(%rsp) 125 | movq %r13,16(%rsp) 126 | 127 | # qhasm: c4_stack = c4 128 | # asm 1: movq c4_stack=stack64#4 129 | # asm 2: movq c4_stack=24(%rsp) 130 | movq %r14,24(%rsp) 131 | 132 | # qhasm: c5_stack = c5 133 | # asm 1: movq c5_stack=stack64#5 134 | # asm 2: movq c5_stack=32(%rsp) 135 | movq %r15,32(%rsp) 136 | 137 | # qhasm: c6_stack = c6 138 | # asm 1: movq c6_stack=stack64#6 139 | # asm 2: movq c6_stack=40(%rsp) 140 | movq %rbx,40(%rsp) 141 | 142 | # qhasm: c7_stack = c7 143 | # asm 1: movq c7_stack=stack64#7 144 | # asm 2: movq c7_stack=48(%rsp) 145 | movq %rbp,48(%rsp) 146 | 147 | # qhasm: rp_stack = rp 148 | # asm 1: movq rp_stack=stack64#8 149 | # asm 2: movq rp_stack=56(%rsp) 150 | movq %rdi,56(%rsp) 151 | 152 | # qhasm: yp = yp 153 | # asm 1: mov yp=int64#4 154 | # asm 2: mov yp=%rcx 155 | mov %rdx,%rcx 156 | 157 | # qhasm: mulrax = *(uint64 *)(xp + 24) 158 | # asm 1: movq 24(mulrax=int64#3 159 | # asm 2: movq 24(mulrax=%rdx 160 | movq 24(%rsi),%rdx 161 | 162 | # qhasm: mulrax *= 19 163 | # asm 1: imulq $19,mulrax=int64#7 164 | # asm 2: imulq $19,mulrax=%rax 165 | imulq $19,%rdx,%rax 166 | 167 | # qhasm: mulx319_stack = mulrax 168 | # asm 1: movq mulx319_stack=stack64#9 169 | # asm 2: movq mulx319_stack=64(%rsp) 170 | movq %rax,64(%rsp) 171 | 172 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16) 173 | # asm 1: mulq 16(r0=int64#5 179 | # asm 2: mov r0=%r8 180 | mov %rax,%r8 181 | 182 | # qhasm: mulr01 = mulrdx 183 | # asm 1: mov mulr01=int64#6 184 | # asm 2: mov mulr01=%r9 185 | mov %rdx,%r9 186 | 187 | # qhasm: mulrax = *(uint64 *)(xp + 32) 188 | # asm 1: movq 32(mulrax=int64#3 189 | # asm 2: movq 32(mulrax=%rdx 190 | movq 32(%rsi),%rdx 191 | 192 | # qhasm: mulrax *= 19 193 | # asm 1: imulq $19,mulrax=int64#7 194 | # asm 2: imulq $19,mulrax=%rax 195 | imulq $19,%rdx,%rax 196 | 197 | # qhasm: mulx419_stack = mulrax 198 | # asm 1: movq mulx419_stack=stack64#10 199 | # asm 2: movq mulx419_stack=72(%rsp) 200 | movq %rax,72(%rsp) 201 | 202 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8) 203 | # asm 1: mulq 8(mulrax=int64#7 219 | # asm 2: movq 0(mulrax=%rax 220 | movq 0(%rsi),%rax 221 | 222 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0) 223 | # asm 1: mulq 0(mulrax=int64#7 239 | # asm 2: movq 0(mulrax=%rax 240 | movq 0(%rsi),%rax 241 | 242 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8) 243 | # asm 1: mulq 8(r1=int64#8 249 | # asm 2: mov r1=%r10 250 | mov %rax,%r10 251 | 252 | # qhasm: mulr11 = mulrdx 253 | # asm 1: mov mulr11=int64#9 254 | # asm 2: mov mulr11=%r11 255 | mov %rdx,%r11 256 | 257 | # qhasm: mulrax = *(uint64 *)(xp + 0) 258 | # asm 1: movq 0(mulrax=int64#7 259 | # asm 2: movq 0(mulrax=%rax 260 | movq 0(%rsi),%rax 261 | 262 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16) 263 | # asm 1: mulq 16(r2=int64#10 269 | # asm 2: mov r2=%r12 270 | mov %rax,%r12 271 | 272 | # qhasm: mulr21 = mulrdx 273 | # asm 1: mov mulr21=int64#11 274 | # asm 2: mov mulr21=%r13 275 | mov %rdx,%r13 276 | 277 | # qhasm: mulrax = *(uint64 *)(xp + 0) 278 | # asm 1: movq 0(mulrax=int64#7 279 | # asm 2: movq 0(mulrax=%rax 280 | movq 0(%rsi),%rax 281 | 282 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24) 283 | # asm 1: mulq 24(r3=int64#12 289 | # asm 2: mov r3=%r14 290 | mov %rax,%r14 291 | 292 | # qhasm: mulr31 = mulrdx 293 | # asm 1: mov mulr31=int64#13 294 | # asm 2: mov mulr31=%r15 295 | mov %rdx,%r15 296 | 297 | # qhasm: mulrax = *(uint64 *)(xp + 0) 298 | # asm 1: movq 0(mulrax=int64#7 299 | # asm 2: movq 0(mulrax=%rax 300 | movq 0(%rsi),%rax 301 | 302 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32) 303 | # asm 1: mulq 32(r4=int64#14 309 | # asm 2: mov r4=%rbx 310 | mov %rax,%rbx 311 | 312 | # qhasm: mulr41 = mulrdx 313 | # asm 1: mov mulr41=int64#15 314 | # asm 2: mov mulr41=%rbp 315 | mov %rdx,%rbp 316 | 317 | # qhasm: mulrax = *(uint64 *)(xp + 8) 318 | # asm 1: movq 8(mulrax=int64#7 319 | # asm 2: movq 8(mulrax=%rax 320 | movq 8(%rsi),%rax 321 | 322 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0) 323 | # asm 1: mulq 0(mulrax=int64#7 339 | # asm 2: movq 8(mulrax=%rax 340 | movq 8(%rsi),%rax 341 | 342 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8) 343 | # asm 1: mulq 8(mulrax=int64#7 359 | # asm 2: movq 8(mulrax=%rax 360 | movq 8(%rsi),%rax 361 | 362 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16) 363 | # asm 1: mulq 16(mulrax=int64#7 379 | # asm 2: movq 8(mulrax=%rax 380 | movq 8(%rsi),%rax 381 | 382 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24) 383 | # asm 1: mulq 24(mulrax=int64#3 399 | # asm 2: movq 8(mulrax=%rdx 400 | movq 8(%rsi),%rdx 401 | 402 | # qhasm: mulrax *= 19 403 | # asm 1: imulq $19,mulrax=int64#7 404 | # asm 2: imulq $19,mulrax=%rax 405 | imulq $19,%rdx,%rax 406 | 407 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32) 408 | # asm 1: mulq 32(mulrax=int64#7 424 | # asm 2: movq 16(mulrax=%rax 425 | movq 16(%rsi),%rax 426 | 427 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0) 428 | # asm 1: mulq 0(mulrax=int64#7 444 | # asm 2: movq 16(mulrax=%rax 445 | movq 16(%rsi),%rax 446 | 447 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8) 448 | # asm 1: mulq 8(mulrax=int64#7 464 | # asm 2: movq 16(mulrax=%rax 465 | movq 16(%rsi),%rax 466 | 467 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16) 468 | # asm 1: mulq 16(mulrax=int64#3 484 | # asm 2: movq 16(mulrax=%rdx 485 | movq 16(%rsi),%rdx 486 | 487 | # qhasm: mulrax *= 19 488 | # asm 1: imulq $19,mulrax=int64#7 489 | # asm 2: imulq $19,mulrax=%rax 490 | imulq $19,%rdx,%rax 491 | 492 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24) 493 | # asm 1: mulq 24(mulrax=int64#3 509 | # asm 2: movq 16(mulrax=%rdx 510 | movq 16(%rsi),%rdx 511 | 512 | # qhasm: mulrax *= 19 513 | # asm 1: imulq $19,mulrax=int64#7 514 | # asm 2: imulq $19,mulrax=%rax 515 | imulq $19,%rdx,%rax 516 | 517 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32) 518 | # asm 1: mulq 32(mulrax=int64#7 534 | # asm 2: movq 24(mulrax=%rax 535 | movq 24(%rsi),%rax 536 | 537 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0) 538 | # asm 1: mulq 0(mulrax=int64#7 554 | # asm 2: movq 24(mulrax=%rax 555 | movq 24(%rsi),%rax 556 | 557 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 8) 558 | # asm 1: mulq 8(mulrax=int64#7 574 | # asm 2: movq mulrax=%rax 575 | movq 64(%rsp),%rax 576 | 577 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24) 578 | # asm 1: mulq 24(mulrax=int64#7 594 | # asm 2: movq mulrax=%rax 595 | movq 64(%rsp),%rax 596 | 597 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32) 598 | # asm 1: mulq 32(mulrax=int64#7 614 | # asm 2: movq 32(mulrax=%rax 615 | movq 32(%rsi),%rax 616 | 617 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 0) 618 | # asm 1: mulq 0(mulrax=int64#7 634 | # asm 2: movq mulrax=%rax 635 | movq 72(%rsp),%rax 636 | 637 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 16) 638 | # asm 1: mulq 16(mulrax=int64#7 654 | # asm 2: movq mulrax=%rax 655 | movq 72(%rsp),%rax 656 | 657 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 24) 658 | # asm 1: mulq 24(mulrax=int64#7 674 | # asm 2: movq mulrax=%rax 675 | movq 72(%rsp),%rax 676 | 677 | # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)(yp + 32) 678 | # asm 1: mulq 32(mulredmask=int64#2 694 | # asm 2: movq CONST_REDMASK51,>mulredmask=%rsi 695 | movq CONST_REDMASK51(%rip),%rsi 696 | 697 | # qhasm: mulr01 = (mulr01.r0) << 13 698 | # asm 1: shld $13,mulr41=int64#3 769 | # asm 2: imulq $19,mulr41=%rdx 770 | imulq $19,%rbp,%rdx 771 | 772 | # qhasm: r0 += mulr41 773 | # asm 1: add mult=int64#3 779 | # asm 2: mov mult=%rdx 780 | mov %r8,%rdx 781 | 782 | # qhasm: (uint64) mult >>= 51 783 | # asm 1: shr $51,r1=int64#4 794 | # asm 2: mov r1=%rcx 795 | mov %rdx,%rcx 796 | 797 | # qhasm: (uint64) mult >>= 51 798 | # asm 1: shr $51,r2=int64#6 814 | # asm 2: mov r2=%r9 815 | mov %rdx,%r9 816 | 817 | # qhasm: (uint64) mult >>= 51 818 | # asm 1: shr $51,r3=int64#7 834 | # asm 2: mov r3=%rax 835 | mov %rdx,%rax 836 | 837 | # qhasm: (uint64) mult >>= 51 838 | # asm 1: shr $51,r4=int64#8 854 | # asm 2: mov r4=%r10 855 | mov %rdx,%r10 856 | 857 | # qhasm: (uint64) mult >>= 51 858 | # asm 1: shr $51,mult=int64#3 869 | # asm 2: imulq $19,mult=%rdx 870 | imulq $19,%rdx,%rdx 871 | 872 | # qhasm: r0 += mult 873 | # asm 1: add c1=int64#9 909 | # asm 2: movq c1=%r11 910 | movq 0(%rsp),%r11 911 | 912 | # qhasm: c2 =c2_stack 913 | # asm 1: movq c2=int64#10 914 | # asm 2: movq c2=%r12 915 | movq 8(%rsp),%r12 916 | 917 | # qhasm: c3 =c3_stack 918 | # asm 1: movq c3=int64#11 919 | # asm 2: movq c3=%r13 920 | movq 16(%rsp),%r13 921 | 922 | # qhasm: c4 =c4_stack 923 | # asm 1: movq c4=int64#12 924 | # asm 2: movq c4=%r14 925 | movq 24(%rsp),%r14 926 | 927 | # qhasm: c5 =c5_stack 928 | # asm 1: movq c5=int64#13 929 | # asm 2: movq c5=%r15 930 | movq 32(%rsp),%r15 931 | 932 | # qhasm: c6 =c6_stack 933 | # asm 1: movq c6=int64#14 934 | # asm 2: movq c6=%rbx 935 | movq 40(%rsp),%rbx 936 | 937 | # qhasm: c7 =c7_stack 938 | # asm 1: movq c7=int64#15 939 | # asm 2: movq c7=%rbp 940 | movq 48(%rsp),%rbp 941 | 942 | # qhasm: leave 943 | add %r11,%rsp 944 | mov %rdi,%rax 945 | mov %rsi,%rdx 946 | ret 947 | -------------------------------------------------------------------------------- /fe25519_neg.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_copy(fe25519 *r, const fe25519 *x) 4 | { 5 | // fe25519 t; 6 | // /fe25519_setint(&t,0); 7 | // fe25519_sub(r,&t,x); 8 | int i; 9 | for (i = 0; i < 5; i++) 10 | r->v[i] = x->v[i]; 11 | 12 | // printf("???\n"); 13 | } 14 | 15 | void fe25519_neg(fe25519 *r, const fe25519 *x) 16 | { 17 | fe25519 t; 18 | fe25519_setint(&t,0); 19 | fe25519_sub(r,&t,x); 20 | } 21 | 22 | -------------------------------------------------------------------------------- /fe25519_nsquare.S: -------------------------------------------------------------------------------- 1 | 2 | # qhasm: int64 rp 3 | 4 | # qhasm: int64 n 5 | 6 | # qhasm: input rp 7 | 8 | # qhasm: input n 9 | 10 | # qhasm: int64 r0 11 | 12 | # qhasm: int64 r1 13 | 14 | # qhasm: int64 r2 15 | 16 | # qhasm: int64 r3 17 | 18 | # qhasm: int64 r4 19 | 20 | # qhasm: int64 c1 21 | 22 | # qhasm: int64 c2 23 | 24 | # qhasm: int64 c3 25 | 26 | # qhasm: int64 c4 27 | 28 | # qhasm: int64 c5 29 | 30 | # qhasm: int64 c6 31 | 32 | # qhasm: int64 c7 33 | 34 | # qhasm: caller c1 35 | 36 | # qhasm: caller c2 37 | 38 | # qhasm: caller c3 39 | 40 | # qhasm: caller c4 41 | 42 | # qhasm: caller c5 43 | 44 | # qhasm: caller c6 45 | 46 | # qhasm: caller c7 47 | 48 | # qhasm: stack64 c1_stack 49 | 50 | # qhasm: stack64 c2_stack 51 | 52 | # qhasm: stack64 c3_stack 53 | 54 | # qhasm: stack64 c4_stack 55 | 56 | # qhasm: stack64 c5_stack 57 | 58 | # qhasm: stack64 c6_stack 59 | 60 | # qhasm: stack64 c7_stack 61 | 62 | # qhasm: stack64 x119_stack 63 | 64 | # qhasm: stack64 x219_stack 65 | 66 | # qhasm: stack64 x319_stack 67 | 68 | # qhasm: stack64 x419_stack 69 | 70 | # qhasm: int64 squarer01 71 | 72 | # qhasm: int64 squarer11 73 | 74 | # qhasm: int64 squarer21 75 | 76 | # qhasm: int64 squarer31 77 | 78 | # qhasm: int64 squarer41 79 | 80 | # qhasm: int64 squarerax 81 | 82 | # qhasm: int64 squarerdx 83 | 84 | # qhasm: int64 squaret 85 | 86 | # qhasm: int64 squareredmask 87 | 88 | # qhasm: stack64 n_stack 89 | 90 | # qhasm: enter fe25519_nsquare 91 | .text 92 | .p2align 5 93 | .globl _fe25519_nsquare 94 | .globl fe25519_nsquare 95 | _fe25519_nsquare: 96 | fe25519_nsquare: 97 | mov %rsp,%r11 98 | and $31,%r11 99 | add $64,%r11 100 | sub %r11,%rsp 101 | 102 | # qhasm: c1_stack = c1 103 | # asm 1: movq c1_stack=stack64#1 104 | # asm 2: movq c1_stack=0(%rsp) 105 | movq %r11,0(%rsp) 106 | 107 | # qhasm: c2_stack = c2 108 | # asm 1: movq c2_stack=stack64#2 109 | # asm 2: movq c2_stack=8(%rsp) 110 | movq %r12,8(%rsp) 111 | 112 | # qhasm: c3_stack = c3 113 | # asm 1: movq c3_stack=stack64#3 114 | # asm 2: movq c3_stack=16(%rsp) 115 | movq %r13,16(%rsp) 116 | 117 | # qhasm: c4_stack = c4 118 | # asm 1: movq c4_stack=stack64#4 119 | # asm 2: movq c4_stack=24(%rsp) 120 | movq %r14,24(%rsp) 121 | 122 | # qhasm: c5_stack = c5 123 | # asm 1: movq c5_stack=stack64#5 124 | # asm 2: movq c5_stack=32(%rsp) 125 | movq %r15,32(%rsp) 126 | 127 | # qhasm: c6_stack = c6 128 | # asm 1: movq c6_stack=stack64#6 129 | # asm 2: movq c6_stack=40(%rsp) 130 | movq %rbx,40(%rsp) 131 | 132 | # qhasm: c7_stack = c7 133 | # asm 1: movq c7_stack=stack64#7 134 | # asm 2: movq c7_stack=48(%rsp) 135 | movq %rbp,48(%rsp) 136 | 137 | # qhasm: loop: 138 | ._loop: 139 | 140 | # qhasm: squarerax = *(uint64 *)(rp + 0) 141 | # asm 1: movq 0(squarerax=int64#7 142 | # asm 2: movq 0(squarerax=%rax 143 | movq 0(%rdi),%rax 144 | 145 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 0) 146 | # asm 1: mulq 0(r0=int64#4 152 | # asm 2: mov r0=%rcx 153 | mov %rax,%rcx 154 | 155 | # qhasm: squarer01 = squarerdx 156 | # asm 1: mov squarer01=int64#5 157 | # asm 2: mov squarer01=%r8 158 | mov %rdx,%r8 159 | 160 | # qhasm: squarerax = *(uint64 *)(rp + 0) 161 | # asm 1: movq 0(squarerax=int64#7 162 | # asm 2: movq 0(squarerax=%rax 163 | movq 0(%rdi),%rax 164 | 165 | # qhasm: squarerax <<= 1 166 | # asm 1: shl $1,r1=int64#6 177 | # asm 2: mov r1=%r9 178 | mov %rax,%r9 179 | 180 | # qhasm: squarer11 = squarerdx 181 | # asm 1: mov squarer11=int64#8 182 | # asm 2: mov squarer11=%r10 183 | mov %rdx,%r10 184 | 185 | # qhasm: squarerax = *(uint64 *)(rp + 0) 186 | # asm 1: movq 0(squarerax=int64#7 187 | # asm 2: movq 0(squarerax=%rax 188 | movq 0(%rdi),%rax 189 | 190 | # qhasm: squarerax <<= 1 191 | # asm 1: shl $1,r2=int64#9 202 | # asm 2: mov r2=%r11 203 | mov %rax,%r11 204 | 205 | # qhasm: squarer21 = squarerdx 206 | # asm 1: mov squarer21=int64#10 207 | # asm 2: mov squarer21=%r12 208 | mov %rdx,%r12 209 | 210 | # qhasm: squarerax = *(uint64 *)(rp + 0) 211 | # asm 1: movq 0(squarerax=int64#7 212 | # asm 2: movq 0(squarerax=%rax 213 | movq 0(%rdi),%rax 214 | 215 | # qhasm: squarerax <<= 1 216 | # asm 1: shl $1,r3=int64#11 227 | # asm 2: mov r3=%r13 228 | mov %rax,%r13 229 | 230 | # qhasm: squarer31 = squarerdx 231 | # asm 1: mov squarer31=int64#12 232 | # asm 2: mov squarer31=%r14 233 | mov %rdx,%r14 234 | 235 | # qhasm: squarerax = *(uint64 *)(rp + 0) 236 | # asm 1: movq 0(squarerax=int64#7 237 | # asm 2: movq 0(squarerax=%rax 238 | movq 0(%rdi),%rax 239 | 240 | # qhasm: squarerax <<= 1 241 | # asm 1: shl $1,r4=int64#13 252 | # asm 2: mov r4=%r15 253 | mov %rax,%r15 254 | 255 | # qhasm: squarer41 = squarerdx 256 | # asm 1: mov squarer41=int64#14 257 | # asm 2: mov squarer41=%rbx 258 | mov %rdx,%rbx 259 | 260 | # qhasm: squarerax = *(uint64 *)(rp + 8) 261 | # asm 1: movq 8(squarerax=int64#7 262 | # asm 2: movq 8(squarerax=%rax 263 | movq 8(%rdi),%rax 264 | 265 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 8) 266 | # asm 1: mulq 8(squarerax=int64#7 282 | # asm 2: movq 8(squarerax=%rax 283 | movq 8(%rdi),%rax 284 | 285 | # qhasm: squarerax <<= 1 286 | # asm 1: shl $1,squarerax=int64#7 307 | # asm 2: movq 8(squarerax=%rax 308 | movq 8(%rdi),%rax 309 | 310 | # qhasm: squarerax <<= 1 311 | # asm 1: shl $1,squarerax=int64#3 332 | # asm 2: movq 8(squarerax=%rdx 333 | movq 8(%rdi),%rdx 334 | 335 | # qhasm: squarerax *= 38 336 | # asm 1: imulq $38,squarerax=int64#7 337 | # asm 2: imulq $38,squarerax=%rax 338 | imulq $38,%rdx,%rax 339 | 340 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32) 341 | # asm 1: mulq 32(squarerax=int64#7 357 | # asm 2: movq 16(squarerax=%rax 358 | movq 16(%rdi),%rax 359 | 360 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 16) 361 | # asm 1: mulq 16(squarerax=int64#3 377 | # asm 2: movq 16(squarerax=%rdx 378 | movq 16(%rdi),%rdx 379 | 380 | # qhasm: squarerax *= 38 381 | # asm 1: imulq $38,squarerax=int64#7 382 | # asm 2: imulq $38,squarerax=%rax 383 | imulq $38,%rdx,%rax 384 | 385 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24) 386 | # asm 1: mulq 24(squarerax=int64#3 402 | # asm 2: movq 16(squarerax=%rdx 403 | movq 16(%rdi),%rdx 404 | 405 | # qhasm: squarerax *= 38 406 | # asm 1: imulq $38,squarerax=int64#7 407 | # asm 2: imulq $38,squarerax=%rax 408 | imulq $38,%rdx,%rax 409 | 410 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32) 411 | # asm 1: mulq 32(squarerax=int64#3 427 | # asm 2: movq 24(squarerax=%rdx 428 | movq 24(%rdi),%rdx 429 | 430 | # qhasm: squarerax *= 19 431 | # asm 1: imulq $19,squarerax=int64#7 432 | # asm 2: imulq $19,squarerax=%rax 433 | imulq $19,%rdx,%rax 434 | 435 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24) 436 | # asm 1: mulq 24(squarerax=int64#3 452 | # asm 2: movq 24(squarerax=%rdx 453 | movq 24(%rdi),%rdx 454 | 455 | # qhasm: squarerax *= 38 456 | # asm 1: imulq $38,squarerax=int64#7 457 | # asm 2: imulq $38,squarerax=%rax 458 | imulq $38,%rdx,%rax 459 | 460 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32) 461 | # asm 1: mulq 32(squarerax=int64#3 477 | # asm 2: movq 32(squarerax=%rdx 478 | movq 32(%rdi),%rdx 479 | 480 | # qhasm: squarerax *= 19 481 | # asm 1: imulq $19,squarerax=int64#7 482 | # asm 2: imulq $19,squarerax=%rax 483 | imulq $19,%rdx,%rax 484 | 485 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32) 486 | # asm 1: mulq 32(squareredmask=int64#3 502 | # asm 2: movq CONST_REDMASK51,>squareredmask=%rdx 503 | movq CONST_REDMASK51(%rip),%rdx 504 | 505 | # qhasm: squarer01 = (squarer01.r0) << 13 506 | # asm 1: shld $13,squarer41=int64#5 577 | # asm 2: imulq $19,squarer41=%r8 578 | imulq $19,%rbx,%r8 579 | 580 | # qhasm: r0 += squarer41 581 | # asm 1: add squaret=int64#5 587 | # asm 2: mov squaret=%r8 588 | mov %rcx,%r8 589 | 590 | # qhasm: (uint64) squaret >>= 51 591 | # asm 1: shr $51,r1=int64#6 607 | # asm 2: mov r1=%r9 608 | mov %r8,%r9 609 | 610 | # qhasm: (uint64) squaret >>= 51 611 | # asm 1: shr $51,r2=int64#7 627 | # asm 2: mov r2=%rax 628 | mov %r8,%rax 629 | 630 | # qhasm: (uint64) squaret >>= 51 631 | # asm 1: shr $51,r3=int64#8 647 | # asm 2: mov r3=%r10 648 | mov %r8,%r10 649 | 650 | # qhasm: (uint64) squaret >>= 51 651 | # asm 1: shr $51,r4=int64#9 667 | # asm 2: mov r4=%r11 668 | mov %r8,%r11 669 | 670 | # qhasm: (uint64) squaret >>= 51 671 | # asm 1: shr $51,squaret=int64#5 677 | # asm 2: imulq $19,squaret=%r8 678 | imulq $19,%r8,%r8 679 | 680 | # qhasm: r0 += squaret 681 | # asm 1: add ? n -= 1 716 | # asm 1: sub $1, 722 | jg ._loop 723 | 724 | # qhasm: c1 =c1_stack 725 | # asm 1: movq c1=int64#9 726 | # asm 2: movq c1=%r11 727 | movq 0(%rsp),%r11 728 | 729 | # qhasm: c2 =c2_stack 730 | # asm 1: movq c2=int64#10 731 | # asm 2: movq c2=%r12 732 | movq 8(%rsp),%r12 733 | 734 | # qhasm: c3 =c3_stack 735 | # asm 1: movq c3=int64#11 736 | # asm 2: movq c3=%r13 737 | movq 16(%rsp),%r13 738 | 739 | # qhasm: c4 =c4_stack 740 | # asm 1: movq c4=int64#12 741 | # asm 2: movq c4=%r14 742 | movq 24(%rsp),%r14 743 | 744 | # qhasm: c5 =c5_stack 745 | # asm 1: movq c5=int64#13 746 | # asm 2: movq c5=%r15 747 | movq 32(%rsp),%r15 748 | 749 | # qhasm: c6 =c6_stack 750 | # asm 1: movq c6=int64#14 751 | # asm 2: movq c6=%rbx 752 | movq 40(%rsp),%rbx 753 | 754 | # qhasm: c7 =c7_stack 755 | # asm 1: movq c7=int64#15 756 | # asm 2: movq c7=%rbp 757 | movq 48(%rsp),%rbp 758 | 759 | # qhasm: leave 760 | add %r11,%rsp 761 | mov %rdi,%rax 762 | mov %rsi,%rdx 763 | ret 764 | -------------------------------------------------------------------------------- /fe25519_pack.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | /* Assumes input x being reduced below 2^255 */ 4 | void fe25519_pack(unsigned char r[32], const fe25519 *x) 5 | { 6 | fe25519 t; 7 | t = *x; 8 | fe25519_freeze(&t); 9 | r[0] = (unsigned char) ( t.v[0] & 0xff); 10 | r[1] = (unsigned char) ((t.v[0] >> 8) & 0xff); 11 | r[2] = (unsigned char) ((t.v[0] >> 16) & 0xff); 12 | r[3] = (unsigned char) ((t.v[0] >> 24) & 0xff); 13 | r[4] = (unsigned char) ((t.v[0] >> 32) & 0xff); 14 | r[5] = (unsigned char) ((t.v[0] >> 40) & 0xff); 15 | r[6] = (unsigned char) ((t.v[0] >> 48)); 16 | 17 | r[6] ^= (unsigned char) ((t.v[1] << 3) & 0xf8); 18 | r[7] = (unsigned char) ((t.v[1] >> 5) & 0xff); 19 | r[8] = (unsigned char) ((t.v[1] >> 13) & 0xff); 20 | r[9] = (unsigned char) ((t.v[1] >> 21) & 0xff); 21 | r[10] = (unsigned char) ((t.v[1] >> 29) & 0xff); 22 | r[11] = (unsigned char) ((t.v[1] >> 37) & 0xff); 23 | r[12] = (unsigned char) ((t.v[1] >> 45)); 24 | 25 | r[12] ^= (unsigned char) ((t.v[2] << 6) & 0xc0); 26 | r[13] = (unsigned char) ((t.v[2] >> 2) & 0xff); 27 | r[14] = (unsigned char) ((t.v[2] >> 10) & 0xff); 28 | r[15] = (unsigned char) ((t.v[2] >> 18) & 0xff); 29 | r[16] = (unsigned char) ((t.v[2] >> 26) & 0xff); 30 | r[17] = (unsigned char) ((t.v[2] >> 34) & 0xff); 31 | r[18] = (unsigned char) ((t.v[2] >> 42) & 0xff); 32 | r[19] = (unsigned char) ((t.v[2] >> 50)); 33 | 34 | r[19] ^= (unsigned char) ((t.v[3] << 1) & 0xfe); 35 | r[20] = (unsigned char) ((t.v[3] >> 7) & 0xff); 36 | r[21] = (unsigned char) ((t.v[3] >> 15) & 0xff); 37 | r[22] = (unsigned char) ((t.v[3] >> 23) & 0xff); 38 | r[23] = (unsigned char) ((t.v[3] >> 31) & 0xff); 39 | r[24] = (unsigned char) ((t.v[3] >> 39) & 0xff); 40 | r[25] = (unsigned char) ((t.v[3] >> 47)); 41 | 42 | r[25] ^= (unsigned char) ((t.v[4] << 4) & 0xf0); 43 | r[26] = (unsigned char) ((t.v[4] >> 4) & 0xff); 44 | r[27] = (unsigned char) ((t.v[4] >> 12) & 0xff); 45 | r[28] = (unsigned char) ((t.v[4] >> 20) & 0xff); 46 | r[29] = (unsigned char) ((t.v[4] >> 28) & 0xff); 47 | r[30] = (unsigned char) ((t.v[4] >> 36) & 0xff); 48 | r[31] = (unsigned char) ((t.v[4] >> 44)); 49 | } 50 | -------------------------------------------------------------------------------- /fe25519_pow2523.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_pow2523(fe25519 *r, const fe25519 *x) 4 | { 5 | fe25519 z2; 6 | fe25519 z9; 7 | fe25519 z11; 8 | fe25519 z2_5_0; 9 | fe25519 z2_10_0; 10 | fe25519 z2_20_0; 11 | fe25519 z2_50_0; 12 | fe25519 z2_100_0; 13 | fe25519 t; 14 | 15 | /* 2 */ fe25519_square(&z2,x); 16 | /* 4 */ fe25519_square(&t,&z2); 17 | /* 8 */ fe25519_square(&t,&t); 18 | /* 9 */ fe25519_mul(&z9,&t,x); 19 | /* 11 */ fe25519_mul(&z11,&z9,&z2); 20 | /* 22 */ fe25519_square(&t,&z11); 21 | /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t,&z9); 22 | 23 | /* 2^6 - 2^1 */ fe25519_square(&t,&z2_5_0); 24 | /* 2^10 - 2^5 */ fe25519_nsquare(&t,4); 25 | /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t,&z2_5_0); 26 | 27 | /* 2^11 - 2^1 */ fe25519_square(&t,&z2_10_0); 28 | /* 2^20 - 2^10 */ fe25519_nsquare(&t,9); 29 | /* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t,&z2_10_0); 30 | 31 | /* 2^21 - 2^1 */ fe25519_square(&t,&z2_20_0); 32 | /* 2^40 - 2^20 */ fe25519_nsquare(&t,19); 33 | /* 2^40 - 2^0 */ fe25519_mul(&t,&t,&z2_20_0); 34 | 35 | /* 2^41 - 2^1 */ fe25519_square(&t,&t); 36 | /* 2^50 - 2^10 */ fe25519_nsquare(&t,9); 37 | /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t,&z2_10_0); 38 | 39 | /* 2^51 - 2^1 */ fe25519_square(&t,&z2_50_0); 40 | /* 2^100 - 2^50 */ fe25519_nsquare(&t,49); 41 | /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t,&z2_50_0); 42 | 43 | /* 2^101 - 2^1 */ fe25519_square(&t,&z2_100_0); 44 | /* 2^200 - 2^100 */ fe25519_nsquare(&t,99); 45 | /* 2^200 - 2^0 */ fe25519_mul(&t,&t,&z2_100_0); 46 | 47 | /* 2^201 - 2^1 */ fe25519_square(&t,&t); 48 | /* 2^250 - 2^50 */ fe25519_nsquare(&t,49); 49 | /* 2^250 - 2^0 */ fe25519_mul(&t,&t,&z2_50_0); 50 | 51 | /* 2^251 - 2^1 */ fe25519_square(&t,&t); 52 | /* 2^252 - 2^2 */ fe25519_square(&t,&t); 53 | /* 2^252 - 3 */ fe25519_mul(r,&t,x); 54 | } 55 | -------------------------------------------------------------------------------- /fe25519_setint.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_setint(fe25519 *r, unsigned int v) 4 | { 5 | r->v[0] = v; 6 | r->v[1] = 0; 7 | r->v[2] = 0; 8 | r->v[3] = 0; 9 | r->v[4] = 0; 10 | } 11 | -------------------------------------------------------------------------------- /fe25519_square.S: -------------------------------------------------------------------------------- 1 | 2 | # qhasm: int64 rp 3 | 4 | # qhasm: int64 xp 5 | 6 | # qhasm: input rp 7 | 8 | # qhasm: input xp 9 | 10 | # qhasm: int64 r0 11 | 12 | # qhasm: int64 r1 13 | 14 | # qhasm: int64 r2 15 | 16 | # qhasm: int64 r3 17 | 18 | # qhasm: int64 r4 19 | 20 | # qhasm: int64 c1 21 | 22 | # qhasm: int64 c2 23 | 24 | # qhasm: int64 c3 25 | 26 | # qhasm: int64 c4 27 | 28 | # qhasm: int64 c5 29 | 30 | # qhasm: int64 c6 31 | 32 | # qhasm: int64 c7 33 | 34 | # qhasm: caller c1 35 | 36 | # qhasm: caller c2 37 | 38 | # qhasm: caller c3 39 | 40 | # qhasm: caller c4 41 | 42 | # qhasm: caller c5 43 | 44 | # qhasm: caller c6 45 | 46 | # qhasm: caller c7 47 | 48 | # qhasm: stack64 c1_stack 49 | 50 | # qhasm: stack64 c2_stack 51 | 52 | # qhasm: stack64 c3_stack 53 | 54 | # qhasm: stack64 c4_stack 55 | 56 | # qhasm: stack64 c5_stack 57 | 58 | # qhasm: stack64 c6_stack 59 | 60 | # qhasm: stack64 c7_stack 61 | 62 | # qhasm: stack64 x119_stack 63 | 64 | # qhasm: stack64 x219_stack 65 | 66 | # qhasm: stack64 x319_stack 67 | 68 | # qhasm: stack64 x419_stack 69 | 70 | # qhasm: int64 squarer01 71 | 72 | # qhasm: int64 squarer11 73 | 74 | # qhasm: int64 squarer21 75 | 76 | # qhasm: int64 squarer31 77 | 78 | # qhasm: int64 squarer41 79 | 80 | # qhasm: int64 squarerax 81 | 82 | # qhasm: int64 squarerdx 83 | 84 | # qhasm: int64 squaret 85 | 86 | # qhasm: int64 squareredmask 87 | 88 | # qhasm: enter fe25519_square 89 | .text 90 | .p2align 5 91 | .globl _fe25519_square 92 | .globl fe25519_square 93 | _fe25519_square: 94 | fe25519_square: 95 | mov %rsp,%r11 96 | and $31,%r11 97 | add $64,%r11 98 | sub %r11,%rsp 99 | 100 | # qhasm: c1_stack = c1 101 | # asm 1: movq c1_stack=stack64#1 102 | # asm 2: movq c1_stack=0(%rsp) 103 | movq %r11,0(%rsp) 104 | 105 | # qhasm: c2_stack = c2 106 | # asm 1: movq c2_stack=stack64#2 107 | # asm 2: movq c2_stack=8(%rsp) 108 | movq %r12,8(%rsp) 109 | 110 | # qhasm: c3_stack = c3 111 | # asm 1: movq c3_stack=stack64#3 112 | # asm 2: movq c3_stack=16(%rsp) 113 | movq %r13,16(%rsp) 114 | 115 | # qhasm: c4_stack = c4 116 | # asm 1: movq c4_stack=stack64#4 117 | # asm 2: movq c4_stack=24(%rsp) 118 | movq %r14,24(%rsp) 119 | 120 | # qhasm: c5_stack = c5 121 | # asm 1: movq c5_stack=stack64#5 122 | # asm 2: movq c5_stack=32(%rsp) 123 | movq %r15,32(%rsp) 124 | 125 | # qhasm: c6_stack = c6 126 | # asm 1: movq c6_stack=stack64#6 127 | # asm 2: movq c6_stack=40(%rsp) 128 | movq %rbx,40(%rsp) 129 | 130 | # qhasm: c7_stack = c7 131 | # asm 1: movq c7_stack=stack64#7 132 | # asm 2: movq c7_stack=48(%rsp) 133 | movq %rbp,48(%rsp) 134 | 135 | # qhasm: squarerax = *(uint64 *)(xp + 0) 136 | # asm 1: movq 0(squarerax=int64#7 137 | # asm 2: movq 0(squarerax=%rax 138 | movq 0(%rsi),%rax 139 | 140 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 0) 141 | # asm 1: mulq 0(r0=int64#4 147 | # asm 2: mov r0=%rcx 148 | mov %rax,%rcx 149 | 150 | # qhasm: squarer01 = squarerdx 151 | # asm 1: mov squarer01=int64#5 152 | # asm 2: mov squarer01=%r8 153 | mov %rdx,%r8 154 | 155 | # qhasm: squarerax = *(uint64 *)(xp + 0) 156 | # asm 1: movq 0(squarerax=int64#7 157 | # asm 2: movq 0(squarerax=%rax 158 | movq 0(%rsi),%rax 159 | 160 | # qhasm: squarerax <<= 1 161 | # asm 1: shl $1,r1=int64#6 172 | # asm 2: mov r1=%r9 173 | mov %rax,%r9 174 | 175 | # qhasm: squarer11 = squarerdx 176 | # asm 1: mov squarer11=int64#8 177 | # asm 2: mov squarer11=%r10 178 | mov %rdx,%r10 179 | 180 | # qhasm: squarerax = *(uint64 *)(xp + 0) 181 | # asm 1: movq 0(squarerax=int64#7 182 | # asm 2: movq 0(squarerax=%rax 183 | movq 0(%rsi),%rax 184 | 185 | # qhasm: squarerax <<= 1 186 | # asm 1: shl $1,r2=int64#9 197 | # asm 2: mov r2=%r11 198 | mov %rax,%r11 199 | 200 | # qhasm: squarer21 = squarerdx 201 | # asm 1: mov squarer21=int64#10 202 | # asm 2: mov squarer21=%r12 203 | mov %rdx,%r12 204 | 205 | # qhasm: squarerax = *(uint64 *)(xp + 0) 206 | # asm 1: movq 0(squarerax=int64#7 207 | # asm 2: movq 0(squarerax=%rax 208 | movq 0(%rsi),%rax 209 | 210 | # qhasm: squarerax <<= 1 211 | # asm 1: shl $1,r3=int64#11 222 | # asm 2: mov r3=%r13 223 | mov %rax,%r13 224 | 225 | # qhasm: squarer31 = squarerdx 226 | # asm 1: mov squarer31=int64#12 227 | # asm 2: mov squarer31=%r14 228 | mov %rdx,%r14 229 | 230 | # qhasm: squarerax = *(uint64 *)(xp + 0) 231 | # asm 1: movq 0(squarerax=int64#7 232 | # asm 2: movq 0(squarerax=%rax 233 | movq 0(%rsi),%rax 234 | 235 | # qhasm: squarerax <<= 1 236 | # asm 1: shl $1,r4=int64#13 247 | # asm 2: mov r4=%r15 248 | mov %rax,%r15 249 | 250 | # qhasm: squarer41 = squarerdx 251 | # asm 1: mov squarer41=int64#14 252 | # asm 2: mov squarer41=%rbx 253 | mov %rdx,%rbx 254 | 255 | # qhasm: squarerax = *(uint64 *)(xp + 8) 256 | # asm 1: movq 8(squarerax=int64#7 257 | # asm 2: movq 8(squarerax=%rax 258 | movq 8(%rsi),%rax 259 | 260 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 8) 261 | # asm 1: mulq 8(squarerax=int64#7 277 | # asm 2: movq 8(squarerax=%rax 278 | movq 8(%rsi),%rax 279 | 280 | # qhasm: squarerax <<= 1 281 | # asm 1: shl $1,squarerax=int64#7 302 | # asm 2: movq 8(squarerax=%rax 303 | movq 8(%rsi),%rax 304 | 305 | # qhasm: squarerax <<= 1 306 | # asm 1: shl $1,squarerax=int64#3 327 | # asm 2: movq 8(squarerax=%rdx 328 | movq 8(%rsi),%rdx 329 | 330 | # qhasm: squarerax *= 38 331 | # asm 1: imulq $38,squarerax=int64#7 332 | # asm 2: imulq $38,squarerax=%rax 333 | imulq $38,%rdx,%rax 334 | 335 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32) 336 | # asm 1: mulq 32(squarerax=int64#7 352 | # asm 2: movq 16(squarerax=%rax 353 | movq 16(%rsi),%rax 354 | 355 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 16) 356 | # asm 1: mulq 16(squarerax=int64#3 372 | # asm 2: movq 16(squarerax=%rdx 373 | movq 16(%rsi),%rdx 374 | 375 | # qhasm: squarerax *= 38 376 | # asm 1: imulq $38,squarerax=int64#7 377 | # asm 2: imulq $38,squarerax=%rax 378 | imulq $38,%rdx,%rax 379 | 380 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24) 381 | # asm 1: mulq 24(squarerax=int64#3 397 | # asm 2: movq 16(squarerax=%rdx 398 | movq 16(%rsi),%rdx 399 | 400 | # qhasm: squarerax *= 38 401 | # asm 1: imulq $38,squarerax=int64#7 402 | # asm 2: imulq $38,squarerax=%rax 403 | imulq $38,%rdx,%rax 404 | 405 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32) 406 | # asm 1: mulq 32(squarerax=int64#3 422 | # asm 2: movq 24(squarerax=%rdx 423 | movq 24(%rsi),%rdx 424 | 425 | # qhasm: squarerax *= 19 426 | # asm 1: imulq $19,squarerax=int64#7 427 | # asm 2: imulq $19,squarerax=%rax 428 | imulq $19,%rdx,%rax 429 | 430 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 24) 431 | # asm 1: mulq 24(squarerax=int64#3 447 | # asm 2: movq 24(squarerax=%rdx 448 | movq 24(%rsi),%rdx 449 | 450 | # qhasm: squarerax *= 38 451 | # asm 1: imulq $38,squarerax=int64#7 452 | # asm 2: imulq $38,squarerax=%rax 453 | imulq $38,%rdx,%rax 454 | 455 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32) 456 | # asm 1: mulq 32(squarerax=int64#3 472 | # asm 2: movq 32(squarerax=%rdx 473 | movq 32(%rsi),%rdx 474 | 475 | # qhasm: squarerax *= 19 476 | # asm 1: imulq $19,squarerax=int64#7 477 | # asm 2: imulq $19,squarerax=%rax 478 | imulq $19,%rdx,%rax 479 | 480 | # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(xp + 32) 481 | # asm 1: mulq 32(squareredmask=int64#2 497 | # asm 2: movq CONST_REDMASK51,>squareredmask=%rsi 498 | movq CONST_REDMASK51(%rip),%rsi 499 | 500 | # qhasm: squarer01 = (squarer01.r0) << 13 501 | # asm 1: shld $13,squarer41=int64#3 572 | # asm 2: imulq $19,squarer41=%rdx 573 | imulq $19,%rbx,%rdx 574 | 575 | # qhasm: r0 += squarer41 576 | # asm 1: add squaret=int64#3 582 | # asm 2: mov squaret=%rdx 583 | mov %rcx,%rdx 584 | 585 | # qhasm: (uint64) squaret >>= 51 586 | # asm 1: shr $51,r1=int64#5 602 | # asm 2: mov r1=%r8 603 | mov %rdx,%r8 604 | 605 | # qhasm: (uint64) squaret >>= 51 606 | # asm 1: shr $51,r2=int64#6 622 | # asm 2: mov r2=%r9 623 | mov %rdx,%r9 624 | 625 | # qhasm: (uint64) squaret >>= 51 626 | # asm 1: shr $51,r3=int64#7 642 | # asm 2: mov r3=%rax 643 | mov %rdx,%rax 644 | 645 | # qhasm: (uint64) squaret >>= 51 646 | # asm 1: shr $51,r4=int64#8 662 | # asm 2: mov r4=%r10 663 | mov %rdx,%r10 664 | 665 | # qhasm: (uint64) squaret >>= 51 666 | # asm 1: shr $51,squaret=int64#3 672 | # asm 2: imulq $19,squaret=%rdx 673 | imulq $19,%rdx,%rdx 674 | 675 | # qhasm: r0 += squaret 676 | # asm 1: add c1=int64#9 712 | # asm 2: movq c1=%r11 713 | movq 0(%rsp),%r11 714 | 715 | # qhasm: c2 =c2_stack 716 | # asm 1: movq c2=int64#10 717 | # asm 2: movq c2=%r12 718 | movq 8(%rsp),%r12 719 | 720 | # qhasm: c3 =c3_stack 721 | # asm 1: movq c3=int64#11 722 | # asm 2: movq c3=%r13 723 | movq 16(%rsp),%r13 724 | 725 | # qhasm: c4 =c4_stack 726 | # asm 1: movq c4=int64#12 727 | # asm 2: movq c4=%r14 728 | movq 24(%rsp),%r14 729 | 730 | # qhasm: c5 =c5_stack 731 | # asm 1: movq c5=int64#13 732 | # asm 2: movq c5=%r15 733 | movq 32(%rsp),%r15 734 | 735 | # qhasm: c6 =c6_stack 736 | # asm 1: movq c6=int64#14 737 | # asm 2: movq c6=%rbx 738 | movq 40(%rsp),%rbx 739 | 740 | # qhasm: c7 =c7_stack 741 | # asm 1: movq c7=int64#15 742 | # asm 2: movq c7=%rbp 743 | movq 48(%rsp),%rbp 744 | 745 | # qhasm: leave 746 | add %r11,%rsp 747 | mov %rdi,%rax 748 | mov %rsi,%rdx 749 | ret 750 | -------------------------------------------------------------------------------- /fe25519_sub.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y) 4 | { 5 | fe25519 yt = *y; 6 | /* Not required for reduced input */ 7 | 8 | unsigned long long t; 9 | t = yt.v[0] >> 51; 10 | yt.v[0] &= 2251799813685247; 11 | yt.v[1] += t; 12 | 13 | t = yt.v[1] >> 51; 14 | yt.v[1] &= 2251799813685247; 15 | yt.v[2] += t; 16 | 17 | t = yt.v[2] >> 51; 18 | yt.v[2] &= 2251799813685247; 19 | yt.v[3] += t; 20 | 21 | t = yt.v[3] >> 51; 22 | yt.v[3] &= 2251799813685247; 23 | yt.v[4] += t; 24 | 25 | t = yt.v[4] >> 51; 26 | yt.v[4] &= 2251799813685247; 27 | yt.v[0] += 19*t; // ??? 28 | 29 | r->v[0] = x->v[0] + 0xFFFFFFFFFFFDA - yt.v[0]; // ??? 30 | r->v[1] = x->v[1] + 0xFFFFFFFFFFFFE - yt.v[1]; 31 | r->v[2] = x->v[2] + 0xFFFFFFFFFFFFE - yt.v[2]; 32 | r->v[3] = x->v[3] + 0xFFFFFFFFFFFFE - yt.v[3]; 33 | r->v[4] = x->v[4] + 0xFFFFFFFFFFFFE - yt.v[4]; 34 | } 35 | -------------------------------------------------------------------------------- /fe25519_unpack.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | 3 | void fe25519_unpack(fe25519 *r, const unsigned char x[32]) 4 | { 5 | r->v[0] = x[0]; 6 | r->v[0] += (unsigned long long)x[1] << 8; 7 | r->v[0] += (unsigned long long)x[2] << 16; 8 | r->v[0] += (unsigned long long)x[3] << 24; 9 | r->v[0] += (unsigned long long)x[4] << 32; 10 | r->v[0] += (unsigned long long)x[5] << 40; 11 | r->v[0] += ((unsigned long long)x[6] & 7) << 48; 12 | 13 | r->v[1] = x[6] >> 3; 14 | r->v[1] += (unsigned long long)x[7] << 5; 15 | r->v[1] += (unsigned long long)x[8] << 13; 16 | r->v[1] += (unsigned long long)x[9] << 21; 17 | r->v[1] += (unsigned long long)x[10] << 29; 18 | r->v[1] += (unsigned long long)x[11] << 37; 19 | r->v[1] += ((unsigned long long)x[12] & 63) << 45; 20 | 21 | r->v[2] = x[12] >> 6; 22 | r->v[2] += (unsigned long long)x[13] << 2; 23 | r->v[2] += (unsigned long long)x[14] << 10; 24 | r->v[2] += (unsigned long long)x[15] << 18; 25 | r->v[2] += (unsigned long long)x[16] << 26; 26 | r->v[2] += (unsigned long long)x[17] << 34; 27 | r->v[2] += (unsigned long long)x[18] << 42; 28 | r->v[2] += ((unsigned long long)x[19] & 1) << 50; 29 | 30 | r->v[3] = x[19] >> 1; 31 | r->v[3] += (unsigned long long)x[20] << 7; 32 | r->v[3] += (unsigned long long)x[21] << 15; 33 | r->v[3] += (unsigned long long)x[22] << 23; 34 | r->v[3] += (unsigned long long)x[23] << 31; 35 | r->v[3] += (unsigned long long)x[24] << 39; 36 | r->v[3] += ((unsigned long long)x[25] & 15) << 47; 37 | 38 | r->v[4] = x[25] >> 4; 39 | r->v[4] += (unsigned long long)x[26] << 4; 40 | r->v[4] += (unsigned long long)x[27] << 12; 41 | r->v[4] += (unsigned long long)x[28] << 20; 42 | r->v[4] += (unsigned long long)x[29] << 28; 43 | r->v[4] += (unsigned long long)x[30] << 36; 44 | r->v[4] += ((unsigned long long)x[31] & 127) << 44; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /ge25519.h: -------------------------------------------------------------------------------- 1 | #ifndef GE25519_H 2 | #define GE25519_H 3 | 4 | /* 5 | * Arithmetic on the twisted Edwards curve -x^2 + y^2 = 1 + dx^2y^2 6 | * with d = -(121665/121666) = 7 | * 37095705934669439343138083508754565189542113879843219016388785533085940283555 8 | * Base point: 9 | * (15112221349535400772501151409588531511454012693041857206046113283949847762202,46316835694926478169428394003475163141307993866256225615783033603165251855960); 10 | */ 11 | 12 | #include "fe25519.h" 13 | #include "sc25519.h" 14 | 15 | #define ge25519_p3 ge25519 16 | 17 | typedef struct 18 | { 19 | fe25519 x; 20 | fe25519 y; 21 | fe25519 z; 22 | fe25519 t; 23 | } ge25519; 24 | 25 | typedef struct 26 | { 27 | fe25519 x; 28 | fe25519 z; 29 | fe25519 y; 30 | fe25519 t; 31 | } ge25519_p1p1; 32 | 33 | typedef struct 34 | { 35 | fe25519 x; 36 | fe25519 y; 37 | fe25519 z; 38 | } ge25519_p2; 39 | 40 | typedef struct 41 | { 42 | fe25519 ysubx; 43 | fe25519 xaddy; 44 | fe25519 t2d; 45 | } ge25519_niels; 46 | 47 | typedef struct 48 | { 49 | fe25519 ysubx; 50 | fe25519 xaddy; 51 | fe25519 z; 52 | fe25519 t2d; 53 | } ge25519_pniels; 54 | 55 | extern void simpleot_ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p); 56 | extern void simpleot_ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p); 57 | extern void ge25519_p1p1_to_pniels(ge25519_pniels *r, const ge25519_p1p1 *p); 58 | extern void ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_p3 *q); 59 | extern void ge25519_dbl_p1p1(ge25519_p1p1 *r, const ge25519_p2 *p); 60 | extern void ge25519_nielsadd2(ge25519_p3 *r, const ge25519_niels *q); 61 | extern void ge25519_nielsadd_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_niels *q); 62 | extern void ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_pniels *q); 63 | 64 | extern const ge25519 ge25519_base; // 65 | 66 | void ge25519_cmov(ge25519 * r, ge25519 * s, unsigned char b); // 67 | 68 | void ge25519_neg(ge25519 * r, const ge25519 * s); // 69 | 70 | extern void ge25519_setneutral(ge25519 *r); // 71 | 72 | extern int ge25519_unpack_vartime(ge25519 *r, const unsigned char p[32]); 73 | 74 | extern void ge25519_pack(unsigned char r[32], const ge25519 *p); // 75 | 76 | extern int ge25519_isneutral_vartime(const ge25519 *p); // 77 | 78 | extern void simpleot_ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q); // 79 | 80 | extern void ge25519_subtract(ge25519 *r, const ge25519 *p, const ge25519 *q); // 81 | 82 | extern void ge25519_double(ge25519 *r, const ge25519 *p); // 83 | 84 | extern void simpleot_ge25519_scalarmult(ge25519 *q, ge25519 *r, const sc25519 *s); // 85 | extern void simpleot_ge25519_scalarmult_base(ge25519 *r, const sc25519 *s); // 86 | 87 | #endif 88 | 89 | -------------------------------------------------------------------------------- /ge25519_add.c: -------------------------------------------------------------------------------- 1 | #include "ge25519.h" 2 | 3 | void simpleot_ge25519_add(ge25519_p3 *r, const ge25519_p3 *p, const ge25519_p3 *q) 4 | { 5 | ge25519_p1p1 grp1p1; 6 | ge25519_add_p1p1(&grp1p1, p, q); 7 | simpleot_ge25519_p1p1_to_p3(r, &grp1p1); 8 | } 9 | -------------------------------------------------------------------------------- /ge25519_double.c: -------------------------------------------------------------------------------- 1 | #include "ge25519.h" 2 | 3 | void ge25519_double(ge25519_p3 *r, const ge25519_p3 *p) 4 | { 5 | ge25519_p1p1 grp1p1; 6 | ge25519_dbl_p1p1(&grp1p1, (ge25519_p2 *)p); 7 | simpleot_ge25519_p1p1_to_p3(r, &grp1p1); 8 | } 9 | -------------------------------------------------------------------------------- /ge25519_pack.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | #include "sc25519.h" 3 | #include "ge25519.h" 4 | 5 | void ge25519_pack(unsigned char r[32], const ge25519_p3 *p) 6 | { 7 | fe25519 tx, ty, zi; 8 | simpleot_fe25519_invert(&zi, &p->z); 9 | fe25519_mul(&tx, &p->x, &zi); 10 | fe25519_mul(&ty, &p->y, &zi); 11 | fe25519_pack(r, &ty); 12 | r[31] ^= fe25519_getparity(&tx) << 7; 13 | } 14 | -------------------------------------------------------------------------------- /ge25519_scalarmult.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "ge25519.h" 4 | 5 | static void ge25519_idoubles(ge25519 * a, int n) 6 | { 7 | int i; 8 | ge25519_p1p1 tp1p1; 9 | 10 | // 11 | 12 | for (i = 0; i < n-1; i++) 13 | { 14 | ge25519_dbl_p1p1(&tp1p1, (ge25519_p2 *)a); 15 | simpleot_ge25519_p1p1_to_p2((ge25519_p2 *)a, &tp1p1); 16 | } 17 | 18 | ge25519_dbl_p1p1(&tp1p1, (ge25519_p2 *)a); 19 | simpleot_ge25519_p1p1_to_p3(a, &tp1p1); 20 | } 21 | 22 | static void ge25519_maketable(ge25519 (*table)[8], const ge25519 * b, int dist) 23 | { 24 | const int n = 64/dist; 25 | 26 | int i; 27 | ge25519 p = *b; 28 | 29 | // 30 | 31 | for (i = 0; i < n; i++) 32 | { 33 | table[i][1-1] = p; 34 | ge25519_double(&table[i][2-1], &p); 35 | simpleot_ge25519_add(&table[i][3-1], &table[i][2-1], &p); 36 | ge25519_double(&table[i][4-1], &table[i][2-1]); 37 | simpleot_ge25519_add(&table[i][5-1], &table[i][4-1], &p); 38 | ge25519_double(&table[i][6-1], &table[i][3-1]); 39 | simpleot_ge25519_add(&table[i][7-1], &table[i][6-1], &p); 40 | ge25519_double(&table[i][8-1], &table[i][4-1]); 41 | 42 | if (i < n-1) 43 | { 44 | ge25519_double(&p, &table[i][8-1]); 45 | ge25519_idoubles(&p, 4*(dist-1)); 46 | } 47 | } 48 | } 49 | 50 | extern void ge25519_lookup_asm(ge25519 *, const ge25519 *, const char *); 51 | 52 | static void ge25519_scalarmult_table(ge25519 *r, ge25519 (*table)[8], const sc25519 *s, int dist) 53 | { 54 | int i, j; 55 | ge25519 t; 56 | ge25519_p1p1 t_p1p1; 57 | char w[64]; 58 | 59 | // 60 | 61 | sc25519_window4(w, s); 62 | 63 | // 64 | 65 | for (i = dist-1; i < 64; i += dist) 66 | { 67 | if (i == dist-1) 68 | ge25519_lookup_asm(r, table[i/dist], &w[i]); 69 | 70 | else 71 | { 72 | ge25519_lookup_asm(&t, table[i/dist], &w[i]); 73 | ge25519_add_p1p1(&t_p1p1, r, &t); 74 | 75 | if (i + dist < 64) 76 | simpleot_ge25519_p1p1_to_p3(r, &t_p1p1); 77 | else 78 | simpleot_ge25519_p1p1_to_p2((ge25519_p2 *)r, &t_p1p1); 79 | } 80 | 81 | } 82 | 83 | // 84 | 85 | for (i = dist-2; i >= 0; i--) 86 | { 87 | ge25519_idoubles(r, 4); 88 | 89 | for (j = i; j < 64; j += dist) 90 | { 91 | ge25519_lookup_asm(&t, table[j/dist], &w[j]); 92 | ge25519_add_p1p1(&t_p1p1, r, &t); 93 | 94 | if (j + dist < 64 || i == 0) 95 | simpleot_ge25519_p1p1_to_p3(r, &t_p1p1); 96 | else 97 | simpleot_ge25519_p1p1_to_p2((ge25519_p2 *)r, &t_p1p1); 98 | } 99 | } 100 | } 101 | 102 | void simpleot_ge25519_scalarmult(ge25519 * a, ge25519 * b, const sc25519 * s) 103 | { 104 | ge25519 table[1][8]; 105 | 106 | // 107 | 108 | ge25519_maketable(table, b, 64); 109 | ge25519_scalarmult_table(a, table, s, 64); 110 | } 111 | 112 | -------------------------------------------------------------------------------- /ge25519_scalarmult_base.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | #include "sc25519.h" 3 | #include "ge25519.h" 4 | 5 | 6 | /* Multiples of the base point in Niels' representation */ 7 | static const ge25519_niels ge25519_base_multiples_niels[] = { 8 | #include "ge25519.data" 9 | }; 10 | 11 | extern void ge25519_lookup_niels_asm(ge25519_niels *, const ge25519_niels *, const char *); 12 | 13 | void simpleot_ge25519_scalarmult_base(ge25519_p3 *r, const sc25519 *s) 14 | { 15 | char b[64]; 16 | int i; 17 | ge25519_niels t; 18 | fe25519 d; 19 | 20 | sc25519_window4(b,s); 21 | 22 | ge25519_lookup_niels_asm((ge25519_niels *) r, ge25519_base_multiples_niels + 8*0, &b[0]); 23 | 24 | fe25519_sub(&d, &r->y, &r->x); 25 | fe25519_add(&r->y, &r->y, &r->x); 26 | r->x = d; 27 | r->t = r->z; 28 | fe25519_setint(&r->z,2); 29 | 30 | for(i = 1; i < 64; i++) 31 | { 32 | ge25519_lookup_niels_asm(&t, ge25519_base_multiples_niels + 8*i, &b[i]); 33 | ge25519_nielsadd2(r, &t); 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /ge25519_setneutral.c: -------------------------------------------------------------------------------- 1 | #include "ge25519.h" 2 | 3 | void ge25519_setneutral(ge25519 *r) 4 | { 5 | fe25519_setint(&r->x, 0); 6 | fe25519_setint(&r->y, 1); 7 | fe25519_setint(&r->z, 1); 8 | fe25519_setint(&r->t, 0); 9 | } 10 | 11 | -------------------------------------------------------------------------------- /ge25519_unpack.c: -------------------------------------------------------------------------------- 1 | #include "fe25519.h" 2 | #include "ge25519.h" 3 | 4 | /* d */ 5 | static const fe25519 ecd = {{929955233495203, 466365720129213, 1662059464998953, 2033849074728123, 1442794654840575}}; 6 | /* sqrt(-1) */ 7 | static const fe25519 sqrtm1 = {{1718705420411056, 234908883556509, 2233514472574048, 2117202627021982, 765476049583133}}; 8 | 9 | /* return 0 on success, -1 otherwise */ 10 | int ge25519_unpack_vartime(ge25519_p3 *r, const unsigned char p[32]) 11 | { 12 | fe25519 t, chk, num, den, den2, den4, den6; 13 | unsigned char par = p[31] >> 7; 14 | 15 | fe25519_setint(&r->z,1); 16 | fe25519_unpack(&r->y, p); 17 | fe25519_square(&num, &r->y); /* x = y^2 */ 18 | fe25519_mul(&den, &num, &ecd); /* den = dy^2 */ 19 | fe25519_sub(&num, &num, &r->z); /* x = y^2-1 */ 20 | fe25519_add(&den, &r->z, &den); /* den = dy^2+1 */ 21 | 22 | /* Computation of sqrt(num/den) 23 | 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) 24 | */ 25 | fe25519_square(&den2, &den); 26 | fe25519_square(&den4, &den2); 27 | fe25519_mul(&den6, &den4, &den2); 28 | fe25519_mul(&t, &den6, &num); 29 | fe25519_mul(&t, &t, &den); 30 | 31 | fe25519_pow2523(&t, &t); 32 | /* 2. computation of r->x = t * num * den^3 33 | */ 34 | fe25519_mul(&t, &t, &num); 35 | fe25519_mul(&t, &t, &den); 36 | fe25519_mul(&t, &t, &den); 37 | fe25519_mul(&r->x, &t, &den); 38 | 39 | /* 3. Check whether sqrt computation gave correct result, multiply by sqrt(-1) if not: 40 | */ 41 | fe25519_square(&chk, &r->x); 42 | fe25519_mul(&chk, &chk, &den); 43 | if (!fe25519_iseq_vartime(&chk, &num)) 44 | fe25519_mul(&r->x, &r->x, &sqrtm1); 45 | 46 | /* 4. Now we have one of the two square roots, except if input was not a square 47 | */ 48 | fe25519_square(&chk, &r->x); 49 | fe25519_mul(&chk, &chk, &den); 50 | if (!fe25519_iseq_vartime(&chk, &num)) 51 | return -1; 52 | 53 | /* 5. Choose the desired square root according to parity: 54 | */ 55 | if(fe25519_getparity(&r->x) != par) 56 | fe25519_neg(&r->x, &r->x); 57 | 58 | fe25519_mul(&r->t, &r->x, &r->y); 59 | return 0; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /ge4x.c: -------------------------------------------------------------------------------- 1 | #include "ge4x.h" 2 | 3 | #include "crypto_hash.h" 4 | 5 | void ge4x_cmovs(ge4x *r, const ge4x *x, unsigned char * b) 6 | { 7 | gfe4x_cmov(&r->x, &(x->x), b); 8 | gfe4x_cmov(&r->y, &(x->y), b); 9 | gfe4x_cmov(&r->z, &(x->z), b); 10 | gfe4x_cmov(&r->t, &(x->t), b); 11 | } 12 | 13 | void ge4x_setneutral(ge4x * a) 14 | { 15 | gfe4x_setzero(&a->x); 16 | gfe4x_setone(&a->y); 17 | gfe4x_setone(&a->z); 18 | gfe4x_setzero(&a->t); 19 | } 20 | 21 | void ge4x_neg(ge4x * a, const ge4x * b) 22 | { 23 | gfe4x_neg(&a->x, &b->x); 24 | a->y = b->y; 25 | a->z = b->z; 26 | gfe4x_neg(&a->t, &b->t); 27 | } 28 | 29 | /////////////////////////////////////////////////////////// 30 | 31 | #define repeat4x(x) {x, x, x, x} 32 | 33 | const gfe4x Gk = 34 | {{ 35 | { repeat4x(3338585.0) } , 36 | { repeat4x(3934835965952.0) } , 37 | { repeat4x(16993937369696567296.0) } , 38 | { repeat4x(4464222746302153748381696.0) } , 39 | { repeat4x(93371163235585075216663357423616.0) } , 40 | { repeat4x(1163399014865459815517614333765877760.0) } , 41 | { repeat4x(441936960085431936284569284157504919873519616.0) } , 42 | { repeat4x(355047131404459050871642921761149483359549389799424.0) } , 43 | { repeat4x(626647004757192365988092839070681114614100044180388577280.0) } , 44 | { repeat4x(13159058716893486699394031679446200360393917757201178927420145664.0) } , 45 | { repeat4x(12842070454865951878207543570322902610654944894655310136406629955928064.0) } , 46 | { repeat4x(16295354408597167049195255459117446390458785936524946835293367493552880222208.0) } 47 | }}; 48 | 49 | extern void ge4x_niels_add_p1p1_asm(ge4x_p1p1 *, const ge4x *, const ge4x_niels *); 50 | 51 | extern void ge4x_add_p1p1_asm(ge4x_p1p1 *, const ge4x *, const ge4x *); 52 | 53 | void ge4x_p1p1_to_p2(ge4x_p2 * a, const ge4x_p1p1 * b) 54 | { 55 | gfe4x_mul(&a->x, &b->x, &b->t); 56 | gfe4x_mul(&a->y, &b->y, &b->z); 57 | gfe4x_mul(&a->z, &b->z, &b->t); 58 | } 59 | 60 | void ge4x_p1p1_to_p3(ge4x * a, const ge4x_p1p1 * b) 61 | { 62 | gfe4x_mul(&a->x, &b->x, &b->t); 63 | gfe4x_mul(&a->y, &b->y, &b->z); 64 | gfe4x_mul(&a->z, &b->z, &b->t); 65 | gfe4x_mul(&a->t, &b->x, &b->y); 66 | } 67 | 68 | void ge4x_add_niels(ge4x * c, const ge4x * a, const ge4x_niels * b) 69 | { 70 | ge4x_p1p1 tmp; 71 | ge4x_niels_add_p1p1_asm(&tmp, a, b); 72 | ge4x_p1p1_to_p3(c, &tmp); 73 | } 74 | 75 | void ge4x_add(ge4x * c, const ge4x * a, const ge4x * b) 76 | { 77 | ge4x_p1p1 tmp; 78 | ge4x_add_p1p1_asm(&tmp, a, b); 79 | ge4x_p1p1_to_p3(c, &tmp); 80 | } 81 | 82 | void ge4x_sub(ge4x * c, const ge4x * a, const ge4x * b) 83 | { 84 | ge4x t; 85 | 86 | ge4x_neg(&t, b); 87 | ge4x_add(c, a, &t); 88 | } 89 | 90 | extern void ge4x_double_p1p1_asm(ge4x_p1p1 *, const ge4x_p2 *); 91 | 92 | void ge4x_double(ge4x * a, const ge4x * b) 93 | { 94 | ge4x_p1p1 tmp; 95 | ge4x_double_p1p1_asm(&tmp, (ge4x_p2 *)b); 96 | ge4x_p1p1_to_p3(a, &tmp); 97 | } 98 | 99 | void ge4x_doubles(ge4x * a, ge4x * b, int n) 100 | { 101 | int i; 102 | ge4x_p1p1 tp1p1; 103 | 104 | if (n == 1) 105 | { 106 | ge4x_double_p1p1_asm(&tp1p1, (ge4x_p2 *)b); 107 | ge4x_p1p1_to_p3(a, &tp1p1); 108 | } 109 | 110 | if (n > 1) 111 | { 112 | ge4x_double_p1p1_asm(&tp1p1, (ge4x_p2 *)b); 113 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &tp1p1); 114 | 115 | for (i = 0; i < n-2; i++) 116 | { 117 | ge4x_double_p1p1_asm(&tp1p1, (ge4x_p2 *)a); 118 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &tp1p1); 119 | } 120 | 121 | ge4x_double_p1p1_asm(&tp1p1, (ge4x_p2 *)a); 122 | ge4x_p1p1_to_p3(a, &tp1p1); 123 | } 124 | } 125 | 126 | void ge4x_idoubles(ge4x * a, int n) 127 | { 128 | ge4x_doubles(a, a, n); 129 | } 130 | 131 | /////////////////////////////////////////////////////////// 132 | 133 | void ge4x_scalarmults_base(ge4x * a, const sc25519 * s) 134 | { 135 | int i; 136 | sc25519 ss[4]; 137 | 138 | for (i = 0; i < 4; i++) 139 | ss[i] = *s; 140 | 141 | ge4x_scalarsmults_base(a, ss); 142 | } 143 | 144 | void ge4x_scalarmults(ge4x * a, ge4x * b, const sc25519 * s) 145 | { 146 | int i; 147 | sc25519 ss[4]; 148 | 149 | for (i = 0; i < 4; i++) 150 | ss[i] = *s; 151 | 152 | ge4x_scalarsmults(a, b, ss); 153 | } 154 | 155 | void ge4x_maketable(ge4x (*table)[8], const ge4x * b, int dist) 156 | { 157 | const int n = 64/dist; 158 | 159 | int i; 160 | ge4x p = *b; 161 | 162 | for (i = 0; i < n; i++) 163 | { 164 | table[i][1-1] = p; 165 | ge4x_double(&table[i][2-1], &p); 166 | ge4x_add(&table[i][3-1], &table[i][2-1], &p); 167 | ge4x_double(&table[i][4-1], &table[i][2-1]); 168 | ge4x_add(&table[i][5-1], &table[i][4-1], &p); 169 | ge4x_double(&table[i][6-1], &table[i][3-1]); 170 | ge4x_add(&table[i][7-1], &table[i][6-1], &p); 171 | ge4x_double(&table[i][8-1], &table[i][4-1]); 172 | 173 | if (i < n-1) 174 | { 175 | ge4x_double(&p, &table[i][8-1]); 176 | ge4x_idoubles(&p, 4*(dist-1)); 177 | } 178 | } 179 | } 180 | 181 | extern void ge4x_lookup_niels_asm(ge4x_niels *, const double (*)[3][12], const char *); 182 | 183 | extern void ge4x_lookup_asm(ge4x *, const ge4x *, const char *); 184 | 185 | static void convert(ge4x * dest, ge4x_niels * src) 186 | { 187 | gfe4x_sub(&dest->x, &src->y, &src->x); 188 | gfe4x_add(&dest->y, &src->y, &src->x); 189 | gfe4x_settwo(&dest->z); 190 | dest->t = src->z; 191 | } 192 | 193 | 194 | static const double ge4x_base_multiples_niels[32][8][3][12] = 195 | { 196 | #include "ge4x.data" 197 | }; 198 | 199 | void ge4x_scalarsmults_base(ge4x * a, const sc25519 * s) 200 | { 201 | const int dist=2; 202 | int i, j, pos; 203 | char idx[4], w[4][64]; 204 | 205 | ge4x_niels tmp[ dist ]; 206 | ge4x t[ dist ]; 207 | ge4x_p1p1 t_p1p1; 208 | 209 | // 210 | 211 | for (pos = 0; pos < 4; pos++) 212 | sc25519_window4(w[pos], &s[pos]); 213 | 214 | // 215 | 216 | for (i = 0; i < dist; i++) 217 | { 218 | for (pos = 0; pos < 4; pos++) 219 | idx[pos] = w[pos][i]; 220 | 221 | ge4x_lookup_niels_asm(&tmp[i], ge4x_base_multiples_niels[0], idx); 222 | 223 | if (i == dist-1) convert( a, &tmp[i]); 224 | else convert(&t[i], &tmp[i]); 225 | } 226 | 227 | for (j = dist; j < 64; j += dist) 228 | { 229 | for (i = 0; i < dist; i++) 230 | { 231 | for (pos = 0; pos < 4; pos++) 232 | idx[pos] = w[pos][i+j]; 233 | 234 | ge4x_lookup_niels_asm(&tmp[i], ge4x_base_multiples_niels[j/dist], idx); 235 | } 236 | 237 | for (i = 0; i < dist-1; i++) 238 | ge4x_add_niels(&t[i], &t[i], &tmp[i]); 239 | 240 | if (j+dist < 64) 241 | { 242 | ge4x_add_niels(a, a, &tmp[ dist-1 ]); 243 | } 244 | else 245 | { 246 | ge4x_niels_add_p1p1_asm(&t_p1p1, a, &tmp[ dist-1 ]); 247 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &t_p1p1); 248 | } 249 | } 250 | 251 | // 252 | 253 | ge4x_idoubles(a, 4); 254 | 255 | for (i = dist-2; i >= 1; i--) 256 | { 257 | ge4x_add_p1p1_asm(&t_p1p1, a, &t[i]); 258 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &t_p1p1); 259 | ge4x_idoubles(a, 4); 260 | } 261 | 262 | ge4x_add(a, a, &t[0]); 263 | } 264 | 265 | void ge4x_scalarsmults(ge4x * a, ge4x * b, const sc25519 * s) 266 | { 267 | ge4x table[1][8]; 268 | 269 | // 270 | 271 | ge4x_maketable(table, b, 64); 272 | ge4x_scalarsmults_table(a, table, s, 64); 273 | } 274 | 275 | void ge4x_scalarsmults_table(ge4x * a, ge4x (*table)[8], const sc25519 * s, int dist) 276 | { 277 | int i, j, pos; 278 | ge4x t; 279 | ge4x_p1p1 t_p1p1; 280 | char idx[4], w[4][64]; 281 | 282 | // 283 | 284 | for (pos = 0; pos < 4; pos++) 285 | sc25519_window4(w[pos], &s[pos]); 286 | 287 | // 288 | 289 | for (i = dist-1; i < 64; i += dist) 290 | { 291 | for (pos = 0; pos < 4; pos++) 292 | idx[pos] = w[pos][i]; 293 | 294 | if (i == dist-1) 295 | ge4x_lookup_asm(a, table[i/dist], idx); 296 | 297 | else 298 | { 299 | ge4x_lookup_asm(&t, table[i/dist], idx); 300 | ge4x_add_p1p1_asm(&t_p1p1, a, &t); 301 | 302 | if (i + dist < 64) 303 | ge4x_p1p1_to_p3(a, &t_p1p1); 304 | else 305 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &t_p1p1); 306 | } 307 | } 308 | 309 | // 310 | 311 | for (i = dist-2; i >= 0; i--) 312 | { 313 | ge4x_idoubles(a, 4); 314 | 315 | /// 316 | 317 | for (j = i; j < 64; j += dist) 318 | { 319 | for (pos = 0; pos < 4; pos++) 320 | idx[pos] = w[pos][j]; 321 | 322 | ge4x_lookup_asm(&t, table[j/dist], idx); 323 | ge4x_add_p1p1_asm(&t_p1p1, a, &t); 324 | 325 | if (j + dist < 64 || i == 0) 326 | ge4x_p1p1_to_p3(a, &t_p1p1); 327 | else 328 | ge4x_p1p1_to_p2((ge4x_p2 *)a, &t_p1p1); 329 | } 330 | } 331 | } 332 | 333 | void ge4x_hash(unsigned char * k, 334 | unsigned char * sp, 335 | unsigned char * q, 336 | ge4x * p) 337 | { 338 | int i, j; 339 | 340 | unsigned char r[128]; 341 | unsigned char in[96]; 342 | 343 | // 344 | 345 | ge4x_pack(r, p); 346 | 347 | for (j = 0; j < 32; j++) in[j] = sp[j]; 348 | 349 | for (i = 0; i < 4; i++) 350 | { 351 | for (j = 0; j < 32; j++) in[j + 32] = q[i*32 + j]; 352 | for (j = 0; j < 32; j++) in[j + 64] = r[i*32 + j]; 353 | 354 | crypto_hash(k + i*32, in, sizeof(in)); 355 | } 356 | } 357 | 358 | -------------------------------------------------------------------------------- /ge4x.h: -------------------------------------------------------------------------------- 1 | #ifndef GE4X_H 2 | #define GE4X_H 3 | 4 | #include "gfe4x.h" 5 | #include "sc25519.h" 6 | 7 | typedef struct{ 8 | 9 | gfe4x x; 10 | gfe4x y; 11 | gfe4x z; 12 | gfe4x t; 13 | 14 | } ge4x; 15 | 16 | typedef struct{ 17 | 18 | gfe4x x; 19 | gfe4x y; 20 | gfe4x z; 21 | gfe4x t; 22 | 23 | } ge4x_p1p1; 24 | 25 | typedef struct{ 26 | 27 | gfe4x x; 28 | gfe4x y; 29 | gfe4x z; 30 | 31 | } ge4x_p2; 32 | 33 | typedef struct{ 34 | 35 | gfe4x x; 36 | gfe4x y; 37 | gfe4x z; 38 | 39 | } ge4x_niels; 40 | 41 | void ge4x_cmovs(ge4x *r, const ge4x *x, unsigned char * b); 42 | 43 | void ge4x_setneutral(ge4x * a); 44 | void ge4x_neg(ge4x * a, const ge4x * b); 45 | void ge4x_add(ge4x * a, const ge4x * b, const ge4x * c); 46 | void ge4x_sub(ge4x * c, const ge4x * a, const ge4x * b); 47 | void ge4x_double(ge4x * a, const ge4x * b); 48 | 49 | void ge4x_maketable(ge4x (*table)[8], const ge4x * b, int dist); 50 | 51 | void ge4x_scalarmults_base(ge4x * a, const sc25519 * s); 52 | void ge4x_scalarmults(ge4x * a, ge4x * b, const sc25519 * s); 53 | 54 | void ge4x_scalarsmults_base(ge4x * a, const sc25519 * s); 55 | void ge4x_scalarsmults_naive(ge4x * a, ge4x * b, const sc25519 * s); 56 | void ge4x_scalarsmults(ge4x * a, ge4x * b, const sc25519 * s); 57 | void ge4x_scalarsmults_table(ge4x * a, ge4x (*table)[8], const sc25519 * s, int dist); 58 | 59 | void ge4x_hash(unsigned char *, unsigned char *, unsigned char *, ge4x *); 60 | 61 | int ge4x_unpack_vartime(ge4x * r, unsigned char p[128]); 62 | void ge4x_pack(unsigned char r[128], const ge4x *p); 63 | 64 | #endif //ifndef GE4X_H 65 | 66 | -------------------------------------------------------------------------------- /ge4x_pack.c: -------------------------------------------------------------------------------- 1 | #include "ge4x.h" 2 | 3 | void ge4x_pack(unsigned char r[128], const ge4x *p) 4 | { 5 | gfe4x tx, ty, zi; 6 | gfe4x_invert(&zi, &p->z); 7 | gfe4x_mul(&tx, &p->x, &zi); 8 | gfe4x_mul(&ty, &p->y, &zi); 9 | gfe4x_pack(r, &ty); 10 | 11 | unsigned char res[4]; 12 | gfe4x_getparity(res, &tx); 13 | 14 | r[31] ^= res[0] << 7; 15 | r[63] ^= res[1] << 7; 16 | r[95] ^= res[2] << 7; 17 | r[127] ^= res[3] << 7; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /ge4x_unpack_vartime.c: -------------------------------------------------------------------------------- 1 | #include "ge4x.h" 2 | 3 | #define repeat4x(x) {x, x, x, x} 4 | 5 | static const gfe4x ecd = 6 | {{ 7 | { repeat4x(1669283.0) } , 8 | { repeat4x(6365464494080.0) } , 9 | { repeat4x(8496964286801772544.0) } , 10 | { repeat4x(2232111373151076874190848.0) } , 11 | { repeat4x(46685581617792537608331678711808.0) } , 12 | { repeat4x(581699507432729907758807166882938880.0) } , 13 | { repeat4x(577780403219205938406856134441126244032446464.0) } , 14 | { repeat4x(177523208890306348945851196309082379305990599213056.0) } , 15 | { repeat4x(3451874370071936564911941131139173765358227744322211545088.0) }, 16 | { repeat4x(6579526219895875656356633921828388576363750827422867231692816384.0) }, 17 | { repeat4x(6421035227432975939103771785161451305327472447327655068203314977964032.0) }, 18 | { repeat4x(37095699513627632380490373981730700158546889134672614427511079748754722521088.0) } 19 | }}; 20 | 21 | static const gfe4x sqrtm1 = 22 | {{ 23 | { repeat4x(958640.0) } , 24 | { repeat4x(3467280121856.0) } , 25 | { repeat4x(14190305864170078208.0) } , 26 | { repeat4x(19212800461602561875509248.0) } , 27 | { repeat4x(528948567410906390584241422336.0) } , 28 | { repeat4x(62822086469243367117680649821264805888.0) } , 29 | { repeat4x(620906088990931074573886915313991388311322624.0) } , 30 | { repeat4x(223962994315572871555725705183030052053612408864768.0) } , 31 | { repeat4x(1061732066906148624681392299747573581275857698087826882560.0) }, 32 | { repeat4x(769792371319145595473002035915072170938712359931846872489000960.0) }, 33 | { repeat4x(3459271828655849329356536893846285443202986369943387069203793412358144.0) }, 34 | { repeat4x(19681157917434907507524698511986411320718631367220517708432557327070548459520.0) } 35 | }}; 36 | 37 | int ge4x_unpack_vartime(ge4x * r, unsigned char p[128]) 38 | { 39 | int i; 40 | 41 | gfe4x t, chk, num, den, den2, den4, den6; 42 | unsigned char par[4]; 43 | unsigned char eq[4]; 44 | 45 | par[0] = p[31] >> 7; 46 | par[1] = p[63] >> 7; 47 | par[2] = p[95] >> 7; 48 | par[3] = p[127] >> 7; 49 | 50 | gfe4x_setone(&r->z); // ??? 51 | gfe4x_unpack(&r->y, p); // ??? 52 | gfe4x_square(&num, &r->y); /* x = y^2 */ 53 | gfe4x_mul(&den, &num, &ecd); /* den = dy^2 */ 54 | gfe4x_sub(&num, &num, &r->z); /* x = y^2-1 */ 55 | gfe4x_add(&den, &r->z, &den); /* den = dy^2+1 */ 56 | 57 | /* Computation of sqrt(num/den) 58 | 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) 59 | */ 60 | gfe4x_square(&den2, &den); 61 | gfe4x_square(&den4, &den2); 62 | gfe4x_mul(&den6, &den4, &den2); 63 | gfe4x_mul(&t, &den6, &num); 64 | gfe4x_mul(&t, &t, &den); 65 | 66 | gfe4x_pow2523(&t, &t); 67 | /* 2. computation of r->x = t * num * den^3 68 | */ 69 | gfe4x_mul(&t, &t, &num); 70 | gfe4x_mul(&t, &t, &den); 71 | gfe4x_mul(&t, &t, &den); 72 | gfe4x_mul(&r->x, &t, &den); 73 | 74 | /* 3. Check whether sqrt computation gave correct result, multiply by sqrt(-1) if not: 75 | */ 76 | gfe4x_square(&chk, &r->x); 77 | gfe4x_mul(&chk, &chk, &den); 78 | 79 | // if (!gfe4x_iseq_vartime(&chk, &num)) 80 | // gfe4x_mul(&r->x, &r->x, &sqrtm1); 81 | 82 | gfe4x_setone(&t); 83 | gfe4x_iseq_vartime(eq, &chk, &num); 84 | gfe4x_cmov_vartime(&t, &sqrtm1, eq); 85 | gfe4x_mul(&r->x, &r->x, &t); 86 | 87 | /* 4. Now we have one of the two square roots, except if input was not a square 88 | */ 89 | gfe4x_square(&chk, &r->x); 90 | gfe4x_mul(&chk, &chk, &den); 91 | gfe4x_iseq_vartime(eq, &chk, &num); 92 | 93 | if (eq[0] || eq[1] || eq[2] || eq[3]) return -1; 94 | 95 | /* 5. Choose the desired square root according to parity: 96 | */ 97 | 98 | gfe4x_getparity(eq, &r->x); 99 | 100 | for (i = 0; i < 4; i++) 101 | { 102 | if (eq[i] != par[i]) 103 | gfe4x_neg_single(&r->x, &r->x, i); 104 | } 105 | 106 | gfe4x_mul(&r->t, &r->x, &r->y); 107 | 108 | return 0; 109 | } 110 | 111 | -------------------------------------------------------------------------------- /gfe4x.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "gfe4x.h" 3 | 4 | /* 5 | extern void limb_add(limb *r, const limb *x, const limb *y); 6 | static void limb_add(limb *r, const limb *x, const limb *y) 7 | { 8 | r->v[0] = x->v[0] + y->v[0]; 9 | r->v[1] = x->v[1] + y->v[1]; 10 | r->v[2] = x->v[2] + y->v[2]; 11 | r->v[3] = x->v[3] + y->v[3]; 12 | } 13 | 14 | static void limb_sub(limb *r, const limb *x, const limb *y) 15 | { 16 | r->v[0] = x->v[0] - y->v[0]; 17 | r->v[1] = x->v[1] - y->v[1]; 18 | r->v[2] = x->v[2] - y->v[2]; 19 | r->v[3] = x->v[3] - y->v[3]; 20 | } 21 | 22 | extern void limb_mul(limb *r, const limb *x, const limb *y); 23 | static void limb_mul(limb *r, const limb *x, const limb *y) 24 | { 25 | r->v[0] = x->v[0] * y->v[0]; 26 | r->v[1] = x->v[1] * y->v[1]; 27 | r->v[2] = x->v[2] * y->v[2]; 28 | r->v[3] = x->v[3] * y->v[3]; 29 | } 30 | 31 | extern void limb_muladd(limb *r, const limb *x, const limb *y); 32 | 33 | void limb_muladd(limb *r, const limb *x, const limb *y) 34 | { 35 | r->v[0] += x->v[0] * y->v[0]; 36 | r->v[1] += x->v[1] * y->v[1]; 37 | r->v[2] += x->v[2] * y->v[2]; 38 | r->v[3] += x->v[3] * y->v[3]; 39 | } 40 | */ 41 | 42 | unsigned long long llutod = 0x3ff0000000000000ULL; 43 | 44 | 45 | static const double _2_22 = 4194304.; 46 | static const double _2_23 = 8388608.; 47 | static const double _2_43 = 8796093022208.; 48 | static const double _2_44 = 17592186044416.; 49 | static const double _2_64 = 18446744073709551616.; 50 | static const double _2_65 = 36893488147419103232.; 51 | static const double _2_85 = 38685626227668133590597632.; 52 | static const double _2_86 = 77371252455336267181195264.; 53 | static const double _2_107 = 162259276829213363391578010288128.; 54 | static const double _2_108 = 324518553658426726783156020576256.; 55 | static const double _2_128 = 340282366920938463463374607431768211456.; 56 | static const double _2_129 = 680564733841876926926749214863536422912.; 57 | static const double _2_149 = 713623846352979940529142984724747568191373312.; 58 | static const double _2_150 = 1427247692705959881058285969449495136382746624.; 59 | static const double _2_170 = 1496577676626844588240573268701473812127674924007424.; 60 | static const double _2_171 = 2993155353253689176481146537402947624255349848014848.; 61 | static const double _2_192 = 6277101735386680763835789423207666416102355444464034512896.; 62 | static const double _2_193 = 12554203470773361527671578846415332832204710888928069025792.; 63 | static const double _2_213 = 13164036458569648337239753460458804039861886925068638906788872192.; 64 | static const double _2_214 = 26328072917139296674479506920917608079723773850137277813577744384.; 65 | static const double _2_234 = 27606985387162255149739023449108101809804435888681546220650096895197184.; 66 | static const double _2_235 = 55213970774324510299478046898216203619608871777363092441300193790394368.; 67 | static const double _2_255 = 57896044618658097711785492504343953926634992332820282019728792003956564819968.; 68 | 69 | static const double _2_22inv = .0000002384185791015625; 70 | static const double _2_43inv = .0000000000001136868377216160297393798828125; 71 | static const double _2_64inv = .0000000000000000000542101086242752217003726400434970855712890625; 72 | static const double _2_85inv = .0000000000000000000000000258493941422821148397315216271863391739316284656524658203125; 73 | static const double _2_107inv = .00000000000000000000000000000000616297582203915472977912941627176741932192527428924222476780414581298828125; 74 | static const double _2_128inv = .00000000000000000000000000000000000000293873587705571876992184134305561419454666389193021880377187926569604314863681793212890625; 75 | static const double _2_149inv = .00000000000000000000000000000000000000000000140129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125; 76 | static const double _2_170inv = .00000000000000000000000000000000000000000000000000066819117752304891153513411678787046970379922002626217449048437304009966024678258966762456338983611203730106353759765625; 77 | static const double _2_192inv = .000000000000000000000000000000000000000000000000000000000159309191113245227702888039776771180559110455519261878607388585338616290151305816094308987472018268594098344692611135542392730712890625; 78 | static const double _2_213inv = .000000000000000000000000000000000000000000000000000000000000000075964541966078389979785938156495657233767726668959559730238239926632065845158489272264951454171308800744221063905303736873975140042603015899658203125; 79 | static const double _2_234inv = .000000000000000000000000000000000000000000000000000000000000000000000036222716315306849470036477163551167122730124792556552758330459559742005274371380458958125807843832397815809757187511318623530931493093021344975568354129791259765625; 80 | 81 | void gfe4x_unpack_single(gfe4x *r, const unsigned char * x, int i) 82 | { 83 | x += i*32; 84 | 85 | r->v[0].v[i] = x[0]; 86 | r->v[0].v[i] += x[1] << 8; 87 | r->v[0].v[i] += (x[2] & 0x3f) << 16; /* 22 bits */ 88 | r->v[0].v[i] = r->v[0].v[i] - _2_22 + 19; 89 | r->v[1].v[i] = x[2] >> 6; 90 | r->v[1].v[i] += x[3] << 2; 91 | r->v[1].v[i] += x[4] << 10; 92 | r->v[1].v[i] += (x[5] & 0x07) << 18; /* 21 bits */ 93 | r->v[1].v[i] *= _2_22; 94 | r->v[1].v[i] = r->v[1].v[i] + _2_22 - _2_43; 95 | r->v[2].v[i] = x[5] >> 3; 96 | r->v[2].v[i] += x[6] << 5; 97 | r->v[2].v[i] += x[7] << 13; /* 21 bits */ 98 | r->v[2].v[i] *= _2_43; 99 | r->v[2].v[i] = r->v[2].v[i] + _2_43 - _2_64; 100 | r->v[3].v[i] = x[8]; 101 | r->v[3].v[i] += x[9] << 8; 102 | r->v[3].v[i] += (x[10] & 0x1f) << 16; /* 21 bits */ 103 | r->v[3].v[i] *= _2_64; 104 | r->v[3].v[i] = r->v[3].v[i] + _2_64 - _2_85; 105 | r->v[4].v[i] = x[10] >> 5; 106 | r->v[4].v[i] += x[11] << 3; 107 | r->v[4].v[i] += x[12] << 11; 108 | r->v[4].v[i] += (x[13] & 0x07) << 19; /* 22 bits */ 109 | r->v[4].v[i] *= _2_85; 110 | r->v[4].v[i] = r->v[4].v[i] + _2_85 - _2_107; 111 | r->v[5].v[i] = x[13] >> 3; 112 | r->v[5].v[i] += x[14] << 5; 113 | r->v[5].v[i] += x[15] << 13; /* 21 bits */ 114 | r->v[5].v[i] *= _2_107; 115 | r->v[5].v[i] = r->v[5].v[i] + _2_107 - _2_128; 116 | r->v[6].v[i] = x[16]; 117 | r->v[6].v[i] += x[17] << 8; 118 | r->v[6].v[i] += (x[18] & 0x1f) << 16; /* 21 bits */ 119 | r->v[6].v[i] *= _2_128; 120 | r->v[6].v[i] = r->v[6].v[i] + _2_128 - _2_149; 121 | r->v[7].v[i] = x[18] >> 5; 122 | r->v[7].v[i] += x[19] << 3; 123 | r->v[7].v[i] += x[20] << 11; 124 | r->v[7].v[i] += (x[21] & 0x03) << 19; /* 21 bits */ 125 | r->v[7].v[i] *= _2_149; 126 | r->v[7].v[i] = r->v[7].v[i] + _2_149 - _2_170; 127 | r->v[8].v[i] = x[21] >> 2; 128 | r->v[8].v[i] += x[22] << 6; 129 | r->v[8].v[i] += x[23] << 14; /* 22 bits */ 130 | r->v[8].v[i] *= _2_170; 131 | r->v[8].v[i] = r->v[8].v[i] + _2_170 - _2_192; 132 | r->v[9].v[i] = x[24]; 133 | r->v[9].v[i] += x[25] << 8; 134 | r->v[9].v[i] += (x[26] & 0x1f) << 16; /* 21 bits */ 135 | r->v[9].v[i] *= _2_192; 136 | r->v[9].v[i] = r->v[9].v[i] + _2_192 - _2_213; 137 | r->v[10].v[i] = x[26] >> 5; 138 | r->v[10].v[i] += x[27] << 3; 139 | r->v[10].v[i] += x[28] << 11; 140 | r->v[10].v[i] += (x[29] & 0x03) << 19; /* 21 bits */ 141 | r->v[10].v[i] *= _2_213; 142 | r->v[10].v[i] = r->v[10].v[i] + _2_213 - _2_234; 143 | r->v[11].v[i] = x[29] >> 2; 144 | r->v[11].v[i] += x[30] << 6; 145 | r->v[11].v[i] += (x[31] & 0x7f) << 14; /* 21 bits */ 146 | r->v[11].v[i] *= _2_234; 147 | r->v[11].v[i] = r->v[11].v[i] + _2_234 - _2_255; 148 | } 149 | 150 | void gfe4x_unpack(gfe4x *r, const unsigned char x[128]) 151 | { 152 | gfe4x_unpack_single(r, x, 0); 153 | gfe4x_unpack_single(r, x, 1); 154 | gfe4x_unpack_single(r, x, 2); 155 | gfe4x_unpack_single(r, x, 3); 156 | } 157 | 158 | void gfe4x_pack(unsigned char r[128], const gfe4x *x) 159 | { 160 | double u[12]; 161 | int i,j; 162 | unsigned long long t; 163 | unsigned char minusr[32]; 164 | for(i=0;i<4;i++) 165 | { 166 | u[0] = x->v[0].v[i] + _2_23 - 19.; 167 | u[1] = x->v[1].v[i] + _2_44 - _2_23; 168 | u[1] *= _2_22inv; 169 | u[2] = x->v[2].v[i] + _2_65 - _2_44; 170 | u[2] *= _2_43inv; 171 | u[3] = x->v[3].v[i] + _2_86 - _2_65; 172 | u[3] *= _2_64inv; 173 | u[4] = x->v[4].v[i] + _2_108 - _2_86; 174 | u[4] *= _2_85inv; 175 | u[5] = x->v[5].v[i] + _2_129 - _2_108; 176 | u[5] *= _2_107inv; 177 | u[6] = x->v[6].v[i] + _2_150 - _2_129; 178 | u[6] *= _2_128inv; 179 | u[7] = x->v[7].v[i] + _2_171 - _2_150; 180 | u[7] *= _2_149inv; 181 | u[8] = x->v[8].v[i] + _2_193 - _2_171; 182 | u[8] *= _2_170inv; 183 | u[9] = x->v[9].v[i] + _2_214 - _2_193; 184 | u[9] *= _2_192inv; 185 | u[10] = x->v[10].v[i] + _2_235 - _2_214; 186 | u[10] *= _2_213inv; 187 | u[11] = x->v[11].v[i] + _2_255 - _2_235; 188 | u[11] *= _2_234inv; 189 | 190 | t = u[0]; 191 | r[0] = t & 0xff; 192 | r[1] = (t >> 8) & 0xff; 193 | t = ((unsigned long long)u[1] << 6) + (t >> 16); 194 | r[2] = t & 0xff; 195 | r[3] = (t >> 8) & 0xff; 196 | r[4] = (t >> 16) & 0xff; 197 | t = ((unsigned long long)u[2] << 3) + (t >> 24); 198 | r[5] = t & 0xff; 199 | r[6] = (t >> 8) & 0xff; 200 | r[7] = (t >> 16) & 0xff; 201 | t = (unsigned long long)u[3] + (t >> 24); 202 | r[8] = t & 0xff; 203 | r[9] = (t >> 8) & 0xff; 204 | t = ((unsigned long long)u[4] << 5) + (t >> 16); 205 | r[10] = t & 0xff; 206 | r[11] = (t >> 8) & 0xff; 207 | r[12] = (t >> 16) & 0xff; 208 | t = ((unsigned long long)u[5] << 3) + (t >> 24); 209 | r[13] = t & 0xff; 210 | r[14] = (t >> 8) & 0xff; 211 | r[15] = (t >> 16) & 0xff; 212 | t = (unsigned long long)u[6] + (t >> 24); 213 | r[16] = t & 0xff; 214 | r[17] = (t >> 8) & 0xff; 215 | t = ((unsigned long long)u[7] << 5) + (t >> 16); 216 | r[18] = t & 0xff; 217 | r[19] = (t >> 8) & 0xff; 218 | r[20] = (t >> 16) & 0xff; 219 | t = ((unsigned long long)u[8] << 2) + (t >> 24); 220 | r[21] = t & 0xff; 221 | r[22] = (t >> 8) & 0xff; 222 | r[23] = (t >> 16) & 0xff; 223 | t = (unsigned long long)u[9] + (t >> 24); 224 | r[24] = t & 0xff; 225 | r[25] = (t >> 8) & 0xff; 226 | t = ((unsigned long long)u[10] << 5) + (t >> 16); 227 | r[26] = t & 0xff; 228 | r[27] = (t >> 8) & 0xff; 229 | r[28] = (t >> 16) & 0xff; 230 | t = ((unsigned long long)u[11] << 2) + (t >> 24); 231 | r[29] = t & 0xff; 232 | r[30] = (t >> 8) & 0xff; 233 | r[31] = (t >> 16) & 0xff; 234 | 235 | //freeze by adding 19: 236 | t = (unsigned long long)r[0] + 19; 237 | for(j=0;j<31;j++) 238 | { 239 | minusr[j] = t & 0xff; 240 | t = (unsigned long long)r[j+1] + (t >> 8); 241 | } 242 | minusr[31] = t & 0x7f; 243 | t >>= 7; 244 | for(j=0;j<32;j++) 245 | r[j] = t * minusr[j] + (1-t) * r[j]; 246 | 247 | r += 32; 248 | } 249 | } 250 | 251 | void gfe4x_neg_single(gfe4x *r, const gfe4x *x, int pos) 252 | { 253 | int i; 254 | 255 | for (i = 0; i < 12; i++) 256 | r->v[i].v[pos] = -(x->v[i].v[pos]); 257 | } 258 | 259 | void gfe4x_neg(gfe4x *r, const gfe4x *x) 260 | { 261 | gfe4x_neg_single(r, x, 0); 262 | gfe4x_neg_single(r, x, 1); 263 | gfe4x_neg_single(r, x, 2); 264 | gfe4x_neg_single(r, x, 3); 265 | } 266 | 267 | /* 268 | void gfe4x_add(gfe4x *r, const gfe4x *x, const gfe4x *y) 269 | { 270 | int i; 271 | for(i=0;i<12;i++) 272 | limb_add(r->v+i, x->v+i, y->v+i); 273 | } 274 | 275 | void gfe4x_sub(gfe4x *r, const gfe4x *x, const gfe4x *y) 276 | { 277 | int i; 278 | for(i=0;i<12;i++) 279 | limb_sub(r->v+i, x->v+i, y->v+i); 280 | } 281 | */ 282 | 283 | void gfe4x_setzero(gfe4x *r) 284 | { 285 | int i; 286 | for(i=0;i<12;i++) 287 | { 288 | r->v[i].v[0] = 0.; 289 | r->v[i].v[1] = 0.; 290 | r->v[i].v[2] = 0.; 291 | r->v[i].v[3] = 0.; 292 | } 293 | } 294 | 295 | void gfe4x_setone(gfe4x *r) 296 | { 297 | int i; 298 | r->v[0].v[0] = 1.; 299 | r->v[0].v[1] = 1.; 300 | r->v[0].v[2] = 1.; 301 | r->v[0].v[3] = 1.; 302 | for(i=1;i<12;i++) 303 | { 304 | r->v[i].v[0] = 0.; 305 | r->v[i].v[1] = 0.; 306 | r->v[i].v[2] = 0.; 307 | r->v[i].v[3] = 0.; 308 | } 309 | } 310 | 311 | void gfe4x_settwo(gfe4x *r) 312 | { 313 | int i; 314 | r->v[0].v[0] = 2.; 315 | r->v[0].v[1] = 2.; 316 | r->v[0].v[2] = 2.; 317 | r->v[0].v[3] = 2.; 318 | for(i=1;i<12;i++) 319 | { 320 | r->v[i].v[0] = 0.; 321 | r->v[i].v[1] = 0.; 322 | r->v[i].v[2] = 0.; 323 | r->v[i].v[3] = 0.; 324 | } 325 | } 326 | 327 | /* b[i] is either 0 or 1 */ 328 | static void gfe4x_cmov_single(gfe4x *r, const gfe4x *x, unsigned char b, int pos) 329 | { 330 | int i; 331 | 332 | for (i = 0; i < 12; i++) 333 | r->v[i].v[pos] = (double)b * x->v[i].v[pos] + (double)(1-b) * r->v[i].v[pos]; 334 | } 335 | 336 | void gfe4x_cmov(gfe4x * r, const gfe4x * x, unsigned char * b) 337 | { 338 | gfe4x_cmov_single(r, x, b[0], 0); 339 | gfe4x_cmov_single(r, x, b[1], 1); 340 | gfe4x_cmov_single(r, x, b[2], 2); 341 | gfe4x_cmov_single(r, x, b[3], 3); 342 | } 343 | 344 | void gfe4x_cmov_vartime(gfe4x * r, const gfe4x * x, unsigned char * b) 345 | { 346 | int pos, j; 347 | 348 | for (pos = 0; pos < 4; pos++) 349 | { 350 | if (b[pos]) 351 | { 352 | for (j = 0; j < 12; j++) 353 | r->v[j].v[pos] = x->v[j].v[pos]; 354 | } 355 | } 356 | } 357 | 358 | //static const unsigned long long d25519 = 0x3043000000000000; /* 19* 2^-255 */ 359 | 360 | const limb scale19 = {{ 361 | 0.000000000000000000000000000000000000000000000000000000000000000000000000000328174405093588895764681370786415183702408013848578692630900731866406488520172228202917285131946952609300797169953687216685813759979613974358814143528206841438077390193939208984375, 362 | 0.000000000000000000000000000000000000000000000000000000000000000000000000000328174405093588895764681370786415183702408013848578692630900731866406488520172228202917285131946952609300797169953687216685813759979613974358814143528206841438077390193939208984375, 363 | 0.000000000000000000000000000000000000000000000000000000000000000000000000000328174405093588895764681370786415183702408013848578692630900731866406488520172228202917285131946952609300797169953687216685813759979613974358814143528206841438077390193939208984375, 364 | 0.000000000000000000000000000000000000000000000000000000000000000000000000000328174405093588895764681370786415183702408013848578692630900731866406488520172228202917285131946952609300797169953687216685813759979613974358814143528206841438077390193939208984375}}; 365 | 366 | const limb two4x = {{2., 2., 2., 2.}}; 367 | const limb _4x121666 = {{121666., 121666., 121666., 121666.}}; 368 | 369 | const limb alpha22 = {{ 370 | 28334198897217871282176.0, 371 | 28334198897217871282176.0, 372 | 28334198897217871282176.0, 373 | 28334198897217871282176.0}}; 374 | const limb alpha43 = {{ 375 | 59421121885698253195157962752.0, 376 | 59421121885698253195157962752.0, 377 | 59421121885698253195157962752.0, 378 | 59421121885698253195157962752.0}}; 379 | const limb alpha64 = {{ 380 | 124615124604835863084731911901282304.0, 381 | 124615124604835863084731911901282304.0, 382 | 124615124604835863084731911901282304.0, 383 | 124615124604835863084731911901282304.0}}; 384 | const limb alpha85 = {{ 385 | 261336857795280739939871698507597986398208.0, 386 | 261336857795280739939871698507597986398208.0, 387 | 261336857795280739939871698507597986398208.0, 388 | 261336857795280739939871698507597986398208.0}}; 389 | const limb alpha107 = {{ 390 | 1096126227998177188652763624537212264741949407232.0, 391 | 1096126227998177188652763624537212264741949407232.0, 392 | 1096126227998177188652763624537212264741949407232.0, 393 | 1096126227998177188652763624537212264741949407232.0}}; 394 | const limb alpha128 = {{ 395 | 2298743311298833287537520540725463775428108683275403264.0, 396 | 2298743311298833287537520540725463775428108683275403264.0, 397 | 2298743311298833287537520540725463775428108683275403264.0, 398 | 2298743311298833287537520540725463775428108683275403264.0}}; 399 | const limb alpha149 = {{ 400 | 4820814132776970826625886277023487807566608981348378505904128.0, 401 | 4820814132776970826625886277023487807566608981348378505904128.0, 402 | 4820814132776970826625886277023487807566608981348378505904128.0, 403 | 4820814132776970826625886277023487807566608981348378505904128.0}}; 404 | const limb alpha170 = {{ 405 | 10109980000181489923000130657632361502613929158452714680413853843456.0, 406 | 10109980000181489923000130657632361502613929158452714680413853843456.0, 407 | 10109980000181489923000130657632361502613929158452714680413853843456.0, 408 | 10109980000181489923000130657632361502613929158452714680413853843456.0}}; 409 | const limb alpha192 = {{ 410 | 42404329554681223909999140017830044379859613525014854994918548831022874624.0, 411 | 42404329554681223909999140017830044379859613525014854994918548831022874624.0, 412 | 42404329554681223909999140017830044379859613525014854994918548831022874624.0, 413 | 42404329554681223909999140017830044379859613525014854994918548831022874624.0}}; 414 | const limb alpha213 = {{ 415 | 88928324534258838085302516486672313231311348223211953182303424518077283563470848.0, 416 | 88928324534258838085302516486672313231311348223211953182303424518077283563470848.0, 417 | 88928324534258838085302516486672313231311348223211953182303424518077283563470848.0, 418 | 88928324534258838085302516486672313231311348223211953182303424518077283563470848.0}}; 419 | const limb alpha234 = {{ 420 | 186496213653669990808268343055057815037671056549005394040173991334934811379700015824896.0, 421 | 186496213653669990808268343055057815037671056549005394040173991334934811379700015824896.0, 422 | 186496213653669990808268343055057815037671056549005394040173991334934811379700015824896.0, 423 | 186496213653669990808268343055057815037671056549005394040173991334934811379700015824896.0}}; 424 | const limb alpha255 = {{ 425 | 391110907456221328563541572174600606921881931583859760122138966276041209554560647587212296192.0, 426 | 391110907456221328563541572174600606921881931583859760122138966276041209554560647587212296192.0, 427 | 391110907456221328563541572174600606921881931583859760122138966276041209554560647587212296192.0, 428 | 391110907456221328563541572174600606921881931583859760122138966276041209554560647587212296192.0}}; 429 | 430 | /* For carrying see http://cr.yp.to/highspeed/fall2006.html */ 431 | /* 432 | void gfe4x_mul(gfe4x *rr, const gfe4x *x, const gfe4x *y) 433 | { 434 | limb y19[12]; 435 | limb t; 436 | 437 | gfe4x r; 438 | 439 | limb_mul(y19+ 1,y->v+ 1, &scale19); 440 | limb_mul(y19+ 2,y->v+ 2, &scale19); 441 | limb_mul(y19+ 3,y->v+ 3, &scale19); 442 | limb_mul(y19+ 4,y->v+ 4, &scale19); 443 | limb_mul(y19+ 5,y->v+ 5, &scale19); 444 | limb_mul(y19+ 6,y->v+ 6, &scale19); 445 | limb_mul(y19+ 7,y->v+ 7, &scale19); 446 | limb_mul(y19+ 8,y->v+ 8, &scale19); 447 | limb_mul(y19+ 9,y->v+ 9, &scale19); 448 | limb_mul(y19+10,y->v+10, &scale19); 449 | limb_mul(y19+11,y->v+11, &scale19); 450 | 451 | limb_mul(r.v+ 0, x->v+ 0, y->v+ 0); 452 | limb_mul(r.v+ 1, x->v+ 0, y->v+ 1); 453 | limb_mul(r.v+ 2, x->v+ 0, y->v+ 2); 454 | limb_mul(r.v+ 3, x->v+ 0, y->v+ 3); 455 | limb_mul(r.v+ 4, x->v+ 0, y->v+ 4); 456 | limb_mul(r.v+ 5, x->v+ 0, y->v+ 5); 457 | limb_mul(r.v+ 6, x->v+ 0, y->v+ 6); 458 | limb_mul(r.v+ 7, x->v+ 0, y->v+ 7); 459 | limb_mul(r.v+ 8, x->v+ 0, y->v+ 8); 460 | limb_mul(r.v+ 9, x->v+ 0, y->v+ 9); 461 | limb_mul(r.v+10, x->v+ 0, y->v+10); 462 | limb_mul(r.v+11, x->v+ 0, y->v+11); 463 | 464 | limb_muladd(r.v+ 1, x->v+ 1, y->v+ 0); 465 | limb_muladd(r.v+ 2, x->v+ 1, y->v+ 1); 466 | limb_muladd(r.v+ 3, x->v+ 1, y->v+ 2); 467 | limb_muladd(r.v+ 4, x->v+ 1, y->v+ 3); 468 | limb_muladd(r.v+ 5, x->v+ 1, y->v+ 4); 469 | limb_muladd(r.v+ 6, x->v+ 1, y->v+ 5); 470 | limb_muladd(r.v+ 7, x->v+ 1, y->v+ 6); 471 | limb_muladd(r.v+ 8, x->v+ 1, y->v+ 7); 472 | limb_muladd(r.v+ 9, x->v+ 1, y->v+ 8); 473 | limb_muladd(r.v+10, x->v+ 1, y->v+ 9); 474 | limb_muladd(r.v+11, x->v+ 1, y->v+10); 475 | limb_muladd(r.v+ 0, x->v+ 1, y19+11); 476 | 477 | limb_muladd(r.v+ 2, x->v+ 2, y->v+ 0); 478 | limb_muladd(r.v+ 3, x->v+ 2, y->v+ 1); 479 | limb_muladd(r.v+ 4, x->v+ 2, y->v+ 2); 480 | limb_muladd(r.v+ 5, x->v+ 2, y->v+ 3); 481 | limb_muladd(r.v+ 6, x->v+ 2, y->v+ 4); 482 | limb_muladd(r.v+ 7, x->v+ 2, y->v+ 5); 483 | limb_muladd(r.v+ 8, x->v+ 2, y->v+ 6); 484 | limb_muladd(r.v+ 9, x->v+ 2, y->v+ 7); 485 | limb_muladd(r.v+10, x->v+ 2, y->v+ 8); 486 | limb_muladd(r.v+11, x->v+ 2, y->v+ 9); 487 | limb_muladd(r.v+ 0, x->v+ 2, y19+10); 488 | limb_muladd(r.v+ 1, x->v+ 2, y19+11); 489 | 490 | limb_muladd(r.v+ 3, x->v+ 3, y->v+ 0); 491 | limb_muladd(r.v+ 4, x->v+ 3, y->v+ 1); 492 | limb_muladd(r.v+ 5, x->v+ 3, y->v+ 2); 493 | limb_muladd(r.v+ 6, x->v+ 3, y->v+ 3); 494 | limb_muladd(r.v+ 7, x->v+ 3, y->v+ 4); 495 | limb_muladd(r.v+ 8, x->v+ 3, y->v+ 5); 496 | limb_muladd(r.v+ 9, x->v+ 3, y->v+ 6); 497 | limb_muladd(r.v+10, x->v+ 3, y->v+ 7); 498 | limb_muladd(r.v+11, x->v+ 3, y->v+ 8); 499 | limb_muladd(r.v+ 0, x->v+ 3, y19+ 9); 500 | limb_muladd(r.v+ 1, x->v+ 3, y19+10); 501 | limb_muladd(r.v+ 2, x->v+ 3, y19+11); 502 | 503 | limb_muladd(r.v+ 4, x->v+ 4, y->v+ 0); 504 | limb_muladd(r.v+ 5, x->v+ 4, y->v+ 1); 505 | limb_muladd(r.v+ 6, x->v+ 4, y->v+ 2); 506 | limb_muladd(r.v+ 7, x->v+ 4, y->v+ 3); 507 | limb_muladd(r.v+ 8, x->v+ 4, y->v+ 4); 508 | limb_muladd(r.v+ 9, x->v+ 4, y->v+ 5); 509 | limb_muladd(r.v+10, x->v+ 4, y->v+ 6); 510 | limb_muladd(r.v+11, x->v+ 4, y->v+ 7); 511 | limb_muladd(r.v+ 0, x->v+ 4, y19+ 8); 512 | limb_muladd(r.v+ 1, x->v+ 4, y19+ 9); 513 | limb_muladd(r.v+ 2, x->v+ 4, y19+10); 514 | limb_muladd(r.v+ 3, x->v+ 4, y19+11); 515 | 516 | limb_muladd(r.v+ 5, x->v+ 5, y->v+ 0); 517 | limb_muladd(r.v+ 6, x->v+ 5, y->v+ 1); 518 | limb_muladd(r.v+ 7, x->v+ 5, y->v+ 2); 519 | limb_muladd(r.v+ 8, x->v+ 5, y->v+ 3); 520 | limb_muladd(r.v+ 9, x->v+ 5, y->v+ 4); 521 | limb_muladd(r.v+10, x->v+ 5, y->v+ 5); 522 | limb_muladd(r.v+11, x->v+ 5, y->v+ 6); 523 | limb_muladd(r.v+ 0, x->v+ 5, y19+ 7); 524 | limb_muladd(r.v+ 1, x->v+ 5, y19+ 8); 525 | limb_muladd(r.v+ 2, x->v+ 5, y19+ 9); 526 | limb_muladd(r.v+ 3, x->v+ 5, y19+10); 527 | limb_muladd(r.v+ 4, x->v+ 5, y19+11); 528 | 529 | limb_muladd(r.v+ 6, x->v+ 6, y->v+ 0); 530 | limb_muladd(r.v+ 7, x->v+ 6, y->v+ 1); 531 | limb_muladd(r.v+ 8, x->v+ 6, y->v+ 2); 532 | limb_muladd(r.v+ 9, x->v+ 6, y->v+ 3); 533 | limb_muladd(r.v+10, x->v+ 6, y->v+ 4); 534 | limb_muladd(r.v+11, x->v+ 6, y->v+ 5); 535 | limb_muladd(r.v+ 0, x->v+ 6, y19+ 6); 536 | limb_muladd(r.v+ 1, x->v+ 6, y19+ 7); 537 | limb_muladd(r.v+ 2, x->v+ 6, y19+ 8); 538 | limb_muladd(r.v+ 3, x->v+ 6, y19+ 9); 539 | limb_muladd(r.v+ 4, x->v+ 6, y19+10); 540 | limb_muladd(r.v+ 5, x->v+ 6, y19+11); 541 | 542 | limb_muladd(r.v+ 7, x->v+ 7, y->v+ 0); 543 | limb_muladd(r.v+ 8, x->v+ 7, y->v+ 1); 544 | limb_muladd(r.v+ 9, x->v+ 7, y->v+ 2); 545 | limb_muladd(r.v+10, x->v+ 7, y->v+ 3); 546 | limb_muladd(r.v+11, x->v+ 7, y->v+ 4); 547 | limb_muladd(r.v+ 0, x->v+ 7, y19+ 5); 548 | limb_muladd(r.v+ 1, x->v+ 7, y19+ 6); 549 | limb_muladd(r.v+ 2, x->v+ 7, y19+ 7); 550 | limb_muladd(r.v+ 3, x->v+ 7, y19+ 8); 551 | limb_muladd(r.v+ 4, x->v+ 7, y19+ 9); 552 | limb_muladd(r.v+ 5, x->v+ 7, y19+10); 553 | limb_muladd(r.v+ 6, x->v+ 7, y19+11); 554 | 555 | limb_muladd(r.v+ 8, x->v+ 8, y->v+ 0); 556 | limb_muladd(r.v+ 9, x->v+ 8, y->v+ 1); 557 | limb_muladd(r.v+10, x->v+ 8, y->v+ 2); 558 | limb_muladd(r.v+11, x->v+ 8, y->v+ 3); 559 | limb_muladd(r.v+ 0, x->v+ 8, y19+ 4); 560 | limb_muladd(r.v+ 1, x->v+ 8, y19+ 5); 561 | limb_muladd(r.v+ 2, x->v+ 8, y19+ 6); 562 | limb_muladd(r.v+ 3, x->v+ 8, y19+ 7); 563 | limb_muladd(r.v+ 4, x->v+ 8, y19+ 8); 564 | limb_muladd(r.v+ 5, x->v+ 8, y19+ 9); 565 | limb_muladd(r.v+ 6, x->v+ 8, y19+10); 566 | limb_muladd(r.v+ 7, x->v+ 8, y19+11); 567 | 568 | limb_muladd(r.v+ 9, x->v+ 9, y->v+ 0); 569 | limb_muladd(r.v+10, x->v+ 9, y->v+ 1); 570 | limb_muladd(r.v+11, x->v+ 9, y->v+ 2); 571 | limb_muladd(r.v+ 0, x->v+ 9, y19+ 3); 572 | limb_muladd(r.v+ 1, x->v+ 9, y19+ 4); 573 | limb_muladd(r.v+ 2, x->v+ 9, y19+ 5); 574 | limb_muladd(r.v+ 3, x->v+ 9, y19+ 6); 575 | limb_muladd(r.v+ 4, x->v+ 9, y19+ 7); 576 | limb_muladd(r.v+ 5, x->v+ 9, y19+ 8); 577 | limb_muladd(r.v+ 6, x->v+ 9, y19+ 9); 578 | limb_muladd(r.v+ 7, x->v+ 9, y19+10); 579 | limb_muladd(r.v+ 8, x->v+ 9, y19+11); 580 | 581 | limb_muladd(r.v+10, x->v+10, y->v+ 0); 582 | limb_muladd(r.v+11, x->v+10, y->v+ 1); 583 | limb_muladd(r.v+ 0, x->v+10, y19+ 2); 584 | limb_muladd(r.v+ 1, x->v+10, y19+ 3); 585 | limb_muladd(r.v+ 2, x->v+10, y19+ 4); 586 | limb_muladd(r.v+ 3, x->v+10, y19+ 5); 587 | limb_muladd(r.v+ 4, x->v+10, y19+ 6); 588 | limb_muladd(r.v+ 5, x->v+10, y19+ 7); 589 | limb_muladd(r.v+ 6, x->v+10, y19+ 8); 590 | limb_muladd(r.v+ 7, x->v+10, y19+ 9); 591 | limb_muladd(r.v+ 8, x->v+10, y19+10); 592 | limb_muladd(r.v+ 9, x->v+10, y19+11); 593 | 594 | limb_muladd(r.v+11, x->v+11, y->v+ 0); 595 | limb_muladd(r.v+ 0, x->v+11, y19+ 1); 596 | limb_muladd(r.v+ 1, x->v+11, y19+ 2); 597 | limb_muladd(r.v+ 2, x->v+11, y19+ 3); 598 | limb_muladd(r.v+ 3, x->v+11, y19+ 4); 599 | limb_muladd(r.v+ 4, x->v+11, y19+ 5); 600 | limb_muladd(r.v+ 5, x->v+11, y19+ 6); 601 | limb_muladd(r.v+ 6, x->v+11, y19+ 7); 602 | limb_muladd(r.v+ 7, x->v+11, y19+ 8); 603 | limb_muladd(r.v+ 8, x->v+11, y19+ 9); 604 | limb_muladd(r.v+ 9, x->v+11, y19+10); 605 | limb_muladd(r.v+10, x->v+11, y19+11); 606 | 607 | limb_add(&t, r.v+0, &alpha22); 608 | limb_sub(&t, &t, &alpha22); 609 | limb_sub(r.v+0, r.v+0, &t); 610 | limb_add(r.v+1, r.v+1, &t); 611 | 612 | limb_add(&t, r.v+1, &alpha43); 613 | limb_sub(&t, &t, &alpha43); 614 | limb_sub(r.v+1, r.v+1, &t); 615 | limb_add(r.v+2, r.v+2, &t); 616 | 617 | limb_add(&t, r.v+2, &alpha64); 618 | limb_sub(&t, &t, &alpha64); 619 | limb_sub(r.v+2, r.v+2, &t); 620 | limb_add(r.v+3, r.v+3, &t); 621 | 622 | limb_add(&t, r.v+3, &alpha85); 623 | limb_sub(&t, &t, &alpha85); 624 | limb_sub(r.v+3, r.v+3, &t); 625 | limb_add(r.v+4, r.v+4, &t); 626 | 627 | limb_add(&t, r.v+4, &alpha107); 628 | limb_sub(&t, &t, &alpha107); 629 | limb_sub(r.v+4, r.v+4, &t); 630 | limb_add(r.v+5, r.v+5, &t); 631 | 632 | limb_add(&t, r.v+5, &alpha128); 633 | limb_sub(&t, &t, &alpha128); 634 | limb_sub(r.v+5, r.v+5, &t); 635 | limb_add(r.v+6, r.v+6, &t); 636 | 637 | limb_add(&t, r.v+6, &alpha149); 638 | limb_sub(&t, &t, &alpha149); 639 | limb_sub(r.v+6, r.v+6, &t); 640 | limb_add(r.v+7, r.v+7, &t); 641 | 642 | limb_add(&t, r.v+7, &alpha170); 643 | limb_sub(&t, &t, &alpha170); 644 | limb_sub(r.v+7, r.v+7, &t); 645 | limb_add(r.v+8, r.v+8, &t); 646 | 647 | limb_add(&t, r.v+8, &alpha192); 648 | limb_sub(&t, &t, &alpha192); 649 | limb_sub(r.v+8, r.v+8, &t); 650 | limb_add(r.v+9, r.v+9, &t); 651 | 652 | limb_add(&t, r.v+9, &alpha213); 653 | limb_sub(&t, &t, &alpha213); 654 | limb_sub(r.v+9, r.v+9, &t); 655 | limb_add(r.v+10, r.v+10, &t); 656 | 657 | limb_add(&t, r.v+10, &alpha234); 658 | limb_sub(&t, &t, &alpha234); 659 | limb_sub(r.v+10, r.v+10, &t); 660 | limb_add(r.v+11, r.v+11, &t); 661 | 662 | limb_add(&t, r.v+11, &alpha255); 663 | limb_sub(&t, &t, &alpha255); 664 | limb_sub(r.v+11, r.v+11, &t); 665 | limb_mul(&t, &t, &scale19); 666 | limb_add(r.v+0, r.v+0, &t); 667 | 668 | limb_add(&t, r.v+0, &alpha22); 669 | limb_sub(&t, &t, &alpha22); 670 | limb_sub(r.v+0, r.v+0, &t); 671 | limb_add(r.v+1, r.v+1, &t); 672 | 673 | *rr = r; 674 | } 675 | 676 | void gfe4x_square(gfe4x *r, const gfe4x *x) 677 | { 678 | gfe4x_mul(r, x, x); 679 | } 680 | */ 681 | 682 | void gfe4x_invert(gfe4x *r, const gfe4x *x) 683 | { 684 | gfe4x z2; 685 | gfe4x z9; 686 | gfe4x z11; 687 | gfe4x z2_5_0; 688 | gfe4x z2_10_0; 689 | gfe4x z2_20_0; 690 | gfe4x z2_50_0; 691 | gfe4x z2_100_0; 692 | gfe4x t0; 693 | gfe4x t1; 694 | int i; 695 | 696 | /* 2 */ gfe4x_square(&z2,x); 697 | /* 4 */ gfe4x_square(&t1,&z2); 698 | /* 8 */ gfe4x_square(&t0,&t1); 699 | /* 9 */ gfe4x_mul(&z9,&t0,x); 700 | /* 11 */ gfe4x_mul(&z11,&z9,&z2); 701 | /* 22 */ gfe4x_square(&t0,&z11); 702 | /* 2^5 - 2^0 = 31 */ gfe4x_mul(&z2_5_0,&t0,&z9); 703 | 704 | /* 2^6 - 2^1 */ gfe4x_square(&t0,&z2_5_0); 705 | /* 2^7 - 2^2 */ gfe4x_square(&t1,&t0); 706 | /* 2^8 - 2^3 */ gfe4x_square(&t0,&t1); 707 | /* 2^9 - 2^4 */ gfe4x_square(&t1,&t0); 708 | /* 2^10 - 2^5 */ gfe4x_square(&t0,&t1); 709 | /* 2^10 - 2^0 */ gfe4x_mul(&z2_10_0,&t0,&z2_5_0); 710 | 711 | /* 2^11 - 2^1 */ gfe4x_square(&t0,&z2_10_0); 712 | /* 2^12 - 2^2 */ gfe4x_square(&t1,&t0); 713 | /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { gfe4x_square(&t0,&t1); gfe4x_square(&t1,&t0); } 714 | /* 2^20 - 2^0 */ gfe4x_mul(&z2_20_0,&t1,&z2_10_0); 715 | 716 | /* 2^21 - 2^1 */ gfe4x_square(&t0,&z2_20_0); 717 | /* 2^22 - 2^2 */ gfe4x_square(&t1,&t0); 718 | /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { gfe4x_square(&t0,&t1); gfe4x_square(&t1,&t0); } 719 | /* 2^40 - 2^0 */ gfe4x_mul(&t0,&t1,&z2_20_0); 720 | 721 | /* 2^41 - 2^1 */ gfe4x_square(&t1,&t0); 722 | /* 2^42 - 2^2 */ gfe4x_square(&t0,&t1); 723 | /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { gfe4x_square(&t1,&t0); gfe4x_square(&t0,&t1); } 724 | /* 2^50 - 2^0 */ gfe4x_mul(&z2_50_0,&t0,&z2_10_0); 725 | 726 | /* 2^51 - 2^1 */ gfe4x_square(&t0,&z2_50_0); 727 | /* 2^52 - 2^2 */ gfe4x_square(&t1,&t0); 728 | /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { gfe4x_square(&t0,&t1); gfe4x_square(&t1,&t0); } 729 | /* 2^100 - 2^0 */ gfe4x_mul(&z2_100_0,&t1,&z2_50_0); 730 | 731 | /* 2^101 - 2^1 */ gfe4x_square(&t1,&z2_100_0); 732 | /* 2^102 - 2^2 */ gfe4x_square(&t0,&t1); 733 | /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { gfe4x_square(&t1,&t0); gfe4x_square(&t0,&t1); } 734 | /* 2^200 - 2^0 */ gfe4x_mul(&t1,&t0,&z2_100_0); 735 | 736 | /* 2^201 - 2^1 */ gfe4x_square(&t0,&t1); 737 | /* 2^202 - 2^2 */ gfe4x_square(&t1,&t0); 738 | /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { gfe4x_square(&t0,&t1); gfe4x_square(&t1,&t0); } 739 | /* 2^250 - 2^0 */ gfe4x_mul(&t0,&t1,&z2_50_0); 740 | 741 | /* 2^251 - 2^1 */ gfe4x_square(&t1,&t0); 742 | /* 2^252 - 2^2 */ gfe4x_square(&t0,&t1); 743 | /* 2^253 - 2^3 */ gfe4x_square(&t1,&t0); 744 | /* 2^254 - 2^4 */ gfe4x_square(&t0,&t1); 745 | /* 2^255 - 2^5 */ gfe4x_square(&t1,&t0); 746 | /* 2^255 - 21 */ gfe4x_mul(r,&t1,&z11); 747 | } 748 | 749 | void gfe4x_print(const gfe4x *x, int pos) 750 | { 751 | int i; 752 | printf("("); 753 | for(i=0;i<11;i++) 754 | printf("%lf +", x->v[i].v[pos]); 755 | printf("%lf)", x->v[11].v[pos]); 756 | } 757 | 758 | -------------------------------------------------------------------------------- /gfe4x.h: -------------------------------------------------------------------------------- 1 | #ifndef GFE4X_H 2 | #define GFE4X_H 3 | 4 | typedef struct{ 5 | double v[4]; 6 | } __attribute__ ((aligned (32))) limb; 7 | 8 | typedef struct{ 9 | limb v[12]; 10 | } gfe4x; 11 | 12 | void gfe4x_pack(unsigned char r[128], const gfe4x *x); 13 | 14 | void gfe4x_unpack(gfe4x *, const unsigned char *); 15 | void gfe4x_unpack_single(gfe4x *, const unsigned char *, int); 16 | 17 | void gfe4x_neg(gfe4x *r, const gfe4x *x); 18 | void gfe4x_neg_single(gfe4x *r, const gfe4x *x, int pos); 19 | 20 | void gfe4x_add(gfe4x *r, const gfe4x *x, const gfe4x *y); 21 | void gfe4x_sub(gfe4x *r, const gfe4x *x, const gfe4x *y); 22 | 23 | void gfe4x_setzero(gfe4x *r); 24 | void gfe4x_setone(gfe4x *r); 25 | void gfe4x_settwo(gfe4x *r); 26 | 27 | void gfe4x_cmov(gfe4x *r, const gfe4x *x, unsigned char * b); 28 | void gfe4x_cmov_vartime(gfe4x *r, const gfe4x *x, unsigned char * b); 29 | 30 | void gfe4x_mul(gfe4x *r, const gfe4x *x, const gfe4x *y); 31 | void gfe4x_square(gfe4x *r, const gfe4x *x); 32 | void gfe4x_nsquare(gfe4x *r, const int n); 33 | 34 | void gfe4x_invert(gfe4x *r, const gfe4x *x); 35 | 36 | void gfe4x_pow2523(gfe4x *r, const gfe4x *x); 37 | 38 | void gfe4x_iseq_vartime(unsigned char *r, const gfe4x *x, const gfe4x *y); 39 | 40 | void gfe4x_getparity(unsigned char * res, const gfe4x * a); 41 | 42 | void gfe4x_print(const gfe4x *x, int pos); 43 | 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /gfe4x_add.S: -------------------------------------------------------------------------------- 1 | 2 | # qhasm: int64 input_0 3 | 4 | # qhasm: int64 input_1 5 | 6 | # qhasm: int64 input_2 7 | 8 | # qhasm: int64 input_3 9 | 10 | # qhasm: int64 input_4 11 | 12 | # qhasm: int64 input_5 13 | 14 | # qhasm: stack64 input_6 15 | 16 | # qhasm: stack64 input_7 17 | 18 | # qhasm: int64 caller_r11 19 | 20 | # qhasm: int64 caller_r12 21 | 22 | # qhasm: int64 caller_r13 23 | 24 | # qhasm: int64 caller_r14 25 | 26 | # qhasm: int64 caller_r15 27 | 28 | # qhasm: int64 caller_rbx 29 | 30 | # qhasm: int64 caller_rbp 31 | 32 | # qhasm: reg256 x0 33 | 34 | # qhasm: reg256 x1 35 | 36 | # qhasm: reg256 x2 37 | 38 | # qhasm: reg256 x3 39 | 40 | # qhasm: reg256 x4 41 | 42 | # qhasm: reg256 x5 43 | 44 | # qhasm: reg256 x6 45 | 46 | # qhasm: reg256 x7 47 | 48 | # qhasm: reg256 x8 49 | 50 | # qhasm: reg256 x9 51 | 52 | # qhasm: reg256 x10 53 | 54 | # qhasm: reg256 x11 55 | 56 | # qhasm: enter gfe4x_add 57 | .p2align 5 58 | .global _gfe4x_add 59 | .global gfe4x_add 60 | _gfe4x_add: 61 | gfe4x_add: 62 | mov %rsp,%r11 63 | and $31,%r11 64 | add $0,%r11 65 | sub %r11,%rsp 66 | 67 | # qhasm: x0 aligned= mem256[input_1 + 0] 68 | # asm 1: vmovupd 0(x0=reg256#1 69 | # asm 2: vmovupd 0(x0=%ymm0 70 | vmovupd 0(%rsi),%ymm0 71 | 72 | # qhasm: 4x x0 approx+= mem256[input_2 + 0] 73 | # asm 1: vaddpd 0(x0=reg256#1 74 | # asm 2: vaddpd 0(x0=%ymm0 75 | vaddpd 0(%rdx),%ymm0,%ymm0 76 | 77 | # qhasm: mem256[input_0 + 0] aligned= x0 78 | # asm 1: vmovupd x1=reg256#1 84 | # asm 2: vmovupd 32(x1=%ymm0 85 | vmovupd 32(%rsi),%ymm0 86 | 87 | # qhasm: 4x x1 approx+= mem256[input_2 + 32] 88 | # asm 1: vaddpd 32(x1=reg256#1 89 | # asm 2: vaddpd 32(x1=%ymm0 90 | vaddpd 32(%rdx),%ymm0,%ymm0 91 | 92 | # qhasm: mem256[input_0 + 32] aligned= x1 93 | # asm 1: vmovupd x2=reg256#1 99 | # asm 2: vmovupd 64(x2=%ymm0 100 | vmovupd 64(%rsi),%ymm0 101 | 102 | # qhasm: 4x x2 approx+= mem256[input_2 + 64] 103 | # asm 1: vaddpd 64(x2=reg256#1 104 | # asm 2: vaddpd 64(x2=%ymm0 105 | vaddpd 64(%rdx),%ymm0,%ymm0 106 | 107 | # qhasm: mem256[input_0 + 64] aligned= x2 108 | # asm 1: vmovupd x3=reg256#1 114 | # asm 2: vmovupd 96(x3=%ymm0 115 | vmovupd 96(%rsi),%ymm0 116 | 117 | # qhasm: 4x x3 approx+= mem256[input_2 + 96] 118 | # asm 1: vaddpd 96(x3=reg256#1 119 | # asm 2: vaddpd 96(x3=%ymm0 120 | vaddpd 96(%rdx),%ymm0,%ymm0 121 | 122 | # qhasm: mem256[input_0 + 96] aligned= x3 123 | # asm 1: vmovupd x4=reg256#1 129 | # asm 2: vmovupd 128(x4=%ymm0 130 | vmovupd 128(%rsi),%ymm0 131 | 132 | # qhasm: 4x x4 approx+= mem256[input_2 + 128] 133 | # asm 1: vaddpd 128(x4=reg256#1 134 | # asm 2: vaddpd 128(x4=%ymm0 135 | vaddpd 128(%rdx),%ymm0,%ymm0 136 | 137 | # qhasm: mem256[input_0 + 128] aligned= x4 138 | # asm 1: vmovupd x5=reg256#1 144 | # asm 2: vmovupd 160(x5=%ymm0 145 | vmovupd 160(%rsi),%ymm0 146 | 147 | # qhasm: 4x x5 approx+= mem256[input_2 + 160] 148 | # asm 1: vaddpd 160(x5=reg256#1 149 | # asm 2: vaddpd 160(x5=%ymm0 150 | vaddpd 160(%rdx),%ymm0,%ymm0 151 | 152 | # qhasm: mem256[input_0 + 160] aligned= x5 153 | # asm 1: vmovupd x6=reg256#1 159 | # asm 2: vmovupd 192(x6=%ymm0 160 | vmovupd 192(%rsi),%ymm0 161 | 162 | # qhasm: 4x x6 approx+= mem256[input_2 + 192] 163 | # asm 1: vaddpd 192(x6=reg256#1 164 | # asm 2: vaddpd 192(x6=%ymm0 165 | vaddpd 192(%rdx),%ymm0,%ymm0 166 | 167 | # qhasm: mem256[input_0 + 192] aligned= x6 168 | # asm 1: vmovupd x7=reg256#1 174 | # asm 2: vmovupd 224(x7=%ymm0 175 | vmovupd 224(%rsi),%ymm0 176 | 177 | # qhasm: 4x x7 approx+= mem256[input_2 + 224] 178 | # asm 1: vaddpd 224(x7=reg256#1 179 | # asm 2: vaddpd 224(x7=%ymm0 180 | vaddpd 224(%rdx),%ymm0,%ymm0 181 | 182 | # qhasm: mem256[input_0 + 224] aligned= x7 183 | # asm 1: vmovupd x8=reg256#1 189 | # asm 2: vmovupd 256(x8=%ymm0 190 | vmovupd 256(%rsi),%ymm0 191 | 192 | # qhasm: 4x x8 approx+= mem256[input_2 + 256] 193 | # asm 1: vaddpd 256(x8=reg256#1 194 | # asm 2: vaddpd 256(x8=%ymm0 195 | vaddpd 256(%rdx),%ymm0,%ymm0 196 | 197 | # qhasm: mem256[input_0 + 256] aligned= x8 198 | # asm 1: vmovupd x9=reg256#1 204 | # asm 2: vmovupd 288(x9=%ymm0 205 | vmovupd 288(%rsi),%ymm0 206 | 207 | # qhasm: 4x x9 approx+= mem256[input_2 + 288] 208 | # asm 1: vaddpd 288(x9=reg256#1 209 | # asm 2: vaddpd 288(x9=%ymm0 210 | vaddpd 288(%rdx),%ymm0,%ymm0 211 | 212 | # qhasm: mem256[input_0 + 288] aligned= x9 213 | # asm 1: vmovupd x10=reg256#1 219 | # asm 2: vmovupd 320(x10=%ymm0 220 | vmovupd 320(%rsi),%ymm0 221 | 222 | # qhasm: 4x x10 approx+= mem256[input_2 + 320] 223 | # asm 1: vaddpd 320(x10=reg256#1 224 | # asm 2: vaddpd 320(x10=%ymm0 225 | vaddpd 320(%rdx),%ymm0,%ymm0 226 | 227 | # qhasm: mem256[input_0 + 320] aligned= x10 228 | # asm 1: vmovupd x11=reg256#1 234 | # asm 2: vmovupd 352(x11=%ymm0 235 | vmovupd 352(%rsi),%ymm0 236 | 237 | # qhasm: 4x x11 approx+= mem256[input_2 + 352] 238 | # asm 1: vaddpd 352(x11=reg256#1 239 | # asm 2: vaddpd 352(x11=%ymm0 240 | vaddpd 352(%rdx),%ymm0,%ymm0 241 | 242 | # qhasm: mem256[input_0 + 352] aligned= x11 243 | # asm 1: vmovupd x0=reg256#1 69 | # asm 2: vmovupd 0(x0=%ymm0 70 | vmovupd 0(%rsi),%ymm0 71 | 72 | # qhasm: 4x x0 approx-= mem256[input_2 + 0] 73 | # asm 1: vsubpd 0(x0=reg256#1 74 | # asm 2: vsubpd 0(x0=%ymm0 75 | vsubpd 0(%rdx),%ymm0,%ymm0 76 | 77 | # qhasm: mem256[input_0 + 0] = x0 78 | # asm 1: vmovupd x1=reg256#1 84 | # asm 2: vmovupd 32(x1=%ymm0 85 | vmovupd 32(%rsi),%ymm0 86 | 87 | # qhasm: 4x x1 approx-= mem256[input_2 + 32] 88 | # asm 1: vsubpd 32(x1=reg256#1 89 | # asm 2: vsubpd 32(x1=%ymm0 90 | vsubpd 32(%rdx),%ymm0,%ymm0 91 | 92 | # qhasm: mem256[input_0 + 32] = x1 93 | # asm 1: vmovupd x2=reg256#1 99 | # asm 2: vmovupd 64(x2=%ymm0 100 | vmovupd 64(%rsi),%ymm0 101 | 102 | # qhasm: 4x x2 approx-= mem256[input_2 + 64] 103 | # asm 1: vsubpd 64(x2=reg256#1 104 | # asm 2: vsubpd 64(x2=%ymm0 105 | vsubpd 64(%rdx),%ymm0,%ymm0 106 | 107 | # qhasm: mem256[input_0 + 64] = x2 108 | # asm 1: vmovupd x3=reg256#1 114 | # asm 2: vmovupd 96(x3=%ymm0 115 | vmovupd 96(%rsi),%ymm0 116 | 117 | # qhasm: 4x x3 approx-= mem256[input_2 + 96] 118 | # asm 1: vsubpd 96(x3=reg256#1 119 | # asm 2: vsubpd 96(x3=%ymm0 120 | vsubpd 96(%rdx),%ymm0,%ymm0 121 | 122 | # qhasm: mem256[input_0 + 96] = x3 123 | # asm 1: vmovupd x4=reg256#1 129 | # asm 2: vmovupd 128(x4=%ymm0 130 | vmovupd 128(%rsi),%ymm0 131 | 132 | # qhasm: 4x x4 approx-= mem256[input_2 + 128] 133 | # asm 1: vsubpd 128(x4=reg256#1 134 | # asm 2: vsubpd 128(x4=%ymm0 135 | vsubpd 128(%rdx),%ymm0,%ymm0 136 | 137 | # qhasm: mem256[input_0 + 128] = x4 138 | # asm 1: vmovupd x5=reg256#1 144 | # asm 2: vmovupd 160(x5=%ymm0 145 | vmovupd 160(%rsi),%ymm0 146 | 147 | # qhasm: 4x x5 approx-= mem256[input_2 + 160] 148 | # asm 1: vsubpd 160(x5=reg256#1 149 | # asm 2: vsubpd 160(x5=%ymm0 150 | vsubpd 160(%rdx),%ymm0,%ymm0 151 | 152 | # qhasm: mem256[input_0 + 160] = x5 153 | # asm 1: vmovupd x6=reg256#1 159 | # asm 2: vmovupd 192(x6=%ymm0 160 | vmovupd 192(%rsi),%ymm0 161 | 162 | # qhasm: 4x x6 approx-= mem256[input_2 + 192] 163 | # asm 1: vsubpd 192(x6=reg256#1 164 | # asm 2: vsubpd 192(x6=%ymm0 165 | vsubpd 192(%rdx),%ymm0,%ymm0 166 | 167 | # qhasm: mem256[input_0 + 192] = x6 168 | # asm 1: vmovupd x7=reg256#1 174 | # asm 2: vmovupd 224(x7=%ymm0 175 | vmovupd 224(%rsi),%ymm0 176 | 177 | # qhasm: 4x x7 approx-= mem256[input_2 + 224] 178 | # asm 1: vsubpd 224(x7=reg256#1 179 | # asm 2: vsubpd 224(x7=%ymm0 180 | vsubpd 224(%rdx),%ymm0,%ymm0 181 | 182 | # qhasm: mem256[input_0 + 224] = x7 183 | # asm 1: vmovupd x8=reg256#1 189 | # asm 2: vmovupd 256(x8=%ymm0 190 | vmovupd 256(%rsi),%ymm0 191 | 192 | # qhasm: 4x x8 approx-= mem256[input_2 + 256] 193 | # asm 1: vsubpd 256(x8=reg256#1 194 | # asm 2: vsubpd 256(x8=%ymm0 195 | vsubpd 256(%rdx),%ymm0,%ymm0 196 | 197 | # qhasm: mem256[input_0 + 256] = x8 198 | # asm 1: vmovupd x9=reg256#1 204 | # asm 2: vmovupd 288(x9=%ymm0 205 | vmovupd 288(%rsi),%ymm0 206 | 207 | # qhasm: 4x x9 approx-= mem256[input_2 + 288] 208 | # asm 1: vsubpd 288(x9=reg256#1 209 | # asm 2: vsubpd 288(x9=%ymm0 210 | vsubpd 288(%rdx),%ymm0,%ymm0 211 | 212 | # qhasm: mem256[input_0 + 288] = x9 213 | # asm 1: vmovupd x10=reg256#1 219 | # asm 2: vmovupd 320(x10=%ymm0 220 | vmovupd 320(%rsi),%ymm0 221 | 222 | # qhasm: 4x x10 approx-= mem256[input_2 + 320] 223 | # asm 1: vsubpd 320(x10=reg256#1 224 | # asm 2: vsubpd 320(x10=%ymm0 225 | vsubpd 320(%rdx),%ymm0,%ymm0 226 | 227 | # qhasm: mem256[input_0 + 320] = x10 228 | # asm 1: vmovupd x11=reg256#1 234 | # asm 2: vmovupd 352(x11=%ymm0 235 | vmovupd 352(%rsi),%ymm0 236 | 237 | # qhasm: 4x x11 approx-= mem256[input_2 + 352] 238 | # asm 1: vsubpd 352(x11=reg256#1 239 | # asm 2: vsubpd 352(x11=%ymm0 240 | vsubpd 352(%rdx),%ymm0,%ymm0 241 | 242 | # qhasm: mem256[input_0 + 352] = x11 243 | # asm 1: vmovupd h_addr, 88 | server->h_length ); 89 | 90 | serv_addr.sin_port = htons(portno); 91 | 92 | /* Now connect to the server */ 93 | 94 | while( connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0 ) ; 95 | 96 | *sock = sockfd; 97 | } 98 | 99 | void writing(int sockfd, void * buffer, const unsigned len) 100 | { 101 | unsigned delivered = 0; 102 | int n; 103 | char * ptr = (char *) buffer; 104 | 105 | /* Send message to the server */ 106 | 107 | while (delivered < len) 108 | { 109 | n = write(sockfd, ptr, len - delivered); 110 | 111 | if (n < 0) 112 | { 113 | perror("ERROR writing to socket"); exit(-1); 114 | } 115 | 116 | else 117 | { 118 | delivered += n; 119 | ptr += n; 120 | } 121 | } 122 | } 123 | 124 | void reading(int sockfd, void * buffer, const unsigned len) 125 | { 126 | unsigned delivered = 0; 127 | int n; 128 | char * ptr = (char *) buffer; 129 | 130 | /* Send message to the server */ 131 | 132 | while (delivered < len) 133 | { 134 | n = read(sockfd, ptr, len - delivered); 135 | 136 | if (n < 0) 137 | { 138 | perror("ERROR reading from socket"); exit(-1); 139 | } 140 | 141 | else 142 | { 143 | delivered += n; 144 | ptr += n; 145 | } 146 | } 147 | } 148 | 149 | -------------------------------------------------------------------------------- /network.h: -------------------------------------------------------------------------------- 1 | #ifndef NETWORK_H 2 | #define NETWORK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define DEFPORT 5001 16 | #define BUFSIZE 65536 17 | 18 | union Sockaddr 19 | { 20 | struct sockaddr addr; 21 | struct sockaddr_in addr_in; 22 | }; 23 | 24 | int server_listen(const int portno); 25 | int server_accept(int sockfd); 26 | void client_connect(int * sock, const char * host, const int portno); 27 | void writing(int sockfd, void * buffer, const unsigned len); 28 | void reading(int sockfd, void * buffer, const unsigned len); 29 | 30 | #endif //ifndef NETWORK_H 31 | 32 | -------------------------------------------------------------------------------- /ot_config.h: -------------------------------------------------------------------------------- 1 | #ifndef OT_CONFIG_H 2 | #define OT_CONFIG_H 3 | 4 | #define NOTS 16 5 | 6 | #define HASHBYTES 32 7 | #define HASHBITS (HASHBYTES * 8) 8 | 9 | #define PACKBYTES 32 10 | 11 | #define DIST 1 12 | 13 | #define VERBOSE 1 14 | 15 | #endif //ifndef OT_CONFIG_H 16 | 17 | -------------------------------------------------------------------------------- /ot_receiver.c: -------------------------------------------------------------------------------- 1 | #include "ot_receiver.h" 2 | 3 | #include 4 | 5 | #include "ge25519.h" 6 | #include "ge4x.h" 7 | #include "to_4x.h" 8 | 9 | void receiver_maketable(SIMPLEOT_RECEIVER * r) 10 | { 11 | ge4x_maketable(r->table, &r->S, DIST); 12 | } 13 | 14 | void receiver_procS(SIMPLEOT_RECEIVER * r) 15 | { 16 | bool success = receiver_procS_check(r); 17 | if (!success) 18 | { 19 | fprintf(stderr, "Error: point decompression failed\n"); 20 | exit(-1); 21 | } 22 | } 23 | 24 | bool receiver_procS_check(SIMPLEOT_RECEIVER * r) 25 | { 26 | int i; 27 | 28 | ge25519 S; 29 | 30 | if (ge25519_unpack_vartime(&S, r->S_pack) != 0) 31 | { 32 | return false; 33 | } 34 | 35 | for (i = 0; i < 3; i++) ge25519_double(&S, &S); // 8S 36 | 37 | ge25519_pack(r->S_pack, &S); // E_1(S) 38 | ge_to_4x(&r->S, &S); 39 | 40 | return true; 41 | } 42 | 43 | void receiver_rsgen(SIMPLEOT_RECEIVER * r, 44 | unsigned char * Rs_pack, 45 | unsigned char * cs) 46 | { 47 | int i; 48 | 49 | ge4x P; 50 | 51 | // 52 | 53 | for (i = 0; i < 4; i++) sc25519_random(&r->x[i], 1); 54 | ge4x_scalarsmults_base(&r->xB, r->x); // 8x^iB 55 | 56 | ge4x_sub(&P, &r->S, &r->xB); // 8S - 8x^iB 57 | ge4x_cmovs(&r->xB, &P, cs); 58 | 59 | ge4x_pack(Rs_pack, &r->xB); // E^1(R^i) 60 | 61 | } 62 | 63 | void receiver_keygen(SIMPLEOT_RECEIVER * r, 64 | unsigned char (*keys)[HASHBYTES]) 65 | { 66 | int i; 67 | 68 | unsigned char Rs_pack[ 4 * PACKBYTES ]; 69 | ge4x P; 70 | 71 | // 72 | 73 | for (i = 0; i < 3; i++) ge4x_double(&r->xB, &r->xB); 74 | ge4x_pack(Rs_pack, &r->xB); // E_2(R^i) 75 | 76 | ge4x_scalarsmults_table(&P, r->table, r->x, DIST); // 64x^iS 77 | 78 | ge4x_hash(keys[0], r->S_pack, Rs_pack, &P); // E_2(x^iS) 79 | } 80 | 81 | -------------------------------------------------------------------------------- /ot_receiver.h: -------------------------------------------------------------------------------- 1 | #ifndef OT_RECEIVER_H 2 | #define OT_RECEIVER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "sc25519.h" 12 | #include "ge4x.h" 13 | #include "ot_config.h" 14 | 15 | struct ot_receiver 16 | { 17 | unsigned char S_pack[ PACKBYTES ]; 18 | ge4x S; 19 | ge4x table[ 64/DIST ][8]; 20 | 21 | // temporary 22 | 23 | ge4x xB; 24 | sc25519 x[4]; 25 | }; 26 | 27 | typedef struct ot_receiver SIMPLEOT_RECEIVER; 28 | 29 | void receiver_maketable(SIMPLEOT_RECEIVER *); 30 | void receiver_procS(SIMPLEOT_RECEIVER *); 31 | bool receiver_procS_check(SIMPLEOT_RECEIVER *); 32 | void receiver_rsgen(SIMPLEOT_RECEIVER *, unsigned char *, unsigned char *); 33 | void receiver_keygen(SIMPLEOT_RECEIVER *, unsigned char (*)[HASHBYTES]); 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif //ifndef OT_RECEIVER_H 40 | 41 | -------------------------------------------------------------------------------- /ot_receiver_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ot_receiver.h" 7 | 8 | #include "ot_config.h" 9 | #include "randombytes.h" 10 | #include "network.h" 11 | #include "cpucycles.h" 12 | 13 | void ot_receiver_test(SIMPLEOT_RECEIVER * receiver, int sockfd) 14 | { 15 | int i, j, k; 16 | 17 | unsigned char Rs_pack[ 4 * PACKBYTES ]; 18 | unsigned char keys[ 4 ][ HASHBYTES ]; 19 | unsigned char cs[ 4 ]; 20 | 21 | // 22 | 23 | reading(sockfd, receiver->S_pack, sizeof(receiver->S_pack)); 24 | receiver_procS(receiver); 25 | 26 | // 27 | 28 | receiver_maketable(receiver); 29 | 30 | for (i = 0; i < NOTS; i += 4) 31 | { 32 | simpleot_randombytes(cs, sizeof(cs)); 33 | 34 | for (j = 0; j < 4; j++) 35 | { 36 | cs[j] &= 1; 37 | 38 | if (VERBOSE) printf("%4d-th choose bit = %d\n", i+j, cs[j]); 39 | } 40 | 41 | receiver_rsgen(receiver, Rs_pack, cs); 42 | 43 | writing(sockfd, Rs_pack, sizeof(Rs_pack)); 44 | 45 | receiver_keygen(receiver, keys); 46 | 47 | // 48 | 49 | if (VERBOSE) 50 | { 51 | for (j = 0; j < 4; j++) 52 | { 53 | printf("%4d-th reciever key:", i+j); 54 | 55 | for (k = 0; k < HASHBYTES; k++) printf("%.2X", keys[j][k]); 56 | printf("\n"); 57 | } 58 | } 59 | } 60 | } 61 | 62 | 63 | int main(int argc, char * argv[]) 64 | { 65 | int sockfd; 66 | int sndbuf = BUFSIZE; 67 | int flag = 1; 68 | 69 | long long t = 0; 70 | 71 | SIMPLEOT_RECEIVER receiver; 72 | 73 | // 74 | 75 | if (argc != 3) 76 | { 77 | fprintf(stderr,"usage %s hostname port\n", argv[0]); exit(-1); 78 | } 79 | 80 | // 81 | 82 | client_connect(&sockfd, argv[1], atoi(argv[2])); 83 | 84 | if( setsockopt(sockfd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(int)) != 0 ) { perror("ERROR setsockopt"); exit(-1); } 85 | if( setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(int)) != 0 ) { perror("ERROR setsockopt"); exit(-1); } 86 | 87 | t -= cpucycles_amd64cpuinfo(); 88 | 89 | ot_receiver_test(&receiver, sockfd); 90 | 91 | t += cpucycles_amd64cpuinfo(); 92 | 93 | // 94 | 95 | if (!VERBOSE) printf("[n=%d] Elapsed time: %lld cycles\n", NOTS, t); 96 | 97 | shutdown (sockfd, 2); 98 | 99 | // 100 | 101 | return 0; 102 | } 103 | 104 | -------------------------------------------------------------------------------- /ot_sender.c: -------------------------------------------------------------------------------- 1 | #include "ot_sender.h" 2 | 3 | #include 4 | 5 | #include "ge25519.h" 6 | #include "ge4x.h" 7 | #include "to_4x.h" 8 | 9 | void sender_genS(SIMPLEOT_SENDER * s, unsigned char * S_pack) 10 | { 11 | int i; 12 | 13 | ge25519 S, yS; 14 | 15 | // 16 | 17 | sc25519_random(&s->y, 0); 18 | 19 | simpleot_ge25519_scalarmult_base(&S, &s->y); // S 20 | 21 | ge25519_pack(S_pack, &S); // E^0(S) 22 | 23 | for (i = 0; i < 3; i++) ge25519_double(&S, &S); // 8S 24 | 25 | ge25519_pack(s->S_pack, &S); // E_1(S) 26 | 27 | simpleot_ge25519_scalarmult(&yS, &S, &s->y); 28 | for (i = 0; i < 3; i++) ge25519_double(&yS, &yS); // 64T 29 | ge_to_4x(&s->yS, &yS); 30 | } 31 | 32 | void sender_keygen(SIMPLEOT_SENDER * s, 33 | unsigned char * Rs_pack, 34 | unsigned char (*keys)[4][HASHBYTES]) 35 | { 36 | bool success = sender_keygen_check(s, Rs_pack, keys); 37 | if (!success) 38 | { 39 | fprintf(stderr, "Error: point decompression failed\n"); 40 | exit(-1); 41 | } 42 | } 43 | 44 | bool sender_keygen_check(SIMPLEOT_SENDER * s, 45 | unsigned char * Rs_pack, 46 | unsigned char (*keys)[4][HASHBYTES]) 47 | { 48 | int i; 49 | 50 | ge4x P0; 51 | ge4x P1; 52 | ge4x Rs; 53 | 54 | // 55 | 56 | if (ge4x_unpack_vartime(&Rs, Rs_pack) != 0) 57 | { 58 | return false; 59 | } 60 | 61 | for (i = 0; i < 3; i++) ge4x_double(&Rs, &Rs); // 64R^i 62 | 63 | ge4x_pack(Rs_pack, &Rs); // E_2(R^i) 64 | 65 | ge4x_scalarmults(&P0, &Rs, &s->y); // 64yR^i 66 | ge4x_hash(keys[0][0], s->S_pack, Rs_pack, &P0); // E_2(yR^i) 67 | 68 | ge4x_sub(&P1, &s->yS, &P0); // 64(T-yR^i) 69 | ge4x_hash(keys[1][0], s->S_pack, Rs_pack, &P1); // E_2(T - yR^i) 70 | 71 | return true; 72 | } 73 | -------------------------------------------------------------------------------- /ot_sender.h: -------------------------------------------------------------------------------- 1 | #ifndef OT_SENDER_H 2 | #define OT_SENDER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "ge4x.h" 12 | #include "sc25519.h" 13 | #include "ot_config.h" 14 | 15 | struct ot_sender 16 | { 17 | unsigned char S_pack[ PACKBYTES ]; 18 | sc25519 y; 19 | ge4x yS; 20 | }; 21 | 22 | typedef struct ot_sender SIMPLEOT_SENDER; 23 | 24 | void sender_genS(SIMPLEOT_SENDER *, unsigned char *); 25 | void sender_keygen(SIMPLEOT_SENDER *, unsigned char *, unsigned char (*)[4][HASHBYTES]); 26 | bool sender_keygen_check(SIMPLEOT_SENDER *, unsigned char *, unsigned char (*)[4][HASHBYTES]); 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif //ifndef OT_SENDER_H 32 | 33 | -------------------------------------------------------------------------------- /ot_sender_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ot_sender.h" 6 | 7 | #include "ot_config.h" 8 | #include "network.h" 9 | #include "cpucycles.h" 10 | 11 | void ot_sender_test(SIMPLEOT_SENDER * sender, int newsockfd) 12 | { 13 | int i, j, k; 14 | unsigned char S_pack[ PACKBYTES ]; 15 | unsigned char Rs_pack[4 * PACKBYTES]; 16 | unsigned char keys[ 2 ][ 4 ][ HASHBYTES ]; 17 | 18 | // 19 | 20 | sender_genS(sender, S_pack); 21 | writing(newsockfd, S_pack, sizeof(S_pack)); 22 | 23 | // 24 | 25 | for (i = 0; i < NOTS; i += 4) 26 | { 27 | reading(newsockfd, Rs_pack, sizeof(Rs_pack)); 28 | 29 | sender_keygen(sender, Rs_pack, keys); 30 | 31 | // 32 | 33 | if (VERBOSE) 34 | { 35 | for (j = 0; j < 4; j++) 36 | { 37 | printf("%4d-th sender keys:", i+j); 38 | 39 | for (k = 0; k < HASHBYTES; k++) printf("%.2X", keys[0][j][k]); 40 | printf(" "); 41 | for (k = 0; k < HASHBYTES; k++) printf("%.2X", keys[1][j][k]); 42 | printf("\n"); 43 | } 44 | 45 | printf("\n"); 46 | } 47 | } 48 | } 49 | 50 | int main(int argc, char * argv[]) 51 | { 52 | int sockfd; 53 | int newsockfd; 54 | int rcvbuf = BUFSIZE; 55 | int reuseaddr = 1; 56 | 57 | long long t = 0; 58 | 59 | SIMPLEOT_SENDER sender; 60 | 61 | // 62 | 63 | if (argc != 2) 64 | { 65 | fprintf(stderr, "usage %s port\n", argv[0]); exit(-1); 66 | } 67 | 68 | // 69 | 70 | sockfd = server_listen(atoi(argv[1])); 71 | newsockfd = server_accept(sockfd); 72 | 73 | if (setsockopt(newsockfd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) != 0) { perror("ERROR setsockopt"); exit(-1); } 74 | if (setsockopt(newsockfd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(reuseaddr)) != 0) { perror("ERROR setsockopt"); exit(-1); } 75 | 76 | t -= cpucycles_amd64cpuinfo(); 77 | 78 | ot_sender_test(&sender, newsockfd); 79 | 80 | t += cpucycles_amd64cpuinfo(); 81 | 82 | // 83 | 84 | if (!VERBOSE) printf("[n=%d] Elapsed time: %lld cycles\n", NOTS, t); 85 | 86 | shutdown (newsockfd, 2); 87 | shutdown (sockfd, 2); 88 | 89 | // 90 | 91 | return 0; 92 | } 93 | 94 | -------------------------------------------------------------------------------- /randombytes.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | /* it's really stupid that there isn't a syscall for this */ 7 | 8 | static int fd = -1; 9 | 10 | void simpleot_randombytes(unsigned char *x,unsigned long long xlen) 11 | { 12 | int i; 13 | 14 | if (fd == -1) { 15 | for (;;) { 16 | fd = open("/dev/urandom",O_RDONLY); 17 | if (fd != -1) break; 18 | sleep(1); 19 | } 20 | } 21 | 22 | while (xlen > 0) { 23 | if (xlen < 1048576) i = xlen; else i = 1048576; 24 | 25 | i = read(fd,x,i); 26 | if (i < 1) { 27 | sleep(1); 28 | continue; 29 | } 30 | 31 | x += i; 32 | xlen -= i; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /randombytes.h: -------------------------------------------------------------------------------- 1 | /* 2 | randombytes/devurandom.h version 20080713 3 | D. J. Bernstein 4 | Public domain. 5 | */ 6 | 7 | #ifndef randombytes_devurandom_H 8 | #define randombytes_devurandom_H 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | extern void simpleot_randombytes(unsigned char *,unsigned long long); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #ifndef randombytes_implementation 21 | #define randombytes_implementation "devurandom" 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /sc25519.h: -------------------------------------------------------------------------------- 1 | #ifndef SC25519_H 2 | #define SC25519_H 3 | 4 | typedef struct 5 | { 6 | unsigned long long v[4]; 7 | } 8 | sc25519; 9 | 10 | void sc25519_random(sc25519 *, int); 11 | void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]); 12 | void sc25519_window4(char r[64], const sc25519 *s); // 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /sc25519_from32bytes.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "sc25519.h" 4 | 5 | /*Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 6 | * = 7237005577332262213973186563042994240857116359379907606001950938285454250989 7 | */ 8 | 9 | /* Contains order, 2*order, 4*order, 8*order, each represented in 4 consecutive unsigned long long */ 10 | static const unsigned long long order[16] = {0x5812631A5CF5D3EDULL, 0x14DEF9DEA2F79CD6ULL, 11 | 0x0000000000000000ULL, 0x1000000000000000ULL, 12 | 0xB024C634B9EBA7DAULL, 0x29BDF3BD45EF39ACULL, 13 | 0x0000000000000000ULL, 0x2000000000000000ULL, 14 | 0x60498C6973D74FB4ULL, 0x537BE77A8BDE7359ULL, 15 | 0x0000000000000000ULL, 0x4000000000000000ULL, 16 | 0xC09318D2E7AE9F68ULL, 0xA6F7CEF517BCE6B2ULL, 17 | 0x0000000000000000ULL, 0x8000000000000000ULL}; 18 | 19 | static unsigned long long smaller(unsigned long long a,unsigned long long b) 20 | { 21 | unsigned long long atop = a >> 32; 22 | unsigned long long abot = a & 4294967295; 23 | unsigned long long btop = b >> 32; 24 | unsigned long long bbot = b & 4294967295; 25 | unsigned long long atopbelowbtop = (atop - btop) >> 63; 26 | unsigned long long atopeqbtop = ((atop ^ btop) - 1) >> 63; 27 | unsigned long long abotbelowbbot = (abot - bbot) >> 63; 28 | return atopbelowbtop | (atopeqbtop & abotbelowbbot); 29 | } 30 | 31 | void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]) 32 | { 33 | unsigned char e[32]; 34 | 35 | unsigned long long t[4]; 36 | unsigned long long b; 37 | unsigned long long mask; 38 | int i, j; 39 | 40 | for (i = 0;i < 32;++i) e[i] = x[i]; 41 | e[0] &= 248; 42 | e[31] &= 127; 43 | e[31] |= 64; 44 | 45 | /* assuming little-endian */ 46 | 47 | memcpy(&r->v[0], e + 0, 8); 48 | memcpy(&r->v[1], e + 8, 8); 49 | memcpy(&r->v[2], e + 16, 8); 50 | memcpy(&r->v[3], e + 24, 8); 51 | 52 | for(j=3;j>=0;j--) 53 | { 54 | b=0; 55 | for(i=0;i<4;i++) 56 | { 57 | b += order[4*j+i]; /* no overflow for this particular order */ 58 | t[i] = r->v[i] - b; 59 | b = smaller(r->v[i],b); 60 | } 61 | mask = b - 1; 62 | for(i=0;i<4;i++) 63 | r->v[i] ^= mask & (r->v[i] ^ t[i]); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /sc25519_random.c: -------------------------------------------------------------------------------- 1 | #include "sc25519.h" 2 | #include "randombytes.h" 3 | 4 | void sc25519_random(sc25519 *r, int c) 5 | { 6 | unsigned char x[32]; 7 | 8 | simpleot_randombytes(x, 32); 9 | 10 | if (c == 0) 11 | { 12 | x[31] &= 15; 13 | } 14 | else 15 | { 16 | x[0] &= 248; 17 | x[31] &= 127; 18 | } 19 | 20 | sc25519_from32bytes(r, x); 21 | } 22 | 23 | -------------------------------------------------------------------------------- /sc25519_window4.c: -------------------------------------------------------------------------------- 1 | #include "sc25519.h" 2 | 3 | static void sc25519_window4_unsigned(char r[64], const sc25519 *s) 4 | { 5 | int i; 6 | 7 | for(i=0;i<16;i++) r[i+0 ] = (s->v[0] >> (4*i)) & 15; 8 | for(i=0;i<16;i++) r[i+16] = (s->v[1] >> (4*i)) & 15; 9 | for(i=0;i<16;i++) r[i+32] = (s->v[2] >> (4*i)) & 15; 10 | for(i=0;i<16;i++) r[i+48] = (s->v[3] >> (4*i)) & 15; 11 | } 12 | 13 | void sc25519_window4(char r[64], const sc25519 *s) 14 | { 15 | char carry; 16 | int i; 17 | 18 | sc25519_window4_unsigned(r, s); 19 | 20 | /* Making it signed */ 21 | carry = 0; 22 | for(i=0;i<63;i++) 23 | { 24 | r[i] += carry; 25 | r[i+1] += r[i] >> 4; 26 | r[i] &= 15; 27 | carry = r[i] >> 3; 28 | r[i] -= carry << 4; 29 | } 30 | r[63] += carry; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /to_4x.h: -------------------------------------------------------------------------------- 1 | static void fe_to_4x(gfe4x * a, fe25519 * b) 2 | { 3 | int i; 4 | unsigned char buf[32]; 5 | 6 | fe25519_pack(buf, b); 7 | 8 | gfe4x_unpack_single(a, buf, 0); 9 | 10 | for (i = 0; i < 12; i++) 11 | { 12 | a->v[i].v[1] = a->v[i].v[0]; 13 | a->v[i].v[2] = a->v[i].v[0]; 14 | a->v[i].v[3] = a->v[i].v[0]; 15 | } 16 | } 17 | 18 | static void ge_to_4x(ge4x * a, ge25519 * b) 19 | { 20 | fe_to_4x(&a->x, &b->x); 21 | fe_to_4x(&a->y, &b->y); 22 | fe_to_4x(&a->z, &b->z); 23 | fe_to_4x(&a->t, &b->t); 24 | } 25 | 26 | --------------------------------------------------------------------------------