├── LICENSE ├── README.md └── src ├── Makefile ├── Makefile.win32 ├── api.c ├── api_128_256.c ├── api_257_512.c ├── api_769_1024.c ├── bat.h ├── blake2.h ├── blake2b.c ├── blake2s.c ├── codec.c ├── fft.c ├── fnr.c ├── inner.h ├── kem128.c ├── kem257.c ├── kem769.c ├── keygen.c ├── modgen.c ├── modqp.c ├── prng.c ├── speed_bat.c └── test_bat.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Thomas Pornin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BAT 2 | BAT KEM/Signature Reference Implementation 3 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | CC = clang 4 | 5 | CFLAGS = -Wall -Wextra -Wshadow -Wundef -O3 6 | # To allow clang to optimize for the local machine, add: 7 | # -march=native 8 | # (this will activate automatic vectorization by clang, if the local machine 9 | # can do it; but the binary may fail to run on older CPU) 10 | # 11 | # To use the AVX2-specific code, add: 12 | # -DBAT_AVX2 13 | # (this will use all the code wil explicit AVX2 intrinsic functions; the 14 | # binary won't run on a machine without AVX2) 15 | # 16 | # You can use -march=native and -DBAT_AVX2 at the same time, for the 17 | # highest optimization: 18 | # CFLAGS = -Wall -Wextra -Wshadow -Wundef -O3 -march=native -DBAT_AVX2 19 | 20 | LD = clang 21 | LDFLAGS = 22 | LIBS = 23 | 24 | OBJ = api_128_256.o api_257_512.o api_769_1024.o codec.o fft.o fnr.o kem128.o kem257.o kem769.o keygen.o modqp.o prng.o blake2b.o blake2s.o 25 | 26 | all: test_bat speed_bat 27 | 28 | clean: 29 | -rm -f $(OBJ) test_bat test_bat.o speed_bat speed_bat.o 30 | 31 | test_bat: $(OBJ) test_bat.o 32 | $(LD) $(LDFLAGS) -o test_bat test_bat.o $(OBJ) $(LIBS) 33 | 34 | speed_bat: speed_bat.o $(OBJ) 35 | $(LD) $(LDFLAGS) -o speed_bat speed_bat.o $(OBJ) $(LIBS) 36 | 37 | api_128_256.o: api_128_256.c api.c bat.h inner.h blake2.h 38 | $(CC) $(CFLAGS) -c -o api_128_256.o api_128_256.c 39 | 40 | api_257_512.o: api_257_512.c api.c bat.h inner.h blake2.h 41 | $(CC) $(CFLAGS) -c -o api_257_512.o api_257_512.c 42 | 43 | api_769_1024.o: api_769_1024.c api.c bat.h inner.h blake2.h 44 | $(CC) $(CFLAGS) -c -o api_769_1024.o api_769_1024.c 45 | 46 | codec.o: codec.c inner.h blake2.h 47 | $(CC) $(CFLAGS) -c -o codec.o codec.c 48 | 49 | fft.o: fft.c inner.h blake2.h 50 | $(CC) $(CFLAGS) -c -o fft.o fft.c 51 | 52 | fnr.o: fnr.c inner.h blake2.h 53 | $(CC) $(CFLAGS) -c -o fnr.o fnr.c 54 | 55 | kem128.o: kem128.c inner.h blake2.h 56 | $(CC) $(CFLAGS) -c -o kem128.o kem128.c 57 | 58 | kem257.o: kem257.c modgen.c inner.h blake2.h 59 | $(CC) $(CFLAGS) -c -o kem257.o kem257.c 60 | 61 | kem769.o: kem769.c modgen.c inner.h blake2.h 62 | $(CC) $(CFLAGS) -c -o kem769.o kem769.c 63 | 64 | keygen.o: keygen.c inner.h blake2.h 65 | $(CC) $(CFLAGS) -c -o keygen.o keygen.c 66 | 67 | modqp.o: modqp.c modgen.c inner.h blake2.h 68 | $(CC) $(CFLAGS) -c -o modqp.o modqp.c 69 | 70 | prng.o: prng.c inner.h blake2.h 71 | $(CC) $(CFLAGS) -c -o prng.o prng.c 72 | 73 | blake2b.o: blake2b.c inner.h blake2.h 74 | $(CC) $(CFLAGS) -c -o blake2b.o blake2b.c 75 | 76 | blake2s.o: blake2s.c inner.h blake2.h 77 | $(CC) $(CFLAGS) -c -o blake2s.o blake2s.c 78 | 79 | speed_bat.o: speed_bat.c bat.h inner.h blake2.h 80 | $(CC) $(CFLAGS) -c -o speed_bat.o speed_bat.c 81 | 82 | test_bat.o: test_bat.c bat.h inner.h blake2.h 83 | $(CC) $(CFLAGS) -c -o test_bat.o test_bat.c 84 | -------------------------------------------------------------------------------- /src/Makefile.win32: -------------------------------------------------------------------------------- 1 | # Makefile specific for MSVC. 2 | # Use in a Visual C command window, with: 3 | # nmake -f Makefile.win32 4 | 5 | CC = cl 6 | 7 | CFLAGS = /nologo /W4 /O2 8 | # To use the AVX2-specific code, add: 9 | # /arch:AVX2 /DBAT_AVX2 10 | # to the options above. 11 | 12 | LD = cl 13 | LDFLAGS = /nologo 14 | LIBS = 15 | 16 | OBJ = api_128_256.obj api_257_512.obj api_769_1024.obj codec.obj fft.obj fnr.obj kem128.obj kem257.obj kem769.obj keygen.obj modqp.obj prng.obj blake2b.obj blake2s.obj 17 | 18 | all: test_bat.exe speed_bat.exe 19 | 20 | clean: 21 | -del /Q $(OBJ) test_bat.exe test_bat.obj speed_bat.exe speed_bat.obj 22 | 23 | test_bat.exe: $(OBJ) test_bat.obj 24 | $(LD) $(LDFLAGS) /Fe:test_bat.exe test_bat.obj $(OBJ) $(LIBS) 25 | 26 | speed_bat.exe: $(OBJ) speed_bat.obj 27 | $(LD) $(LDFLAGS) /Fe:speed_bat.exe speed_bat.obj $(OBJ) $(LIBS) 28 | 29 | api_128_256.obj: api_128_256.c api.c bat.h inner.h blake2.h 30 | $(CC) $(CFLAGS) /c /Fo:api_128_256.obj api_128_256.c 31 | 32 | api_257_512.obj: api_257_512.c api.c bat.h inner.h blake2.h 33 | $(CC) $(CFLAGS) /c /Fo:api_257_512.obj api_257_512.c 34 | 35 | api_769_1024.obj: api_769_1024.c api.c bat.h inner.h blake2.h 36 | $(CC) $(CFLAGS) /c /Fo:api_769_1024.obj api_769_1024.c 37 | 38 | codec.obj: codec.c inner.h blake2.h 39 | $(CC) $(CFLAGS) /c /Fo:codec.obj codec.c 40 | 41 | fft.obj: fft.c inner.h blake2.h 42 | $(CC) $(CFLAGS) /c /Fo:fft.obj fft.c 43 | 44 | fnr.obj: fnr.c inner.h blake2.h 45 | $(CC) $(CFLAGS) /c /Fo:fnr.obj fnr.c 46 | 47 | kem128.obj: kem128.c inner.h blake2.h 48 | $(CC) $(CFLAGS) /c /Fo:kem128.obj kem128.c 49 | 50 | kem257.obj: kem257.c modgen.c inner.h blake2.h 51 | $(CC) $(CFLAGS) /c /Fo:kem257.obj kem257.c 52 | 53 | kem769.obj: kem769.c modgen.c inner.h blake2.h 54 | $(CC) $(CFLAGS) /c /Fo:kem769.obj kem769.c 55 | 56 | keygen.obj: keygen.c inner.h blake2.h 57 | $(CC) $(CFLAGS) /c /Fo:keygen.obj keygen.c 58 | 59 | modqp.obj: modqp.c modgen.c inner.h blake2.h 60 | $(CC) $(CFLAGS) /c /Fo:modqp.obj modqp.c 61 | 62 | prng.obj: prng.c inner.h blake2.h 63 | $(CC) $(CFLAGS) /c /Fo:prng.obj prng.c 64 | 65 | blake2b.obj: blake2b.c inner.h blake2.h 66 | $(CC) $(CFLAGS) /c /Fo:blake2b.obj blake2b.c 67 | 68 | blake2s.obj: blake2s.c inner.h blake2.h 69 | $(CC) $(CFLAGS) /c /Fo:blake2s.obj blake2s.c 70 | 71 | speed_bat.obj: speed_bat.c bat.h inner.h blake2.h 72 | $(CC) $(CFLAGS) /c /Fo:speed_bat.obj speed_bat.c 73 | 74 | test_bat.obj: test_bat.c bat.h inner.h blake2.h 75 | $(CC) $(CFLAGS) /c /Fo:test_bat.obj test_bat.c 76 | -------------------------------------------------------------------------------- /src/api.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is not meant to be compiled independently, but to be 3 | * included (with #include) by another C file. The caller shall 4 | * first define the Q, N and LOGN macros to relevant values (decimal 5 | * literal constants only). 6 | */ 7 | 8 | #if !defined Q || !defined N || !defined LOGN || !defined LVLBYTES 9 | #error This module must not be compiled separately. 10 | #endif 11 | 12 | #include "bat.h" 13 | #include "inner.h" 14 | 15 | #define XCAT(x, y) XCAT_(x, y) 16 | #define XCAT_(x, y) x ## y 17 | #define XSTR(x) XSTR_(x) 18 | #define XSTR_(x) #x 19 | 20 | #define Zn(name) XCAT(XCAT(XCAT(bat_, Q), XCAT(_, N)), XCAT(_, name)) 21 | #define ZN(name) XCAT(XCAT(XCAT(BAT_, Q), XCAT(_, N)), XCAT(_, name)) 22 | 23 | /* 24 | * Degrees up to 512 use BLAKE2s; degree 1024 uses BLAKE2b. 25 | */ 26 | #if LOGN <= 9 27 | #define HASH blake2s 28 | #define HASH_context blake2s_context 29 | #define HASH_init blake2s_init 30 | #define HASH_update blake2s_update 31 | #define HASH_final blake2s_final 32 | #define EXPAND blake2s_expand 33 | #else 34 | #define HASH blake2b 35 | #define HASH_context blake2b_context 36 | #define HASH_init blake2b_init 37 | #define HASH_update blake2b_update 38 | #define HASH_final blake2b_final 39 | #define EXPAND blake2b_expand 40 | #endif 41 | 42 | /* 43 | * Ensure good alignment of the provided pointer (8-byte alignment in 44 | * general, 32-byte alignment if the AVX2 implementation is used). 45 | * Returned value is the aligned pointer. If, after alignment, the size 46 | * is not at least equal to min_tmp_len, then NULL is returned. 47 | */ 48 | static void * 49 | tmp_align(void *tmp, size_t tmp_len, size_t min_tmp_len) 50 | { 51 | unsigned off; 52 | 53 | if (tmp == NULL) { 54 | return NULL; 55 | } 56 | #if BAT_AVX2 57 | off = (32u - (unsigned)(uintptr_t)tmp) & 31u; 58 | #else 59 | off = (8u - (unsigned)(uintptr_t)tmp) & 7u; 60 | #endif 61 | if (tmp_len < off || (tmp_len - off) < min_tmp_len) { 62 | return NULL; 63 | } 64 | return (void *)((uintptr_t)tmp + off); 65 | } 66 | 67 | /* 68 | * Recompute the additional secret seed (rr) from the private key seed. 69 | */ 70 | static void 71 | make_rr(Zn(private_key) *sk) 72 | { 73 | EXPAND(sk->rr, sizeof sk->rr, sk->seed, sizeof sk->seed, 74 | (uint32_t)Q | ((uint32_t)LOGN << 16) | 0x72000000); 75 | } 76 | 77 | /* 78 | * Compute the hash function Hash_m(), used over the plaintext polynomial 79 | * 's' to generate the encryption seed. Output size matches the security 80 | * level. 81 | */ 82 | static void 83 | hash_m(void *dst, const void *sbuf, size_t sbuf_len) 84 | { 85 | /* 86 | * We use a raw hash here because in practice sbuf_len exactly 87 | * matches the block length of the BLAKE2 function and we want 88 | * to stick to a single invocation of the primitive. 89 | * 90 | * Note that the output size used here is at most 16 (with BLAKE2s, 91 | * for degree N <= 512) or 32 (with BLAKE2b, for degree N = 1024), 92 | * i.e. strictly less than the natural hash output size. The output 93 | * size is part of the personalization block of BLAKE2, so this 94 | * already ensures domain separation from the BLAKE2 invocations 95 | * in the expand() calls in other functions used in this file. 96 | */ 97 | HASH(dst, LVLBYTES, NULL, 0, sbuf, sbuf_len); 98 | } 99 | 100 | /* 101 | * Compute the combination of Hash_s() and Sample_s(): the provided input 102 | * is nominally hashed into a seed, which is extended into enough bytes 103 | * with a KDF. The seed is used for nothing else. Moreover, the input is 104 | * guaranteed to be small (at most 32 bytes), so we can just use the 105 | * hash expand function. 106 | */ 107 | static void 108 | hash_and_sample_s(void *sbuf, size_t sbuf_len, const void *m, size_t m_len) 109 | { 110 | EXPAND(sbuf, sbuf_len, m, m_len, 111 | (uint32_t)Q | ((uint32_t)LOGN << 16) | 0x73000000); 112 | } 113 | 114 | /* 115 | * Make an alternate seed for key derivation, to be used on decapsulation 116 | * failure. This function is called F() in the BAT specification. 117 | */ 118 | static void 119 | make_kdf_seed_bad(void *m, size_t m_len, 120 | const Zn(private_key) *sk, const Zn(ciphertext) *ct) 121 | { 122 | HASH_context hc; 123 | uint8_t tmp[8]; 124 | 125 | enc64le(tmp, (uint32_t)Q | ((uint32_t)LOGN << 16) | 0x62000000); 126 | HASH_init(&hc, m_len); 127 | HASH_update(&hc, tmp, sizeof tmp); 128 | HASH_update(&hc, sk->rr, sizeof sk->rr); 129 | HASH_update(&hc, ct->c, sizeof ct->c); 130 | HASH_update(&hc, ct->c2, sizeof ct->c2); 131 | HASH_final(&hc, m); 132 | } 133 | 134 | /* 135 | * Make the secret value from the plaintext s. 136 | * 'good' should be 1 for normal secret derivation, or 0 when doing 137 | * fake derivation after decapsulation failure. 138 | */ 139 | static void 140 | make_secret(void *secret, size_t secret_len, 141 | const void *m, size_t m_len, uint32_t good) 142 | { 143 | EXPAND(secret, secret_len, m, m_len, 144 | (uint32_t)Q | ((uint32_t)LOGN << 16) | ((good + 0x66) << 24)); 145 | } 146 | 147 | /* see bat.h */ 148 | int 149 | Zn(keygen)(Zn(private_key) *sk, void *tmp, size_t tmp_len) 150 | { 151 | prng_context rng; 152 | uint8_t rng_seed[32]; 153 | 154 | tmp = tmp_align(tmp, tmp_len, ZN(TMP_KEYGEN) - 31); 155 | if (tmp == NULL) { 156 | return BAT_ERR_NOSPACE; 157 | } 158 | if (!bat_get_seed(rng_seed, sizeof rng_seed)) { 159 | return BAT_ERR_RANDOM; 160 | } 161 | prng_init(&rng, rng_seed, sizeof rng_seed, 0); 162 | for (;;) { 163 | prng_get_bytes(&rng, sk->seed, sizeof sk->seed); 164 | if (!bat_keygen_make_fg(sk->f, sk->g, 165 | (uint16_t *)sk->h, Q, LOGN, 166 | sk->seed, sizeof sk->seed, tmp)) 167 | { 168 | continue; 169 | } 170 | if (!bat_keygen_solve_FG(sk->F, sk->G, sk->f, sk->g, 171 | Q, LOGN, tmp)) 172 | { 173 | continue; 174 | } 175 | if (!bat_keygen_compute_w(sk->w, 176 | sk->f, sk->g, sk->F, sk->G, Q, LOGN, tmp)) 177 | { 178 | continue; 179 | } 180 | make_rr(sk); 181 | return 0; 182 | } 183 | } 184 | 185 | /* see bat.h */ 186 | void 187 | Zn(get_public_key)(Zn(public_key) *pk, const Zn(private_key) *sk) 188 | { 189 | memmove(pk->h, sk->h, sizeof sk->h); 190 | } 191 | 192 | static size_t 193 | get_privkey_length(const Zn(private_key) *sk, int short_format) 194 | { 195 | if (short_format) { 196 | return 1 + sizeof(sk->seed) + bat_trim_i8_encode( 197 | NULL, 0, NULL, LOGN, bat_max_FG_bits[LOGN]); 198 | } else { 199 | return 1 + sizeof(sk->seed) + sizeof(sk->rr) 200 | + bat_trim_i8_encode(NULL, 0, 201 | sk->f, LOGN, bat_max_fg_bits[LOGN]) 202 | + bat_trim_i8_encode(NULL, 0, 203 | sk->g, LOGN, bat_max_fg_bits[LOGN]) 204 | + bat_trim_i8_encode(NULL, 0, 205 | sk->F, LOGN, bat_max_FG_bits[LOGN]) 206 | + bat_trim_i8_encode(NULL, 0, 207 | sk->G, LOGN, bat_max_FG_bits[LOGN]) 208 | + bat_trim_i32_encode(NULL, 0, 209 | sk->w, LOGN, bat_max_w_bits[LOGN]) 210 | + XCAT(bat_encode_, Q)(NULL, 0, sk->h, LOGN); 211 | } 212 | } 213 | 214 | /* see bat.h */ 215 | size_t 216 | Zn(encode_private_key)(void *out, size_t max_out_len, 217 | const Zn(private_key) *sk, int short_format) 218 | { 219 | uint8_t *buf; 220 | size_t len, off, out_len; 221 | 222 | out_len = get_privkey_length(sk, short_format); 223 | if (out == NULL) { 224 | return out_len; 225 | } 226 | if (max_out_len < out_len) { 227 | return 0; 228 | } 229 | buf = out; 230 | if (short_format) { 231 | buf[0] = ZN(TAG_PRIVKEY_SHORT); 232 | memmove(&buf[1], sk->seed, sizeof sk->seed); 233 | off = 1 + sizeof sk->seed; 234 | len = bat_trim_i8_encode(buf + off, out_len - off, 235 | sk->F, LOGN, bat_max_FG_bits[LOGN]); 236 | if (len == 0) { 237 | /* This should never happen in practice. */ 238 | return 0; 239 | } 240 | off += len; 241 | return off; 242 | } else { 243 | buf[0] = ZN(TAG_PRIVKEY_LONG); 244 | memmove(&buf[1], sk->seed, sizeof sk->seed); 245 | off = 1 + sizeof sk->seed; 246 | memmove(&buf[off], sk->rr, sizeof sk->rr); 247 | off += sizeof sk->rr; 248 | len = bat_trim_i8_encode(buf + off, out_len - off, 249 | sk->f, LOGN, bat_max_fg_bits[LOGN]); 250 | if (len == 0) { 251 | /* This should never happen in practice. */ 252 | return 0; 253 | } 254 | off += len; 255 | len = bat_trim_i8_encode(buf + off, out_len - off, 256 | sk->g, LOGN, bat_max_fg_bits[LOGN]); 257 | if (len == 0) { 258 | /* This should never happen in practice. */ 259 | return 0; 260 | } 261 | off += len; 262 | len = bat_trim_i8_encode(buf + off, out_len - off, 263 | sk->F, LOGN, bat_max_FG_bits[LOGN]); 264 | if (len == 0) { 265 | /* This should never happen in practice. */ 266 | return 0; 267 | } 268 | off += len; 269 | len = bat_trim_i8_encode(buf + off, out_len - off, 270 | sk->G, LOGN, bat_max_FG_bits[LOGN]); 271 | if (len == 0) { 272 | /* This should never happen in practice. */ 273 | return 0; 274 | } 275 | off += len; 276 | len = bat_trim_i32_encode(buf + off, out_len - off, 277 | sk->w, LOGN, bat_max_w_bits[LOGN]); 278 | if (len == 0) { 279 | /* This should never happen in practice. */ 280 | return 0; 281 | } 282 | off += len; 283 | len = XCAT(bat_encode_, Q)(buf + off, out_len - off, 284 | sk->h, LOGN); 285 | if (len == 0) { 286 | /* This should never happen in practice. */ 287 | return 0; 288 | } 289 | off += len; 290 | return off; 291 | } 292 | } 293 | 294 | /* see bat.h */ 295 | size_t 296 | Zn(decode_private_key)(Zn(private_key) *sk, const void *in, size_t max_in_len, 297 | void *tmp, size_t tmp_len) 298 | { 299 | const uint8_t *buf; 300 | size_t off, len; 301 | 302 | if (in == NULL || max_in_len == 0) { 303 | return 0; 304 | } 305 | buf = in; 306 | switch (buf[0]) { 307 | case ZN(TAG_PRIVKEY_SHORT): 308 | if (max_in_len < get_privkey_length(sk, 1)) { 309 | return 0; 310 | } 311 | memmove(sk->seed, buf + 1, sizeof sk->seed); 312 | off = 1 + sizeof sk->seed; 313 | len = bat_trim_i8_decode(sk->F, LOGN, bat_max_FG_bits[LOGN], 314 | buf + off, max_in_len - off); 315 | if (len == 0) { 316 | return 0; 317 | } 318 | off += len; 319 | tmp = tmp_align(tmp, tmp_len, ZN(TMP_DECODE_PRIV) - 31); 320 | if (tmp == NULL) { 321 | return 0; 322 | } 323 | if (!bat_keygen_make_fg(sk->f, sk->g, 324 | (uint16_t *)sk->h, Q, LOGN, 325 | sk->seed, sizeof sk->seed, tmp)) 326 | { 327 | return 0; 328 | } 329 | if (!bat_keygen_rebuild_G(sk->G, sk->f, sk->g, sk->F, 330 | Q, LOGN, tmp)) 331 | { 332 | return 0; 333 | } 334 | if (!bat_keygen_compute_w(sk->w, 335 | sk->f, sk->g, sk->F, sk->G, Q, LOGN, tmp)) 336 | { 337 | return 0; 338 | } 339 | make_rr(sk); 340 | return off; 341 | case ZN(TAG_PRIVKEY_LONG): 342 | if (max_in_len < get_privkey_length(sk, 0)) { 343 | return 0; 344 | } 345 | memmove(sk->seed, buf + 1, sizeof sk->seed); 346 | off = 1 + sizeof sk->seed; 347 | memmove(sk->rr, buf + off, sizeof sk->rr); 348 | off += sizeof sk->rr; 349 | len = bat_trim_i8_decode(sk->f, LOGN, bat_max_fg_bits[LOGN], 350 | buf + off, max_in_len - off); 351 | if (len == 0) { 352 | return 0; 353 | } 354 | off += len; 355 | len = bat_trim_i8_decode(sk->g, LOGN, bat_max_fg_bits[LOGN], 356 | buf + off, max_in_len - off); 357 | if (len == 0) { 358 | return 0; 359 | } 360 | off += len; 361 | len = bat_trim_i8_decode(sk->F, LOGN, bat_max_FG_bits[LOGN], 362 | buf + off, max_in_len - off); 363 | if (len == 0) { 364 | return 0; 365 | } 366 | off += len; 367 | len = bat_trim_i8_decode(sk->G, LOGN, bat_max_FG_bits[LOGN], 368 | buf + off, max_in_len - off); 369 | if (len == 0) { 370 | return 0; 371 | } 372 | off += len; 373 | len = bat_trim_i32_decode(sk->w, LOGN, bat_max_w_bits[LOGN], 374 | buf + off, max_in_len - off); 375 | if (len == 0) { 376 | return 0; 377 | } 378 | off += len; 379 | len = XCAT(bat_decode_, Q)(sk->h, LOGN, 380 | buf + off, max_in_len - off); 381 | if (len == 0) { 382 | return 0; 383 | } 384 | off += len; 385 | return off; 386 | default: 387 | return 0; 388 | } 389 | } 390 | 391 | /* see bat.h */ 392 | size_t 393 | Zn(encode_public_key)(void *out, size_t max_out_len, const Zn(public_key) *pk) 394 | { 395 | uint8_t *buf; 396 | size_t out_len, len; 397 | 398 | out_len = 1 + XCAT(bat_encode_, Q)(NULL, 0, pk->h, LOGN); 399 | if (out == NULL) { 400 | return out_len; 401 | } 402 | if (max_out_len < out_len) { 403 | return 0; 404 | } 405 | buf = out; 406 | buf[0] = ZN(TAG_PUBKEY); 407 | len = XCAT(bat_encode_, Q)(buf + 1, max_out_len - 1, pk->h, LOGN); 408 | if (len == 0) { 409 | return 0; 410 | } 411 | return 1 + len; 412 | } 413 | 414 | /* see bat.h */ 415 | size_t 416 | Zn(decode_public_key)(Zn(public_key) *pk, const void *in, size_t max_in_len) 417 | { 418 | const uint8_t *buf; 419 | size_t len; 420 | 421 | if (max_in_len == 0) { 422 | return 0; 423 | } 424 | buf = in; 425 | if (buf[0] != ZN(TAG_PUBKEY)) { 426 | return 0; 427 | } 428 | len = XCAT(bat_decode_, Q)(pk->h, LOGN, buf + 1, max_in_len - 1); 429 | if (len == 0) { 430 | return 0; 431 | } 432 | return 1 + len; 433 | } 434 | 435 | /* see bat.h */ 436 | size_t 437 | Zn(encode_ciphertext)(void *out, size_t max_out_len, const Zn(ciphertext) *ct) 438 | { 439 | uint8_t *buf; 440 | size_t out_len, len, off; 441 | 442 | out_len = 1 + XCAT(bat_encode_ciphertext_, Q)(NULL, 0, ct->c, LOGN) 443 | + sizeof ct->c2; 444 | if (out == NULL) { 445 | return out_len; 446 | } 447 | if (max_out_len < out_len) { 448 | return 0; 449 | } 450 | buf = out; 451 | buf[0] = ZN(TAG_CIPHERTEXT); 452 | off = 1; 453 | len = XCAT(bat_encode_ciphertext_, Q)( 454 | buf + off, max_out_len - off, ct->c, LOGN); 455 | if (len == 0) { 456 | return 0; 457 | } 458 | off += len; 459 | memcpy(buf + off, ct->c2, sizeof ct->c2); 460 | off += sizeof ct->c2; 461 | return off; 462 | } 463 | 464 | /* see bat.h */ 465 | size_t 466 | Zn(decode_ciphertext)(Zn(ciphertext) *ct, const void *in, size_t max_in_len) 467 | { 468 | const uint8_t *buf; 469 | size_t off, len; 470 | 471 | if (max_in_len < 1) { 472 | return 0; 473 | } 474 | buf = in; 475 | if (buf[0] != ZN(TAG_CIPHERTEXT)) { 476 | return 0; 477 | } 478 | off = 1; 479 | len = XCAT(bat_decode_ciphertext_, Q)( 480 | ct->c, LOGN, buf + off, max_in_len - off); 481 | if (len == 0) { 482 | return 0; 483 | } 484 | off += len; 485 | if (max_in_len - off < sizeof ct->c2) { 486 | return 0; 487 | } 488 | memcpy(ct->c2, buf + off, sizeof ct->c2); 489 | off += sizeof ct->c2; 490 | return off; 491 | } 492 | 493 | /* see bat.h */ 494 | int 495 | Zn(encapsulate)(void *secret, size_t secret_len, 496 | Zn(ciphertext) *ct, const Zn(public_key) *pk, 497 | void *tmp, size_t tmp_len) 498 | { 499 | tmp = tmp_align(tmp, tmp_len, ZN(TMP_ENCAPS) - 31); 500 | if (tmp == NULL) { 501 | return BAT_ERR_NOSPACE; 502 | } 503 | 504 | /* 505 | * Encapsulation may theoretically fail if the resulting 506 | * vector norm is higher than a specific bound. However, this 507 | * is very rare (it cannot happen at all for q = 257). Thus, 508 | * we expect not to have to loop. Correspondingly, it is more 509 | * efficient to use the random seed from the OS directly. 510 | */ 511 | for (;;) { 512 | uint8_t m[LVLBYTES], sbuf[SBUF_LEN(LOGN)]; 513 | size_t u; 514 | 515 | /* 516 | * Get a random message m from the OS. 517 | */ 518 | if (!bat_get_seed(m, sizeof m)) { 519 | return BAT_ERR_RANDOM; 520 | } 521 | 522 | /* 523 | * Hash m to sample s. 524 | */ 525 | hash_and_sample_s(sbuf, sizeof sbuf, m, sizeof m); 526 | #if N < 8 527 | /* For very reduced toy versions, we don't even have a 528 | full byte, and we must clear the unused bits. */ 529 | sbuf[0] &= (1u << N) - 1u; 530 | #endif 531 | 532 | /* 533 | * Compute c1. This may fail (rarely!) only for q = 769. 534 | */ 535 | if (!XCAT(bat_encrypt_, Q)(ct->c, sbuf, pk->h, LOGN, tmp)) { 536 | continue; 537 | } 538 | 539 | /* 540 | * Make c2 = Hash_m(s) XOR m. 541 | */ 542 | hash_m(ct->c2, sbuf, sizeof sbuf); 543 | for (u = 0; u < sizeof m; u ++) { 544 | ct->c2[u] ^= m[u]; 545 | } 546 | 547 | /* 548 | * Produce the shared secret (output of a successful key 549 | * exchange). 550 | */ 551 | make_secret(secret, secret_len, m, sizeof m, 1); 552 | 553 | return 0; 554 | } 555 | } 556 | 557 | /* see bat.h */ 558 | int 559 | Zn(encapsulate_explicit_seed)(void *secret, size_t secret_len, 560 | Zn(ciphertext) *ct, const Zn(public_key) *pk, 561 | const void *m, void *tmp, size_t tmp_len) 562 | { 563 | uint8_t m2[LVLBYTES]; 564 | 565 | tmp = tmp_align(tmp, tmp_len, ZN(TMP_ENCAPS) - 31); 566 | if (tmp == NULL) { 567 | return BAT_ERR_NOSPACE; 568 | } 569 | 570 | for (;;) { 571 | uint8_t sbuf[SBUF_LEN(LOGN)]; 572 | size_t u; 573 | 574 | /* 575 | * If no seed is provided, then generate one randomly. 576 | */ 577 | if (m == NULL) { 578 | if (!bat_get_seed(m2, sizeof m2)) { 579 | return BAT_ERR_RANDOM; 580 | } 581 | m = m2; 582 | } 583 | 584 | /* 585 | * Hash m to sample s. 586 | */ 587 | hash_and_sample_s(sbuf, sizeof sbuf, m, LVLBYTES); 588 | #if N < 8 589 | /* For very reduced toy versions, we don't even have a 590 | full byte, and we must clear the unused bits. */ 591 | sbuf[0] &= (1u << N) - 1u; 592 | #endif 593 | 594 | /* 595 | * Compute c1. This may fail (very rarely!) only for q = 769; 596 | * we just hash the current seed. Since this occurrence is 597 | * very rare in practice, this process does not induce any 598 | * non-negligible bias. 599 | */ 600 | if (!XCAT(bat_encrypt_, Q)(ct->c, sbuf, pk->h, LOGN, tmp)) { 601 | blake2s(m2, LVLBYTES, NULL, 0, m, LVLBYTES); 602 | m = m2; 603 | continue; 604 | } 605 | 606 | /* 607 | * Make c2 = Hash_m(s) XOR m. 608 | */ 609 | hash_m(ct->c2, sbuf, sizeof sbuf); 610 | for (u = 0; u < LVLBYTES; u ++) { 611 | ct->c2[u] ^= ((const uint8_t *)m)[u]; 612 | } 613 | 614 | /* 615 | * Produce the shared secret (output of a successful key 616 | * exchange). 617 | */ 618 | make_secret(secret, secret_len, m, LVLBYTES, 1); 619 | 620 | return 0; 621 | } 622 | } 623 | 624 | /* see bat.h */ 625 | int 626 | Zn(decapsulate)(void *secret, size_t secret_len, 627 | const Zn(ciphertext) *ct, const Zn(private_key) *sk, 628 | void *tmp, size_t tmp_len) 629 | { 630 | uint8_t sbuf[SBUF_LEN(LOGN)], m[LVLBYTES], m_alt[LVLBYTES]; 631 | uint8_t sbuf_alt[SBUF_LEN(LOGN)]; 632 | int8_t *c_alt; 633 | size_t u; 634 | uint32_t d; 635 | 636 | tmp = tmp_align(tmp, tmp_len, ZN(TMP_DECAPS) - 31); 637 | if (tmp == NULL) { 638 | return BAT_ERR_NOSPACE; 639 | } 640 | 641 | /* 642 | * Inner decryption never fails (at least, it never reports 643 | * a failure). 644 | */ 645 | XCAT(bat_decrypt_, Q)(sbuf, ct->c, 646 | sk->f, sk->g, sk->F, sk->G, sk->w, LOGN, tmp); 647 | 648 | /* 649 | * From sbuf, we derive the mask that allows recovery of m 650 | * out of the second ciphertext half (c2). 651 | */ 652 | hash_m(m, sbuf, sizeof sbuf); 653 | for (u = 0; u < sizeof m; u ++) { 654 | m[u] ^= ct->c2[u]; 655 | } 656 | 657 | /* 658 | * Decryption is valid if and only if we can reencrypt the 659 | * obtained message m and get the exact same polynomial s 660 | * and ciphertext c1. 661 | */ 662 | hash_and_sample_s(sbuf_alt, sizeof sbuf_alt, m, sizeof m); 663 | #if N < 8 664 | sbuf_alt[0] &= (1u << N) - 1u; 665 | #endif 666 | c_alt = tmp; 667 | tmp = tmp_align((void *)(c_alt + N), ZN(TMP_DECAPS) - N, 668 | ZN(TMP_ENCAPS) - 31); 669 | if (tmp == NULL) { 670 | /* This should never happen in practice. */ 671 | return BAT_ERR_NOSPACE; 672 | } 673 | d = XCAT(bat_encrypt_, Q)(c_alt, sbuf_alt, sk->h, LOGN, tmp); 674 | d --; 675 | for (u = 0; u < sizeof sbuf; u ++) { 676 | d |= sbuf[u] ^ sbuf_alt[u]; 677 | } 678 | for (u = 0; u < sizeof ct->c; u ++) { 679 | d |= (uint32_t)(ct->c[u] - c_alt[u]); 680 | } 681 | 682 | /* 683 | * If encapsulation worked AND yielded the same ciphertext as 684 | * received, then d == 0 at this point, and we want to produce 685 | * the secret key as a hash of m. Otherwise, d != 0, and we 686 | * must produce the secret as a hash of the received ciphertext 687 | * and the secret value r (stored in sk->rr). We MUST NOT leak 688 | * which was the case, and therefore we must always compute 689 | * both hashes and perform constant-time conditional replacement. 690 | */ 691 | 692 | make_kdf_seed_bad(m_alt, sizeof m, sk, ct); 693 | d = -((uint32_t)(d | -d) >> 31); 694 | for (u = 0; u < sizeof m; u ++) { 695 | m[u] ^= d & (m[u] ^ m_alt[u]); 696 | } 697 | make_secret(secret, secret_len, m, sizeof m, d + 1); 698 | return 0; 699 | } 700 | -------------------------------------------------------------------------------- /src/api_128_256.c: -------------------------------------------------------------------------------- 1 | #define Q 128 2 | #define N 256 3 | #define LOGN 8 4 | #define LVLBYTES 10 5 | 6 | #include "api.c" 7 | -------------------------------------------------------------------------------- /src/api_257_512.c: -------------------------------------------------------------------------------- 1 | #define Q 257 2 | #define N 512 3 | #define LOGN 9 4 | #define LVLBYTES 16 5 | 6 | #include "api.c" 7 | -------------------------------------------------------------------------------- /src/api_769_1024.c: -------------------------------------------------------------------------------- 1 | #define Q 769 2 | #define N 1024 3 | #define LOGN 10 4 | #define LVLBYTES 32 5 | 6 | #include "api.c" 7 | -------------------------------------------------------------------------------- /src/bat.h: -------------------------------------------------------------------------------- 1 | #ifndef BAT_H__ 2 | #define BAT_H__ 3 | 4 | #include 5 | #include 6 | 7 | /* 8 | * For modulus qqq and degree nnn, the following types and functions are 9 | * defined: 10 | * 11 | * bat_qqq_nnn_private_key 12 | * 13 | * Private key structure; contains all private key elements, including 14 | * a copy of the public key. 15 | * 16 | * bat_qqq_nnn_public_key 17 | * 18 | * Public key structure. Contains only the public key. 19 | * 20 | * bat_qqq_nnn_ciphertext 21 | * 22 | * Ciphertext structure. Contains the ciphertext polynomial and 23 | * the FO tag. 24 | * 25 | * int bat_qqq_nnn_keygen( 26 | * bat_qqq_nnn_private_key *sk, void *tmp, size_t tmp_len); 27 | * 28 | * Generate a new key pair. Returned value is 0 on success, a negative 29 | * value on error. Buffer tmp (tmp_len bytes) should be large enough 30 | * (see the BAT_qqq_nnn_TMP_KEYGEN macro). 31 | * 32 | * void bat_qqq_nnn_get_public_key( 33 | * bat_qqq_nnn_public_key *pk, const bat_qqq_nnn_private_key *sk); 34 | * 35 | * Get a copy of the public key from the private key. 36 | * 37 | * size_t bat_qqq_nnn_encode_private_key( 38 | * void *out, size_t max_out_len, 39 | * const bat_qqq_nnn_private_key *sk, int short_format); 40 | * 41 | * Encode the private key into bytes. If short_format is zero, then 42 | * the "long format" is used (encoding contains f, g, F, G, w, and the 43 | * generation seed). If short_format is non-zero, then the "short 44 | * format" is used (encoding contains only F and the seed). The short 45 | * format is much smaller, but requires more CPU and temporary RAM 46 | * when decoding. 47 | * 48 | * If out is NULL, then max_out_len is ignored, and the function 49 | * returns the size (in bytes) that the encoded private key would have. 50 | * Otherwise, if the encoded private key would be longer than 51 | * max_out_len, then the function returns 0 and encodes nothing. 52 | * Otherwise, the encoded private key is written into out, and its 53 | * size (in bytes) is returned. 54 | * 55 | * size_t bat_qqq_nnn_decode_private_key( 56 | * bat_qqq_nnn_private_key *sk, 57 | * const void *in, size_t max_in_len, 58 | * void *tmp, size_t tmp_len); 59 | * 60 | * Decode the private key from bytes. If the incoming bytes are 61 | * invalid, or relate to different set of parameters, or max_in_len 62 | * is shorter than the private key size (i.e. it was truncated), 63 | * then this function returns 0. Otherwise, it returns the actual 64 | * size (in bytes) of the encoded private key (which is not greater 65 | * than max_in_len, but may be lower than max_in_len). 66 | * 67 | * If the encoded key uses the long format, then tmp and tmp_len 68 | * are ignored. If the encoded key uses the short format, then 69 | * tmp (of size tmp_len bytes) is used for temporary storage; in 70 | * that case, if the buffer is too short, then the function fails 71 | * and returns 0. The BAT_qqq_nnn_TMP_DECODE_PRIV macro evaluates to 72 | * the required minimal size. 73 | * 74 | * size_t bat_qqq_nnn_encode_public_key( 75 | * void *out, size_t max_out_len, 76 | * const bat_qqq_nnn_public_key *pk); 77 | * 78 | * Encode the public key into bytes. 79 | * 80 | * If out is NULL, then max_out_len is ignored, and the function 81 | * returns the size (in bytes) that the encoded public key would have. 82 | * Otherwise, if the encoded public key would be longer than 83 | * max_out_len, then the function returns 0 and encodes nothing. 84 | * Otherwise, the encoded public key is written into out, and its 85 | * size (in bytes) is returned. 86 | * 87 | * size_t bat_qqq_nnn_decode_public_key( 88 | * bat_qqq_nnn_public_key *pk, 89 | * const void *in, size_t max_in_len); 90 | * 91 | * Decode the public key from bytes. If the incoming bytes are 92 | * invalid, or relate to different set of parameters, or max_in_len 93 | * is shorter than the public key size (i.e. it was truncated), 94 | * then this function returns 0. Otherwise, it returns the actual 95 | * size (in bytes) of the encoded public key (which is not greater 96 | * than max_in_len, but may be lower than max_in_len). 97 | * 98 | * size_t bat_qqq_nnn_encode_ciphertext( 99 | * void *out, size_t max_out_len, 100 | * const bat_qqq_nnn_ciphertext *ct); 101 | * 102 | * Encode the ciphertext into bytes. 103 | * 104 | * If out is NULL, then max_out_len is ignored, and the function 105 | * returns the size (in bytes) that the encoded ciphertext would have. 106 | * Otherwise, if the encoded ciphertext would be longer than 107 | * max_out_len, then the function returns 0 and encodes nothing. 108 | * Otherwise, the encoded ciphertext is written into out, and its 109 | * size (in bytes) is returned. 110 | * 111 | * size_t bat_qqq_nnn_decode_ciphertext( 112 | * bat_qqq_nnn_ciphertext *ct, 113 | * const void *in, size_t max_in_len); 114 | * 115 | * Decode the ciphertext from bytes. If the incoming bytes are 116 | * invalid, or relate to different set of parameters, or max_in_len 117 | * is shorter than the ciphertext size (i.e. it was truncated), 118 | * then this function returns 0. Otherwise, it returns the actual 119 | * size (in bytes) of the encoded ciphertext (which is not greater 120 | * than max_in_len, but may be lower than max_in_len). 121 | * 122 | * int bat_qqq_nnn_encapsulate( 123 | * void *secret, size_t secret_len, 124 | * bat_qqq_nnn_ciphertext *ct, 125 | * const bat_qqq_nnn_public_key *pk, 126 | * void *tmp, size_t tmp_len); 127 | * 128 | * Perform a key encpasulation with the provided public key. The 129 | * resulting shared secret is written into secret[], while the 130 | * ciphertext is written into *ct. The shared secret length is 131 | * arbitrary (it internally comes from a BLAKE2-based KDF) but 132 | * of course the sender and receiver should agree on the length to 133 | * use, depending on what the secret is for. 134 | * 135 | * On success, 0 is returned; on error, a negative error code is 136 | * returned and the secret value is not produced. If provided 137 | * temporary buffer (tmp, of size tmp_len bytes) is too small, then 138 | * BAT_ERR_NOSPACE is returned (see BAT_qqq_nnn_TMP_ENCAPS). 139 | * 140 | * int bat_qqq_nnn_encapsulate_explicit_seed( 141 | * void *secret, size_t secret_len, 142 | * bat_qqq_nnn_ciphertext *ct, 143 | * const bat_qqq_nnn_public_key *pk, 144 | * const uint8_t *m, void *tmp, size_t tmp_len); 145 | * 146 | * This is a variant of bat_qqq_nnn_encapsulate(), in which the 147 | * random seed (m[] value) is provided explicitly. This function 148 | * is meant mostly for benchmarks and reproducible test vectors, 149 | * to avoid the overhead and unpredictability of the OS-provided 150 | * random generator; in general, bat_qqq_nnn_encapsulate() SHOULD 151 | * be used instead. If m is NULL, then the OS RNG is used to create 152 | * the seed. When m is not NULL, then it MUST be generated as a 153 | * uniform unpredictable sequence of bytes of the right length for 154 | * the target BAT version (10, 16 or 32 bytes, for BAT-128-256, 155 | * BAT-257-512 and BAT-769-1024, respectively). 156 | * 157 | * int bat_qqq_nnn_decapsulate( 158 | * void *secret, size_t secret_len, 159 | * const bat_qqq_nnn_ciphertext *ct, 160 | * const bat_qqq_nnn_private_key *sk, 161 | * void *tmp, size_t tmp_len); 162 | * 163 | * Perform a key decpasulation with the provided ciphertext and 164 | * private key. The resulting shared secret is written into 165 | * secret[]. The shared secret length is arbitrary (it internally 166 | * comes from a BLAKE2-based KDF) but of course the sender and 167 | * receiver should agree on the length to use, depending on what 168 | * the secret is for. 169 | * 170 | * On success, 0 is returned; on error, a negative error code is 171 | * returned and the secret value is not produced. Such errors are 172 | * reported only for local technical reasons unrelated to the 173 | * received ciphertext; e.g. BAT_ERR_NOSPACE is returned if the 174 | * tmp[] buffer (of size tmp_len bytes) is returned. By 175 | * construction of the algorithm, invalid ciphertext values lead to 176 | * a recovered shared secret which is deterministic from the 177 | * ciphertext and private key, but unpredictable by third parties; 178 | * in such cases, this function reports a success (0). 179 | */ 180 | 181 | #define BAT_MK(q, n, lvl_bytes, htype) \ 182 | typedef struct { \ 183 | int8_t f[n]; \ 184 | int8_t g[n]; \ 185 | int8_t F[n]; \ 186 | int8_t G[n]; \ 187 | int32_t w[n]; \ 188 | htype h[n]; \ 189 | uint8_t rr[32]; \ 190 | uint8_t seed[32]; \ 191 | } bat_ ## q ## _ ## n ## _private_key; \ 192 | typedef struct { \ 193 | htype h[n]; \ 194 | } bat_ ## q ## _ ## n ## _public_key; \ 195 | typedef struct { \ 196 | int8_t c[n]; \ 197 | uint8_t c2[lvl_bytes]; \ 198 | } bat_ ## q ## _ ## n ## _ciphertext; \ 199 | int bat_ ## q ## _ ## n ## _keygen(bat_ ## q ## _ ## n ## _private_key *sk, \ 200 | void *tmp, size_t tmp_len); \ 201 | void bat_ ## q ## _ ## n ## _get_public_key( \ 202 | bat_ ## q ## _ ## n ## _public_key *pk, \ 203 | const bat_ ## q ## _ ## n ## _private_key *sk); \ 204 | size_t bat_ ## q ## _ ## n ## _encode_private_key( \ 205 | void *out, size_t max_out_len, \ 206 | const bat_ ## q ## _ ## n ## _private_key *sk, int short_format); \ 207 | size_t bat_ ## q ## _ ## n ## _decode_private_key( \ 208 | bat_ ## q ## _ ## n ## _private_key *sk, \ 209 | const void *in, size_t max_in_len, \ 210 | void *tmp, size_t tmp_len); \ 211 | size_t bat_ ## q ## _ ## n ## _encode_public_key( \ 212 | void *out, size_t max_out_len, \ 213 | const bat_ ## q ## _ ## n ## _public_key *pk); \ 214 | size_t bat_ ## q ## _ ## n ## _decode_public_key( \ 215 | bat_ ## q ## _ ## n ## _public_key *pk, \ 216 | const void *in, size_t max_in_len); \ 217 | size_t bat_ ## q ## _ ## n ## _encode_ciphertext( \ 218 | void *out, size_t max_out_len, \ 219 | const bat_ ## q ## _ ## n ## _ciphertext *ct); \ 220 | size_t bat_ ## q ## _ ## n ## _decode_ciphertext( \ 221 | bat_ ## q ## _ ## n ## _ciphertext *ct, \ 222 | const void *in, size_t max_in_len); \ 223 | int bat_ ## q ## _ ## n ## _encapsulate( \ 224 | void *secret, size_t secret_len, \ 225 | bat_ ## q ## _ ## n ## _ciphertext *ct, \ 226 | const bat_ ## q ## _ ## n ## _public_key *pk, \ 227 | void *tmp, size_t tmp_len); \ 228 | int bat_ ## q ## _ ## n ## _encapsulate_explicit_seed( \ 229 | void *secret, size_t secret_len, \ 230 | bat_ ## q ## _ ## n ## _ciphertext *ct, \ 231 | const bat_ ## q ## _ ## n ## _public_key *pk, \ 232 | const void *m, void *tmp, size_t tmp_len); \ 233 | int bat_ ## q ## _ ## n ## _decapsulate( \ 234 | void *secret, size_t secret_len, \ 235 | const bat_ ## q ## _ ## n ## _ciphertext *ct, \ 236 | const bat_ ## q ## _ ## n ## _private_key *sk, \ 237 | void *tmp, size_t tmp_len); 238 | 239 | BAT_MK(128, 256, 10, uint8_t) 240 | BAT_MK(257, 512, 16, uint16_t) 241 | BAT_MK(769, 1024, 32, uint16_t) 242 | 243 | #undef BAT_MK 244 | 245 | /* 246 | * Macros for temporary buffer sizes. 247 | * 248 | * Each length is in bytes and accounts for an extra 31 bytes for internal 249 | * alignment adjustment. 250 | */ 251 | #define BAT_128_256_TMP_KEYGEN 6175 252 | #define BAT_128_256_TMP_DECODE_PRIV 6175 253 | #define BAT_128_256_TMP_ENCAPS 799 254 | #define BAT_128_256_TMP_DECAPS 2079 255 | 256 | #define BAT_257_512_TMP_KEYGEN 12319 257 | #define BAT_257_512_TMP_DECODE_PRIV 12319 258 | #define BAT_257_512_TMP_ENCAPS 2079 259 | #define BAT_257_512_TMP_DECAPS 4127 260 | 261 | #define BAT_769_1024_TMP_KEYGEN 24607 262 | #define BAT_769_1024_TMP_DECODE_PRIV 24607 263 | #define BAT_769_1024_TMP_ENCAPS 4127 264 | #define BAT_769_1024_TMP_DECAPS 8223 265 | 266 | /* 267 | * Error codes. 268 | */ 269 | 270 | /* Decapsulation failed. */ 271 | #define BAT_ERR_DECAPS_FAILED -1 272 | 273 | /* Provided object (key or ciphertext) uses a different set of parameters 274 | (modulus and/or degree) than expected by the called function. */ 275 | #define BAT_ERR_WRONG_PARAMS -2 276 | 277 | /* Provided object (key or ciphertext) is invalidly encoded. */ 278 | #define BAT_ERR_BAD_ENCODING -3 279 | 280 | /* Provided temporary space has insufficient length for the requested 281 | operation. */ 282 | #define BAT_ERR_NOSPACE -4 283 | 284 | /* Random seeding from operating system failed. */ 285 | #define BAT_ERR_RANDOM -5 286 | 287 | /* 288 | * Tag bytes. Each encoded public key, private key or ciphertext starts 289 | * with a tag byte that identifies the object type and parameters. 290 | * General format is (most-to-least significant order): 291 | * 292 | * t t q q n n n n 293 | * 294 | * with: 295 | * 296 | * - tt = 00 for a private key (long format), 01 for a private key (short 297 | * format), 10 for a public key, 11 for a ciphertext. 298 | * - qq = 00 for q = 128, 01 for q = 257, 10 for q = 769. 299 | * - nnnn = log2(n) where n is the degree (power of 2, up to 1024). 300 | */ 301 | #define BAT_128_256_TAG_PRIVKEY_LONG 0x08 302 | #define BAT_128_256_TAG_PRIVKEY_SHORT 0x48 303 | #define BAT_128_256_TAG_PUBKEY 0x88 304 | #define BAT_128_256_TAG_CIPHERTEXT 0xC8 305 | 306 | #define BAT_257_512_TAG_PRIVKEY_LONG 0x19 307 | #define BAT_257_512_TAG_PRIVKEY_SHORT 0x59 308 | #define BAT_257_512_TAG_PUBKEY 0x99 309 | #define BAT_257_512_TAG_CIPHERTEXT 0xD9 310 | 311 | #define BAT_769_1024_TAG_PRIVKEY_LONG 0x2A 312 | #define BAT_769_1024_TAG_PRIVKEY_SHORT 0x6A 313 | #define BAT_769_1024_TAG_PUBKEY 0xAA 314 | #define BAT_769_1024_TAG_CIPHERTEXT 0xEA 315 | 316 | #endif 317 | -------------------------------------------------------------------------------- /src/blake2.h: -------------------------------------------------------------------------------- 1 | #ifndef BLAKE2_H__ 2 | #define BLAKE2_H__ 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | typedef struct { 12 | uint8_t buf[64]; 13 | uint32_t h[8]; 14 | uint64_t ctr; 15 | size_t out_len; 16 | } blake2s_context; 17 | 18 | void blake2s_init(blake2s_context *bc, size_t out_len); 19 | 20 | void blake2s_init_key(blake2s_context *bc, size_t out_len, 21 | const void *key, size_t key_len); 22 | 23 | void blake2s_update(blake2s_context *bc, const void *data, size_t len); 24 | 25 | void blake2s_final(blake2s_context *bc, void *dst); 26 | 27 | void blake2s(void *dst, size_t dst_len, const void *key, size_t key_len, 28 | const void *src, size_t src_len); 29 | 30 | /* 31 | * Use BLAKE2s as a PRNG: for a given seed, compute the concatenation: 32 | * H(label || 0 || seed) || H(label || 1 || seed) || ... 33 | * with: 34 | * H = BLAKE2s with a 32-byte output 35 | * seed = provided seed (length MUST be at most 48 bytes) 36 | * label = provided value (64-bit, little-endian) 37 | * 0, 1,... = block counter (64-bit, little-endian) 38 | * The concatenation output is truncated to dst_len and written in dst[]. 39 | * The seed and dst buffers may overlap arbitrarily. 40 | */ 41 | void blake2s_expand(void *dst, size_t dst_len, 42 | const void *seed, size_t seed_len, uint64_t label); 43 | 44 | typedef struct { 45 | uint8_t buf[128]; 46 | uint64_t h[8]; 47 | uint64_t ctr; 48 | size_t out_len; 49 | } blake2b_context; 50 | 51 | void blake2b_init(blake2b_context *bc, size_t out_len); 52 | 53 | void blake2b_init_key(blake2b_context *bc, size_t out_len, 54 | const void *key, size_t key_len); 55 | 56 | void blake2b_update(blake2b_context *bc, const void *data, size_t len); 57 | 58 | void blake2b_final(blake2b_context *bc, void *dst); 59 | 60 | void blake2b(void *dst, size_t dst_len, const void *key, size_t key_len, 61 | const void *src, size_t src_len); 62 | 63 | /* 64 | * Use BLAKE2b as a PRNG: for a given seed, compute the concatenation: 65 | * H(label || 0 || seed) || H(label || 1 || seed) || ... 66 | * with: 67 | * H = BLAKE2b with a 64-byte output 68 | * seed = provided seed (length MUST be at most 112 bytes) 69 | * label = provided value (64-bit, little-endian) 70 | * 0, 1,... = block counter (64-bit, little-endian) 71 | * The concatenation output is truncated to dst_len and written in dst[]. 72 | * The seed and dst buffers may overlap arbitrarily. 73 | */ 74 | void blake2b_expand(void *dst, size_t dst_len, 75 | const void *seed, size_t seed_len, uint64_t label); 76 | 77 | #ifdef __cplusplus 78 | } 79 | #endif 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /src/blake2b.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Internal functions for BAT. 3 | */ 4 | 5 | /* ====================================================================== */ 6 | 7 | #include 8 | #include 9 | 10 | #include "blake2.h" 11 | 12 | #include "inner.h" 13 | #define BLAKE2_AVX2 BAT_AVX2 14 | #define BLAKE2_LE BAT_LE 15 | #define BLAKE2_UNALIGNED BAT_UNALIGNED 16 | 17 | static const uint64_t IV[] = { 18 | 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 19 | 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1, 20 | 0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 21 | 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179 22 | }; 23 | 24 | static void 25 | process_block(uint64_t *h, const uint8_t *data, uint64_t t, int f) 26 | { 27 | uint64_t v[16], m[16]; 28 | int i; 29 | 30 | memcpy(v, h, 8 * sizeof(uint64_t)); 31 | memcpy(v + 8, IV, sizeof IV); 32 | v[12] ^= t; 33 | if (f) { 34 | v[14] = ~v[14]; 35 | } 36 | 37 | #if BLAKE2_LE 38 | memcpy(m, data, sizeof m); 39 | #else 40 | for (i = 0; i < 16; i ++) { 41 | m[i] = dec64le(data + (i << 3)); 42 | } 43 | #endif 44 | 45 | #define ROR(x, n) (((x) << (64 - (n))) | ((x) >> (n))) 46 | 47 | #define G(a, b, c, d, x, y) do { \ 48 | v[a] += v[b] + (x); \ 49 | v[d] = ROR(v[d] ^ v[a], 32); \ 50 | v[c] += v[d]; \ 51 | v[b] = ROR(v[b] ^ v[c], 24); \ 52 | v[a] += v[b] + (y); \ 53 | v[d] = ROR(v[d] ^ v[a], 16); \ 54 | v[c] += v[d]; \ 55 | v[b] = ROR(v[b] ^ v[c], 63); \ 56 | } while (0) 57 | 58 | #define ROUND(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF) \ 59 | do { \ 60 | G(0, 4, 8, 12, m[s0], m[s1]); \ 61 | G(1, 5, 9, 13, m[s2], m[s3]); \ 62 | G(2, 6, 10, 14, m[s4], m[s5]); \ 63 | G(3, 7, 11, 15, m[s6], m[s7]); \ 64 | G(0, 5, 10, 15, m[s8], m[s9]); \ 65 | G(1, 6, 11, 12, m[sA], m[sB]); \ 66 | G(2, 7, 8, 13, m[sC], m[sD]); \ 67 | G(3, 4, 9, 14, m[sE], m[sF]); \ 68 | } while (0) 69 | 70 | ROUND( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 71 | ROUND(14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); 72 | ROUND(11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); 73 | ROUND( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); 74 | ROUND( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); 75 | ROUND( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); 76 | ROUND(12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); 77 | ROUND(13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); 78 | ROUND( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); 79 | ROUND(10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0); 80 | ROUND( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 81 | ROUND(14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); 82 | 83 | #undef ROR 84 | #undef G 85 | #undef ROUND 86 | 87 | for (i = 0; i < 8; i ++) { 88 | h[i] ^= v[i] ^ v[i + 8]; 89 | } 90 | } 91 | 92 | /* 93 | * State rules: 94 | * 95 | * buf buffered data 96 | * h current state 97 | * ctr number of bytes injected so far 98 | * 99 | * Initially, ctr == 0 and h contains the XOR of IV and parameter block; 100 | * buf[] is empty. For any ctr > 0, buf[] is non-empty; it might contain 101 | * a full block worth of data (processing of the block is delayed until 102 | * we know whether this is the final block or not). 103 | * 104 | * If a key is injected, then it counts as a first full block. 105 | */ 106 | 107 | /* see blake2.h */ 108 | void 109 | blake2b_init(blake2b_context *bc, size_t out_len) 110 | { 111 | memcpy(bc->h, IV, sizeof bc->h); 112 | bc->h[0] ^= 0x01010000 ^ (uint64_t)out_len; 113 | bc->ctr = 0; 114 | bc->out_len = out_len; 115 | } 116 | 117 | /* see blake2.h */ 118 | void 119 | blake2b_init_key(blake2b_context *bc, size_t out_len, 120 | const void *key, size_t key_len) 121 | { 122 | blake2b_init(bc, out_len); 123 | if (key_len > 0) { 124 | bc->h[0] ^= (uint64_t)key_len << 8; 125 | memcpy(bc->buf, key, key_len); 126 | memset(bc->buf + key_len, 0, (sizeof bc->buf) - key_len); 127 | bc->ctr = sizeof bc->buf; 128 | } 129 | } 130 | 131 | /* see blake2.h */ 132 | void 133 | blake2b_update(blake2b_context *bc, const void *data, size_t len) 134 | { 135 | uint64_t ctr; 136 | size_t p; 137 | 138 | /* Special case: if no input data, return immediately. */ 139 | if (len == 0) { 140 | return; 141 | } 142 | 143 | ctr = bc->ctr; 144 | 145 | /* First complete the current block, if not already full. */ 146 | p = (size_t)ctr & ((sizeof bc->buf) - 1); 147 | if (ctr == 0 || p != 0) { 148 | /* buffer is not full */ 149 | size_t clen; 150 | 151 | clen = sizeof bc->buf - p; 152 | if (clen >= len) { 153 | memcpy(bc->buf + p, data, len); 154 | bc->ctr = ctr + len; 155 | return; 156 | } 157 | memcpy(bc->buf + p, data, clen); 158 | ctr += clen; 159 | data = (const uint8_t *)data + clen; 160 | len -= clen; 161 | } 162 | 163 | /* Process the buffered block. */ 164 | process_block(bc->h, bc->buf, ctr, 0); 165 | 166 | /* Process all subsequent full blocks, except the last. */ 167 | while (len > sizeof bc->buf) { 168 | ctr += sizeof bc->buf; 169 | process_block(bc->h, data, ctr, 0); 170 | data = (const uint8_t *)data + sizeof bc->buf; 171 | len -= sizeof bc->buf; 172 | } 173 | 174 | /* Copy the last block (possibly partial) into the buffer. */ 175 | memcpy(bc->buf, data, len); 176 | bc->ctr = ctr + len; 177 | } 178 | 179 | /* see blake2.h */ 180 | void 181 | blake2b_final(blake2b_context *bc, void *dst) 182 | { 183 | #if !BLAKE2_LE 184 | int i; 185 | uint8_t tmp[64]; 186 | #endif 187 | size_t p; 188 | 189 | /* Pad the current block with zeros, if not full. If the 190 | buffer is empty (no key, no data) then fill it with zeros 191 | as well. */ 192 | p = (size_t)bc->ctr & ((sizeof bc->buf) - 1); 193 | if (bc->ctr == 0 || p != 0) { 194 | memset(bc->buf + p, 0, (sizeof bc->buf) - p); 195 | } 196 | 197 | process_block(bc->h, bc->buf, bc->ctr, 1); 198 | #if BLAKE2_LE 199 | memcpy(dst, bc->h, bc->out_len); 200 | #else 201 | for (i = 0; i < 8; i ++) { 202 | enc64le(tmp + (i << 3), bc->h[i]); 203 | } 204 | memcpy(dst, tmp, bc->out_len); 205 | #endif 206 | } 207 | 208 | /* see blake2.h */ 209 | void 210 | blake2b(void *dst, size_t dst_len, const void *key, size_t key_len, 211 | const void *src, size_t src_len) 212 | { 213 | blake2b_context bc; 214 | 215 | blake2b_init_key(&bc, dst_len, key, key_len); 216 | blake2b_update(&bc, src, src_len); 217 | blake2b_final(&bc, dst); 218 | } 219 | 220 | /* see blake2.h */ 221 | void 222 | blake2b_expand(void *dst, size_t dst_len, 223 | const void *seed, size_t seed_len, uint64_t label) 224 | { 225 | uint64_t h[8]; 226 | uint8_t buf[128]; 227 | size_t in_len; 228 | uint64_t num; 229 | 230 | in_len = 16 + seed_len; 231 | enc64le(buf, label); 232 | memset(buf + 8, 0, 8); 233 | memcpy(buf + 16, seed, seed_len); 234 | memset(buf + in_len, 0, (sizeof buf) - in_len); 235 | num = 0; 236 | while (dst_len > 0) { 237 | size_t clen; 238 | #if !BLAKE2_LE 239 | uint8_t tmp[64]; 240 | int i; 241 | #endif 242 | 243 | memcpy(h, IV, sizeof h); 244 | h[0] ^= 0x01010000 ^ (sizeof h); 245 | enc64le(buf + 8, num ++); 246 | process_block(h, buf, in_len, 1); 247 | clen = dst_len < (sizeof h) ? dst_len : (sizeof h); 248 | #if BLAKE2_LE 249 | memcpy(dst, h, clen); 250 | #else 251 | for (i = 0; i < 8; i ++) { 252 | enc64le(tmp + (i << 3), h[i]); 253 | } 254 | memcpy(dst, tmp, clen); 255 | #endif 256 | dst_len -= clen; 257 | dst = (uint8_t *)dst + clen; 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/blake2s.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Internal functions for BAT. 3 | */ 4 | 5 | /* ====================================================================== */ 6 | 7 | #include 8 | #include 9 | 10 | #include "blake2.h" 11 | 12 | #include "inner.h" 13 | #define BLAKE2_AVX2 BAT_AVX2 14 | #define BLAKE2_LE BAT_LE 15 | #define BLAKE2_UNALIGNED BAT_UNALIGNED 16 | 17 | ALIGNED_AVX2 18 | static const uint32_t IV[] = { 19 | 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 20 | 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 21 | }; 22 | 23 | #if BLAKE2_AVX2 24 | 25 | TARGET_AVX2 26 | static void 27 | process_block(uint32_t *h, const uint8_t *data, uint64_t t, int f) 28 | { 29 | __m128i xh0, xh1, xv0, xv1, xv2, xv3; 30 | __m128i xm0, xm1, xm2, xm3, xn0, xn1, xn2, xn3; 31 | __m128i xt0, xt1, xt2, xt3, xt4, xt5, xt6, xt7, xt8, xt9; 32 | __m128i xror8, xror16; 33 | 34 | xror8 = _mm_setr_epi8( 35 | 1, 2, 3, 0, 5, 6, 7, 4, 36 | 9, 10, 11, 8, 13, 14, 15, 12); 37 | xror16 = _mm_setr_epi8( 38 | 2, 3, 0, 1, 6, 7, 4, 5, 39 | 10, 11, 8, 9, 14, 15, 12, 13); 40 | 41 | /* Initialize state. */ 42 | xh0 = _mm_loadu_si128((const void *)(h + 0)); 43 | xh1 = _mm_loadu_si128((const void *)(h + 4)); 44 | xv0 = xh0; 45 | xv1 = xh1; 46 | xv2 = _mm_loadu_si128((const void *)(IV + 0)); 47 | xv3 = _mm_loadu_si128((const void *)(IV + 4)); 48 | xv3 = _mm_xor_si128(xv3, _mm_setr_epi32( 49 | (int32_t)(uint32_t)t, (int32_t)(uint32_t)(t >> 32), 50 | -f, 0)); 51 | 52 | /* Load data and move it into the proper order for the first round: 53 | xm0: 0 2 4 6 54 | xm1: 1 3 5 7 55 | xm2: 8 10 12 14 56 | xm3: 9 11 13 15 */ 57 | xm0 = _mm_loadu_si128((const void *)(data + 0)); 58 | xm1 = _mm_loadu_si128((const void *)(data + 16)); 59 | xm2 = _mm_loadu_si128((const void *)(data + 32)); 60 | xm3 = _mm_loadu_si128((const void *)(data + 48)); 61 | 62 | xn0 = _mm_shuffle_epi32(xm0, 0xD8); 63 | xn1 = _mm_shuffle_epi32(xm1, 0xD8); 64 | xm0 = _mm_unpacklo_epi64(xn0, xn1); 65 | xm1 = _mm_unpackhi_epi64(xn0, xn1); 66 | 67 | xn2 = _mm_shuffle_epi32(xm2, 0xD8); 68 | xn3 = _mm_shuffle_epi32(xm3, 0xD8); 69 | xm2 = _mm_unpacklo_epi64(xn2, xn3); 70 | xm3 = _mm_unpackhi_epi64(xn2, xn3); 71 | 72 | #define G4(xx, xy) do { \ 73 | __m128i xtg; \ 74 | xv0 = _mm_add_epi32(xv0, _mm_add_epi32(xv1, xx)); \ 75 | xv3 = _mm_shuffle_epi8(_mm_xor_si128(xv0, xv3), xror16); \ 76 | xv2 = _mm_add_epi32(xv2, xv3); \ 77 | xtg = _mm_xor_si128(xv1, xv2); \ 78 | xv1 = _mm_or_si128( \ 79 | _mm_srli_epi32(xtg, 12), _mm_slli_epi32(xtg, 20)); \ 80 | xv0 = _mm_add_epi32(xv0, _mm_add_epi32(xv1, xy)); \ 81 | xv3 = _mm_shuffle_epi8(_mm_xor_si128(xv0, xv3), xror8); \ 82 | xv2 = _mm_add_epi32(xv2, xv3); \ 83 | xtg = _mm_xor_si128(xv1, xv2); \ 84 | xv1 = _mm_or_si128( \ 85 | _mm_srli_epi32(xtg, 7), _mm_slli_epi32(xtg, 25)); \ 86 | } while (0) 87 | 88 | #define ROUND(i0, i1, i2, i3) do { \ 89 | G4(i0, i1); \ 90 | xv1 = _mm_shuffle_epi32(xv1, 0x39); \ 91 | xv2 = _mm_shuffle_epi32(xv2, 0x4E); \ 92 | xv3 = _mm_shuffle_epi32(xv3, 0x93); \ 93 | G4(i2, i3); \ 94 | xv1 = _mm_shuffle_epi32(xv1, 0x93); \ 95 | xv2 = _mm_shuffle_epi32(xv2, 0x4E); \ 96 | xv3 = _mm_shuffle_epi32(xv3, 0x39); \ 97 | } while (0) 98 | 99 | /* round 0 */ 100 | ROUND(xm0, xm1, xm2, xm3); 101 | 102 | /* round 1 */ 103 | xt0 = _mm_shuffle_epi32(xm0, 0x00); 104 | xt1 = _mm_shuffle_epi32(xm0, 0xC8); 105 | xt2 = _mm_shuffle_epi32(xm1, 0x70); 106 | xt3 = _mm_shuffle_epi32(xm1, 0x80); 107 | xt4 = _mm_shuffle_epi32(xm2, 0x01); 108 | xt5 = _mm_shuffle_epi32(xm2, 0x02); 109 | xt6 = _mm_shuffle_epi32(xm2, 0x03); 110 | xt7 = _mm_shuffle_epi32(xm3, 0x80); 111 | xt8 = _mm_shuffle_epi32(xm3, 0x10); 112 | xt9 = _mm_shuffle_epi32(xm3, 0x30); 113 | xn0 = _mm_blend_epi32( 114 | _mm_blend_epi32(xt6, xt1, 0x02), 115 | xt7, 0x0C); 116 | xn1 = _mm_blend_epi32( 117 | _mm_blend_epi32(xt4, xt9, 0x04), 118 | xt1, 0x08); 119 | xn2 = _mm_blend_epi32( 120 | _mm_blend_epi32(xt3, xt0, 0x02), 121 | xt8, 0x04); 122 | xn3 = _mm_blend_epi32( 123 | _mm_blend_epi32(xt5, xm0, 0x02), 124 | xt2, 0x0C); 125 | ROUND(xn0, xn1, xn2, xn3); 126 | 127 | /* round 2 */ 128 | xt0 = _mm_shuffle_epi32(xn0, 0x40); 129 | xt1 = _mm_shuffle_epi32(xn0, 0x80); 130 | xt2 = _mm_shuffle_epi32(xn1, 0x80); 131 | xt3 = _mm_shuffle_epi32(xn1, 0x0D); 132 | xt4 = _mm_shuffle_epi32(xn2, 0x04); 133 | xt5 = _mm_shuffle_epi32(xn2, 0x32); 134 | xt6 = _mm_shuffle_epi32(xn3, 0x10); 135 | xt7 = _mm_shuffle_epi32(xn3, 0x2C); 136 | xm0 = _mm_blend_epi32( 137 | _mm_blend_epi32(xt5, xt6, 0x02), 138 | xt2, 0x08); 139 | xm1 = _mm_blend_epi32( 140 | _mm_blend_epi32(xt3, xt4, 0x02), 141 | _mm_blend_epi32(xt6, xn0, 0x08), 0x0C); 142 | xm2 = _mm_blend_epi32( 143 | _mm_blend_epi32(xt2, xt7, 0x06), 144 | xt1, 0x08); 145 | xm3 = _mm_blend_epi32( 146 | _mm_blend_epi32(xt0, xt3, 0x02), 147 | xt4, 0x04); 148 | ROUND(xm0, xm1, xm2, xm3); 149 | 150 | /* round 3 */ 151 | xt0 = _mm_shuffle_epi32(xm0, 0x10); 152 | xt1 = _mm_shuffle_epi32(xm0, 0xC8); 153 | xt2 = _mm_shuffle_epi32(xm1, 0x10); 154 | xt3 = _mm_shuffle_epi32(xm1, 0x32); 155 | xt4 = _mm_shuffle_epi32(xm2, 0x03); 156 | xt5 = _mm_shuffle_epi32(xm2, 0x06); 157 | xt6 = _mm_shuffle_epi32(xm3, 0x39); 158 | xn0 = _mm_blend_epi32( 159 | _mm_blend_epi32(xt5, xt3, 0x04), 160 | xt0, 0x08); 161 | xn1 = _mm_blend_epi32( 162 | _mm_blend_epi32(xt4, xt6, 0x0A), 163 | xt0, 0x04); 164 | xn2 = _mm_blend_epi32( 165 | _mm_blend_epi32(xt3, xt1, 0x0A), 166 | xt6, 0x04); 167 | xn3 = _mm_blend_epi32( 168 | _mm_blend_epi32(xt6, xt4, 0x02), 169 | xt2, 0x0C); 170 | ROUND(xn0, xn1, xn2, xn3); 171 | 172 | /* round 4 */ 173 | xt0 = _mm_shuffle_epi32(xn0, 0x80); 174 | xt1 = _mm_shuffle_epi32(xn0, 0x4C); 175 | xt2 = _mm_shuffle_epi32(xn1, 0x09); 176 | xt3 = _mm_shuffle_epi32(xn1, 0x03); 177 | xt4 = _mm_shuffle_epi32(xn2, 0x04); 178 | xt5 = _mm_shuffle_epi32(xn3, 0x40); 179 | xt6 = _mm_shuffle_epi32(xn3, 0x32); 180 | xm0 = _mm_blend_epi32( 181 | _mm_blend_epi32(xn1, xt4, 0x06), 182 | xt5, 0x08); 183 | xm1 = _mm_blend_epi32( 184 | _mm_blend_epi32(xt6, xt0, 0x02), 185 | xn2, 0x0C); 186 | xm2 = _mm_blend_epi32( 187 | _mm_blend_epi32(xt3, xt1, 0x0A), 188 | xt5, 0x04); 189 | xm3 = _mm_blend_epi32( 190 | _mm_blend_epi32(xt2, xt6, 0x04), 191 | xt0, 0x08); 192 | ROUND(xm0, xm1, xm2, xm3); 193 | 194 | /* round 5 */ 195 | xt0 = _mm_shuffle_epi32(xm0, 0x04); 196 | xt1 = _mm_shuffle_epi32(xm0, 0x0E); 197 | xt2 = _mm_shuffle_epi32(xm1, 0x04); 198 | xt3 = _mm_shuffle_epi32(xm1, 0x32); 199 | xt4 = _mm_shuffle_epi32(xm2, 0x08); 200 | xt5 = _mm_shuffle_epi32(xm2, 0xD0); 201 | xt6 = _mm_shuffle_epi32(xm3, 0x01); 202 | xt7 = _mm_shuffle_epi32(xm3, 0x83); 203 | xn0 = _mm_blend_epi32( 204 | _mm_blend_epi32(xt1, xt4, 0x02), 205 | _mm_blend_epi32(xt2, xt7, 0x08), 0x0C); 206 | xn1 = _mm_blend_epi32( 207 | _mm_blend_epi32(xt6, xt1, 0x02), 208 | xt5, 0x0C); 209 | xn2 = _mm_blend_epi32( 210 | _mm_blend_epi32(xt3, xt2, 0x02), 211 | xt6, 0x08); 212 | xn3 = _mm_blend_epi32( 213 | _mm_blend_epi32(xt7, xt0, 0x0A), 214 | xt4, 0x04); 215 | ROUND(xn0, xn1, xn2, xn3); 216 | 217 | /* round 6 */ 218 | xt0 = _mm_shuffle_epi32(xn0, 0xC6); 219 | xt1 = _mm_shuffle_epi32(xn1, 0x40); 220 | xt2 = _mm_shuffle_epi32(xn1, 0x8C); 221 | xt3 = _mm_shuffle_epi32(xn2, 0x09); 222 | xt4 = _mm_shuffle_epi32(xn2, 0x0C); 223 | xt5 = _mm_shuffle_epi32(xn3, 0x01); 224 | xt6 = _mm_shuffle_epi32(xn3, 0x30); 225 | xm0 = _mm_blend_epi32( 226 | _mm_blend_epi32(xt1, xt4, 0x0A), 227 | xn3, 0x04); 228 | xm1 = _mm_blend_epi32( 229 | _mm_blend_epi32(xt5, xt3, 0x02), 230 | xt1, 0x08); 231 | xm2 = _mm_blend_epi32(xt0, xt6, 0x04); 232 | xm3 = _mm_blend_epi32( 233 | _mm_blend_epi32(xt3, xt2, 0x0A), 234 | xt0, 0x04); 235 | ROUND(xm0, xm1, xm2, xm3); 236 | 237 | /* round 7 */ 238 | xt0 = _mm_shuffle_epi32(xm0, 0x0C); 239 | xt1 = _mm_shuffle_epi32(xm0, 0x18); 240 | xt2 = _mm_shuffle_epi32(xm1, 0xC2); 241 | xt3 = _mm_shuffle_epi32(xm2, 0x10); 242 | xt4 = _mm_shuffle_epi32(xm2, 0xB0); 243 | xt5 = _mm_shuffle_epi32(xm3, 0x40); 244 | xt6 = _mm_shuffle_epi32(xm3, 0x83); 245 | xn0 = _mm_blend_epi32( 246 | _mm_blend_epi32(xt2, xt5, 0x0A), 247 | xt0, 0x04); 248 | xn1 = _mm_blend_epi32( 249 | _mm_blend_epi32(xt6, xt1, 0x06), 250 | xt4, 0x08); 251 | xn2 = _mm_blend_epi32( 252 | _mm_blend_epi32(xm1, xt4, 0x04), 253 | xt6, 0x08); 254 | xn3 = _mm_blend_epi32( 255 | _mm_blend_epi32(xt3, xt0, 0x02), 256 | xt2, 0x08); 257 | ROUND(xn0, xn1, xn2, xn3); 258 | 259 | /* round 8 */ 260 | xt0 = _mm_shuffle_epi32(xn0, 0x02); 261 | xt1 = _mm_shuffle_epi32(xn0, 0x34); 262 | xt2 = _mm_shuffle_epi32(xn1, 0x0C); 263 | xt3 = _mm_shuffle_epi32(xn2, 0x03); 264 | xt4 = _mm_shuffle_epi32(xn2, 0x81); 265 | xt5 = _mm_shuffle_epi32(xn3, 0x02); 266 | xt6 = _mm_shuffle_epi32(xn3, 0xD0); 267 | xm0 = _mm_blend_epi32( 268 | _mm_blend_epi32(xt5, xn1, 0x02), 269 | xt2, 0x04); 270 | xm1 = _mm_blend_epi32( 271 | _mm_blend_epi32(xt4, xt2, 0x02), 272 | xt1, 0x04); 273 | xm2 = _mm_blend_epi32( 274 | _mm_blend_epi32(xt0, xn1, 0x04), 275 | xt6, 0x08); 276 | xm3 = _mm_blend_epi32( 277 | _mm_blend_epi32(xt3, xt1, 0x02), 278 | xt6, 0x04); 279 | ROUND(xm0, xm1, xm2, xm3); 280 | 281 | /* round 9 */ 282 | xt0 = _mm_shuffle_epi32(xm0, 0xC6); 283 | xt1 = _mm_shuffle_epi32(xm1, 0x2C); 284 | xt2 = _mm_shuffle_epi32(xm2, 0x40); 285 | xt3 = _mm_shuffle_epi32(xm2, 0x83); 286 | xt4 = _mm_shuffle_epi32(xm3, 0xD8); 287 | xn0 = _mm_blend_epi32( 288 | _mm_blend_epi32(xt3, xt1, 0x02), 289 | xt4, 0x04); 290 | xn1 = _mm_blend_epi32(xt4, xt0, 0x04); 291 | xn2 = _mm_blend_epi32( 292 | _mm_blend_epi32(xm1, xt1, 0x04), 293 | xt2, 0x08); 294 | xn3 = _mm_blend_epi32(xt0, xt2, 0x04); 295 | ROUND(xn0, xn1, xn2, xn3); 296 | 297 | #undef G4 298 | #undef ROUND 299 | 300 | xh0 = _mm_xor_si128(xh0, _mm_xor_si128(xv0, xv2)); 301 | xh1 = _mm_xor_si128(xh1, _mm_xor_si128(xv1, xv3)); 302 | _mm_storeu_si128((void *)(h + 0), xh0); 303 | _mm_storeu_si128((void *)(h + 4), xh1); 304 | } 305 | 306 | /* 307 | * Optimized AVX2 implementation for blake2s_expand(). 308 | * 309 | * Input buffer data_x2[] has size 128 bytes; it is filled with two 310 | * interlaced instances of the label, initial block counter and seed in 311 | * their proper positions, and zero elsewhere. 312 | * 313 | * In data_x2[], the counter fields are supposed to be already set for the 314 | * two first blocks. 315 | * 316 | * 'data_len' is the message length (16 + length of seed, not the length 317 | * of the duplicated buffer data_x2). 318 | * 319 | * This function produces dst_len bytes. dst_len MUST be non-zero, 320 | * and a multiple of 64 bytes. 321 | * 322 | * The function internally increments the block counters over 32 bits only, 323 | * without carry propagation. The caller is responsible for calling this 324 | * function with initial counter and dst_len values that ensure that no 325 | * carry propagation is missed. 326 | */ 327 | TARGET_AVX2 328 | static void 329 | expand_inner_x2(uint8_t *dst, size_t dst_len, 330 | const uint8_t *data_x2, size_t data_len) 331 | { 332 | /* Initial value, duplicated for AVX2 parallelism. */ 333 | ALIGNED_AVX2 334 | static const uint32_t IV_x2[] = { 335 | 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 336 | 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 337 | 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, 338 | 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 339 | }; 340 | 341 | /* Initial state, duplicated for AVX2 parallelism, with a 342 | personalization block for output 32 bytes. */ 343 | ALIGNED_AVX2 344 | static const uint32_t hinit_out32_x2[] = { 345 | 0x6A09E667 ^ 0x01010020, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 346 | 0x6A09E667 ^ 0x01010020, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 347 | 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, 348 | 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 349 | }; 350 | 351 | __m256i xh0, xh1, xv0, xv1, xv2, xv3; 352 | __m256i xm0, xm1, xm2, xm3, xn0, xn1, xn2, xn3; 353 | __m256i xt0, xt1, xt2, xt3, xt4, xt5, xt6, xt7, xt8, xt9; 354 | __m256i xror8, xror16, xca2; 355 | 356 | xror8 = _mm256_setr_epi8( 357 | 1, 2, 3, 0, 5, 6, 7, 4, 358 | 9, 10, 11, 8, 13, 14, 15, 12, 359 | 1, 2, 3, 0, 5, 6, 7, 4, 360 | 9, 10, 11, 8, 13, 14, 15, 12); 361 | xror16 = _mm256_setr_epi8( 362 | 2, 3, 0, 1, 6, 7, 4, 5, 363 | 10, 11, 8, 9, 14, 15, 12, 13, 364 | 2, 3, 0, 1, 6, 7, 4, 5, 365 | 10, 11, 8, 9, 14, 15, 12, 13); 366 | xca2 = _mm256_setr_epi32(0, 2, 0, 0, 0, 2, 0, 0); 367 | 368 | /* Initialize state. */ 369 | xh0 = _mm256_loadu_si256((const void *)(hinit_out32_x2 + 0)); 370 | xh1 = _mm256_loadu_si256((const void *)(hinit_out32_x2 + 8)); 371 | 372 | /* Load data and move it into the proper order for the first round: 373 | xm0: 0 2 4 6 374 | xm1: 1 3 5 7 375 | xm2: 8 10 12 14 376 | xm3: 9 11 13 15 */ 377 | xm0 = _mm256_loadu_si256((const void *)(data_x2 + 0)); 378 | xm1 = _mm256_loadu_si256((const void *)(data_x2 + 32)); 379 | xm2 = _mm256_loadu_si256((const void *)(data_x2 + 64)); 380 | xm3 = _mm256_loadu_si256((const void *)(data_x2 + 96)); 381 | 382 | xn0 = _mm256_shuffle_epi32(xm0, 0xD8); 383 | xn1 = _mm256_shuffle_epi32(xm1, 0xD8); 384 | xm0 = _mm256_unpacklo_epi64(xn0, xn1); 385 | xm1 = _mm256_unpackhi_epi64(xn0, xn1); 386 | 387 | xn2 = _mm256_shuffle_epi32(xm2, 0xD8); 388 | xn3 = _mm256_shuffle_epi32(xm3, 0xD8); 389 | xm2 = _mm256_unpacklo_epi64(xn2, xn3); 390 | xm3 = _mm256_unpackhi_epi64(xn2, xn3); 391 | 392 | for (;;) { 393 | /* Each loop iteration computes two BLAKE2s in parallel, 394 | in the low and high lanes, respectively. */ 395 | 396 | /* Initialize round state. */ 397 | xv0 = xh0; 398 | xv1 = xh1; 399 | xv2 = _mm256_loadu_si256((const void *)(IV_x2 + 0)); 400 | xv3 = _mm256_loadu_si256((const void *)(IV_x2 + 8)); 401 | xv3 = _mm256_xor_si256(xv3, 402 | _mm256_setr_epi64x( 403 | data_len, 0xFFFFFFFF, 404 | data_len, 0xFFFFFFFF)); 405 | 406 | #define G4(xx, xy) do { \ 407 | __m256i xtg; \ 408 | xv0 = _mm256_add_epi32(xv0, _mm256_add_epi32(xv1, xx)); \ 409 | xv3 = _mm256_shuffle_epi8(_mm256_xor_si256(xv0, xv3), xror16); \ 410 | xv2 = _mm256_add_epi32(xv2, xv3); \ 411 | xtg = _mm256_xor_si256(xv1, xv2); \ 412 | xv1 = _mm256_or_si256( \ 413 | _mm256_srli_epi32(xtg, 12), \ 414 | _mm256_slli_epi32(xtg, 20)); \ 415 | xv0 = _mm256_add_epi32(xv0, _mm256_add_epi32(xv1, xy)); \ 416 | xv3 = _mm256_shuffle_epi8(_mm256_xor_si256(xv0, xv3), xror8); \ 417 | xv2 = _mm256_add_epi32(xv2, xv3); \ 418 | xtg = _mm256_xor_si256(xv1, xv2); \ 419 | xv1 = _mm256_or_si256( \ 420 | _mm256_srli_epi32(xtg, 7), \ 421 | _mm256_slli_epi32(xtg, 25)); \ 422 | } while (0) 423 | 424 | #define ROUND(i0, i1, i2, i3) do { \ 425 | G4(i0, i1); \ 426 | xv1 = _mm256_shuffle_epi32(xv1, 0x39); \ 427 | xv2 = _mm256_shuffle_epi32(xv2, 0x4E); \ 428 | xv3 = _mm256_shuffle_epi32(xv3, 0x93); \ 429 | G4(i2, i3); \ 430 | xv1 = _mm256_shuffle_epi32(xv1, 0x93); \ 431 | xv2 = _mm256_shuffle_epi32(xv2, 0x4E); \ 432 | xv3 = _mm256_shuffle_epi32(xv3, 0x39); \ 433 | } while (0) 434 | 435 | /* round 0 */ 436 | ROUND(xm0, xm1, xm2, xm3); 437 | 438 | /* round 1 */ 439 | xt0 = _mm256_shuffle_epi32(xm0, 0x00); 440 | xt1 = _mm256_shuffle_epi32(xm0, 0xC8); 441 | xt2 = _mm256_shuffle_epi32(xm1, 0x70); 442 | xt3 = _mm256_shuffle_epi32(xm1, 0x80); 443 | xt4 = _mm256_shuffle_epi32(xm2, 0x01); 444 | xt5 = _mm256_shuffle_epi32(xm2, 0x02); 445 | xt6 = _mm256_shuffle_epi32(xm2, 0x03); 446 | xt7 = _mm256_shuffle_epi32(xm3, 0x80); 447 | xt8 = _mm256_shuffle_epi32(xm3, 0x10); 448 | xt9 = _mm256_shuffle_epi32(xm3, 0x30); 449 | xn0 = _mm256_blend_epi32( 450 | _mm256_blend_epi32(xt6, xt1, 0x22), 451 | xt7, 0xCC); 452 | xn1 = _mm256_blend_epi32( 453 | _mm256_blend_epi32(xt4, xt9, 0x44), 454 | xt1, 0x88); 455 | xn2 = _mm256_blend_epi32( 456 | _mm256_blend_epi32(xt3, xt0, 0x22), 457 | xt8, 0x44); 458 | xn3 = _mm256_blend_epi32( 459 | _mm256_blend_epi32(xt5, xm0, 0x22), 460 | xt2, 0xCC); 461 | ROUND(xn0, xn1, xn2, xn3); 462 | 463 | /* round 2 */ 464 | xt0 = _mm256_shuffle_epi32(xn0, 0x40); 465 | xt1 = _mm256_shuffle_epi32(xn0, 0x80); 466 | xt2 = _mm256_shuffle_epi32(xn1, 0x80); 467 | xt3 = _mm256_shuffle_epi32(xn1, 0x0D); 468 | xt4 = _mm256_shuffle_epi32(xn2, 0x04); 469 | xt5 = _mm256_shuffle_epi32(xn2, 0x32); 470 | xt6 = _mm256_shuffle_epi32(xn3, 0x10); 471 | xt7 = _mm256_shuffle_epi32(xn3, 0x2C); 472 | xm0 = _mm256_blend_epi32( 473 | _mm256_blend_epi32(xt5, xt6, 0x22), 474 | xt2, 0x88); 475 | xm1 = _mm256_blend_epi32( 476 | _mm256_blend_epi32(xt3, xt4, 0x22), 477 | _mm256_blend_epi32(xt6, xn0, 0x88), 0xCC); 478 | xm2 = _mm256_blend_epi32( 479 | _mm256_blend_epi32(xt2, xt7, 0x66), 480 | xt1, 0x88); 481 | xm3 = _mm256_blend_epi32( 482 | _mm256_blend_epi32(xt0, xt3, 0x22), 483 | xt4, 0x44); 484 | ROUND(xm0, xm1, xm2, xm3); 485 | 486 | /* round 3 */ 487 | xt0 = _mm256_shuffle_epi32(xm0, 0x10); 488 | xt1 = _mm256_shuffle_epi32(xm0, 0xC8); 489 | xt2 = _mm256_shuffle_epi32(xm1, 0x10); 490 | xt3 = _mm256_shuffle_epi32(xm1, 0x32); 491 | xt4 = _mm256_shuffle_epi32(xm2, 0x03); 492 | xt5 = _mm256_shuffle_epi32(xm2, 0x06); 493 | xt6 = _mm256_shuffle_epi32(xm3, 0x39); 494 | xn0 = _mm256_blend_epi32( 495 | _mm256_blend_epi32(xt5, xt3, 0x44), 496 | xt0, 0x88); 497 | xn1 = _mm256_blend_epi32( 498 | _mm256_blend_epi32(xt4, xt6, 0xAA), 499 | xt0, 0x44); 500 | xn2 = _mm256_blend_epi32( 501 | _mm256_blend_epi32(xt3, xt1, 0xAA), 502 | xt6, 0x44); 503 | xn3 = _mm256_blend_epi32( 504 | _mm256_blend_epi32(xt6, xt4, 0x22), 505 | xt2, 0xCC); 506 | ROUND(xn0, xn1, xn2, xn3); 507 | 508 | /* round 4 */ 509 | xt0 = _mm256_shuffle_epi32(xn0, 0x80); 510 | xt1 = _mm256_shuffle_epi32(xn0, 0x4C); 511 | xt2 = _mm256_shuffle_epi32(xn1, 0x09); 512 | xt3 = _mm256_shuffle_epi32(xn1, 0x03); 513 | xt4 = _mm256_shuffle_epi32(xn2, 0x04); 514 | xt5 = _mm256_shuffle_epi32(xn3, 0x40); 515 | xt6 = _mm256_shuffle_epi32(xn3, 0x32); 516 | xm0 = _mm256_blend_epi32( 517 | _mm256_blend_epi32(xn1, xt4, 0x66), 518 | xt5, 0x88); 519 | xm1 = _mm256_blend_epi32( 520 | _mm256_blend_epi32(xt6, xt0, 0x22), 521 | xn2, 0xCC); 522 | xm2 = _mm256_blend_epi32( 523 | _mm256_blend_epi32(xt3, xt1, 0xAA), 524 | xt5, 0x44); 525 | xm3 = _mm256_blend_epi32( 526 | _mm256_blend_epi32(xt2, xt6, 0x44), 527 | xt0, 0x88); 528 | ROUND(xm0, xm1, xm2, xm3); 529 | 530 | /* round 5 */ 531 | xt0 = _mm256_shuffle_epi32(xm0, 0x04); 532 | xt1 = _mm256_shuffle_epi32(xm0, 0x0E); 533 | xt2 = _mm256_shuffle_epi32(xm1, 0x04); 534 | xt3 = _mm256_shuffle_epi32(xm1, 0x32); 535 | xt4 = _mm256_shuffle_epi32(xm2, 0x08); 536 | xt5 = _mm256_shuffle_epi32(xm2, 0xD0); 537 | xt6 = _mm256_shuffle_epi32(xm3, 0x01); 538 | xt7 = _mm256_shuffle_epi32(xm3, 0x83); 539 | xn0 = _mm256_blend_epi32( 540 | _mm256_blend_epi32(xt1, xt4, 0x22), 541 | _mm256_blend_epi32(xt2, xt7, 0x88), 0xCC); 542 | xn1 = _mm256_blend_epi32( 543 | _mm256_blend_epi32(xt6, xt1, 0x22), 544 | xt5, 0xCC); 545 | xn2 = _mm256_blend_epi32( 546 | _mm256_blend_epi32(xt3, xt2, 0x22), 547 | xt6, 0x88); 548 | xn3 = _mm256_blend_epi32( 549 | _mm256_blend_epi32(xt7, xt0, 0xAA), 550 | xt4, 0x44); 551 | ROUND(xn0, xn1, xn2, xn3); 552 | 553 | /* round 6 */ 554 | xt0 = _mm256_shuffle_epi32(xn0, 0xC6); 555 | xt1 = _mm256_shuffle_epi32(xn1, 0x40); 556 | xt2 = _mm256_shuffle_epi32(xn1, 0x8C); 557 | xt3 = _mm256_shuffle_epi32(xn2, 0x09); 558 | xt4 = _mm256_shuffle_epi32(xn2, 0x0C); 559 | xt5 = _mm256_shuffle_epi32(xn3, 0x01); 560 | xt6 = _mm256_shuffle_epi32(xn3, 0x30); 561 | xm0 = _mm256_blend_epi32( 562 | _mm256_blend_epi32(xt1, xt4, 0xAA), 563 | xn3, 0x44); 564 | xm1 = _mm256_blend_epi32( 565 | _mm256_blend_epi32(xt5, xt3, 0x22), 566 | xt1, 0x88); 567 | xm2 = _mm256_blend_epi32(xt0, xt6, 0x44); 568 | xm3 = _mm256_blend_epi32( 569 | _mm256_blend_epi32(xt3, xt2, 0xAA), 570 | xt0, 0x44); 571 | ROUND(xm0, xm1, xm2, xm3); 572 | 573 | /* round 7 */ 574 | xt0 = _mm256_shuffle_epi32(xm0, 0x0C); 575 | xt1 = _mm256_shuffle_epi32(xm0, 0x18); 576 | xt2 = _mm256_shuffle_epi32(xm1, 0xC2); 577 | xt3 = _mm256_shuffle_epi32(xm2, 0x10); 578 | xt4 = _mm256_shuffle_epi32(xm2, 0xB0); 579 | xt5 = _mm256_shuffle_epi32(xm3, 0x40); 580 | xt6 = _mm256_shuffle_epi32(xm3, 0x83); 581 | xn0 = _mm256_blend_epi32( 582 | _mm256_blend_epi32(xt2, xt5, 0xAA), 583 | xt0, 0x44); 584 | xn1 = _mm256_blend_epi32( 585 | _mm256_blend_epi32(xt6, xt1, 0x66), 586 | xt4, 0x88); 587 | xn2 = _mm256_blend_epi32( 588 | _mm256_blend_epi32(xm1, xt4, 0x44), 589 | xt6, 0x88); 590 | xn3 = _mm256_blend_epi32( 591 | _mm256_blend_epi32(xt3, xt0, 0x22), 592 | xt2, 0x88); 593 | ROUND(xn0, xn1, xn2, xn3); 594 | 595 | /* round 8 */ 596 | xt0 = _mm256_shuffle_epi32(xn0, 0x02); 597 | xt1 = _mm256_shuffle_epi32(xn0, 0x34); 598 | xt2 = _mm256_shuffle_epi32(xn1, 0x0C); 599 | xt3 = _mm256_shuffle_epi32(xn2, 0x03); 600 | xt4 = _mm256_shuffle_epi32(xn2, 0x81); 601 | xt5 = _mm256_shuffle_epi32(xn3, 0x02); 602 | xt6 = _mm256_shuffle_epi32(xn3, 0xD0); 603 | xm0 = _mm256_blend_epi32( 604 | _mm256_blend_epi32(xt5, xn1, 0x22), 605 | xt2, 0x44); 606 | xm1 = _mm256_blend_epi32( 607 | _mm256_blend_epi32(xt4, xt2, 0x22), 608 | xt1, 0x44); 609 | xm2 = _mm256_blend_epi32( 610 | _mm256_blend_epi32(xt0, xn1, 0x44), 611 | xt6, 0x88); 612 | xm3 = _mm256_blend_epi32( 613 | _mm256_blend_epi32(xt3, xt1, 0x22), 614 | xt6, 0x44); 615 | ROUND(xm0, xm1, xm2, xm3); 616 | 617 | /* round 9 */ 618 | xt0 = _mm256_shuffle_epi32(xm0, 0xC6); 619 | xt1 = _mm256_shuffle_epi32(xm1, 0x2C); 620 | xt2 = _mm256_shuffle_epi32(xm2, 0x40); 621 | xt3 = _mm256_shuffle_epi32(xm2, 0x83); 622 | xt4 = _mm256_shuffle_epi32(xm3, 0xD8); 623 | xn0 = _mm256_blend_epi32( 624 | _mm256_blend_epi32(xt3, xt1, 0x22), 625 | xt4, 0x44); 626 | xn1 = _mm256_blend_epi32(xt4, xt0, 0x44); 627 | xn2 = _mm256_blend_epi32( 628 | _mm256_blend_epi32(xm1, xt1, 0x44), 629 | xt2, 0x88); 630 | xn3 = _mm256_blend_epi32(xt0, xt2, 0x44); 631 | ROUND(xn0, xn1, xn2, xn3); 632 | #undef G4 633 | #undef ROUND 634 | 635 | /* Finalize computation and store output. The output must 636 | be deinterlaced since output blocks are supposed to 637 | be consecutive . */ 638 | xt0 = _mm256_xor_si256(xh0, _mm256_xor_si256(xv0, xv2)); 639 | xt1 = _mm256_xor_si256(xh1, _mm256_xor_si256(xv1, xv3)); 640 | xt2 = _mm256_permute2x128_si256(xt0, xt1, 0x20); 641 | xt3 = _mm256_permute2x128_si256(xt0, xt1, 0x31); 642 | _mm256_storeu_si256((void *)(dst + 0), xt2); 643 | _mm256_storeu_si256((void *)(dst + 32), xt3); 644 | 645 | dst += 64; 646 | dst_len -= 64; 647 | if (dst_len == 0) { 648 | break; 649 | } 650 | 651 | /* Put back message words in initial order */ 652 | xt0 = _mm256_shuffle_epi32(xn0, 0x01); 653 | xt1 = _mm256_shuffle_epi32(xn0, 0x83); 654 | xt2 = _mm256_shuffle_epi32(xn1, 0x10); 655 | xt3 = _mm256_shuffle_epi32(xn1, 0xB0); 656 | xt4 = _mm256_shuffle_epi32(xn2, 0x39); 657 | xt5 = _mm256_shuffle_epi32(xn3, 0x63); 658 | xm0 = _mm256_blend_epi32( 659 | _mm256_blend_epi32(xt5, xt2, 0x66), 660 | xt3, 0x88); 661 | xm1 = _mm256_blend_epi32( 662 | _mm256_blend_epi32(xt1, xt4, 0x22), 663 | xt3, 0x44); 664 | xm2 = _mm256_blend_epi32(xt0, xt5, 0xCC); 665 | xm3 = _mm256_blend_epi32(xt4, xt5, 0x22); 666 | 667 | /* Increment block counter in the message. 668 | Nominally, the counter is 64 bits, but we only 669 | increment the low 32 bits; the caller is responsible 670 | for setting the high half and calling us with values 671 | that won't overflow. */ 672 | xm0 = _mm256_add_epi32(xm0, xca2); 673 | } 674 | } 675 | 676 | #else 677 | 678 | static void 679 | process_block(uint32_t *h, const uint8_t *data, uint64_t t, int f) 680 | { 681 | uint32_t v[16], m[16]; 682 | int i; 683 | 684 | memcpy(v, h, 8 * sizeof(uint32_t)); 685 | memcpy(v + 8, IV, sizeof IV); 686 | v[12] ^= (uint32_t)t; 687 | v[13] ^= (uint32_t)(t >> 32); 688 | if (f) { 689 | v[14] = ~v[14]; 690 | } 691 | 692 | #if BLAKE2_LE 693 | memcpy(m, data, sizeof m); 694 | #else 695 | for (i = 0; i < 16; i ++) { 696 | m[i] = dec32le(data + (i << 2)); 697 | } 698 | #endif 699 | 700 | #define ROR(x, n) (((x) << (32 - (n))) | ((x) >> (n))) 701 | 702 | #define G(a, b, c, d, x, y) do { \ 703 | v[a] += v[b] + (x); \ 704 | v[d] = ROR(v[d] ^ v[a], 16); \ 705 | v[c] += v[d]; \ 706 | v[b] = ROR(v[b] ^ v[c], 12); \ 707 | v[a] += v[b] + (y); \ 708 | v[d] = ROR(v[d] ^ v[a], 8); \ 709 | v[c] += v[d]; \ 710 | v[b] = ROR(v[b] ^ v[c], 7); \ 711 | } while (0) 712 | 713 | #define ROUND(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF) \ 714 | do { \ 715 | G(0, 4, 8, 12, m[s0], m[s1]); \ 716 | G(1, 5, 9, 13, m[s2], m[s3]); \ 717 | G(2, 6, 10, 14, m[s4], m[s5]); \ 718 | G(3, 7, 11, 15, m[s6], m[s7]); \ 719 | G(0, 5, 10, 15, m[s8], m[s9]); \ 720 | G(1, 6, 11, 12, m[sA], m[sB]); \ 721 | G(2, 7, 8, 13, m[sC], m[sD]); \ 722 | G(3, 4, 9, 14, m[sE], m[sF]); \ 723 | } while (0) 724 | 725 | ROUND( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 726 | ROUND(14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3); 727 | ROUND(11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4); 728 | ROUND( 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8); 729 | ROUND( 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13); 730 | ROUND( 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9); 731 | ROUND(12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11); 732 | ROUND(13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10); 733 | ROUND( 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5); 734 | ROUND(10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0); 735 | 736 | #undef ROR 737 | #undef G 738 | #undef ROUND 739 | 740 | for (i = 0; i < 8; i ++) { 741 | h[i] ^= v[i] ^ v[i + 8]; 742 | } 743 | } 744 | 745 | #endif 746 | 747 | /* 748 | * State rules: 749 | * 750 | * buf buffered data 751 | * h current state 752 | * ctr number of bytes injected so far 753 | * 754 | * Initially, ctr == 0 and h contains the XOR of IV and parameter block; 755 | * buf[] is empty. For any ctr > 0, buf[] is non-empty; it might contain 756 | * a full block worth of data (processing of the block is delayed until 757 | * we know whether this is the final block or not). 758 | * 759 | * If a key is injected, then it counts as a first full block. 760 | */ 761 | 762 | /* see blake2.h */ 763 | void 764 | blake2s_init(blake2s_context *bc, size_t out_len) 765 | { 766 | memcpy(bc->h, IV, sizeof bc->h); 767 | bc->h[0] ^= 0x01010000 ^ (uint32_t)out_len; 768 | bc->ctr = 0; 769 | bc->out_len = out_len; 770 | } 771 | 772 | /* see blake2.h */ 773 | void 774 | blake2s_init_key(blake2s_context *bc, size_t out_len, 775 | const void *key, size_t key_len) 776 | { 777 | blake2s_init(bc, out_len); 778 | if (key_len > 0) { 779 | bc->h[0] ^= (uint32_t)key_len << 8; 780 | memcpy(bc->buf, key, key_len); 781 | memset(bc->buf + key_len, 0, (sizeof bc->buf) - key_len); 782 | bc->ctr = sizeof bc->buf; 783 | } 784 | } 785 | 786 | /* see blake2.h */ 787 | void 788 | blake2s_update(blake2s_context *bc, const void *data, size_t len) 789 | { 790 | uint64_t ctr; 791 | size_t p; 792 | 793 | /* Special case: if no input data, return immediately. */ 794 | if (len == 0) { 795 | return; 796 | } 797 | 798 | ctr = bc->ctr; 799 | 800 | /* First complete the current block, if not already full. */ 801 | p = (size_t)ctr & ((sizeof bc->buf) - 1); 802 | if (ctr == 0 || p != 0) { 803 | /* buffer is not full */ 804 | size_t clen; 805 | 806 | clen = sizeof bc->buf - p; 807 | if (clen >= len) { 808 | memcpy(bc->buf + p, data, len); 809 | bc->ctr = ctr + len; 810 | return; 811 | } 812 | memcpy(bc->buf + p, data, clen); 813 | ctr += clen; 814 | data = (const uint8_t *)data + clen; 815 | len -= clen; 816 | } 817 | 818 | /* Process the buffered block. */ 819 | process_block(bc->h, bc->buf, ctr, 0); 820 | 821 | /* Process all subsequent full blocks, except the last. */ 822 | while (len > sizeof bc->buf) { 823 | ctr += sizeof bc->buf; 824 | process_block(bc->h, data, ctr, 0); 825 | data = (const uint8_t *)data + sizeof bc->buf; 826 | len -= sizeof bc->buf; 827 | } 828 | 829 | /* Copy the last block (possibly partial) into the buffer. */ 830 | memcpy(bc->buf, data, len); 831 | bc->ctr = ctr + len; 832 | } 833 | 834 | /* see blake2.h */ 835 | void 836 | blake2s_final(blake2s_context *bc, void *dst) 837 | { 838 | #if !BLAKE2_LE 839 | int i; 840 | uint8_t tmp[32]; 841 | #endif 842 | size_t p; 843 | 844 | /* Pad the current block with zeros, if not full. If the 845 | buffer is empty (no key, no data) then fill it with zeros 846 | as well. */ 847 | p = (size_t)bc->ctr & ((sizeof bc->buf) - 1); 848 | if (bc->ctr == 0 || p != 0) { 849 | memset(bc->buf + p, 0, (sizeof bc->buf) - p); 850 | } 851 | 852 | process_block(bc->h, bc->buf, bc->ctr, 1); 853 | #if BLAKE2_LE 854 | memcpy(dst, bc->h, bc->out_len); 855 | #else 856 | for (i = 0; i < 8; i ++) { 857 | enc32le(tmp + (i << 2), bc->h[i]); 858 | } 859 | memcpy(dst, tmp, bc->out_len); 860 | #endif 861 | } 862 | 863 | /* see blake2.h */ 864 | void 865 | blake2s(void *dst, size_t dst_len, const void *key, size_t key_len, 866 | const void *src, size_t src_len) 867 | { 868 | blake2s_context bc; 869 | 870 | blake2s_init_key(&bc, dst_len, key, key_len); 871 | blake2s_update(&bc, src, src_len); 872 | blake2s_final(&bc, dst); 873 | } 874 | 875 | /* see blake2.h */ 876 | void 877 | blake2s_expand(void *dst, size_t dst_len, 878 | const void *seed, size_t seed_len, uint64_t label) 879 | { 880 | uint32_t h[8]; 881 | uint8_t buf[64]; 882 | size_t in_len; 883 | uint64_t num; 884 | 885 | in_len = 16 + seed_len; 886 | enc64le(buf, label); 887 | memset(buf + 8, 0, 8); 888 | memcpy(buf + 16, seed, seed_len); 889 | memset(buf + in_len, 0, (sizeof buf) - in_len); 890 | num = 0; 891 | #if BLAKE2_AVX2 892 | if (dst_len >= 64) { 893 | uint8_t buf_x2[128]; 894 | 895 | memcpy(buf_x2 + 0, buf + 0, 16); 896 | memcpy(buf_x2 + 16, buf + 0, 16); 897 | memcpy(buf_x2 + 32, buf + 16, 16); 898 | memcpy(buf_x2 + 48, buf + 16, 16); 899 | memcpy(buf_x2 + 64, buf + 32, 16); 900 | memcpy(buf_x2 + 80, buf + 32, 16); 901 | memcpy(buf_x2 + 96, buf + 48, 16); 902 | memcpy(buf_x2 + 112, buf + 48, 16); 903 | buf_x2[24] = 1; 904 | while (dst_len >= 64) { 905 | /* We compute all full pairs of blocks, but 906 | only at most 2^31 pairs at a time, since 907 | expand_inner_x2() cannot propagate block 908 | counter carries. */ 909 | uint64_t tnum; 910 | size_t tlen; 911 | 912 | tnum = (uint64_t)dst_len >> 6; 913 | if (tnum >= 0x80000000) { 914 | tnum = 0x80000000; 915 | } 916 | enc32le(buf_x2 + 12, (uint32_t)(num >> 32)); 917 | enc32le(buf_x2 + 28, (uint32_t)(num >> 32)); 918 | tlen = (size_t)tnum << 6; 919 | expand_inner_x2(dst, tlen, buf_x2, in_len); 920 | dst = (uint8_t *)dst + tlen; 921 | dst_len -= tlen; 922 | num += tnum << 1; 923 | } 924 | } 925 | #endif 926 | while (dst_len > 0) { 927 | size_t clen; 928 | #if !BLAKE2_LE 929 | uint8_t tmp[32]; 930 | int i; 931 | #endif 932 | 933 | memcpy(h, IV, sizeof h); 934 | h[0] ^= 0x01010000 ^ (sizeof h); 935 | enc64le(buf + 8, num ++); 936 | process_block(h, buf, in_len, 1); 937 | clen = dst_len < (sizeof h) ? dst_len : (sizeof h); 938 | #if BLAKE2_LE 939 | memcpy(dst, h, clen); 940 | #else 941 | for (i = 0; i < 8; i ++) { 942 | enc32le(tmp + (i << 2), h[i]); 943 | } 944 | memcpy(dst, tmp, clen); 945 | #endif 946 | dst_len -= clen; 947 | dst = (uint8_t *)dst + clen; 948 | } 949 | } 950 | -------------------------------------------------------------------------------- /src/codec.c: -------------------------------------------------------------------------------- 1 | #include "inner.h" 2 | 3 | /* see inner.h */ 4 | const uint8_t bat_max_fg_bits[] = { 5 | 0, /* unused */ 6 | 6, 7 | 6, 8 | 6, 9 | 6, 10 | 6, 11 | 5, 12 | 5, 13 | 4, 14 | 4, 15 | 4 16 | }; 17 | 18 | /* see inner.h */ 19 | const uint8_t bat_max_FG_bits[] = { 20 | 0, /* unused */ 21 | 6, 22 | 6, 23 | 6, 24 | 6, 25 | 6, 26 | 6, 27 | 6, 28 | 6, 29 | 6, 30 | 6 31 | }; 32 | 33 | /* see inner.h */ 34 | const uint8_t bat_max_w_bits[] = { 35 | 0, /* unused */ 36 | 17, 37 | 17, 38 | 17, 39 | 17, 40 | 17, 41 | 17, 42 | 17, 43 | 17, 44 | 17, 45 | 17 46 | }; 47 | 48 | /* see inner.h */ 49 | size_t 50 | bat_trim_i32_encode( 51 | void *out, size_t max_out_len, 52 | const int32_t *x, unsigned logn, unsigned bits) 53 | { 54 | size_t n, u, out_len; 55 | uint8_t *buf; 56 | uint32_t acc, mask; 57 | unsigned acc_len; 58 | 59 | n = (size_t)1 << logn; 60 | out_len = ((n * bits) + 7) >> 3; 61 | if (out == NULL) { 62 | return out_len; 63 | } 64 | if (out_len > max_out_len) { 65 | return 0; 66 | } 67 | buf = out; 68 | acc = 0; 69 | acc_len = 0; 70 | mask = ((uint32_t)1 << bits) - 1; 71 | for (u = 0; u < n; u ++) { 72 | acc = (acc << bits) | ((uint32_t)x[u] & mask); 73 | acc_len += bits; 74 | while (acc_len >= 8) { 75 | acc_len -= 8; 76 | *buf ++ = (uint8_t)(acc >> acc_len); 77 | } 78 | } 79 | if (acc_len > 0) { 80 | *buf ++ = (uint8_t)(acc << (8 - acc_len)); 81 | } 82 | return out_len; 83 | } 84 | 85 | /* see inner.h */ 86 | size_t 87 | bat_trim_i32_decode( 88 | int32_t *x, unsigned logn, unsigned bits, 89 | const void *in, size_t max_in_len) 90 | { 91 | size_t n, in_len; 92 | const uint8_t *buf; 93 | size_t u; 94 | uint32_t acc, mask1, mask2; 95 | unsigned acc_len; 96 | uint32_t r; 97 | 98 | n = (size_t)1 << logn; 99 | in_len = ((n * bits) + 7) >> 3; 100 | if (in_len > max_in_len) { 101 | return 0; 102 | } 103 | buf = in; 104 | u = 0; 105 | acc = 0; 106 | acc_len = 0; 107 | mask1 = ((uint32_t)1 << bits) - 1; 108 | mask2 = (uint32_t)1 << (bits - 1); 109 | r = (uint32_t)-1; 110 | while (u < n) { 111 | acc = (acc << 8) | *buf ++; 112 | acc_len += 8; 113 | while (acc_len >= bits && u < n) { 114 | uint32_t w, q; 115 | 116 | acc_len -= bits; 117 | w = (acc >> acc_len) & mask1; 118 | w |= -(w & mask2); 119 | x[u ++] = *(int32_t *)&w; 120 | 121 | /* Value w == -mask2 is forbidden. */ 122 | q = w + mask2; 123 | r &= q | -q; 124 | } 125 | } 126 | 127 | /* Extra bits in the last byte must be zero. */ 128 | acc &= (((uint32_t)1 << acc_len) - 1); 129 | r &= ~(acc | -acc); 130 | 131 | return in_len & -(size_t)(r >> 31); 132 | } 133 | 134 | /* see inner.h */ 135 | size_t 136 | bat_trim_i8_encode( 137 | void *out, size_t max_out_len, 138 | const int8_t *x, unsigned logn, unsigned bits) 139 | { 140 | size_t n, u, out_len; 141 | uint8_t *buf; 142 | uint32_t acc, mask; 143 | unsigned acc_len; 144 | 145 | n = (size_t)1 << logn; 146 | out_len = ((n * bits) + 7) >> 3; 147 | if (out == NULL) { 148 | return out_len; 149 | } 150 | if (out_len > max_out_len) { 151 | return 0; 152 | } 153 | buf = out; 154 | acc = 0; 155 | acc_len = 0; 156 | mask = ((uint32_t)1 << bits) - 1; 157 | for (u = 0; u < n; u ++) { 158 | acc = (acc << bits) | ((uint8_t)x[u] & mask); 159 | acc_len += bits; 160 | while (acc_len >= 8) { 161 | acc_len -= 8; 162 | *buf ++ = (uint8_t)(acc >> acc_len); 163 | } 164 | } 165 | if (acc_len > 0) { 166 | *buf ++ = (uint8_t)(acc << (8 - acc_len)); 167 | } 168 | return out_len; 169 | } 170 | 171 | /* see inner.h */ 172 | size_t 173 | bat_trim_i8_decode( 174 | int8_t *x, unsigned logn, unsigned bits, 175 | const void *in, size_t max_in_len) 176 | { 177 | size_t n, in_len; 178 | const uint8_t *buf; 179 | size_t u; 180 | uint32_t acc, mask1, mask2; 181 | unsigned acc_len; 182 | uint32_t r; 183 | 184 | n = (size_t)1 << logn; 185 | in_len = ((n * bits) + 7) >> 3; 186 | if (in_len > max_in_len) { 187 | return 0; 188 | } 189 | buf = in; 190 | u = 0; 191 | acc = 0; 192 | acc_len = 0; 193 | mask1 = ((uint32_t)1 << bits) - 1; 194 | mask2 = (uint32_t)1 << (bits - 1); 195 | r = (uint32_t)-1; 196 | while (u < n) { 197 | acc = (acc << 8) | *buf ++; 198 | acc_len += 8; 199 | while (acc_len >= bits && u < n) { 200 | uint32_t w, q; 201 | 202 | acc_len -= bits; 203 | w = (acc >> acc_len) & mask1; 204 | w |= -(w & mask2); 205 | x[u ++] = (int8_t)*(int32_t *)&w; 206 | 207 | /* Value w == -mask2 is forbidden. */ 208 | q = w + mask2; 209 | r &= q | -q; 210 | } 211 | } 212 | 213 | /* Extra bits in the last byte must be zero. */ 214 | acc &= (((uint32_t)1 << acc_len) - 1); 215 | r &= ~(acc | -acc); 216 | 217 | return in_len & -(size_t)(r >> 31); 218 | } 219 | -------------------------------------------------------------------------------- /src/fnr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Fixed-point division. 3 | */ 4 | 5 | #include "inner.h" 6 | 7 | /* see inner.h */ 8 | uint64_t 9 | bat_fnr_div(uint64_t x, uint64_t y) 10 | { 11 | uint64_t sx, sy, q, b, num; 12 | int i; 13 | 14 | /* 15 | * Get absolute values and signs. From now on, we can suppose 16 | * that x and y fit on 63 bits (we ignore edge conditions). 17 | */ 18 | sx = x >> 63; 19 | x = (x ^ -sx) + sx; 20 | sy = y >> 63; 21 | y = (y ^ -sy) + sy; 22 | 23 | /* 24 | * Do a bit by bit division, assuming that the quotient fits. 25 | * The numerator starts at x*2^31, and is shifted one bit a time. 26 | */ 27 | q = 0; 28 | num = x >> 31; 29 | for (i = 63; i >= 0; i --) { 30 | b = 1 - ((num - y) >> 63); 31 | q |= b << i; 32 | num -= y & -b; 33 | num <<= 1; 34 | if (i >= 33) { 35 | num |= (x >> (i - 33)) & 1; 36 | } 37 | } 38 | 39 | /* 40 | * Rounding: if the remainder is at least y/2 (scaled), we add 41 | * 2^(-32) to the quotient. 42 | */ 43 | b = 1 - ((num - y) >> 63); 44 | q += b; 45 | 46 | /* 47 | * Sign management: if the original x and y had different signs, 48 | * then we must negate the quotient. 49 | */ 50 | sx ^= sy; 51 | q = (q ^ -sx) + sx; 52 | 53 | return q; 54 | } 55 | -------------------------------------------------------------------------------- /src/inner.h: -------------------------------------------------------------------------------- 1 | #ifndef BAT_INNER_H__ 2 | #define BAT_INNER_H__ 3 | 4 | /* 5 | * Internal functions for BAT. 6 | */ 7 | 8 | /* ====================================================================== */ 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "blake2.h" 15 | 16 | #if defined BAT_AVX2 && BAT_AVX2 17 | /* 18 | * This implementation uses AVX2 intrinsics. 19 | */ 20 | #include 21 | #ifndef BAT_LE 22 | #define BAT_LE 1 23 | #endif 24 | #ifndef BAT_UNALIGNED 25 | #define BAT_UNALIGNED 1 26 | #endif 27 | #if defined __GNUC__ 28 | #define TARGET_AVX2 __attribute__((target("avx2"))) 29 | #define ALIGNED_AVX2 __attribute__((aligned(32))) 30 | #elif defined _MSC_VER && _MSC_VER 31 | #pragma warning( disable : 4752 ) 32 | #endif 33 | #endif 34 | 35 | #ifndef TARGET_AVX2 36 | #define TARGET_AVX2 37 | #endif 38 | #ifndef ALIGNED_AVX2 39 | #define ALIGNED_AVX2 40 | #endif 41 | 42 | /* 43 | * Disable warning on applying unary minus on an unsigned type. 44 | */ 45 | #if defined _MSC_VER && _MSC_VER 46 | #pragma warning( disable : 4146 ) 47 | #pragma warning( disable : 4244 ) 48 | #pragma warning( disable : 4267 ) 49 | #pragma warning( disable : 4334 ) 50 | #endif 51 | 52 | /* 53 | * Auto-detect 64-bit architectures. 54 | */ 55 | #ifndef BAT_64 56 | #if defined __x86_64__ || defined _M_X64 \ 57 | || defined __ia64 || defined __itanium__ || defined _M_IA64 \ 58 | || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ \ 59 | || defined __64BIT__ || defined _LP64 || defined __LP64__ \ 60 | || defined __sparc64__ \ 61 | || defined __aarch64__ || defined _M_ARM64 \ 62 | || defined __mips64 63 | #define BAT_64 1 64 | #else 65 | #define BAT_64 0 66 | #endif 67 | #endif 68 | 69 | /* 70 | * Auto-detect endianness and support of unaligned accesses. 71 | */ 72 | #if defined __i386__ || defined _M_IX86 \ 73 | || defined __x86_64__ || defined _M_X64 \ 74 | || (defined _ARCH_PWR8 \ 75 | && (defined __LITTLE_ENDIAN || defined __LITTLE_ENDIAN__)) 76 | 77 | #ifndef BAT_LE 78 | #define BAT_LE 1 79 | #endif 80 | #ifndef BAT_UNALIGNED 81 | #define BAT_UNALIGNED 1 82 | #endif 83 | 84 | #elif (defined __LITTLE_ENDIAN && __LITTLE_ENDIAN__) \ 85 | || (defined __BYTE_ORDER__ && defined __ORDER_LITTLE_ENDIAN__ \ 86 | && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 87 | 88 | #ifndef BAT_LE 89 | #define BAT_LE 1 90 | #endif 91 | #ifndef BAT_UNALIGNED 92 | #define BAT_UNALIGNED 0 93 | #endif 94 | 95 | #else 96 | 97 | #ifndef BAT_LE 98 | #define BAT_LE 0 99 | #endif 100 | #ifndef BAT_UNALIGNED 101 | #define BAT_UNALIGNED 0 102 | #endif 103 | 104 | #endif 105 | 106 | /* 107 | * For seed generation: 108 | * 109 | * - On Linux (glibc-2.25+), FreeBSD 12+ and OpenBSD, use getentropy(). 110 | * - On other Unix-like systems, use /dev/urandom (also a fallback for 111 | * failed getentropy() calls). 112 | * - On Windows, use CryptGenRandom(). 113 | */ 114 | 115 | #ifndef BAT_RAND_GETENTROPY 116 | #if (defined __linux && defined __GLIBC__ \ 117 | && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \ 118 | || (defined __FreeBSD__ && __FreeBSD__ >= 12) \ 119 | || defined __OpenBSD__ 120 | #define BAT_RAND_GETENTROPY 1 121 | #else 122 | #define BAT_RAND_GETENTROPY 0 123 | #endif 124 | #endif 125 | 126 | #ifndef BAT_RAND_URANDOM 127 | #if defined _AIX \ 128 | || defined __ANDROID__ \ 129 | || defined __FreeBSD__ \ 130 | || defined __NetBSD__ \ 131 | || defined __OpenBSD__ \ 132 | || defined __DragonFly__ \ 133 | || defined __linux__ \ 134 | || (defined __sun && (defined __SVR4 || defined __svr4__)) \ 135 | || (defined __APPLE__ && defined __MACH__) 136 | #define BAT_RAND_URANDOM 1 137 | #else 138 | #define BAT_RAND_URANDOM 0 139 | #endif 140 | #endif 141 | 142 | #ifndef BAT_RAND_WIN32 143 | #if defined _WIN32 || defined _WIN64 144 | #define BAT_RAND_WIN32 1 145 | #else 146 | #define BAT_RAND_WIN32 0 147 | #endif 148 | #endif 149 | 150 | /* 151 | * Ensure all macros are defined, to avoid warnings with -Wundef. 152 | */ 153 | #ifndef BAT_AVX2 154 | #define BAT_AVX2 0 155 | #endif 156 | 157 | /* 158 | * MSVC 2015 does not known the C99 keyword 'restrict'. 159 | */ 160 | #if defined _MSC_VER && _MSC_VER 161 | #ifndef restrict 162 | #define restrict __restrict 163 | #endif 164 | #endif 165 | 166 | /* ====================================================================== */ 167 | /* 168 | * Fixed-point numbers. 169 | * 170 | * For FFT and other computations with approximations, we use a fixed-point 171 | * format over 64 bits; the top 32 bits are the integral part, and the low 172 | * 32 bits are the fractional part. 173 | */ 174 | 175 | /* 176 | * We wrap the type into a struct in order to detect any attempt at using 177 | * arithmetic operators on values directly. Since all functions are inline, 178 | * the compiler will be able to remove the wrapper, which will then have 179 | * no runtime cost. 180 | */ 181 | typedef struct { 182 | uint64_t v; 183 | } fnr; 184 | 185 | static inline fnr 186 | fnr_of(int32_t j) 187 | { 188 | fnr x; 189 | 190 | x.v = (uint64_t)j << 32; 191 | return x; 192 | } 193 | 194 | static inline fnr 195 | fnr_of_scaled32(uint64_t t) 196 | { 197 | fnr x; 198 | 199 | x.v = t; 200 | return x; 201 | } 202 | 203 | static inline fnr 204 | fnr_add(fnr x, fnr y) 205 | { 206 | x.v += y.v; 207 | return x; 208 | } 209 | 210 | static inline fnr 211 | fnr_sub(fnr x, fnr y) 212 | { 213 | x.v -= y.v; 214 | return x; 215 | } 216 | 217 | static inline fnr 218 | fnr_double(fnr x) 219 | { 220 | x.v <<= 1; 221 | return x; 222 | } 223 | 224 | static inline fnr 225 | fnr_neg(fnr x) 226 | { 227 | x.v = (uint64_t)0 - x.v; 228 | return x; 229 | } 230 | 231 | static inline fnr 232 | fnr_abs(fnr x) 233 | { 234 | x.v -= (x.v << 1) & -(uint64_t)(x.v >> 63); 235 | return x; 236 | } 237 | 238 | static inline fnr 239 | fnr_mul(fnr x, fnr y) 240 | { 241 | #if defined __GNUC__ && defined __x86_64__ 242 | __int128 z; 243 | 244 | z = (__int128)*(int64_t *)&x.v * (__int128)*(int64_t *)&y.v; 245 | x.v = (uint64_t)(z >> 32); 246 | return x; 247 | #else 248 | int32_t xh, yh; 249 | uint32_t xl, yl; 250 | uint64_t z0, z1, z2, z3; 251 | 252 | xl = (uint32_t)x.v; 253 | yl = (uint32_t)y.v; 254 | xh = (int32_t)(*(int64_t *)&x.v >> 32); 255 | yh = (int32_t)(*(int64_t *)&y.v >> 32); 256 | z0 = ((uint64_t)xl * (uint64_t)yl + 0x80000000ul) >> 32; 257 | z1 = (uint64_t)((int64_t)xl * (int64_t)yh); 258 | z2 = (uint64_t)((int64_t)yl * (int64_t)xh); 259 | z3 = (uint64_t)((int64_t)xh * (int64_t)yh) << 32; 260 | x.v = z0 + z1 + z2 + z3; 261 | return x; 262 | #endif 263 | } 264 | 265 | static inline fnr 266 | fnr_sqr(fnr x) 267 | { 268 | #if defined __GNUC__ && defined __x86_64__ 269 | int64_t t; 270 | __int128 z; 271 | 272 | t = *(int64_t *)&x.v; 273 | z = (__int128)t * (__int128)t; 274 | x.v = (uint64_t)(z >> 32); 275 | return x; 276 | #else 277 | int32_t xh; 278 | uint32_t xl; 279 | uint64_t z0, z1, z3; 280 | 281 | xl = (uint32_t)x.v; 282 | xh = (int32_t)(*(int64_t *)&x.v >> 32); 283 | z0 = ((uint64_t)xl * (uint64_t)xl + 0x80000000ul) >> 32; 284 | z1 = (uint64_t)((int64_t)xl * (int64_t)xh); 285 | z3 = (uint64_t)((int64_t)xh * (int64_t)xh) << 32; 286 | x.v = z0 + (z1 << 1) + z3; 287 | return x; 288 | #endif 289 | } 290 | 291 | static inline int32_t 292 | fnr_round(fnr x) 293 | { 294 | x.v += 0x80000000ul; 295 | return (int32_t)(*(int64_t *)&x.v >> 32); 296 | } 297 | 298 | static inline fnr 299 | fnr_div_2e(fnr x, unsigned n) 300 | { 301 | int64_t v; 302 | 303 | v = *(int64_t *)&x.v; 304 | x.v = (uint64_t)((v + (((int64_t)1 << n) >> 1)) >> n); 305 | return x; 306 | } 307 | 308 | static inline fnr 309 | fnr_mul_2e(fnr x, unsigned n) 310 | { 311 | x.v <<= n; 312 | return x; 313 | } 314 | 315 | uint64_t bat_fnr_div(uint64_t x, uint64_t y); 316 | 317 | static inline fnr 318 | fnr_inv(fnr x) 319 | { 320 | x.v = bat_fnr_div((uint64_t)1 << 32, x.v); 321 | return x; 322 | } 323 | 324 | static inline fnr 325 | fnr_div(fnr x, fnr y) 326 | { 327 | x.v = bat_fnr_div(x.v, y.v); 328 | return x; 329 | } 330 | 331 | static inline int 332 | fnr_lt(fnr x, fnr y) 333 | { 334 | return *(int64_t *)&x.v < *(int64_t *)&y.v; 335 | } 336 | 337 | static const fnr fnr_zero = { 0 }; 338 | static const fnr fnr_sqrt2 = { 6074001000ull }; 339 | 340 | /* ====================================================================== */ 341 | /* 342 | * Apply FFT on a vector. 343 | */ 344 | void bat_FFT(fnr *f, unsigned logn); 345 | 346 | /* 347 | * Apply inverse FFT on a vector. 348 | */ 349 | void bat_iFFT(fnr *f, unsigned logn); 350 | 351 | /* 352 | * Add polynomial b to polynomial a (works in FFT and non-FFT). The two 353 | * polynomial arrays must be distinct. 354 | */ 355 | void bat_poly_add(fnr *restrict a, const fnr *restrict b, unsigned logn); 356 | 357 | /* 358 | * Subtract polynomial b from polynomial a (works in FFT and non-FFT). The two 359 | * polynomial arrays must be distinct. 360 | */ 361 | void bat_poly_sub(fnr *restrict a, const fnr *restrict b, unsigned logn); 362 | 363 | /* 364 | * Negate polynomial a (works in FFT and non-FFT). 365 | */ 366 | void bat_poly_neg(fnr *a, unsigned logn); 367 | 368 | /* 369 | * Multiply polynomial a by constant c. 370 | */ 371 | void bat_poly_mulconst(fnr *a, fnr c, unsigned logn); 372 | 373 | /* 374 | * Multiply polynomial a by polynomial b (FFT representation only). The two 375 | * polynomial arrays must be distinct. 376 | */ 377 | void bat_poly_mul_fft(fnr *restrict a, const fnr *restrict b, unsigned logn); 378 | 379 | /* 380 | * Compute the adjoint of a polynomial in FFT representation. 381 | */ 382 | void bat_poly_adj_fft(fnr *a, unsigned logn); 383 | 384 | /* 385 | * Scale a polynomial down by a factor 2^e. 386 | */ 387 | void bat_poly_div_2e(fnr *a, unsigned e, unsigned logn); 388 | 389 | /* 390 | * Multiply polynomial a by polynomial b (FFT representation only). The two 391 | * polynomial arrays must be distinct. The polynomial b must be auto-adjoint, 392 | * i.e. all its coefficients in FFT representation are real numbers (the 393 | * polynomial has half-length; the imaginary values of the coefficients, 394 | * assumed to be zero and located in the second half, are not accessed). 395 | */ 396 | void bat_poly_mul_autoadj_fft(fnr *restrict a, 397 | const fnr *restrict b, unsigned logn); 398 | 399 | /* 400 | * Divide polynomial a by polynomial b (FFT representation only). The two 401 | * polynomial arrays must be distinct. The polynomial b must be auto-adjoint, 402 | * i.e. all its coefficients in FFT representation are real numbers (the 403 | * polynomial has half-length; the imaginary values of the coefficients, 404 | * assumed to be zero and located in the second half, are not accessed). 405 | */ 406 | void bat_poly_div_autoadj_fft(fnr *restrict a, 407 | const fnr *restrict b, unsigned logn); 408 | 409 | /* 410 | * Compute (2^e)/(a*adj(a)+b*adj(b)) into d[]. Polynomials are in FFT 411 | * representation. d[] is a half-size polynomial because all FFT 412 | * coefficients are zero (they are not set by this function). Parameter e 413 | * can be 0. 414 | */ 415 | void bat_poly_invnorm_fft(fnr *restrict d, 416 | const fnr *restrict a, const fnr *restrict b, 417 | unsigned e, unsigned logn); 418 | 419 | /* ====================================================================== */ 420 | 421 | /* 422 | * Max size in bits for elements of (f,g), indexed by log(N). Size includes 423 | * the sign bit. 424 | */ 425 | extern const uint8_t bat_max_fg_bits[]; 426 | 427 | /* 428 | * Max size in bits for elements of (F,G), indexed by log(N). Size includes 429 | * the sign bit. 430 | */ 431 | extern const uint8_t bat_max_FG_bits[]; 432 | 433 | /* 434 | * Max size in bits for elements of w, indexed by log(N). Size includes 435 | * the sign bit. 436 | */ 437 | extern const uint8_t bat_max_w_bits[]; 438 | 439 | /* ====================================================================== */ 440 | 441 | /* 442 | * Key pair generation, first step: given a seed, candidate polynomials 443 | * f and g are generated. The following properties are checked: 444 | * - All coefficients of f and g are within the expected bounds. 445 | * - Res(f, x^n+1) == 1 mod 2. 446 | * - Res(g, x^n+1) == 1 mod 2. 447 | * - The (f,g) vector has an acceptable norm, both in normal and in 448 | * orthogonalized representations. 449 | * - f is invertible modulo x^n+1 modulo q. 450 | * If any of these properties is not met, then a failure is reported 451 | * (returned value is 0) and the contents of f[] and g[] are indeterminate. 452 | * Otherwise, success (1) is returned. 453 | * 454 | * If h != NULL, then the public key h = g/f mod x^n+1 mod q is returned 455 | * in that array. Note that h is always internally computed, regardless 456 | * of whether h == NULL or not. 457 | * 458 | * Size of tmp[]: 6*n elements (24*n bytes). 459 | * tmp[] MUST be 64-bit aligned. 460 | * 461 | * The seed length MUST NOT exceed 48 bytes. 462 | */ 463 | int bat_keygen_make_fg(int8_t *f, int8_t *g, uint16_t *h, 464 | uint32_t q, unsigned logn, 465 | const void *seed, size_t seed_len, uint32_t *tmp); 466 | 467 | /* 468 | * Given polynomials f and g, solve the NTRU equation for F and G. This 469 | * may fail if there is no solution, or if some intermediate value exceeds 470 | * an internal heuristic threshold. Returned value is 1 on success, 0 471 | * on failure. On failure, contents of F and G are indeterminate. 472 | * 473 | * Size of tmp[]: 6*n elements (24*n bytes). 474 | * tmp[] MUST be 64-bit aligned. 475 | */ 476 | int bat_keygen_solve_FG(int8_t *F, int8_t *G, 477 | const int8_t *f, const int8_t *g, 478 | uint32_t q, unsigned logn, uint32_t *tmp); 479 | 480 | /* 481 | * Given polynomials f, g and F, rebuild the polynomial G that completes 482 | * the NTRU equation g*F - f*G = q. Returned value is 1 on success, 0 on 483 | * failure. A failure is reported if the rebuilt solution has 484 | * coefficients outside of the expected maximum range, or f is not 485 | * invertible modulo x^n+1 modulo q. This function does NOT fully verify 486 | * that f, g, F, G is a solution to the NTRU equation. 487 | * 488 | * Size of tmp[]: n elements (4*n bytes). 489 | */ 490 | int bat_keygen_rebuild_G(int8_t *G, 491 | const int8_t *f, const int8_t *g, const int8_t *F, 492 | uint32_t q, unsigned logn, uint32_t *tmp); 493 | 494 | /* 495 | * Verify that the given f, g, F, G fulfill the NTRU equation g*F - f*G = q. 496 | * Returned value is 1 on success, 0 on error. 497 | * 498 | * This function may be called when decoding a private key of unsure 499 | * provenance. It is implicitly called by bat_keygen_solve_FG(). 500 | * 501 | * Size of tmp[]: 4*n elements (16*n bytes). 502 | */ 503 | int bat_keygen_verify_FG( 504 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 505 | uint32_t q, unsigned logn, uint32_t *tmp); 506 | 507 | /* 508 | * Compute the w vector. Returned value is 1 on success, 0 on error. An 509 | * error is reported if the w vector has coefficients that do not fit 510 | * in signed 16-bit integer, or if the norm of (gamma*F_d, G_d) exceeds 511 | * the prescribed limit. 512 | * 513 | * Size of tmp[]: 6*n elements (24*n bytes). 514 | * tmp[] MUST be 64-bit aligned. 515 | */ 516 | int bat_keygen_compute_w(int32_t *w, 517 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 518 | uint32_t q, unsigned logn, uint32_t *tmp); 519 | 520 | /* 521 | * Compute the public key h = g/f. Returned value is 1 on success, 0 on 522 | * error. An error is reported if f is not invertible modulo X^n+1. 523 | * This function is for q = 128 and 1 <= logn <= 8. 524 | * CAUTION: for q = 128, public key is in an array of uint8_t, not uint16_t. 525 | * 526 | * Size of tmp[]: 3*n/4 elements (3*n bytes). 527 | */ 528 | int bat_make_public_128(uint8_t *h, const int8_t *f, const int8_t *g, 529 | unsigned logn, uint32_t *tmp); 530 | 531 | /* 532 | * Compute the public key h = g/f. Returned value is 1 on success, 0 on 533 | * error. An error is reported if f is not invertible modulo X^n+1. 534 | * This function is for q = 257 and 1 <= logn <= 9. 535 | * 536 | * Size of tmp[]: n elements (4*n bytes). 537 | */ 538 | int bat_make_public_257(uint16_t *h, const int8_t *f, const int8_t *g, 539 | unsigned logn, uint32_t *tmp); 540 | 541 | /* 542 | * Compute the public key h = g/f. Returned value is 1 on success, 0 on 543 | * error. An error is reported if f is not invertible modulo X^n+1. 544 | * This function is for q = 769 and 1 <= logn <= 10. 545 | * 546 | * Size of tmp[]: n elements (4*n bytes). 547 | */ 548 | int bat_make_public_769(uint16_t *h, const int8_t *f, const int8_t *g, 549 | unsigned logn, uint32_t *tmp); 550 | 551 | /* 552 | * Given f, g and F, rebuild G, for the case q = 128. This function 553 | * reports a failure if (q,logn) are not supported parameters, if f is 554 | * not invertible modulo x^n+1 and modulo q, or if the rebuilt value G 555 | * has coefficients that exceed the expected maximum size. 556 | * 557 | * This function does NOT check that the returned G matches the NTRU 558 | * equation. 559 | * 560 | * Size of tmp[]: 3*n/4 elements (3*n bytes). 561 | */ 562 | int bat_rebuild_G_128(int8_t *G, 563 | const int8_t *f, const int8_t *g, const int8_t *F, 564 | unsigned logn, uint32_t *tmp); 565 | 566 | /* 567 | * Given f, g and F, rebuild G, for the case q = 257. This function 568 | * reports a failure if (q,logn) are not supported parameters, if f is 569 | * not invertible modulo x^n+1 and modulo q, or if the rebuilt value G 570 | * has coefficients that exceed the expected maximum size. 571 | * 572 | * This function does NOT check that the returned G matches the NTRU 573 | * equation. 574 | * 575 | * Size of tmp[]: n elements (4*n bytes). 576 | */ 577 | int bat_rebuild_G_257(int8_t *G, 578 | const int8_t *f, const int8_t *g, const int8_t *F, 579 | unsigned logn, uint32_t *tmp); 580 | 581 | /* 582 | * Given f, g and F, rebuild G, for the case q = 769. This function 583 | * reports a failure if (q,logn) are not supported parameters, if f is 584 | * not invertible modulo x^n+1 and modulo q, or if the rebuilt value G 585 | * has coefficients that exceed the expected maximum size. 586 | * 587 | * This function does NOT check that the returned G matches the NTRU 588 | * equation. 589 | * 590 | * Size of tmp[]: n elements (4*n bytes). 591 | */ 592 | int bat_rebuild_G_769(int8_t *G, 593 | const int8_t *f, const int8_t *g, const int8_t *F, 594 | unsigned logn, uint32_t *tmp); 595 | 596 | /* ====================================================================== */ 597 | 598 | /* 599 | * Get the length of sbuf, for a given degree n, with n = 2^logn. 600 | * The logn parameter must be between 1 and 10, inclusive. Returned length 601 | * is in bytes, between 1 and 128, inclusive. 602 | */ 603 | #define SBUF_LEN(logn) (((1 << (logn)) + 7) >> 3) 604 | 605 | /* 606 | * Encrypt: given public key (in h) and secret polynomial s (in sbuf[]), 607 | * produce ciphertext c1 (in c). 608 | * 609 | * This function is for q = 128, with logn = 1 to 8. Ciphertext elements 610 | * are in the -31..+32 range. The function cannot fail, hence it always 611 | * returns 1. 612 | * CAUTION: for q = 128, public key is in an array of uint8_t, not uint16_t. 613 | * 614 | * Size of tmp[]: 3*n/4 elements (3*n bytes) 615 | */ 616 | uint32_t bat_encrypt_128(int8_t *c, const uint8_t *sbuf, 617 | const uint8_t *h, unsigned logn, uint32_t *tmp); 618 | 619 | /* 620 | * Encrypt: given public key (in h) and secret polynomial s (in sbuf[]), 621 | * produce ciphertext c1 (in c). 622 | * 623 | * This function is for q = 257, with logn = 1 to 9. Ciphertext elements 624 | * are in the -64..+64 range. The function cannot fail, hence it always 625 | * returns 1. 626 | * 627 | * Size of tmp[]: n elements (4*n bytes). 628 | */ 629 | uint32_t bat_encrypt_257(int8_t *c, const uint8_t *sbuf, 630 | const uint16_t *h, unsigned logn, uint32_t *tmp); 631 | 632 | /* 633 | * Encrypt: given public key (in h) and secret polynomial s (in sbuf[]), 634 | * produce ciphertext c1 (in c). 635 | * 636 | * This function is for q = 769, with logn = 1 to 10. Ciphertext elements 637 | * are in the -96..+96 range. 638 | * 639 | * The function may fail, if the norm of the result is too high, in which 640 | * case the caller should start again with a new seed (this is uncommon). 641 | * On failure, this function returns 0; on success, it returns 1. 642 | * 643 | * Size of tmp[]: 3*n/4 elements (3*n bytes). 644 | */ 645 | uint32_t bat_encrypt_769(int8_t *c, const uint8_t *sbuf, 646 | const uint16_t *h, unsigned logn, uint32_t *tmp); 647 | 648 | /* 649 | * Decrypt: given private key (f,g,F,G,w) and ciphertext c1, extract 650 | * secret s. The polynomial s has length n bits (with n = 2^logn); it 651 | * is returned in sbuf[] (ceil(n/8) bytes; for toy versions with logn < 652 | * 3, the upper bits of the incomplete byte are set to zero). 653 | * 654 | * This function is for q = 128. Ciphertext elements are in the -31..+32 655 | * range. 656 | * 657 | * Size of tmp[]: 2*n elements (8*n bytes). 658 | * 659 | * This function never fails; for proper security, the caller must obtain 660 | * the message m (using the second ciphertext element c2) and check that 661 | * encryption of m would indeed yield exactly ciphertext c1. 662 | */ 663 | void bat_decrypt_128(uint8_t *sbuf, const int8_t *c, 664 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 665 | const int32_t *w, unsigned logn, uint32_t *tmp); 666 | 667 | /* 668 | * Decrypt: given private key (f,g,F,G,w) and ciphertext c1, extract 669 | * secret s. The polynomial s has length n bits (with n = 2^logn); it 670 | * is returned in sbuf[] (ceil(n/8) bytes; for toy versions with logn < 671 | * 3, the upper bits of the incomplete byte are set to zero). 672 | * 673 | * This function is for q = 257. Ciphertext elements are in the -64..+64 674 | * range. 675 | * 676 | * Size of tmp[]: 2*n elements (8*n bytes). 677 | * 678 | * This function never fails; for proper security, the caller must obtain 679 | * the message m (using the second ciphertext element c2) and check that 680 | * encryption of m would indeed yield exactly ciphertext c1. 681 | */ 682 | void bat_decrypt_257(uint8_t *sbuf, const int8_t *c, 683 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 684 | const int32_t *w, unsigned logn, uint32_t *tmp); 685 | 686 | /* 687 | * Decrypt: given private key (f,g,F,G,w) and ciphertext c1, extract 688 | * secret s. The polynomial s has length n bits (with n = 2^logn); it 689 | * is returned in sbuf[] (ceil(n/8) bytes; for toy versions with logn < 690 | * 3, the upper bits of the incomplete byte are set to zero). 691 | * 692 | * This function is for q = 769. Ciphertext elements are in the -96..+96 693 | * range. 694 | * 695 | * Size of tmp[]: 2*n elements (8*n bytes). 696 | * 697 | * This function never fails; for proper security, the caller must obtain 698 | * the message m (using the second ciphertext element c2) and check that 699 | * encryption of m would indeed yield exactly ciphertext c1. 700 | */ 701 | void bat_decrypt_769(uint8_t *sbuf, const int8_t *c, 702 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 703 | const int32_t *w, unsigned logn, uint32_t *tmp); 704 | 705 | /* 706 | * Second phase of decapsulation, performed modulo 769. 707 | * Given c', c'', f, F and w, this function computes: 708 | * Fd = q'*F - f*w 709 | * q*q'*Q*s' = Fd*c' - f*c'' 710 | * 711 | * On input, cp[] and cs[] must contain c' and c'', respectively, in 712 | * Montgomery representation modulo 769. On output, polynomial q*q'*Q*s' 713 | * is returned in cp[], in Montgomery representation modulo 769 (since 714 | * coefficients of s' can have only a few specific values, this is enough 715 | * to recover s'). cs[] is consumed. tmp[] must have room for 4*n bytes 716 | * (n 32-bit elements). 717 | * 718 | * Size of tmp[]: n elements (4*n bytes). 719 | */ 720 | void bat_finish_decapsulate_769(uint16_t *cp, uint16_t *cs, 721 | const int8_t *f, const int8_t *F, const int32_t *w, unsigned logn, 722 | uint32_t *tmp); 723 | 724 | /* 725 | * Second phase of decapsulation, performed modulo 257. 726 | * Given c', c'', f, F and w, this function computes: 727 | * Fd = q'*F - f*w 728 | * q*q'*Q*s' = Fd*c' - f*c'' 729 | * 730 | * On input, cp[] and cs[] must contain c' and c'', respectively, in 731 | * Montgomery representation modulo 257. On output, polynomial q*q'*Q*s' 732 | * is returned in cp[], in Montgomery representation modulo 257 (since 733 | * coefficients of s' can have only a few specific values, this is enough 734 | * to recover s'). cs[] is consumed. tmp[] must have room for 4*n bytes 735 | * (n 32-bit elements). 736 | * 737 | * Size of tmp[]: n elements (4*n bytes). 738 | */ 739 | void bat_finish_decapsulate_257(uint16_t *cp, uint16_t *cs, 740 | const int8_t *f, const int8_t *F, const int32_t *w, unsigned logn, 741 | uint32_t *tmp); 742 | 743 | /* 744 | * Explicit reduction and conversion to Montgomery representation modulo 745 | * 257. This works for inputs x in range 0..4278190336. 746 | */ 747 | static inline uint32_t 748 | m257_tomonty(uint32_t x) 749 | { 750 | x *= 16711935; 751 | x = (x >> 16) * 257; 752 | return (x >> 16) + 1; 753 | } 754 | 755 | /* 756 | * Explicit reduction and conversion to Montgomery representation modulo 757 | * 769. This works for inputs x in range 0..4244636416. 758 | */ 759 | static inline uint32_t 760 | m769_tomonty(uint32_t x) 761 | { 762 | x *= 452395775; 763 | x = (x >> 16) * 769; 764 | x = (x >> 16) + 1; 765 | x *= 2016233021; 766 | x = (x >> 16) * 769; 767 | return (x >> 16) + 1; 768 | } 769 | 770 | /* ====================================================================== */ 771 | /* 772 | * Computations on polynomials modulo q' = 64513. 773 | */ 774 | 775 | /* 776 | * Compute d = -a*b mod X^n+1 mod q' 777 | * Coefficients of source values are plain integers (for value b, they must 778 | * be in the -503109..+503109 range). Coefficients of output values are 779 | * normalized in -32256..+32256. 780 | * 781 | * Array d[] may overlap, partially or totally, with a[]; however, it 782 | * MUST NOT overlap with b[]. 783 | * 784 | * Size of tmp[]: n/2 elements (2*n bytes). 785 | */ 786 | void bat_polyqp_mulneg(int16_t *d, const int16_t *a, const int32_t *b, 787 | unsigned logn, uint32_t *tmp); 788 | 789 | /* ====================================================================== */ 790 | /* 791 | * Encoding/decoding functions. 792 | */ 793 | 794 | #if BAT_LE && BAT_UNALIGNED 795 | 796 | static inline unsigned 797 | dec16le(const void *src) 798 | { 799 | return *(const uint16_t *)src; 800 | } 801 | 802 | static inline void 803 | enc16le(void *dst, unsigned x) 804 | { 805 | *(uint16_t *)dst = x; 806 | } 807 | 808 | static inline uint32_t 809 | dec32le(const void *src) 810 | { 811 | return *(const uint32_t *)src; 812 | } 813 | 814 | static inline void 815 | enc32le(void *dst, uint32_t x) 816 | { 817 | *(uint32_t *)dst = x; 818 | } 819 | 820 | static inline uint64_t 821 | dec64le(const void *src) 822 | { 823 | return *(const uint64_t *)src; 824 | } 825 | 826 | static inline void 827 | enc64le(void *dst, uint64_t x) 828 | { 829 | *(uint64_t *)dst = x; 830 | } 831 | 832 | #else 833 | 834 | static inline unsigned 835 | dec16le(const void *src) 836 | { 837 | const uint8_t *buf; 838 | 839 | buf = src; 840 | return (unsigned)buf[0] 841 | | ((unsigned)buf[1] << 8); 842 | } 843 | 844 | static inline void 845 | enc16le(void *dst, unsigned x) 846 | { 847 | uint8_t *buf; 848 | 849 | buf = dst; 850 | buf[0] = (uint8_t)x; 851 | buf[1] = (uint8_t)(x >> 8); 852 | } 853 | 854 | static inline uint32_t 855 | dec32le(const void *src) 856 | { 857 | const uint8_t *buf; 858 | 859 | buf = src; 860 | return (uint32_t)buf[0] 861 | | ((uint32_t)buf[1] << 8) 862 | | ((uint32_t)buf[2] << 16) 863 | | ((uint32_t)buf[3] << 24); 864 | } 865 | 866 | static inline void 867 | enc32le(void *dst, uint32_t x) 868 | { 869 | uint8_t *buf; 870 | 871 | buf = dst; 872 | buf[0] = (uint8_t)x; 873 | buf[1] = (uint8_t)(x >> 8); 874 | buf[2] = (uint8_t)(x >> 16); 875 | buf[3] = (uint8_t)(x >> 24); 876 | } 877 | 878 | static inline uint64_t 879 | dec64le(const void *src) 880 | { 881 | const uint8_t *buf; 882 | 883 | buf = src; 884 | return (uint64_t)buf[0] 885 | | ((uint64_t)buf[1] << 8) 886 | | ((uint64_t)buf[2] << 16) 887 | | ((uint64_t)buf[3] << 24) 888 | | ((uint64_t)buf[4] << 32) 889 | | ((uint64_t)buf[5] << 40) 890 | | ((uint64_t)buf[6] << 48) 891 | | ((uint64_t)buf[7] << 56); 892 | } 893 | 894 | static inline void 895 | enc64le(void *dst, uint64_t x) 896 | { 897 | uint8_t *buf; 898 | 899 | buf = dst; 900 | buf[0] = (uint64_t)x; 901 | buf[1] = (uint64_t)(x >> 8); 902 | buf[2] = (uint64_t)(x >> 16); 903 | buf[3] = (uint64_t)(x >> 24); 904 | buf[4] = (uint64_t)(x >> 32); 905 | buf[5] = (uint64_t)(x >> 40); 906 | buf[6] = (uint64_t)(x >> 48); 907 | buf[7] = (uint64_t)(x >> 56); 908 | } 909 | 910 | #endif 911 | 912 | static inline uint32_t 913 | dec24le(const void *src) 914 | { 915 | const uint8_t *buf; 916 | 917 | buf = src; 918 | return (uint32_t)buf[0] 919 | | ((uint32_t)buf[1] << 8) 920 | | ((uint32_t)buf[2] << 16); 921 | } 922 | 923 | static inline void 924 | enc24le(void *dst, uint32_t x) 925 | { 926 | uint8_t *buf; 927 | 928 | buf = dst; 929 | buf[0] = (uint8_t)x; 930 | buf[1] = (uint8_t)(x >> 8); 931 | buf[2] = (uint8_t)(x >> 16); 932 | } 933 | 934 | /* 935 | * bat_trim_i32_encode() and bat_trim_i32_decode() encode and decode 936 | * polynomials with signed coefficients (int32_t), using the specified 937 | * number of bits for each coefficient. The number of bits includes the 938 | * sign bit. Each coefficient x must be such that |x| < 2^(bits-1) (the 939 | * value -2^(bits-1), though conceptually encodable with two's 940 | * complement representation, is forbidden). 941 | * 942 | * bat_trim_i8_encode() and bat_trim_i8_decode() do the same work for 943 | * polynomials whose coefficients are held in slots of type int8_t. 944 | * 945 | * Encoding API: 946 | * 947 | * Output buffer (out[]) has max length max_out_len (in bytes). If 948 | * that length is not large enough, then no encoding occurs and the 949 | * function returns 0; otherwise, the function returns the number of 950 | * bytes which have been written into out[]. If out == NULL, then 951 | * max_out_len is ignored, and no output is produced, but the function 952 | * returns how many bytes it would produce. 953 | * 954 | * Encoding functions assume that the input is valid (all values in 955 | * the encodable range). 956 | * 957 | * Decoding API: 958 | * 959 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 960 | * the input length is not enough for the expected polynomial, then 961 | * no decoding occurs and the function returns 0. Otherwise, the values 962 | * are decoded and the number of processed input bytes is returned. 963 | * 964 | * If the input is invalid in some way (a decoded coefficient has 965 | * value -2^(bits-1), or some of the ignored bits in the last byte 966 | * are non-zero), then the function fails and returns 0; the contents 967 | * of the output array are then indeterminate. 968 | * 969 | * Both encoding and decoding are constant-time with regards to the 970 | * values and bits. 971 | */ 972 | 973 | size_t bat_trim_i32_encode(void *out, size_t max_out_len, 974 | const int32_t *x, unsigned logn, unsigned bits); 975 | size_t bat_trim_i32_decode(int32_t *x, unsigned logn, unsigned bits, 976 | const void *in, size_t max_in_len); 977 | size_t bat_trim_i8_encode(void *out, size_t max_out_len, 978 | const int8_t *x, unsigned logn, unsigned bits); 979 | size_t bat_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, 980 | const void *in, size_t max_in_len); 981 | 982 | /* 983 | * Encode a polynomial with coefficients modulo 128. This is used for 984 | * public keys with q = 128. 985 | * 986 | * If out == NULL, then max_out_len is ignored and the function returns 987 | * the size of the output it could produce (in bytes). 988 | * If out != NULL, then max_out_len is compared with the expected output 989 | * size. If max_out_len is lower, then no output is produced, and the 990 | * function returns 0; otherwise, the output is produced and its length 991 | * (in bytes) is returned. 992 | */ 993 | size_t bat_encode_128(void *out, size_t max_out_len, 994 | const uint8_t *x, unsigned logn); 995 | 996 | /* 997 | * Decode a polynomial with coefficients modulo 128. This is used for 998 | * public keys with q = 128. 999 | * 1000 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1001 | * the input length is not enough for the expected polynomial, then 1002 | * no decoding occurs and the function returns 0. Otherwise, the values 1003 | * are decoded and the number of processed input bytes is returned. 1004 | * 1005 | * If the input is invalid in some way (a decoded coefficient is out of 1006 | * the expected range, or some ignored bit is non-zero), then this the 1007 | * function fails and returns 0; the contents of the output array are 1008 | * then indeterminate. 1009 | * 1010 | * Decoding is constant-time as long as no failure occurs. 1011 | */ 1012 | size_t bat_decode_128(uint8_t *x, unsigned logn, 1013 | const void *in, size_t max_in_len); 1014 | 1015 | /* 1016 | * Encode a ciphertext polynomial, for q = 128; coefficients are in -31..+32. 1017 | * 1018 | * If out == NULL, then max_out_len is ignored and the function returns 1019 | * the size of the output it could produce (in bytes). 1020 | * If out != NULL, then max_out_len is compared with the expected output 1021 | * size. If max_out_len is lower, then no output is produced, and the 1022 | * function returns 0; otherwise, the output is produced and its length 1023 | * (in bytes) is returned. 1024 | */ 1025 | size_t bat_encode_ciphertext_128(void *out, size_t max_out_len, 1026 | const int8_t *c, unsigned logn); 1027 | /* 1028 | * Decode a ciphertext polynomial, for q = 128; coefficients are in -31..+32. 1029 | * 1030 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1031 | * the input length is not enough for the expected polynomial, then 1032 | * no decoding occurs and the function returns 0. Otherwise, the values 1033 | * are decoded and the number of processed input bytes is returned. 1034 | * 1035 | * If the input is invalid in some way (a decoded coefficient is out of 1036 | * the expected range, or some ignored bit is non-zero), then this the 1037 | * function fails and returns 0; the contents of the output array are 1038 | * then indeterminate. 1039 | * 1040 | * Decoding is constant-time with regard to the coefficient values. 1041 | */ 1042 | size_t bat_decode_ciphertext_128(int8_t *c, unsigned logn, 1043 | const void *in, size_t max_in_len); 1044 | 1045 | /* 1046 | * Encode a polynomial with coefficients modulo 257. This is used for 1047 | * public keys with q = 257. 1048 | * 1049 | * If out == NULL, then max_out_len is ignored and the function returns 1050 | * the size of the output it could produce (in bytes). 1051 | * If out != NULL, then max_out_len is compared with the expected output 1052 | * size. If max_out_len is lower, then no output is produced, and the 1053 | * function returns 0; otherwise, the output is produced and its length 1054 | * (in bytes) is returned. 1055 | */ 1056 | size_t bat_encode_257(void *out, size_t max_out_len, 1057 | const uint16_t *x, unsigned logn); 1058 | 1059 | /* 1060 | * Decode a polynomial with coefficients modulo 257. This is used for 1061 | * public keys with q = 257. 1062 | * 1063 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1064 | * the input length is not enough for the expected polynomial, then 1065 | * no decoding occurs and the function returns 0. Otherwise, the values 1066 | * are decoded and the number of processed input bytes is returned. 1067 | * 1068 | * If the input is invalid in some way (a decoded coefficient is out of 1069 | * the expected range, or some ignored bit is non-zero), then this the 1070 | * function fails and returns 0; the contents of the output array are 1071 | * then indeterminate. 1072 | * 1073 | * Decoding is constant-time as long as no failure occurs. 1074 | */ 1075 | size_t bat_decode_257(uint16_t *x, unsigned logn, 1076 | const void *in, size_t max_in_len); 1077 | 1078 | /* 1079 | * Encode a ciphertext polynomial, for q = 257; coefficients are in -64..+64. 1080 | * 1081 | * If out == NULL, then max_out_len is ignored and the function returns 1082 | * the size of the output it could produce (in bytes). 1083 | * If out != NULL, then max_out_len is compared with the expected output 1084 | * size. If max_out_len is lower, then no output is produced, and the 1085 | * function returns 0; otherwise, the output is produced and its length 1086 | * (in bytes) is returned. 1087 | */ 1088 | size_t bat_encode_ciphertext_257(void *out, size_t max_out_len, 1089 | const int8_t *c, unsigned logn); 1090 | /* 1091 | * Decode a ciphertext polynomial, for q = 257; coefficients are in -64..+64. 1092 | * 1093 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1094 | * the input length is not enough for the expected polynomial, then 1095 | * no decoding occurs and the function returns 0. Otherwise, the values 1096 | * are decoded and the number of processed input bytes is returned. 1097 | * 1098 | * If the input is invalid in some way (a decoded coefficient is out of 1099 | * the expected range, or some ignored bit is non-zero), then this the 1100 | * function fails and returns 0; the contents of the output array are 1101 | * then indeterminate. 1102 | * 1103 | * Decoding is constant-time with regard to the coefficient values. 1104 | */ 1105 | size_t bat_decode_ciphertext_257(int8_t *c, unsigned logn, 1106 | const void *in, size_t max_in_len); 1107 | 1108 | /* 1109 | * Encode a polynomial with coefficients modulo 769. This is used for 1110 | * public keys with q = 769. 1111 | * 1112 | * If out == NULL, then max_out_len is ignored and the function returns 1113 | * the size of the output it could produce (in bytes). 1114 | * If out != NULL, then max_out_len is compared with the expected output 1115 | * size. If max_out_len is lower, then no output is produced, and the 1116 | * function returns 0; otherwise, the output is produced and its length 1117 | * (in bytes) is returned. 1118 | */ 1119 | size_t bat_encode_769(void *out, size_t max_out_len, 1120 | const uint16_t *x, unsigned logn); 1121 | 1122 | /* 1123 | * Decode a polynomial with coefficients modulo 769. This is used for 1124 | * public keys with q = 769. 1125 | * 1126 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1127 | * the input length is not enough for the expected polynomial, then 1128 | * no decoding occurs and the function returns 0. Otherwise, the values 1129 | * are decoded and the number of processed input bytes is returned. 1130 | * 1131 | * If the input is invalid in some way (a decoded coefficient is out of 1132 | * the expected range, or some ignored bit is non-zero), then this the 1133 | * function fails and returns 0; the contents of the output array are 1134 | * then indeterminate. 1135 | * 1136 | * Decoding is constant-time with regard to the coefficient values. 1137 | */ 1138 | size_t bat_decode_769(uint16_t *x, unsigned logn, 1139 | const void *in, size_t max_in_len); 1140 | 1141 | /* 1142 | * Encode a ciphertext polynomial, for q = 769; coefficients are in -96..+96. 1143 | * 1144 | * If out == NULL, then max_out_len is ignored and the function returns 1145 | * the size of the output it could produce (in bytes). 1146 | * If out != NULL, then max_out_len is compared with the expected output 1147 | * size. If max_out_len is lower, then no output is produced, and the 1148 | * function returns 0; otherwise, the output is produced and its length 1149 | * (in bytes) is returned. 1150 | */ 1151 | size_t bat_encode_ciphertext_769(void *out, size_t max_out_len, 1152 | const int8_t *c, unsigned logn); 1153 | /* 1154 | * Decode a ciphertext polynomial, for q = 769; coefficients are in -96..+96. 1155 | * 1156 | * Input buffer (in[]) has maximum length max_in_len (in bytes). If 1157 | * the input length is not enough for the expected polynomial, then 1158 | * no decoding occurs and the function returns 0. Otherwise, the values 1159 | * are decoded and the number of processed input bytes is returned. 1160 | * 1161 | * If the input is invalid in some way (a decoded coefficient is out of 1162 | * the expected range, or some ignored bit is non-zero), then this the 1163 | * function fails and returns 0; the contents of the output array are 1164 | * then indeterminate. 1165 | * 1166 | * Decoding is constant-time with regard to the coefficient values. 1167 | */ 1168 | size_t bat_decode_ciphertext_769(int8_t *c, unsigned logn, 1169 | const void *in, size_t max_in_len); 1170 | 1171 | /* ====================================================================== */ 1172 | 1173 | /* 1174 | * Obtain a random seed from the system RNG. Maximum allowed seed length 1175 | * is 2048 bits (256 bytes). 1176 | * 1177 | * Returned value is 1 on success, 0 on error. 1178 | */ 1179 | int bat_get_seed(void *seed, size_t len); 1180 | 1181 | /* 1182 | * Custom PRNG that outputs 64-bit integers. It is based on BLAKE2s. 1183 | */ 1184 | typedef struct { 1185 | uint8_t buf[128]; 1186 | uint8_t key[32]; 1187 | uint64_t ctr; 1188 | size_t ptr; 1189 | } prng_context; 1190 | 1191 | /* 1192 | * Initialize the PRNG from the provided seed and an extra 64-bit integer. 1193 | * The seed length MUST NOT exceed 48 bytes. 1194 | */ 1195 | static inline void 1196 | prng_init(prng_context *p, const void *seed, size_t seed_len, uint64_t label) 1197 | { 1198 | blake2s_expand(p->key, sizeof p->key, seed, seed_len, label); 1199 | p->ctr = 0; 1200 | p->ptr = sizeof p->buf; 1201 | } 1202 | 1203 | /* 1204 | * Get a 64-bit integer out of a PRNG. 1205 | */ 1206 | static inline uint64_t 1207 | prng_get_u64(prng_context *p) 1208 | { 1209 | uint64_t x; 1210 | 1211 | if (p->ptr == sizeof p->buf) { 1212 | blake2s_expand(p->buf, sizeof p->buf, 1213 | p->key, sizeof p->key, p->ctr ++); 1214 | p->ptr = 0; 1215 | } 1216 | x = dec64le(p->buf + p->ptr); 1217 | p->ptr += 8; 1218 | return x; 1219 | } 1220 | 1221 | /* 1222 | * Get arbitrary bytes out of a PRNG. 1223 | */ 1224 | static inline void 1225 | prng_get_bytes(prng_context *p, void *dst, size_t len) 1226 | { 1227 | blake2s_expand(dst, len, p->key, sizeof p->key, p->ctr ++); 1228 | } 1229 | 1230 | /* ====================================================================== */ 1231 | 1232 | #endif 1233 | -------------------------------------------------------------------------------- /src/kem128.c: -------------------------------------------------------------------------------- 1 | #include "inner.h" 2 | 3 | /* 4 | * We use computations modulo 256 (usually implicitly through the use 5 | * of uint8_t as a storage type). When a value is needed modulo 128, 6 | * we apply a mask explicitly. 7 | * 8 | * We can use int8_t* and uint8_t* interchangeably, since the C standard 9 | * guarantees two's-complement and compatibility of formats for 10 | * exact-width types. 11 | * 12 | * Note that invertibility modulo 256 is equivalent to invertibility 13 | * modulo 128, since this boils down to the parity of the value at the 14 | * deepest recursion level (see mq_poly_inv_inner() for details). In BAT, 15 | * the keygen makes f with odd parity only (this is required for the 16 | * NTRU solving algorithm), thus f is always invertible modulo 128 (and 17 | * 256). 18 | * 19 | * Functions use a logarithm "stride" for access: successive elements of 20 | * polynomial a[] are a[0], a[1 << ls], a[2 << ls], ... 21 | */ 22 | 23 | static void 24 | mq_poly_add_inner(uint8_t *d, 25 | const uint8_t *a, const uint8_t *b, int ls, unsigned logn) 26 | { 27 | size_t u, n; 28 | 29 | n = (size_t)1 << logn; 30 | for (u = 0; u < n; u ++) { 31 | d[u << ls] = a[u << ls] + b[u << ls]; 32 | } 33 | } 34 | 35 | static void 36 | mq_poly_sub_inner(uint8_t *d, 37 | const uint8_t *a, const uint8_t *b, int ls, unsigned logn) 38 | { 39 | size_t u, n; 40 | 41 | n = (size_t)1 << logn; 42 | for (u = 0; u < n; u ++) { 43 | d[u << ls] = a[u << ls] - b[u << ls]; 44 | } 45 | } 46 | 47 | static void 48 | mq_poly_neg_inner(uint8_t *d, const uint8_t *a, int ls, unsigned logn) 49 | { 50 | size_t u, n; 51 | 52 | n = (size_t)1 << logn; 53 | for (u = 0; u < n; u ++) { 54 | d[u << ls] = -a[u << ls]; 55 | } 56 | } 57 | 58 | /* 59 | * d <- a + X*b 60 | */ 61 | static void 62 | mq_poly_add_mulX_inner(uint8_t *d, 63 | const uint8_t *a, const uint8_t *b, int ls, unsigned logn) 64 | { 65 | /* 66 | * We must take care to perform the loop in a way that does not 67 | * break when d == b. 68 | */ 69 | size_t u, n; 70 | int t; 71 | 72 | n = (size_t)1 << logn; 73 | t = -b[(n - 1) << ls]; 74 | for (u = 0; u < n; u ++) { 75 | int tn; 76 | 77 | tn = b[u << ls]; 78 | d[u << ls] = a[u << ls] + t; 79 | t = tn; 80 | } 81 | } 82 | 83 | /* 84 | * d <- a - X*b 85 | */ 86 | static void 87 | mq_poly_sub_mulX_inner(uint8_t *d, 88 | const uint8_t *a, const uint8_t *b, int ls, unsigned logn) 89 | { 90 | /* 91 | * We must take care to perform the loop in a way that does not 92 | * break when d == b. 93 | */ 94 | size_t u, n; 95 | int t; 96 | 97 | n = (size_t)1 << logn; 98 | t = -b[(n - 1) << ls]; 99 | for (u = 0; u < n; u ++) { 100 | int tn; 101 | 102 | tn = b[u << ls]; 103 | d[u << ls] = a[u << ls] - t; 104 | t = tn; 105 | } 106 | } 107 | 108 | /* 109 | * For multiplications, we use Karatsuba, with even/odd splits: 110 | * 111 | * a = a_e(X^2) + X*a_o(X^2) 112 | * b = b_e(X^2) + X*b_o(X^2) 113 | * a*b = (a_e*b_e + X*a_o*b_o)(X^2) + X*(a_e*b_o + a_o*b_e)(X^2) 114 | * (a_e*b_o + a_o*b_e) = (a_e + a_o)*(b_e + b_o) - a_e*b_e - a_o*b_o 115 | * 116 | * Size of tmp[]: 2*n bytes (with n = top-level degree). 117 | */ 118 | static void 119 | mq_poly_mul_inner(uint8_t *d, const uint8_t *a, const uint8_t *b, 120 | int ls, unsigned logn, uint8_t *tmp) 121 | { 122 | uint8_t *t1, *t2; 123 | 124 | switch (logn) { 125 | unsigned a0, a1, a2, a3; 126 | unsigned b0, b1, b2, b3; 127 | 128 | case 1: 129 | a0 = a[0 << ls]; 130 | a1 = a[1 << ls]; 131 | b0 = b[0 << ls]; 132 | b1 = b[1 << ls]; 133 | d[0 << ls] = a0 * b0 - a1 * b1; 134 | d[1 << ls] = a0 * b1 + a1 * b0; 135 | return; 136 | case 2: 137 | a0 = a[0 << ls]; 138 | a1 = a[1 << ls]; 139 | a2 = a[2 << ls]; 140 | a3 = a[3 << ls]; 141 | b0 = b[0 << ls]; 142 | b1 = b[1 << ls]; 143 | b2 = b[2 << ls]; 144 | b3 = b[3 << ls]; 145 | d[0 << ls] = a0 * b0 - a1 * b3 - a2 * b2 - a3 * b1; 146 | d[1 << ls] = a0 * b1 + a1 * b0 - a2 * b3 - a3 * b2; 147 | d[2 << ls] = a0 * b2 + a1 * b1 + a2 * b0 - a3 * b3; 148 | d[3 << ls] = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; 149 | return; 150 | default: 151 | break; 152 | } 153 | 154 | /* 155 | * a_e is a[], starting at 0, with stride ls + 1 156 | * a_o is a[], starting at 1 << ls, with stride ls + 1 157 | * 158 | * We need two temporaries t1 and t2, and we will use only the 159 | * elements with stride ls + 1 in them. Thus, in the received 160 | * tmp[], we use odd-indexed elements for our temporaries, 161 | * leaving the even-indexed elements free for the deeper 162 | * recursion levels. 163 | */ 164 | t1 = tmp + (1 << ls); 165 | t2 = t1 + ((size_t)1 << (logn + ls)); 166 | 167 | /* 168 | * t1 <- a_e + a_o 169 | * t2 <- b_e + b_o 170 | */ 171 | mq_poly_add_inner(t1, a, a + (1 << ls), ls + 1, logn - 1); 172 | mq_poly_add_inner(t2, b, b + (1 << ls), ls + 1, logn - 1); 173 | 174 | /* 175 | * t1 <- (a_e + a_o)*(b_e + b_o) 176 | */ 177 | mq_poly_mul_inner(t1, t1, t2, ls + 1, logn - 1, tmp); 178 | 179 | /* 180 | * t2 <- a_o * b_o 181 | * d_e <- a_e * b_e 182 | * We don't need a[] and b[] afterwards, which is why we can 183 | * write into d_e (which may overlap either or both). 184 | */ 185 | mq_poly_mul_inner(t2, 186 | a + (1 << ls), b + (1 << ls), ls + 1, logn - 1, tmp); 187 | mq_poly_mul_inner(d, 188 | a, b, ls + 1, logn - 1, tmp); 189 | 190 | /* 191 | * d_o <- t1 - t2 - d_e = a_e*b_o + a_o*b_e 192 | */ 193 | mq_poly_sub_inner(t1, t1, t2, ls + 1, logn - 1); 194 | mq_poly_sub_inner(d + (1 << ls), t1, d, ls + 1, logn - 1); 195 | 196 | /* 197 | * d_e <- d_e + X*t2 = a_e*b_e + X*a_o*b_o 198 | */ 199 | mq_poly_add_mulX_inner(d, d, t2, ls + 1, logn - 1); 200 | } 201 | 202 | /* 203 | * TODO: make a specialized squaring function, which could be faster 204 | * than the plain multiplication routine. 205 | */ 206 | static void 207 | mq_poly_sqr_inner(uint8_t *d, const uint8_t *a, 208 | int ls, unsigned logn, uint8_t *tmp) 209 | { 210 | mq_poly_mul_inner(d, a, a, ls, logn, tmp); 211 | } 212 | 213 | /* 214 | * Polynomial inversion: we split a into even and odd coefficients: 215 | * a = a_e(X^2) + X*a_o(X^2) 216 | * with a_e and a_o being half-degree. We define: 217 | * adj(a) = a_e(X^2) - X*a_o(X^2) 218 | * Then: 219 | * a*adj(a) = a_e^2(X^2) - X^2*a_o^2(X^2) 220 | * = (a_e^2 - X*a_o^2)(X^2) 221 | * which is a half-degree polynomial. 222 | * 223 | * Thus: 224 | * 1/a = adj(a)*(1 / (a*adj(a))) 225 | * so we reduced the problem of inverting a of degree n into inverting 226 | * a*adj(a) of degree n/2. We just apply the process recursively 227 | * until we reach degree 1. 228 | * 229 | * Note that 1/(a*adj(a)) is really half-degree, so the multiplication 230 | * by adj(a) can be done with two half-degree multiplications. 231 | * 232 | * Size of tmp[]: 2*n bytes (with n = top-level degree). 233 | * 234 | * Return value: 1 on success, 0 on error. On error (a[] is not invertible), 235 | * contents of d[] are unpredictable. 236 | */ 237 | static int 238 | mq_poly_inv_inner(uint8_t *d, const uint8_t *a, int ls, 239 | unsigned logn, uint8_t *tmp) 240 | { 241 | uint8_t *t1, *t2; 242 | int r; 243 | 244 | if (logn == 1) { 245 | unsigned a0, a1, x, y; 246 | 247 | a0 = a[0 << ls]; 248 | a1 = a[1 << ls]; 249 | x = a0 * a0 + a1 * a1; 250 | 251 | /* 252 | * x is invertible modulo 256 if and only if it is odd. 253 | */ 254 | r = (int)(x & 1); 255 | 256 | /* 257 | * If x*y = 1 + u*2^k, then: 258 | * x*(y*(2-x*y)) = (1 + u*2^k)*(1 - u*2^k) 259 | * = 1 - (u^2)*2^(2*k) 260 | * = 1 mod 2^(2*k) 261 | * Inverse of s modulo 4 is itself: 262 | * 1*1 = 1 mod 4 263 | * 3*3 = 1 mod 4 264 | * Thus, we apply the rule above twice on x, to get an 265 | * inverse modulo 4*4 = 16, and then modulo 16*16 = 256. 266 | */ 267 | y = x; 268 | y *= 2 - (x * y); 269 | y *= 2 - (x * y); 270 | 271 | /* 272 | * 1/(a0 + X*a1) = (a0 - X*a1) / (a0^2 - X^2*a1^2) 273 | * = (a0 - X*a1) / (a0^2 + a1^2) 274 | * (since we work modulo X^2+1) 275 | */ 276 | d[0 << ls] = a0 * y; 277 | d[1 << ls] = -a1 * y; 278 | return r; 279 | } 280 | 281 | t1 = tmp + (1 << ls); 282 | t2 = t1 + ((size_t)1 << (logn + ls)); 283 | 284 | /* 285 | * t1 <- a*adj(a) 286 | */ 287 | mq_poly_sqr_inner(t1, a, ls + 1, logn - 1, tmp); 288 | mq_poly_sqr_inner(t2, a + (1 << ls), ls + 1, logn - 1, tmp); 289 | mq_poly_sub_mulX_inner(t1, t1, t2, ls + 1, logn - 1); 290 | 291 | /* 292 | * t1 <- 1/a*adj(a) 293 | */ 294 | r = mq_poly_inv_inner(t1, t1, ls + 1, logn - 1, tmp); 295 | 296 | /* 297 | * d_e <- t1*a_e 298 | * d_o <- -t1*a_o 299 | */ 300 | mq_poly_mul_inner(d, a, t1, ls + 1, logn - 1, tmp); 301 | mq_poly_mul_inner(d + (1 << ls), 302 | a + (1 << ls), t1, ls + 1, logn - 1, tmp); 303 | mq_poly_neg_inner(d + (1 << ls), d + (1 << ls), ls + 1, logn - 1); 304 | 305 | return r; 306 | } 307 | 308 | /* 309 | * Wrappers for the case of ls = 1 (minimal stride). 310 | */ 311 | 312 | static inline void 313 | mq_poly_add(uint8_t *d, const uint8_t *a, const uint8_t *b, unsigned logn) 314 | { 315 | mq_poly_add_inner(d, a, b, 0, logn); 316 | } 317 | 318 | static inline void 319 | mq_poly_sub(uint8_t *d, const uint8_t *a, const uint8_t *b, unsigned logn) 320 | { 321 | mq_poly_sub_inner(d, a, b, 0, logn); 322 | } 323 | 324 | static inline void 325 | mq_poly_mul(uint8_t *d, const uint8_t *a, const uint8_t *b, unsigned logn, 326 | uint8_t *tmp) 327 | { 328 | mq_poly_mul_inner(d, a, b, 0, logn, tmp); 329 | } 330 | 331 | static inline int 332 | mq_poly_inv(uint8_t *d, const uint8_t *a, unsigned logn, uint8_t *tmp) 333 | { 334 | return mq_poly_inv_inner(d, a, 0, logn, tmp); 335 | } 336 | 337 | /* 338 | * Multiply polynomial a[] by 1+X+X^2+X^3+...+X^(n-1). 339 | * 340 | * d[0] = a[0] - a[1] - a[2] - a[3] - ... - a[n - 1] 341 | * d[1] = a[0] + a[1] - a[2] - a[3] - ... - a[n - 1] 342 | * d[2] = a[0] + a[1] + a[2] - a[3] - ... - a[n - 1] 343 | * ... 344 | * d[n-1] = a[0] + a[1] + a[2] + a[3] + ... + a[n - 1] 345 | * 346 | * Thus, d[n - 1] is the sum of all a[i], and: 347 | * d[i] = d[i + 1] - 2*a[i + 1] 348 | * Equivalently: 349 | * d[i] = d[i - 1] + 2*a[i] 350 | * for all i >= 1. 351 | * 352 | * This allows efficient computation, in O(n) operations and with no 353 | * need for extra storage. 354 | */ 355 | static void 356 | mq_poly_mul_ones(uint8_t *d, const uint8_t *a, unsigned logn) 357 | { 358 | size_t u, n; 359 | unsigned t; 360 | 361 | n = (size_t)1 << logn; 362 | t = a[0]; 363 | for (u = 1; u < n; u ++) { 364 | t -= a[u]; 365 | } 366 | d[0] = t; 367 | for (u = 1; u < n; u ++) { 368 | t += a[u] << 1; 369 | d[u] = t; 370 | } 371 | } 372 | 373 | /* 374 | * Multiply a polynomial by a constant. 375 | */ 376 | static void 377 | mq_poly_mulconst(uint8_t *d, const uint8_t *a, unsigned c, unsigned logn) 378 | { 379 | size_t u, n; 380 | 381 | n = (size_t)1 << logn; 382 | for (u = 0; u < n; u ++) { 383 | d[u] = a[u] * c; 384 | } 385 | } 386 | 387 | /* see inner.h */ 388 | int 389 | bat_make_public_128(uint8_t *h, const int8_t *f, const int8_t *g, 390 | unsigned logn, uint32_t *tmp) 391 | { 392 | size_t u, n; 393 | uint8_t *t1, *t2; 394 | int r; 395 | 396 | n = (size_t)1 << logn; 397 | t1 = (uint8_t *)tmp; 398 | t2 = t1 + n; 399 | 400 | /* 401 | * t1 <- 1/f 402 | */ 403 | r = mq_poly_inv(t1, (const uint8_t *)f, logn, t2); 404 | 405 | /* 406 | * h <- t1*g = g/f 407 | */ 408 | mq_poly_mul(h, t1, (const uint8_t *)g, logn, t2); 409 | 410 | /* 411 | * Reduce coefficients modulo 128. 412 | */ 413 | for (u = 0; u < n; u ++) { 414 | h[u] &= 0x7F; 415 | } 416 | return r; 417 | } 418 | 419 | /* see inner.h */ 420 | uint32_t 421 | bat_encrypt_128(int8_t *c, const uint8_t *sbuf, 422 | const uint8_t *h, unsigned logn, uint32_t *tmp) 423 | { 424 | size_t u, n; 425 | uint8_t *t1, *t2; 426 | 427 | n = (size_t)1 << logn; 428 | t1 = (uint8_t *)tmp; 429 | t2 = t1 + n; 430 | 431 | /* 432 | * Expand sbuf[] into polynomial s (in t1). 433 | */ 434 | for (u = 0; u < n; u ++) { 435 | t1[u] = (sbuf[u >> 3] >> ((unsigned)u & 7)) & 1; 436 | } 437 | 438 | /* 439 | * t1 <- h*s 440 | */ 441 | mq_poly_mul(t1, h, t1, logn, t2); 442 | 443 | /* 444 | * c = round((h*s) / 2) 445 | * Coefficients of h*s must be reduced modulo 128, into -63..+64. 446 | * Rounding is toward +inf, thus the result is in -31..+32. 447 | */ 448 | for (u = 0; u < n; u ++) { 449 | c[u] = (((t1[u] + 63) & 0x7F) >> 1) - 31; 450 | } 451 | 452 | /* 453 | * Since coefficients of e' (centered error vector) are in 454 | * {-1/2,+1/2}, and those of s' are also in {-1/2,+1/2}, the 455 | * norm of vector (gamma*s',e') is always equal to 456 | * sqrt((gamma^2 + 1)*(n/4)) = sqrt(n/2), thus always acceptable. 457 | */ 458 | return 1; 459 | } 460 | 461 | /* see inner.h */ 462 | void 463 | bat_decrypt_128(uint8_t *sbuf, const int8_t *c, 464 | const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, 465 | const int32_t *w, unsigned logn, uint32_t *tmp) 466 | { 467 | /* 468 | * q = 128, Q = 2, q' = 3329, k = 2. 469 | * 470 | * Decapsulation algorithm: 471 | * 472 | * c <- k*c 473 | * c' <- (Q*f*c - f*ones - g*ones) mod q*Q 474 | * c'' <- (q'*Q*F*c - q'*F*ones - q'*G*ones - c'*w) mod q*q'*Q 475 | * e' = (-Gd*c' + g*c'') / (q*q'*Q) 476 | * s' = (Fd*c' - f*c'') / (q*q'*Q) 477 | * e = e' + (1/2)*ones 478 | * s = s' + (1/2)*ones 479 | * 480 | * We don't need to recompute e, only s. q*Q = 256, which is 481 | * natively supported by the code in this file. 482 | */ 483 | size_t u, n; 484 | uint8_t *t1, *t2, *t3, *t4; 485 | uint16_t *tw1, *tw3, *tw4; 486 | 487 | n = (size_t)1 << logn; 488 | t1 = (uint8_t *)tmp; 489 | t2 = t1 + n; 490 | t3 = t2 + n; 491 | t4 = t3 + n; 492 | 493 | tw1 = (uint16_t *)t1; 494 | tw3 = (uint16_t *)t3; 495 | tw4 = tw3 + n; 496 | 497 | /* 498 | * c <- k*c (implicit) 499 | * t2 <- Q*c 500 | */ 501 | for (u = 0; u < n; u ++) { 502 | t2[u] = (uint8_t)(c[u] << 2); 503 | } 504 | 505 | /* 506 | * t1 <- c' = (Q*f*c - f*ones - g*ones) mod q*Q 507 | */ 508 | mq_poly_mul(t1, t2, (const uint8_t *)f, logn, t3); 509 | mq_poly_add(t3, (const uint8_t *)f, (const uint8_t *)g, logn); 510 | mq_poly_mul_ones(t3, t3, logn); 511 | mq_poly_sub(t1, t1, t3, logn); 512 | 513 | /* 514 | * t2 <- (q'*Q*F*c - q'*F*ones - q'*G*ones - c'*w) mod q*Q 515 | */ 516 | mq_poly_mul(t2, t2, (const uint8_t *)F, logn, t4); 517 | mq_poly_add(t3, (const uint8_t *)F, (const uint8_t *)G, logn); 518 | mq_poly_mul_ones(t3, t3, logn); 519 | mq_poly_sub(t2, t2, t3, logn); 520 | mq_poly_mulconst(t2, t2, 64513 & 0xFF, logn); 521 | for (u = 0; u < n; u ++) { 522 | t3[u] = (uint8_t)w[u]; 523 | } 524 | mq_poly_mul(t3, t1, t3, logn, t4); 525 | mq_poly_sub(t2, t2, t3, logn); 526 | 527 | /* 528 | * tw3 <- -c'*w mod q' 529 | * This involves rebulding c' mod q' in tw3 first. 530 | */ 531 | for (u = 0; u < n; u ++) { 532 | *(int16_t *)&tw3[u] = (int)((t1[u] + 127) & 0xFF) - 127; 533 | } 534 | bat_polyqp_mulneg((int16_t *)tw3, (int16_t *)tw3, w, 535 | logn, (uint32_t *)tw4); 536 | 537 | /* 538 | * At that point, we have: 539 | * t1: c' mod q*Q (0..255) 540 | * t2: c'' mod q*Q (0..255) 541 | * tw3: c'' mod q' (-32256..+32256) 542 | * 543 | * We now want to assemble c'' mod 257 in tw3. We do so by 544 | * applying the CRT between t2 and tw3. 545 | */ 546 | for (u = 0; u < n; u ++) { 547 | uint16_t z; 548 | uint32_t x0, x1, x; 549 | int32_t y; 550 | 551 | /* 552 | * We ensure that we get a positive value by adding 553 | * 64513 to the coefficient from c''. 554 | */ 555 | x0 = t2[u]; 556 | z = tw3[u]; 557 | x1 = (uint32_t)(*(int16_t *)&z + 64513); 558 | 559 | /* 560 | * CRT reconstruction: If: 561 | * x = x0 mod q*Q 562 | * x = x1 mod q' 563 | * then: 564 | * x = ((1/q') * (x0 - x1) mod q*Q) * q' + x1 565 | * We have q*Q = 256; since 64513 = 252*256 + 1, the value 566 | * 1/q' mod q*Q is trivial. 567 | */ 568 | x = x1 + ((x0 - x1) & 0xFF) * 64513; 569 | 570 | /* 571 | * x is in 0..16515327; we should normalize it to 572 | * -8257663..+8257664. 573 | */ 574 | y = (int32_t)x - (int32_t)(16515328 575 | & -((uint32_t)(8257664 - x) >> 31)); 576 | 577 | /* 578 | * For reduction modulo 257, we ensure a positive value by 579 | * adding 32131*257 = 8257667. 580 | */ 581 | tw3[u] = m257_tomonty((uint32_t)(y + 8257667)); 582 | } 583 | 584 | /* 585 | * We assemble c' mod 257 in tw4 (then moved to tw1). 586 | */ 587 | for (u = 0; u < n; u ++) { 588 | /* 589 | * For x in 0..255, normalization to -127..+128 is done 590 | * by computing ((x + 127) % 256) - 127. But we then want 591 | * to add 257 to get a positive value for reduction modulo 592 | * 257. 593 | */ 594 | tw4[u] = m257_tomonty(((t1[u] + 127) & 0xFF) + 130); 595 | } 596 | memcpy(tw1, tw4, n * sizeof *tw4); 597 | 598 | /* 599 | * We have c' mod 257 in tw1, and c'' mod 257 in tw3. We 600 | * use the mod 257 code to obtain q*q'*Q*s'. 601 | */ 602 | bat_finish_decapsulate_257(tw1, tw3, f, F, w, logn, (uint32_t *)tw4); 603 | 604 | /* 605 | * If the ciphertext is correct and the decapsulation worked well, 606 | * then s' has coefficients in {-1/2,+1/2} and the coefficients of 607 | * s are obtained by adding 1/2. We have the coefficients of 608 | * q*q'*Q*s' in tw1[], in Montgomery representation modulo 257: 609 | * 610 | * s' s tw1[] 611 | * -1/2 0 3 (-q*q'*Q/2 = 3 mod 257) 612 | * +1/2 1 254 (+q*q'*Q/2 = 254 mod 257) 613 | * 614 | * Thus, we just need to look at the least significant bit of each 615 | * value in tw1[] to get the coefficients of s. 616 | */ 617 | memset(sbuf, 0, (n + 7) >> 3); 618 | for (u = 0; u < n; u ++) { 619 | sbuf[u >> 3] |= (1 - (tw1[u] & 1)) << (u & 7); 620 | } 621 | } 622 | 623 | /* see inner.h */ 624 | int 625 | bat_rebuild_G_128(int8_t *G, 626 | const int8_t *f, const int8_t *g, const int8_t *F, 627 | unsigned logn, uint32_t *tmp) 628 | { 629 | size_t u, n; 630 | uint8_t *t1, *t2, *t3; 631 | int lim; 632 | 633 | n = (size_t)1 << logn; 634 | t1 = (uint8_t *)tmp; 635 | t2 = t1 + n; 636 | t3 = t2 + n; 637 | 638 | /* 639 | * We have g*F - f*G = q; therefore: G = (g*F - q) / f 640 | * 641 | * We compute modulo 256; note that if f is invertible modulo 642 | * 128, it will be invertible modulo 256, and vice versa. 643 | */ 644 | if (!mq_poly_inv(t1, (const uint8_t *)f, logn, t3)) { 645 | return 0; 646 | } 647 | mq_poly_mul(t2, (const uint8_t *)g, (const uint8_t *)F, logn, t3); 648 | t2[0] -= 128; 649 | mq_poly_mul(t1, t1, t2, logn, t3); 650 | 651 | /* 652 | * Normalize coefficients of G around 0, and check that they 653 | * are within the expected bounds. 654 | */ 655 | lim = (1 << (bat_max_FG_bits[logn] - 1)) - 1; 656 | for (u = 0; u < n; u ++) { 657 | int x; 658 | 659 | x = ((t1[u] + 127) & 0xFF) - 127; 660 | if (x < -lim || x > +lim) { 661 | return 0; 662 | } 663 | G[u] = (int8_t)x; 664 | } 665 | return 1; 666 | } 667 | 668 | /* 669 | * Values modulo q are in 0..127, which naturally encodes over exactly 670 | * 7 bits. 671 | */ 672 | 673 | /* see inner.h */ 674 | size_t 675 | bat_encode_128(void *out, size_t max_out_len, 676 | const uint8_t *x, unsigned logn) 677 | { 678 | size_t u, v, n, out_len; 679 | uint8_t *buf; 680 | 681 | n = (size_t)1 << logn; 682 | out_len = ((7 * n) + 7) >> 3; 683 | if (out == NULL) { 684 | return out_len; 685 | } 686 | if (max_out_len < out_len) { 687 | return 0; 688 | } 689 | buf = out; 690 | if (n == 2) { 691 | uint32_t w; 692 | 693 | w = (uint32_t)x[0] 694 | | ((uint32_t)x[1] << 7); 695 | enc16le(buf, w); 696 | return 2; 697 | } else if (n == 4) { 698 | uint32_t w; 699 | 700 | w = (uint32_t)x[0] 701 | | ((uint32_t)x[1] << 7) 702 | | ((uint32_t)x[2] << 14) 703 | | ((uint32_t)x[3] << 21); 704 | enc32le(buf, w); 705 | return 4; 706 | } else { 707 | v = 0; 708 | for (u = 0; (u + 8) <= n; u += 8) { 709 | uint32_t w0, w1; 710 | 711 | w0 = (uint32_t)x[u] 712 | | ((uint32_t)x[u + 1] << 7) 713 | | ((uint32_t)x[u + 2] << 14) 714 | | ((uint32_t)x[u + 3] << 21); 715 | w1 = (uint32_t)x[u + 4] 716 | | ((uint32_t)x[u + 5] << 7) 717 | | ((uint32_t)x[u + 6] << 14) 718 | | ((uint32_t)x[u + 7] << 21); 719 | enc32le(buf + v, w0 | (w1 << 28)); 720 | enc24le(buf + v + 4, w1 >> 4); 721 | v += 7; 722 | } 723 | return v; 724 | } 725 | } 726 | 727 | /* see inner.h */ 728 | size_t 729 | bat_decode_128(uint8_t *x, unsigned logn, 730 | const void *in, size_t max_in_len) 731 | { 732 | size_t u, v, n, in_len; 733 | const uint8_t *buf; 734 | uint32_t r; 735 | 736 | n = (size_t)1 << logn; 737 | in_len = ((7 * n) + 7) >> 3; 738 | if (max_in_len < in_len) { 739 | return 0; 740 | } 741 | buf = in; 742 | if (n == 2) { 743 | uint32_t w; 744 | 745 | w = dec16le(buf); 746 | x[0] = w & 0x7F; 747 | x[1] = (w >> 7) & 0x7F; 748 | r = (uint32_t)((w >> 14) - 1) >> 31; 749 | v = 2; 750 | } else if (n == 4) { 751 | uint32_t w; 752 | 753 | w = dec32le(buf); 754 | x[0] = w & 0x7F; 755 | x[1] = (w >> 7) & 0x7F; 756 | x[2] = (w >> 14) & 0x7F; 757 | x[3] = (w >> 21) & 0x7F; 758 | r = (uint32_t)((w >> 28) - 1) >> 31; 759 | v = 4; 760 | } else { 761 | v = 0; 762 | for (u = 0; (u + 8) <= n; u += 8) { 763 | uint32_t w0, w1; 764 | 765 | w0 = dec32le(buf + v); 766 | w1 = dec24le(buf + v + 4); 767 | v += 7; 768 | x[u + 0] = w0 & 0x7F; 769 | x[u + 1] = (w0 >> 7) & 0x7F; 770 | x[u + 2] = (w0 >> 14) & 0x7F; 771 | x[u + 3] = (w0 >> 21) & 0x7F; 772 | x[u + 4] = ((w0 >> 28) | (w1 << 4)) & 0x7F; 773 | x[u + 5] = (w1 >> 3) & 0x7F; 774 | x[u + 6] = (w1 >> 10) & 0x7F; 775 | x[u + 7] = (w1 >> 17) & 0x7F; 776 | } 777 | r = 1; 778 | } 779 | return v & -(size_t)r; 780 | } 781 | 782 | /* 783 | * Ciphertext values are in -31..+32 range; for value v, we encode v+31 784 | * over 6 bits (v+31 is in 0..63). 785 | */ 786 | 787 | /* see inner.h */ 788 | size_t 789 | bat_encode_ciphertext_128(void *out, size_t max_out_len, 790 | const int8_t *c, unsigned logn) 791 | { 792 | size_t u, v, n, out_len; 793 | uint8_t *buf; 794 | 795 | n = (size_t)1 << logn; 796 | out_len = ((6 * n) + 7) >> 3; 797 | if (out == NULL) { 798 | return out_len; 799 | } 800 | if (max_out_len < out_len) { 801 | return 0; 802 | } 803 | buf = out; 804 | v = 0; 805 | for (u = 0; (u + 4) <= n; u += 4) { 806 | uint32_t w; 807 | 808 | w = (uint32_t)(c[u] + 31) 809 | | ((uint32_t)(c[u + 1] + 31) << 6) 810 | | ((uint32_t)(c[u + 2] + 31) << 12) 811 | | ((uint32_t)(c[u + 3] + 31) << 18); 812 | enc24le(buf + v, w); 813 | v += 3; 814 | } 815 | if (u < n) { 816 | uint32_t w; 817 | 818 | w = (uint32_t)(c[u] + 31) 819 | | ((uint32_t)(c[u + 1] + 31) << 6); 820 | enc16le(buf + v, w); 821 | v += 2; 822 | } 823 | return v; 824 | } 825 | 826 | /* see inner.h */ 827 | size_t 828 | bat_decode_ciphertext_128(int8_t *c, unsigned logn, 829 | const void *in, size_t max_in_len) 830 | { 831 | size_t u, v, n, in_len; 832 | const uint8_t *buf; 833 | uint32_t r; 834 | 835 | n = (size_t)1 << logn; 836 | in_len = ((6 * n) + 7) >> 3; 837 | if (max_in_len < in_len) { 838 | return 0; 839 | } 840 | buf = in; 841 | v = 0; 842 | r = 1; 843 | for (u = 0; (u + 4) <= n; u += 4) { 844 | uint32_t w; 845 | 846 | w = dec24le(buf + v); 847 | v += 3; 848 | c[u + 0] = (int)(w & 0x3F) - 31; 849 | c[u + 1] = (int)((w >> 6) & 0x3F) - 31; 850 | c[u + 2] = (int)((w >> 12) & 0x3F) - 31; 851 | c[u + 3] = (int)((w >> 18) & 0x3F) - 31; 852 | } 853 | if (u < n) { 854 | uint32_t w; 855 | 856 | w = dec16le(buf + v); 857 | v += 2; 858 | c[u + 0] = (int)(w & 0x3F) - 31; 859 | c[u + 1] = (int)((w >> 6) & 0x3F) - 31; 860 | r &= (uint32_t)((w >> 12) - 1) >> 31; 861 | } 862 | return v & -(size_t)r; 863 | } 864 | -------------------------------------------------------------------------------- /src/modqp.c: -------------------------------------------------------------------------------- 1 | #include "inner.h" 2 | 3 | #define Q 64513 4 | #include "modgen.c" 5 | 6 | /* see inner.h */ 7 | void 8 | bat_polyqp_mulneg(int16_t *d, const int16_t *a, const int32_t *b, 9 | unsigned logn, uint32_t *tmp) 10 | { 11 | size_t u, n; 12 | uint16_t *t1, *t2; 13 | 14 | n = (size_t)1 << logn; 15 | 16 | /* 17 | * In order to save memory, we use the destination array for 18 | * intermediate computations as well. Since d may partially 19 | * overlap with a, we first do a memmove(). 20 | */ 21 | if (d != a) { 22 | memmove(d, a, n * sizeof *a); 23 | } 24 | t1 = (uint16_t *)d; 25 | t2 = (uint16_t *)tmp; 26 | for (u = 0; u < n; u ++) { 27 | t1[u] = mq_set(*(int16_t *)&t1[u]); 28 | t2[u] = mq_set(-b[u]); 29 | } 30 | NTT(t1, t1, logn); 31 | NTT(t2, t2, logn); 32 | mq_poly_mul_ntt(t1, t1, t2, logn); 33 | iNTT(t1, t1, logn); 34 | for (u = 0; u < n; u ++) { 35 | *(int16_t *)&t1[u] = mq_snorm(t1[u]); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/prng.c: -------------------------------------------------------------------------------- 1 | /* 2 | * PRNG and interface to the system RNG. 3 | */ 4 | 5 | #include 6 | 7 | #include "inner.h" 8 | 9 | /* 10 | * Include relevant system header files. For Win32, this will also need 11 | * linking with advapi32.dll, which we trigger with an appropriate #pragma. 12 | */ 13 | #if BAT_RAND_GETENTROPY 14 | #include 15 | #endif 16 | #if BAT_RAND_URANDOM 17 | #include 18 | #if !BAT_RAND_GETENTROPY 19 | #include 20 | #endif 21 | #include 22 | #include 23 | #endif 24 | #if BAT_RAND_WIN32 25 | #include 26 | #define SystemFunction036 NTAPI SystemFunction036 27 | #include 28 | #undef SystemFunction036 29 | #pragma comment(lib, "advapi32") 30 | #endif 31 | 32 | /* see inner.h */ 33 | int 34 | bat_get_seed(void *seed, size_t len) 35 | { 36 | (void)seed; 37 | if (len == 0) { 38 | return 1; 39 | } 40 | #if BAT_RAND_GETENTROPY 41 | if (getentropy(seed, len) == 0) { 42 | return 1; 43 | } 44 | #endif 45 | #if BAT_RAND_URANDOM 46 | /* 47 | * We could try to optimize this code with some caching of the 48 | * file descriptor, but this raises extra difficulties (this is 49 | * hard to make thread-safe without dabbling with a mutex). It 50 | * is simpler to assume that any Unix-like platform for which it 51 | * is worth optimizing performance will also have a recent 52 | * enough OS to use getentropy() (possibly as a wrapper around 53 | * getrandom()). 54 | */ 55 | { 56 | int f; 57 | 58 | f = open("/dev/urandom", O_RDONLY | O_CLOEXEC); 59 | if (f >= 0) { 60 | while (len > 0) { 61 | ssize_t rlen; 62 | 63 | rlen = read(f, seed, len); 64 | if (rlen < 0) { 65 | if (errno == EINTR) { 66 | continue; 67 | } 68 | break; 69 | } 70 | seed = (uint8_t *)seed + rlen; 71 | len -= (size_t)rlen; 72 | } 73 | close(f); 74 | if (len == 0) { 75 | return 1; 76 | } 77 | } 78 | } 79 | #endif 80 | #if BAT_RAND_WIN32 81 | /* 82 | * Nominally, a "Win32" implementation should use CryptoAPI 83 | * (CryptAcquireContext(), then CryptGenRandom()) but this is 84 | * quite inefficient and error prone. Since Windows XP and 85 | * Windows Server 2003, the RtlGenRandom() function (from 86 | * advapi32.dll) offers a much direct road to the OS RNG. 87 | */ 88 | if (RtlGenRandom(seed, len)) { 89 | return 1; 90 | } 91 | #endif 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /src/speed_bat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Speed benchmark code for BAT implementation. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "bat.h" 11 | #include "inner.h" 12 | 13 | #ifndef DO_BENCH86 14 | #if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 15 | #define DO_BENCH86 1 16 | #else 17 | #define DO_BENCH86 0 18 | #endif 19 | #endif 20 | 21 | #if DO_BENCH86 22 | #include 23 | 24 | static inline uint64_t 25 | core_cycles(void) 26 | { 27 | #if defined __GNUC__ && !defined __clang__ 28 | uint32_t hi, lo; 29 | 30 | _mm_lfence(); 31 | __asm__ __volatile__ ("rdtsc" : "=d" (hi), "=a" (lo) : : ); 32 | return ((uint64_t)hi << 32) | (uint64_t)lo; 33 | #else 34 | _mm_lfence(); 35 | return __rdtsc(); 36 | #endif 37 | } 38 | 39 | #endif 40 | 41 | static void * 42 | xmalloc(size_t len) 43 | { 44 | void *buf; 45 | 46 | if (len == 0) { 47 | return NULL; 48 | } 49 | buf = malloc(len); 50 | if (buf == NULL) { 51 | fprintf(stderr, "memory allocation error\n"); 52 | exit(EXIT_FAILURE); 53 | } 54 | return buf; 55 | } 56 | 57 | static void 58 | xfree(void *buf) 59 | { 60 | if (buf != NULL) { 61 | free(buf); 62 | } 63 | } 64 | 65 | /* 66 | * Benchmark function takes an opaque context and an iteration count; 67 | * it returns 0 on success, a negative error code on error. 68 | */ 69 | typedef int (*bench_fun)(void *ctx, unsigned long num); 70 | 71 | /* 72 | * Returned value is the time per iteration in nanoseconds. 73 | * WARNING: ON x86, VALUES ARE RETURNED IN CLOCK CYCLES, NOT NANOSECONDS; 74 | * THRESHOLD IS IN BILLIONS OF CYCLES. 75 | * 76 | * If the benchmark function reports an error, 0.0 is returned. 77 | */ 78 | static double 79 | do_bench(bench_fun bf, void *ctx, double threshold) 80 | { 81 | unsigned long num; 82 | int r; 83 | 84 | /* 85 | * Alsways do a few blank runs to "train" the caches and branch 86 | * prediction. 87 | */ 88 | r = bf(ctx, 5); 89 | if (r != 0) { 90 | fprintf(stderr, "ERR: %d\n", r); 91 | return 0.0; 92 | } 93 | 94 | num = 1; 95 | for (;;) { 96 | #if DO_BENCH86 97 | uint64_t begin, end; 98 | #else 99 | clock_t begin, end; 100 | #endif 101 | double tt; 102 | 103 | #if DO_BENCH86 104 | begin = core_cycles(); 105 | #else 106 | begin = clock(); 107 | #endif 108 | r = bf(ctx, num); 109 | #if DO_BENCH86 110 | end = core_cycles(); 111 | #else 112 | end = clock(); 113 | #endif 114 | if (r != 0) { 115 | fprintf(stderr, "ERR: %d\n", r); 116 | return 0.0; 117 | } 118 | #if DO_BENCH86 119 | tt = (double)(end - begin) / (double)1000000000.0; 120 | #else 121 | tt = (double)(end - begin) / (double)CLOCKS_PER_SEC; 122 | #endif 123 | if (tt >= threshold) { 124 | return tt * 1000000000.0 / (double)num; 125 | } 126 | 127 | /* 128 | * If the function ran for less than 0.1 seconds then 129 | * we simply double the iteration number; otherwise, we 130 | * use the run time to try to get a "correct" number of 131 | * iterations quickly. 132 | */ 133 | if (tt < 0.1) { 134 | num <<= 1; 135 | } else { 136 | unsigned long num2; 137 | 138 | num2 = (unsigned long)((double)num 139 | * (threshold * 1.1) / tt); 140 | if (num2 <= num) { 141 | num2 = num + 1; 142 | } 143 | num = num2; 144 | } 145 | } 146 | } 147 | 148 | #define XCAT(x, y) XCAT_(x, y) 149 | #define XCAT_(x, y) x ## y 150 | #define Zn(q, n, name) XCAT(XCAT(XCAT(bat_, q), XCAT(_, n)), XCAT(_, name)) 151 | #define Bn(q, n, name) XCAT(XCAT(XCAT(bench_, q), XCAT(_, n)), XCAT(_, name)) 152 | 153 | #define MK_BENCH_FUNS(q, n) \ 154 | \ 155 | typedef struct { \ 156 | Zn(q, n, private_key) sk; \ 157 | Zn(q, n, public_key) pk; \ 158 | Zn(q, n, ciphertext) ct; \ 159 | uint8_t *enc_sk; \ 160 | size_t enc_sk_len; \ 161 | uint8_t *enc_pk; \ 162 | size_t enc_pk_len; \ 163 | uint8_t *enc_ct; \ 164 | size_t enc_ct_len; \ 165 | uint8_t *tmp; \ 166 | size_t tmp_len; \ 167 | uint8_t secret[32]; \ 168 | unsigned logn; \ 169 | uint8_t *sbuf; \ 170 | uint8_t randm[32]; \ 171 | } Bn(q, n, context); \ 172 | \ 173 | static int \ 174 | Bn(q, n, keygen)(void *ctx, unsigned long num) \ 175 | { \ 176 | Bn(q, n, context) *bc; \ 177 | \ 178 | bc = ctx; \ 179 | while (num -- > 0) { \ 180 | if (Zn(q, n, keygen)(&bc->sk, bc->tmp, bc->tmp_len) != 0) { \ 181 | return -1; \ 182 | } \ 183 | } \ 184 | return 0; \ 185 | } \ 186 | \ 187 | static int \ 188 | Bn(q, n, encode_private_key_short)(void *ctx, unsigned long num) \ 189 | { \ 190 | Bn(q, n, context) *bc; \ 191 | \ 192 | bc = ctx; \ 193 | while (num -- > 0) { \ 194 | if (Zn(q, n, encode_private_key)( \ 195 | bc->enc_sk, bc->enc_sk_len, &bc->sk, 1) == 0) \ 196 | { \ 197 | return -1; \ 198 | } \ 199 | } \ 200 | return 0; \ 201 | } \ 202 | \ 203 | static int \ 204 | Bn(q, n, encode_private_key_long)(void *ctx, unsigned long num) \ 205 | { \ 206 | Bn(q, n, context) *bc; \ 207 | \ 208 | bc = ctx; \ 209 | while (num -- > 0) { \ 210 | if (Zn(q, n, encode_private_key)( \ 211 | bc->enc_sk, bc->enc_sk_len, &bc->sk, 0) == 0) \ 212 | { \ 213 | return -1; \ 214 | } \ 215 | } \ 216 | return 0; \ 217 | } \ 218 | \ 219 | static int \ 220 | Bn(q, n, decode_private_key)(void *ctx, unsigned long num) \ 221 | { \ 222 | Bn(q, n, context) *bc; \ 223 | \ 224 | bc = ctx; \ 225 | while (num -- > 0) { \ 226 | if (Zn(q, n, decode_private_key)( \ 227 | &bc->sk, bc->enc_sk, bc->enc_sk_len, \ 228 | bc->tmp, bc->tmp_len) == 0) \ 229 | { \ 230 | return -1; \ 231 | } \ 232 | } \ 233 | return 0; \ 234 | } \ 235 | \ 236 | static int \ 237 | Bn(q, n, encode_public_key)(void *ctx, unsigned long num) \ 238 | { \ 239 | Bn(q, n, context) *bc; \ 240 | \ 241 | bc = ctx; \ 242 | while (num -- > 0) { \ 243 | if (Zn(q, n, encode_public_key)( \ 244 | bc->enc_pk, bc->enc_pk_len, &bc->pk) == 0) \ 245 | { \ 246 | return -1; \ 247 | } \ 248 | } \ 249 | return 0; \ 250 | } \ 251 | \ 252 | static int \ 253 | Bn(q, n, decode_public_key)(void *ctx, unsigned long num) \ 254 | { \ 255 | Bn(q, n, context) *bc; \ 256 | \ 257 | bc = ctx; \ 258 | while (num -- > 0) { \ 259 | if (Zn(q, n, decode_public_key)( \ 260 | &bc->pk, bc->enc_pk, bc->enc_pk_len) == 0) \ 261 | { \ 262 | return -1; \ 263 | } \ 264 | } \ 265 | return 0; \ 266 | } \ 267 | \ 268 | static int \ 269 | Bn(q, n, encode_ciphertext)(void *ctx, unsigned long num) \ 270 | { \ 271 | Bn(q, n, context) *bc; \ 272 | \ 273 | bc = ctx; \ 274 | while (num -- > 0) { \ 275 | if (Zn(q, n, encode_ciphertext)( \ 276 | bc->enc_ct, bc->enc_ct_len, &bc->ct) == 0) \ 277 | { \ 278 | return -1; \ 279 | } \ 280 | } \ 281 | return 0; \ 282 | } \ 283 | \ 284 | static int \ 285 | Bn(q, n, decode_ciphertext)(void *ctx, unsigned long num) \ 286 | { \ 287 | Bn(q, n, context) *bc; \ 288 | \ 289 | bc = ctx; \ 290 | while (num -- > 0) { \ 291 | if (Zn(q, n, decode_ciphertext)( \ 292 | &bc->ct, bc->enc_ct, bc->enc_ct_len) == 0) \ 293 | { \ 294 | return -1; \ 295 | } \ 296 | } \ 297 | return 0; \ 298 | } \ 299 | \ 300 | static int \ 301 | Bn(q, n, encapsulate)(void *ctx, unsigned long num) \ 302 | { \ 303 | Bn(q, n, context) *bc; \ 304 | \ 305 | bc = ctx; \ 306 | while (num -- > 0) { \ 307 | if (Zn(q, n, encapsulate_explicit_seed)( \ 308 | bc->secret, sizeof bc->secret, \ 309 | &bc->ct, &bc->pk, bc->randm, \ 310 | bc->tmp, bc->tmp_len) != 0) \ 311 | { \ 312 | return -1; \ 313 | } \ 314 | } \ 315 | return 0; \ 316 | } \ 317 | \ 318 | static int \ 319 | Bn(q, n, decapsulate)(void *ctx, unsigned long num) \ 320 | { \ 321 | Bn(q, n, context) *bc; \ 322 | \ 323 | bc = ctx; \ 324 | while (num -- > 0) { \ 325 | if (Zn(q, n, decapsulate)( \ 326 | bc->secret, sizeof bc->secret, \ 327 | &bc->ct, &bc->sk, bc->tmp, bc->tmp_len) != 0) \ 328 | { \ 329 | return -1; \ 330 | } \ 331 | } \ 332 | return 0; \ 333 | } \ 334 | \ 335 | static int \ 336 | Bn(q, n, encapsulate_nofo)(void *ctx, unsigned long num) \ 337 | { \ 338 | Bn(q, n, context) *bc; \ 339 | \ 340 | bc = ctx; \ 341 | while (num -- > 0) { \ 342 | if (!XCAT(bat_encrypt_, q)(bc->ct.c, bc->sbuf, \ 343 | bc->pk.h, bc->logn, (uint32_t *)bc->tmp)) \ 344 | { \ 345 | return -1; \ 346 | } \ 347 | } \ 348 | return 0; \ 349 | } \ 350 | \ 351 | static int \ 352 | Bn(q, n, decapsulate_nofo)(void *ctx, unsigned long num) \ 353 | { \ 354 | Bn(q, n, context) *bc; \ 355 | \ 356 | bc = ctx; \ 357 | while (num -- > 0) { \ 358 | XCAT(bat_decrypt_, q)(bc->sbuf, bc->ct.c, \ 359 | bc->sk.f, bc->sk.g, bc->sk.F, bc->sk.G, \ 360 | bc->sk.w, bc->logn, (uint32_t *)bc->tmp); \ 361 | XCAT(bat_encrypt_, q)(bc->ct.c, bc->sbuf, \ 362 | bc->pk.h, bc->logn, (uint32_t *)bc->tmp); \ 363 | } \ 364 | return 0; \ 365 | } \ 366 | \ 367 | static void \ 368 | Bn(q, n, all)(double threshold) \ 369 | { \ 370 | Bn(q, n, context) bc; \ 371 | \ 372 | printf("q=%3u, n=%4u:", (unsigned)q, (unsigned)n); \ 373 | fflush(stdout); \ 374 | \ 375 | bc.enc_sk_len = Zn(q, n, encode_private_key(0, 0, 0, 0)); \ 376 | bc.enc_pk_len = Zn(q, n, encode_public_key(0, 0, 0)); \ 377 | bc.enc_ct_len = Zn(q, n, encode_public_key(0, 0, 0)); \ 378 | bc.enc_sk = xmalloc(bc.enc_sk_len); \ 379 | bc.enc_pk = xmalloc(bc.enc_pk_len); \ 380 | bc.enc_ct = xmalloc(bc.enc_ct_len); \ 381 | bc.tmp_len = 24 * n + 31; \ 382 | bc.tmp = xmalloc(bc.tmp_len); \ 383 | for (bc.logn = 1; (1u << bc.logn) < n; bc.logn ++); \ 384 | bc.sbuf = xmalloc(SBUF_LEN(bc.logn)); \ 385 | if (!bat_get_seed(bc.sbuf, SBUF_LEN(bc.logn))) { \ 386 | fprintf(stderr, "ERR: bat_get_seed() failed\n"); \ 387 | exit(EXIT_FAILURE); \ 388 | } \ 389 | if (!bat_get_seed(bc.randm, sizeof bc.randm)) { \ 390 | fprintf(stderr, "ERR: bat_get_seed() failed\n"); \ 391 | exit(EXIT_FAILURE); \ 392 | } \ 393 | \ 394 | PRINT_BENCHS(q, n); \ 395 | \ 396 | xfree(bc.enc_sk); \ 397 | xfree(bc.enc_pk); \ 398 | xfree(bc.enc_ct); \ 399 | xfree(bc.tmp); \ 400 | xfree(bc.sbuf); \ 401 | } 402 | 403 | #if DO_BENCH86 404 | #define PRINT_BENCHS(q, n) \ 405 | do { \ 406 | printf(" %7.0fk", \ 407 | do_bench(&Bn(q, n, keygen), \ 408 | &bc, threshold) / 1000.0); \ 409 | fflush(stdout); \ 410 | printf(" %8.0f", \ 411 | do_bench(&Bn(q, n, encode_private_key_short), \ 412 | &bc, threshold)); \ 413 | fflush(stdout); \ 414 | printf(" %8.0f", \ 415 | do_bench(&Bn(q, n, decode_private_key), \ 416 | &bc, threshold)); \ 417 | fflush(stdout); \ 418 | printf(" %8.0f", \ 419 | do_bench(&Bn(q, n, encode_private_key_long), \ 420 | &bc, threshold)); \ 421 | fflush(stdout); \ 422 | printf(" %8.0f", \ 423 | do_bench(&Bn(q, n, decode_private_key), \ 424 | &bc, threshold)); \ 425 | fflush(stdout); \ 426 | Zn(q, n, get_public_key)(&bc.pk, &bc.sk); \ 427 | printf(" %8.0f", \ 428 | do_bench(&Bn(q, n, encode_public_key), \ 429 | &bc, threshold)); \ 430 | fflush(stdout); \ 431 | printf(" %8.0f", \ 432 | do_bench(&Bn(q, n, decode_public_key), \ 433 | &bc, threshold)); \ 434 | fflush(stdout); \ 435 | printf(" %8.0f", \ 436 | do_bench(&Bn(q, n, encapsulate_nofo), \ 437 | &bc, threshold)); \ 438 | fflush(stdout); \ 439 | printf(" %8.0f", \ 440 | do_bench(&Bn(q, n, encapsulate), \ 441 | &bc, threshold)); \ 442 | fflush(stdout); \ 443 | printf(" %8.0f", \ 444 | do_bench(&Bn(q, n, decapsulate_nofo), \ 445 | &bc, threshold)); \ 446 | fflush(stdout); \ 447 | printf(" %8.0f", \ 448 | do_bench(&Bn(q, n, decapsulate), \ 449 | &bc, threshold)); \ 450 | fflush(stdout); \ 451 | printf(" %8.0f", \ 452 | do_bench(&Bn(q, n, encode_ciphertext), \ 453 | &bc, threshold)); \ 454 | fflush(stdout); \ 455 | printf(" %8.0f", \ 456 | do_bench(&Bn(q, n, decode_ciphertext), \ 457 | &bc, threshold)); \ 458 | printf("\n"); \ 459 | fflush(stdout); \ 460 | } while (0) 461 | #else 462 | #define PRINT_BENCHS(q, n) \ 463 | do { \ 464 | printf(" %8.2f", \ 465 | do_bench(&Bn(q, n, keygen), \ 466 | &bc, threshold) / 1000000.0); \ 467 | fflush(stdout); \ 468 | printf(" %8.2f", \ 469 | do_bench(&Bn(q, n, encode_private_key_short), \ 470 | &bc, threshold) / 1000.0); \ 471 | fflush(stdout); \ 472 | printf(" %8.2f", \ 473 | do_bench(&Bn(q, n, decode_private_key), \ 474 | &bc, threshold) / 1000.0); \ 475 | fflush(stdout); \ 476 | printf(" %8.2f", \ 477 | do_bench(&Bn(q, n, encode_private_key_long), \ 478 | &bc, threshold) / 1000.0); \ 479 | fflush(stdout); \ 480 | printf(" %8.2f", \ 481 | do_bench(&Bn(q, n, decode_private_key), \ 482 | &bc, threshold) / 1000.0); \ 483 | fflush(stdout); \ 484 | Zn(q, n, get_public_key)(&bc.pk, &bc.sk); \ 485 | printf(" %8.2f", \ 486 | do_bench(&Bn(q, n, encode_public_key), \ 487 | &bc, threshold) / 1000.0); \ 488 | fflush(stdout); \ 489 | printf(" %8.2f", \ 490 | do_bench(&Bn(q, n, decode_public_key), \ 491 | &bc, threshold) / 1000.0); \ 492 | fflush(stdout); \ 493 | printf(" %8.2f", \ 494 | do_bench(&Bn(q, n, encapsulate_nofo), \ 495 | &bc, threshold) / 1000.0); \ 496 | fflush(stdout); \ 497 | printf(" %8.2f", \ 498 | do_bench(&Bn(q, n, encapsulate), \ 499 | &bc, threshold) / 1000.0); \ 500 | fflush(stdout); \ 501 | printf(" %8.2f", \ 502 | do_bench(&Bn(q, n, decapsulate_nofo), \ 503 | &bc, threshold) / 1000.0); \ 504 | fflush(stdout); \ 505 | printf(" %8.2f", \ 506 | do_bench(&Bn(q, n, decapsulate), \ 507 | &bc, threshold) / 1000.0); \ 508 | fflush(stdout); \ 509 | printf(" %8.2f", \ 510 | do_bench(&Bn(q, n, encode_ciphertext), \ 511 | &bc, threshold) / 1000.0); \ 512 | fflush(stdout); \ 513 | printf(" %8.2f", \ 514 | do_bench(&Bn(q, n, decode_ciphertext), \ 515 | &bc, threshold) / 1000.0); \ 516 | printf("\n"); \ 517 | fflush(stdout); \ 518 | } while (0) 519 | #endif 520 | 521 | MK_BENCH_FUNS(128, 256) 522 | MK_BENCH_FUNS(257, 512) 523 | MK_BENCH_FUNS(769, 1024) 524 | 525 | int 526 | main(int argc, char *argv[]) 527 | { 528 | double threshold; 529 | 530 | if (argc < 2) { 531 | threshold = 2.0; 532 | } else if (argc == 2) { 533 | threshold = atof(argv[1]); 534 | } else { 535 | threshold = -1.0; 536 | } 537 | if (threshold <= 0.0 || threshold > 60.0) { 538 | fprintf(stderr, 539 | "usage: speed [ threshold ]\n" 540 | "'threshold' is the minimum time for a bench run, in seconds (must be\n" 541 | "positive and less than 60).\n"); 542 | exit(EXIT_FAILURE); 543 | } 544 | #if DO_BENCH86 545 | printf("time threshold = %.4f Gcyc\n", threshold); 546 | #else 547 | printf("time threshold = %.4f s\n", threshold); 548 | #endif 549 | 550 | printf("esk / dsk = encode / decode private key (s = short format, l = long format)\n"); 551 | printf("epk / dpk = encode / decode public key\n"); 552 | printf("ect / dct = encode / decode ciphertext\n"); 553 | printf("ecp = encapsulate, dcp = decapsulate (nofo = without Fujisaki-Okamoto)\n"); 554 | #if DO_BENCH86 555 | printf("x86 PLATFORM, USING TSC; VALUES IN CLOCK CYCLES\n"); 556 | #else 557 | printf("keygen in milliseconds, all other times in microseconds\n"); 558 | #endif 559 | printf(" " 560 | " keygen" 561 | " esk-s" 562 | " dsk-s" 563 | " esk-l" 564 | " dsk-l" 565 | " epk" 566 | " dpk" 567 | " ecp-nofo" 568 | " ecp-fo" 569 | " dcp-nofo" 570 | " dcp-fo" 571 | " ect" 572 | " dct" 573 | "\n"); 574 | bench_128_256_all(threshold); 575 | bench_257_512_all(threshold); 576 | bench_769_1024_all(threshold); 577 | return 0; 578 | } 579 | -------------------------------------------------------------------------------- /src/test_bat.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "bat.h" 7 | #include "inner.h" 8 | 9 | static void 10 | check_equals(const void *a1, const void *a2, size_t len, const char *msg) 11 | { 12 | const uint8_t *b1, *b2; 13 | size_t u; 14 | 15 | if (memcmp(a1, a2, len) == 0) { 16 | return; 17 | } 18 | fprintf(stderr, "ERR: %s\n", msg); 19 | b1 = a1; 20 | b2 = a2; 21 | fprintf(stderr, "a1 = "); 22 | for (u = 0; u < len; u ++) { 23 | fprintf(stderr, "%02x", b1[u]); 24 | } 25 | fprintf(stderr, "\n"); 26 | fprintf(stderr, "a2 = "); 27 | for (u = 0; u < len; u ++) { 28 | fprintf(stderr, "%02x", b2[u]); 29 | } 30 | fprintf(stderr, "\n"); 31 | exit(EXIT_FAILURE); 32 | } 33 | 34 | static void 35 | selftest_seq(uint8_t *out, size_t len, uint32_t seed) 36 | { 37 | size_t i; 38 | uint32_t t, a, b; 39 | 40 | a = 0xDEAD4BAD * seed; 41 | b = 1; 42 | 43 | for (i = 0; i < len; i++) { 44 | t = a + b; 45 | a = b; 46 | b = t; 47 | out[i] = (t >> 24) & 0xFF; 48 | } 49 | } 50 | 51 | static void 52 | test_BLAKE2s_self() 53 | { 54 | /* 55 | * This code is from RFC 7693 (appendix E). 56 | */ 57 | 58 | // Grand hash of hash results. 59 | static const uint8_t blake2s_res[32] = { 60 | 0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD, 61 | 0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC, 62 | 0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87, 63 | 0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE 64 | }; 65 | // Parameter sets. 66 | static const size_t b2s_md_len[4] = { 16, 20, 28, 32 }; 67 | static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 }; 68 | 69 | size_t i, j, outlen, inlen; 70 | uint8_t in[1024], md[32], key[32]; 71 | blake2s_context ctx; 72 | 73 | printf("Test BLAKE2s selftest: "); 74 | fflush(stdout); 75 | 76 | blake2s_init(&ctx, 32); 77 | 78 | for (i = 0; i < 4; i ++) { 79 | outlen = b2s_md_len[i]; 80 | for (j = 0; j < 6; j++) { 81 | inlen = b2s_in_len[j]; 82 | 83 | selftest_seq(in, inlen, inlen); 84 | blake2s(md, outlen, NULL, 0, in, inlen); 85 | blake2s_update(&ctx, md, outlen); 86 | 87 | selftest_seq(key, outlen, outlen); 88 | blake2s(md, outlen, key, outlen, in, inlen); 89 | blake2s_update(&ctx, md, outlen); 90 | } 91 | printf("."); 92 | fflush(stdout); 93 | } 94 | 95 | blake2s_final(&ctx, md); 96 | check_equals(md, blake2s_res, sizeof blake2s_res, "KAT"); 97 | 98 | printf(" done.\n"); 99 | fflush(stdout); 100 | } 101 | 102 | static void 103 | test_BLAKE2s_expand(void) 104 | { 105 | size_t u; 106 | 107 | printf("Test BLAKE2s expand: "); 108 | fflush(stdout); 109 | 110 | /* Test vector generated with python3 hashlib.blake2s() 111 | implementation. */ 112 | static const uint8_t seed[] = { 113 | 0x4B, 0xFC, 0xB2, 0x19, 0x96, 0xAC, 0xE1, 0xE2, 114 | 0xA1, 0xD5, 0x38, 0xC5, 0x4D, 0x10, 0x99, 0xBF, 115 | 0x53, 0x20, 0x82, 0x62 116 | }; 117 | 118 | uint64_t label = 0x4A1BE6AC1347378C; 119 | 120 | static const uint8_t ref[] = { 121 | 0x78, 0x3D, 0xAA, 0x23, 0xB5, 0x2A, 0xDE, 0x32, 122 | 0x8C, 0x44, 0xB5, 0xBF, 0x68, 0xB3, 0x8E, 0xA3, 123 | 0x47, 0x49, 0xDB, 0x98, 0x96, 0xB4, 0xD8, 0x84, 124 | 0xA0, 0xEB, 0xB0, 0x0B, 0x84, 0x91, 0x66, 0xBD, 125 | 0x49, 0x56, 0x50, 0xEC, 0x3E, 0x89, 0x46, 0xF3, 126 | 0x45, 0x26, 0xBF, 0xEA, 0x28, 0x63, 0xE3, 0x83, 127 | 0x31, 0x64, 0xFE, 0x30, 0xE2, 0x89, 0x71, 0xFC, 128 | 0x34, 0x0C, 0x13, 0x05, 0xBA, 0x0D, 0x51, 0x39, 129 | 0x63, 0xD7, 0x41, 0x41, 0xCB, 0x4D, 0x74, 0xE8, 130 | 0x3F, 0x62, 0x74, 0xA2, 0xE4, 0x12, 0xB3, 0x25, 131 | 0x48, 0xC9, 0x3E, 0x57, 0xD3, 0x9E, 0xDD, 0xD7, 132 | 0x7B, 0x35, 0xC4, 0xE8, 0x54, 0x2E, 0x78, 0x44, 133 | 0xEF, 0xF0, 0x98, 0xCF, 0x82, 0x6B, 0xD0, 0x92, 134 | 0x2E, 0xF6, 0x9E, 0xFA, 0xB3, 0x38, 0x83, 0x3B, 135 | 0x96, 0x0C, 0xCF, 0xEA, 0xA8, 0x5E, 0xBE, 0x14, 136 | 0x64, 0xD6, 0x35, 0xE3, 0xA8, 0x60, 0x40, 0xE5, 137 | 0xF5, 0xEB, 0xDC, 0x55, 0xC8, 0x74, 0xEB, 0x21, 138 | 0x19, 0x43, 0x98, 0x46, 0xCD, 0xBE, 0x22, 0x0A, 139 | 0x0A, 0xF9, 0x07, 0xEA, 0xF0, 0xDC, 0x26, 0x80, 140 | 0x43, 0x42, 0xA6, 0xEE, 0x4F, 0x73, 0xA3, 0x0E, 141 | 0xB4, 0xDB, 0x10, 0x2D, 0x48, 0x8A, 0x43, 0xA9, 142 | 0xC0, 0x8B, 0x31, 0x4F, 0x2B, 0x52, 0xBA, 0xE2, 143 | 0x33, 0xBC, 0x32, 0xEA, 0xB7, 0xBB, 0x64, 0x2D, 144 | 0x31, 0xDA, 0x42, 0x24, 0x7B, 0x7D, 0x34, 0x61, 145 | 0xE3, 0x90, 0x2B, 0xA4, 0x93, 0x4A, 0x9D, 0x60, 146 | 0x4C, 0x48, 0xA4, 0x9E, 0x27, 0x04, 0x7C, 0xE6, 147 | 0x53, 0x12, 0x53, 0xD2, 0x8B, 0xC9, 0xCD, 0x4D, 148 | 0x74, 0xF4, 0x96, 0x5D, 0x02, 0x37, 0xB4, 0x2D, 149 | 0xBC, 0xAB, 0xDA, 0xEC, 0x4C, 0xE3, 0xF0, 0x57, 150 | 0x12, 0x7F, 0xB9, 0xFD, 0xB7, 0x3A, 0xDE, 0x37, 151 | 0xEF, 0x1B, 0x84, 0x5B, 0xFE, 0x1D, 0xEB, 0xC4, 152 | 0x0C, 0xF9, 0xC7, 0xA7, 0xE0, 0xB6, 0xC7, 0xAB 153 | }; 154 | 155 | for (u = 0; u < sizeof ref; u ++) { 156 | uint8_t out[1 + (sizeof ref)]; 157 | 158 | out[u] = 0xFF; 159 | blake2s_expand(out, u, seed, sizeof seed, label); 160 | if (out[u] != 0xFF) { 161 | fprintf(stderr, "Output buffer overflow"); 162 | exit(EXIT_FAILURE); 163 | } 164 | check_equals(out, ref, u, "KAT"); 165 | printf("."); 166 | fflush(stdout); 167 | } 168 | 169 | printf(" done.\n"); 170 | fflush(stdout); 171 | } 172 | 173 | static void 174 | test_BLAKE2b_self() 175 | { 176 | /* 177 | * This code is from RFC 7693 (appendix E). 178 | */ 179 | 180 | // Grand hash of hash results. 181 | static const uint8_t blake2b_res[32] = { 182 | 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, 183 | 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56, 184 | 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73, 185 | 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75 186 | }; 187 | // Parameter sets. 188 | static const size_t b2b_md_len[4] = { 20, 32, 48, 64 }; 189 | static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 }; 190 | 191 | size_t i, j, outlen, inlen; 192 | uint8_t in[1024], md[64], key[64]; 193 | blake2b_context ctx; 194 | 195 | printf("Test BLAKE2b selftest: "); 196 | fflush(stdout); 197 | 198 | blake2b_init(&ctx, 32); 199 | 200 | for (i = 0; i < 4; i ++) { 201 | outlen = b2b_md_len[i]; 202 | for (j = 0; j < 6; j++) { 203 | inlen = b2b_in_len[j]; 204 | 205 | selftest_seq(in, inlen, inlen); 206 | blake2b(md, outlen, NULL, 0, in, inlen); 207 | blake2b_update(&ctx, md, outlen); 208 | 209 | selftest_seq(key, outlen, outlen); 210 | blake2b(md, outlen, key, outlen, in, inlen); 211 | blake2b_update(&ctx, md, outlen); 212 | } 213 | printf("."); 214 | fflush(stdout); 215 | } 216 | 217 | blake2b_final(&ctx, md); 218 | check_equals(md, blake2b_res, sizeof blake2b_res, "KAT"); 219 | 220 | printf(" done.\n"); 221 | fflush(stdout); 222 | } 223 | 224 | static void 225 | test_BLAKE2b_expand(void) 226 | { 227 | size_t u; 228 | 229 | printf("Test BLAKE2b expand: "); 230 | fflush(stdout); 231 | 232 | /* Test vector generated with python3 hashlib.blake2b() 233 | implementation. */ 234 | static const uint8_t seed[] = { 235 | 0x4B, 0xFC, 0xB2, 0x19, 0x96, 0xAC, 0xE1, 0xE2, 236 | 0xA1, 0xD5, 0x38, 0xC5, 0x4D, 0x10, 0x99, 0xBF, 237 | 0x53, 0x20, 0x82, 0x62 238 | }; 239 | 240 | uint64_t label = 0x4A1BE6AC1347378C; 241 | 242 | static const uint8_t ref[] = { 243 | 0xF7, 0x50, 0xF1, 0x35, 0x88, 0x0B, 0x7F, 0xBD, 244 | 0x1E, 0x01, 0x54, 0x42, 0x21, 0x6C, 0xAC, 0xCA, 245 | 0x6A, 0x19, 0xF4, 0xFE, 0x76, 0xB1, 0x69, 0xF8, 246 | 0x2B, 0xA1, 0x99, 0x14, 0x13, 0xF5, 0xB1, 0x87, 247 | 0xD9, 0xF8, 0xA0, 0x49, 0x47, 0xF6, 0x94, 0x26, 248 | 0x4E, 0x91, 0xF0, 0x63, 0x36, 0x56, 0x56, 0x9C, 249 | 0x3D, 0xF2, 0xD9, 0x8D, 0x7D, 0x6D, 0x07, 0xF6, 250 | 0x64, 0xB1, 0x25, 0x14, 0xB0, 0x80, 0xF6, 0x08, 251 | 0x59, 0x70, 0xB0, 0xE2, 0x18, 0x2A, 0x0C, 0x9B, 252 | 0xA6, 0x51, 0xE2, 0x73, 0xE8, 0xBF, 0x0A, 0x2F, 253 | 0x3E, 0xD1, 0x65, 0x34, 0x95, 0x5F, 0xF1, 0x0C, 254 | 0xB3, 0x0A, 0x45, 0xF5, 0x90, 0x71, 0x71, 0x72, 255 | 0xCA, 0x5D, 0x58, 0x46, 0xF1, 0xDA, 0xC7, 0xE4, 256 | 0xD4, 0x5B, 0xAE, 0x92, 0xBD, 0x6B, 0x0B, 0xA6, 257 | 0xBF, 0xDD, 0x90, 0x24, 0x8B, 0x8B, 0xF7, 0x02, 258 | 0x4F, 0xDB, 0x99, 0xA8, 0x42, 0x2D, 0x58, 0x51, 259 | 0x55, 0xD5, 0xD4, 0xEA, 0x08, 0x94, 0x19, 0x99, 260 | 0x5B, 0x25, 0xEB, 0x24, 0x48, 0x56, 0xDE, 0xEA, 261 | 0xA7, 0x66, 0x02, 0xD8, 0x40, 0x2B, 0x3B, 0xCC, 262 | 0x2B, 0x98, 0xA1, 0x9B, 0xEE, 0x59, 0xD2, 0x42, 263 | 0x60, 0xF2, 0x80, 0x95, 0x4D, 0x3E, 0x93, 0xD9, 264 | 0x17, 0x2B, 0xAF, 0x11, 0xD4, 0xE1, 0x40, 0x60, 265 | 0x5F, 0xC9, 0x2D, 0x1D, 0xFA, 0x7F, 0x21, 0xAB, 266 | 0x0C, 0xA2, 0xFE, 0x90, 0xD9, 0x23, 0x65, 0x52, 267 | 0xA7, 0xE5, 0x33, 0xB6, 0xC3, 0xEA, 0xE4, 0xC0, 268 | 0x91, 0xBA, 0x1C, 0xB5, 0x4B, 0x81, 0xAC, 0xBF, 269 | 0xC3, 0x55, 0x82, 0xE7, 0xF9, 0x56, 0x0B, 0xD1, 270 | 0x9F, 0x74, 0x18, 0xEB, 0x49, 0xEE, 0x55, 0x48, 271 | 0xE6, 0x6F, 0xE6, 0x01, 0x69, 0x6A, 0x7C, 0x59, 272 | 0x8D, 0xD0, 0x45, 0x1C, 0x14, 0x28, 0x44, 0x74, 273 | 0x24, 0x95, 0xE0, 0xEB, 0x0A, 0x21, 0x82, 0x8D, 274 | 0x99, 0x35, 0xC5, 0x1C, 0x68, 0x98, 0x51, 0x3A, 275 | 0xF9, 0x7F, 0x09, 0xE7, 0xA8, 0xAB, 0x20, 0x80, 276 | 0xCD, 0x2D, 0x46, 0x25, 0xCB, 0x7A, 0xC6, 0xC5, 277 | 0xDC, 0xF5, 0xAC, 0x76, 0x00, 0xA0, 0xC0, 0xDA, 278 | 0x29, 0x41, 0x5C, 0x2A, 0x0D, 0x0A, 0xE4, 0x18, 279 | 0x73, 0x35, 0xD2, 0x8B, 0x46, 0xAA, 0x04, 0x8E, 280 | 0x32, 0xB4, 0xA3, 0x79, 0x95, 0x0A, 0x9F, 0x4C, 281 | 0x9F, 0x0D, 0xED, 0x67, 0xA8, 0x97, 0xEB, 0xB0, 282 | 0xCA, 0xD9, 0xF1, 0xBB, 0x88, 0x7F, 0x14, 0xD0, 283 | 0xD0, 0xCD, 0x7F, 0xEC, 0xAC, 0xDB, 0x7C, 0x81, 284 | 0x3F, 0x19, 0x6C, 0x56, 0x16, 0x26, 0x4A, 0xA7, 285 | 0xD8, 0x75, 0xC0, 0x91, 0xDA, 0x8A, 0x35, 0xDB, 286 | 0x75, 0x34, 0x9F, 0x60, 0x57, 0x0A, 0xFD, 0xBD, 287 | 0xBA, 0x43, 0x64, 0xB6, 0xF9, 0x63, 0x8C, 0x39, 288 | 0x0C, 0xFF, 0x07, 0x09, 0xBB, 0xD8, 0x85, 0x19, 289 | 0x0C, 0x2B, 0xDF, 0xF1, 0x97, 0xD7, 0xC2, 0x38, 290 | 0x15, 0x89, 0x7A, 0x54, 0x6E, 0x6E, 0x30, 0xFC, 291 | 0xA8, 0xD0, 0xCD, 0xC0, 0x82, 0x37, 0x0B, 0x6A, 292 | 0x21, 0x24, 0x48, 0x85, 0x9F, 0xB3, 0xEA, 0x1B, 293 | 0x12, 0xAF, 0x17, 0xD3, 0x20, 0x31, 0xE3, 0x35, 294 | 0xB8, 0x78, 0xF7, 0x7B, 0x2C, 0x07, 0xAD, 0xEF, 295 | 0x26, 0xEF, 0xCB, 0xC3, 0x59, 0x01, 0x9F, 0x73, 296 | 0x5C, 0x88, 0xB3, 0x61, 0x6D, 0x77, 0x52, 0x30, 297 | 0x04, 0x71, 0x28, 0xB8, 0x94, 0xF3, 0xA0, 0x30, 298 | 0x05, 0xCD, 0x51, 0x2F, 0x90, 0x8B, 0xF1, 0x1F, 299 | 0x52, 0xBC, 0x2B, 0x20, 0xD2, 0x52, 0xAE, 0x41, 300 | 0x70, 0x56, 0x07, 0x84, 0x90, 0xAF, 0x3B, 0xE6, 301 | 0xAD, 0x25, 0x11, 0x07, 0x36, 0x86, 0xFC, 0xD5, 302 | 0xA5, 0x4A, 0xE7, 0x09, 0xBF, 0x02, 0x10, 0x82, 303 | 0x52, 0xDB, 0x01, 0x77, 0x77, 0x2A, 0xAA, 0x3A, 304 | 0xFD, 0x0F, 0x9E, 0x6E, 0x86, 0x0B, 0x6F, 0x77, 305 | 0x7A, 0x5B, 0x1A, 0xD0, 0x9F, 0xFB, 0x49, 0x4B, 306 | 0x79, 0x8D, 0x5C, 0x59, 0x9D, 0x5A, 0x0D, 0x51 307 | }; 308 | 309 | for (u = 0; u < sizeof ref; u ++) { 310 | uint8_t out[1 + (sizeof ref)]; 311 | 312 | out[u] = 0xFF; 313 | blake2b_expand(out, u, seed, sizeof seed, label); 314 | if (out[u] != 0xFF) { 315 | fprintf(stderr, "Output buffer overflow"); 316 | exit(EXIT_FAILURE); 317 | } 318 | check_equals(out, ref, u, "KAT"); 319 | printf("."); 320 | fflush(stdout); 321 | } 322 | 323 | printf(" done.\n"); 324 | fflush(stdout); 325 | } 326 | 327 | /* 328 | * Initialize a PRNG with a given seed and extra label. 329 | */ 330 | static void 331 | rand_init(prng_context *rng, const char *seed, uint64_t x) 332 | { 333 | prng_init(rng, seed, strlen(seed), x); 334 | } 335 | 336 | /* 337 | * Generate a random polynomial with integer coefficients (coefficients 338 | * are signed and selected uniformly over num_bits). 339 | */ 340 | static void 341 | rand_poly_32(prng_context *rng, int32_t *f, unsigned logn, unsigned num_bits) 342 | { 343 | size_t u, n; 344 | 345 | n = (size_t)1 << logn; 346 | for (u = 0; u < n; u ++) { 347 | uint32_t x; 348 | 349 | x = (uint32_t)prng_get_u64(rng); 350 | f[u] = *(int32_t *)&x >> (32 - num_bits); 351 | } 352 | } 353 | 354 | static void 355 | poly_add(int32_t *d, const int32_t *a, const int32_t *b, unsigned logn) 356 | { 357 | size_t u, n; 358 | 359 | n = (size_t)1 << logn; 360 | for (u = 0; u < n; u ++) { 361 | d[u] = a[u] + b[u]; 362 | } 363 | } 364 | 365 | static void 366 | poly_sub(int32_t *d, const int32_t *a, const int32_t *b, unsigned logn) 367 | { 368 | size_t u, n; 369 | 370 | n = (size_t)1 << logn; 371 | for (u = 0; u < n; u ++) { 372 | d[u] = a[u] - b[u]; 373 | } 374 | } 375 | 376 | static void 377 | poly_neg(int32_t *d, const int32_t *a, unsigned logn) 378 | { 379 | size_t u, n; 380 | 381 | n = (size_t)1 << logn; 382 | for (u = 0; u < n; u ++) { 383 | d[u] = -a[u]; 384 | } 385 | } 386 | 387 | static void 388 | poly_mul(int32_t *d, const int32_t *a, const int32_t *b, unsigned logn) 389 | { 390 | int32_t t[1024]; 391 | size_t u, v, n; 392 | 393 | n = (size_t)1 << logn; 394 | memset(t, 0, sizeof t); 395 | for (u = 0; u < n; u ++) { 396 | for (v = 0; v < n; v ++) { 397 | int32_t m; 398 | 399 | m = a[u] * b[v]; 400 | if ((u + v) < n) { 401 | t[u + v] += m; 402 | } else { 403 | t[u + v - n] -= m; 404 | } 405 | } 406 | } 407 | memcpy(d, t, n * sizeof *d); 408 | } 409 | 410 | static inline void 411 | print_fnr(fnr x) 412 | { 413 | fprintf(stderr, "%ld(%08lX)", 414 | (long)(*(int64_t *)&x.v >> 32), 415 | (unsigned long)(uint32_t)x.v); 416 | } 417 | 418 | static void 419 | print_poly_i32(const char *name, const int32_t *a, unsigned logn) 420 | { 421 | size_t u, n; 422 | 423 | n = (size_t)1 << logn; 424 | fprintf(stderr, "%s =", name); 425 | for (u = 0; u < n; u ++) { 426 | fprintf(stderr, " %ld", (long)a[u]); 427 | } 428 | fprintf(stderr, "\n"); 429 | } 430 | 431 | static void 432 | print_poly_fnr(const char *name, const fnr *f, unsigned logn) 433 | { 434 | size_t u, n; 435 | 436 | n = (size_t)1 << logn; 437 | fprintf(stderr, "%s =", name); 438 | for (u = 0; u < n; u ++) { 439 | fprintf(stderr, " "); 440 | print_fnr(f[u]); 441 | } 442 | fprintf(stderr, "\n"); 443 | } 444 | 445 | static void 446 | check_poly_eq_round(const char *banner, 447 | const int32_t *a, const fnr *f, unsigned logn) 448 | { 449 | size_t u, n; 450 | 451 | n = (size_t)1 << logn; 452 | for (u = 0; u < n; u ++) { 453 | if (fnr_round(f[u]) != a[u]) { 454 | break; 455 | } 456 | } 457 | if (u == n) { 458 | return; 459 | } 460 | fprintf(stderr, "ERR: %s (not equal on %zu)\n", banner, u); 461 | print_poly_i32("a", a, logn); 462 | print_poly_fnr("f", f, logn); 463 | fprintf(stderr, "a[%zu] = %ld\n", u, (long)a[u]); 464 | fprintf(stderr, "f[%zu] = ", u); 465 | print_fnr(f[u]); 466 | fprintf(stderr, "\n"); 467 | exit(EXIT_FAILURE); 468 | } 469 | 470 | static void 471 | test_FFT(void) 472 | { 473 | unsigned logn; 474 | 475 | printf("Test FFT: "); 476 | fflush(stdout); 477 | 478 | for (logn = 1; logn <= 10; logn ++) { 479 | prng_context rng; 480 | int32_t a[1024], b[1024], c[1024]; 481 | fnr fa[1024], fb[1024], fc[1024]; 482 | unsigned num_bits; 483 | size_t u, n; 484 | int i; 485 | 486 | rand_init(&rng, "test_FFT", logn); 487 | n = (size_t)1 << logn; 488 | 489 | /* 490 | * If source coefficients are over k+1 bits (including 491 | * sign bit), then product coefficients are at most 492 | * 1+2*k+logn bits, and FFT coefficients will fit in 493 | * 1+2*k+2*logn bits. We need this value to be at most 32. 494 | */ 495 | num_bits = (32 - 1 - 2 * logn) >> 1; 496 | 497 | for (i = 0; i < 100; i ++) { 498 | rand_poly_32(&rng, a, logn, 32 - logn); 499 | for (u = 0; u < n; u ++) { 500 | fa[u] = fnr_of(a[u]); 501 | } 502 | bat_FFT(fa, logn); 503 | bat_iFFT(fa, logn); 504 | check_poly_eq_round("FFT", a, fa, logn); 505 | 506 | rand_poly_32(&rng, a, logn, num_bits); 507 | rand_poly_32(&rng, b, logn, num_bits); 508 | for (u = 0; u < n; u ++) { 509 | fa[u] = fnr_of(a[u]); 510 | fb[u] = fnr_of(b[u]); 511 | } 512 | bat_FFT(fa, logn); 513 | bat_FFT(fb, logn); 514 | memcpy(fc, fa, n * sizeof(fnr)); 515 | bat_poly_add(fc, fb, logn); 516 | bat_iFFT(fc, logn); 517 | poly_add(c, a, b, logn); 518 | check_poly_eq_round("add", c, fc, logn); 519 | 520 | memcpy(fc, fa, n * sizeof(fnr)); 521 | bat_poly_sub(fc, fb, logn); 522 | bat_iFFT(fc, logn); 523 | poly_sub(c, a, b, logn); 524 | check_poly_eq_round("sub", c, fc, logn); 525 | 526 | memcpy(fc, fa, n * sizeof(fnr)); 527 | bat_poly_neg(fc, logn); 528 | bat_iFFT(fc, logn); 529 | poly_neg(c, a, logn); 530 | check_poly_eq_round("neg", c, fc, logn); 531 | 532 | memcpy(fc, fa, n * sizeof(fnr)); 533 | bat_poly_mul_fft(fc, fb, logn); 534 | bat_iFFT(fc, logn); 535 | poly_mul(c, a, b, logn); 536 | check_poly_eq_round("mul", c, fc, logn); 537 | } 538 | 539 | printf("."); 540 | fflush(stdout); 541 | } 542 | 543 | printf(" done.\n"); 544 | fflush(stdout); 545 | } 546 | 547 | static void 548 | prep_tmp(void *tmp, size_t tmp_len, int i) 549 | { 550 | memset(tmp, i & 0xFF, tmp_len); 551 | } 552 | 553 | static size_t 554 | get_tmp_used(const void *tmp, size_t tmp_len, int i) 555 | { 556 | const uint8_t *buf; 557 | size_t u; 558 | 559 | buf = tmp; 560 | i &= 0xFF; 561 | for (u = tmp_len; u > 0; u --) { 562 | if (buf[u - 1] != i) { 563 | return u; 564 | } 565 | } 566 | return 0; 567 | } 568 | 569 | static void 570 | check_tmp_used(const char *name, 571 | const void *tmp, size_t tmp_len, int i, size_t max_len) 572 | { 573 | size_t used_len; 574 | 575 | used_len = get_tmp_used(tmp, tmp_len, i); 576 | if (used_len > max_len) { 577 | fprintf(stderr, "ERR: %s: tmp usage exceeded allowance" 578 | " (%lu vs %lu bytes)\n", 579 | name, 580 | (unsigned long)used_len, (unsigned long)max_len); 581 | exit(EXIT_FAILURE); 582 | } 583 | } 584 | 585 | static void 586 | test_kem_inner_spec(uint32_t q, unsigned logn) 587 | { 588 | int i; 589 | union { 590 | uint8_t b[24 * 1024 + 8]; 591 | uint32_t w[6 * 1024 + 2]; 592 | uint64_t d; 593 | } tmp; 594 | int8_t f[1024], g[1024], F[1024], G[1024], G2[1024], c[1024]; 595 | uint16_t h[1024]; 596 | int32_t w[1024]; 597 | int enc_fail; 598 | 599 | printf("[%u-%u]", (unsigned)q, 1u << logn); 600 | fflush(stdout); 601 | 602 | enc_fail = 0; 603 | 604 | for (i = 0; i < 100; i ++) { 605 | prng_context rng; 606 | uint8_t kg_seed[32]; 607 | int j; 608 | 609 | rand_init(&rng, "kem_inner", 610 | ((uint64_t)(q << 4 | logn) << 32) | (uint64_t)i); 611 | 612 | /* 613 | * Generate a new key pair. 614 | */ 615 | for (;;) { 616 | int r; 617 | 618 | prep_tmp(tmp.w, sizeof tmp.w, i); 619 | prng_get_bytes(&rng, kg_seed, sizeof kg_seed); 620 | r = bat_keygen_make_fg(f, g, h, q, logn, 621 | kg_seed, sizeof kg_seed, tmp.w); 622 | check_tmp_used("bat_keygen_make_fg", 623 | tmp.w, sizeof tmp.w, i, 24u << logn); 624 | if (!r) { 625 | continue; 626 | } 627 | 628 | prep_tmp(tmp.w, sizeof tmp.w, i); 629 | r = bat_keygen_solve_FG(F, G, f, g, q, logn, tmp.w); 630 | check_tmp_used("bat_keygen_solve_FG", 631 | tmp.w, sizeof tmp.w, i, 24u << logn); 632 | if (!r) { 633 | continue; 634 | } 635 | 636 | prep_tmp(tmp.w, sizeof tmp.w, i); 637 | r = bat_keygen_compute_w(w, f, g, F, G, q, logn, tmp.w); 638 | check_tmp_used("bat_keygen_compute_w", 639 | tmp.w, sizeof tmp.w, i, 24u << logn); 640 | if (!r) { 641 | continue; 642 | } 643 | 644 | break; 645 | } 646 | 647 | /* 648 | * Verify the key pair is behaving properly. 649 | */ 650 | prep_tmp(tmp.w, sizeof tmp.w, i); 651 | if (!bat_keygen_verify_FG(f, g, F, G, q, logn, tmp.w)) { 652 | fprintf(stderr, "bat_keygen_verify_FG() failed\n"); 653 | exit(EXIT_FAILURE); 654 | } 655 | check_tmp_used("bat_keygen_verify_FG", 656 | tmp.w, sizeof tmp.w, i, 16u << logn); 657 | 658 | prep_tmp(tmp.w, sizeof tmp.w, i); 659 | if (!bat_keygen_rebuild_G(G2, f, g, F, q, logn, tmp.w)) { 660 | fprintf(stderr, "bat_keygen_rebuild_G() failed\n"); 661 | exit(EXIT_FAILURE); 662 | } 663 | check_tmp_used("bat_keygen_rebuild_G", 664 | tmp.w, sizeof tmp.w, i, 4u << logn); 665 | check_equals(G, G2, 1u << logn, "rebuild G"); 666 | 667 | /* 668 | * Do some encapsulation / decapsulation. 669 | */ 670 | for (j = 0; j < 100; j ++) { 671 | uint8_t sbuf[128], sbuf2[128]; 672 | int r; 673 | 674 | rand_init(&rng, "kem_inner_encaps", 675 | ((uint64_t)(q << 4 | logn) << 32) 676 | | (uint64_t)i | ((uint64_t)j << 16)); 677 | 678 | for (;;) { 679 | prng_get_bytes(&rng, sbuf, SBUF_LEN(logn)); 680 | if (logn < 3) { 681 | sbuf[0] &= (1u << (1u << logn)) - 1u; 682 | } 683 | 684 | prep_tmp(tmp.w, sizeof tmp.w, i); 685 | switch (q) { 686 | case 128: 687 | r = bat_encrypt_128( 688 | c, sbuf, (const uint8_t *)h, 689 | logn, tmp.w); 690 | break; 691 | case 257: 692 | r = bat_encrypt_257( 693 | c, sbuf, h, logn, tmp.w); 694 | break; 695 | case 769: 696 | r = bat_encrypt_769( 697 | c, sbuf, h, logn, tmp.w); 698 | break; 699 | default: 700 | fprintf(stderr, 701 | "Unknown q: %u\n", (unsigned)q); 702 | exit(EXIT_FAILURE); 703 | } 704 | check_tmp_used("bat_encrypt", 705 | tmp.w, sizeof tmp.w, i, 706 | (q == 128 ? 3u : 4u) << logn); 707 | if (!r) { 708 | /* 709 | * This may happen with q = 769, but 710 | * not with q = 128 or q = 257. 711 | */ 712 | if (q == 769) { 713 | enc_fail ++; 714 | continue; 715 | } 716 | fprintf(stderr, 717 | "bat_encrypt() failed\n"); 718 | exit(EXIT_FAILURE); 719 | } 720 | break; 721 | } 722 | 723 | prep_tmp(tmp.w, sizeof tmp.w, i); 724 | switch (q) { 725 | case 128: 726 | bat_decrypt_128(sbuf2, 727 | c, f, g, F, G, w, logn, tmp.w); 728 | break; 729 | case 257: 730 | bat_decrypt_257(sbuf2, 731 | c, f, g, F, G, w, logn, tmp.w); 732 | break; 733 | case 769: 734 | bat_decrypt_769(sbuf2, 735 | c, f, g, F, G, w, logn, tmp.w); 736 | break; 737 | default: 738 | fprintf(stderr, "Unknown q: %u\n", (unsigned)q); 739 | exit(EXIT_FAILURE); 740 | } 741 | check_tmp_used("bat_decrypt", 742 | tmp.w, sizeof tmp.w, i, 8u << logn); 743 | 744 | check_equals(sbuf, sbuf2, SBUF_LEN(logn), 745 | "KEM enc/dec"); 746 | } 747 | 748 | printf("."); 749 | fflush(stdout); 750 | } 751 | 752 | printf("(%d)", enc_fail); 753 | fflush(stdout); 754 | } 755 | 756 | static void 757 | test_kem_inner(void) 758 | { 759 | unsigned logn; 760 | 761 | printf("Test KEM (inner):\n "); 762 | fflush(stdout); 763 | for (logn = 1; logn <= 8; logn ++) { 764 | test_kem_inner_spec(128, logn); 765 | } 766 | printf("\n "); 767 | fflush(stdout); 768 | for (logn = 1; logn <= 9; logn ++) { 769 | test_kem_inner_spec(257, logn); 770 | } 771 | printf("\n "); 772 | fflush(stdout); 773 | for (logn = 1; logn <= 10; logn ++) { 774 | test_kem_inner_spec(769, logn); 775 | } 776 | printf("\n"); 777 | } 778 | 779 | #define CC(x) do { \ 780 | int cc_err = (x); \ 781 | if (cc_err != 0) { \ 782 | fprintf(stderr, "%s failed with error %d\n", \ 783 | #x, cc_err); \ 784 | exit(EXIT_FAILURE); \ 785 | } \ 786 | } while (0) 787 | 788 | static void 789 | test_kem_128_256(void) 790 | { 791 | int i; 792 | bat_128_256_private_key sk, sk2; 793 | bat_128_256_public_key pk, pk2; 794 | bat_128_256_ciphertext ct, ct2; 795 | uint8_t tmp[BAT_128_256_TMP_KEYGEN], buf[33 + 8 * 256]; 796 | size_t len, len2; 797 | 798 | printf("Test KEM-128-256: "); 799 | fflush(stdout); 800 | 801 | for (i = 0; i < 100; i ++) { 802 | int j; 803 | 804 | CC(bat_128_256_keygen(&sk, tmp, sizeof tmp)); 805 | 806 | len = bat_128_256_encode_private_key(NULL, 0, &sk, 0); 807 | if (len > sizeof buf) { 808 | fprintf(stderr, "oversized private key encoding\n"); 809 | exit(EXIT_FAILURE); 810 | } 811 | len2 = bat_128_256_encode_private_key(buf, sizeof buf, &sk, 0); 812 | if (len2 != len) { 813 | fprintf(stderr, "private key encoding size mismatch\n"); 814 | exit(EXIT_FAILURE); 815 | } 816 | 817 | memset(&sk2, 0, sizeof sk2); 818 | len2 = bat_128_256_decode_private_key( 819 | &sk2, buf, sizeof buf, NULL, 0); 820 | if (len2 != len) { 821 | fprintf(stderr, "private key decoding size mismatch" 822 | "(%zu / %zu)\n", len, len2); 823 | exit(EXIT_FAILURE); 824 | } 825 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 826 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 827 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 828 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 829 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 830 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 831 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 832 | 833 | len = bat_128_256_encode_private_key(NULL, 0, &sk, 1); 834 | if (len > sizeof buf) { 835 | fprintf(stderr, "oversized private key encoding" 836 | " (short form)\n"); 837 | exit(EXIT_FAILURE); 838 | } 839 | len2 = bat_128_256_encode_private_key(buf, sizeof buf, &sk, 1); 840 | if (len2 != len) { 841 | fprintf(stderr, "private key encoding size mismatch" 842 | " (short form) (%zu vs %zu)\n", len, len2); 843 | exit(EXIT_FAILURE); 844 | } 845 | 846 | memset(&sk2, 0, sizeof sk2); 847 | len2 = bat_128_256_decode_private_key( 848 | &sk2, buf, sizeof buf, tmp, sizeof tmp); 849 | if (len2 != len) { 850 | fprintf(stderr, "private key decoding size mismatch" 851 | " (short form)\n"); 852 | exit(EXIT_FAILURE); 853 | } 854 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 855 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 856 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 857 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 858 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 859 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 860 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 861 | 862 | bat_128_256_get_public_key(&pk, &sk); 863 | 864 | len = bat_128_256_encode_public_key(NULL, 0, &pk); 865 | if (len > sizeof buf) { 866 | fprintf(stderr, "oversized public key encoding\n"); 867 | exit(EXIT_FAILURE); 868 | } 869 | len2 = bat_128_256_encode_public_key(buf, sizeof buf, &pk); 870 | if (len2 != len) { 871 | fprintf(stderr, "public key encoding size mismatch" 872 | " (%zu vs %zu)\n", len, len2); 873 | exit(EXIT_FAILURE); 874 | } 875 | 876 | memset(&pk2, 0, sizeof pk2); 877 | len2 = bat_128_256_decode_public_key(&pk2, buf, sizeof buf); 878 | if (len2 != len) { 879 | fprintf(stderr, "public key decoding size mismatch" 880 | " (%zu vs %zu)\n", len, len2); 881 | exit(EXIT_FAILURE); 882 | } 883 | check_equals(pk.h, pk2.h, sizeof pk.h, "pk h"); 884 | 885 | for (j = 0; j < 100; j ++) { 886 | uint8_t secret[48], secret2[48]; 887 | 888 | CC(bat_128_256_encapsulate(secret, sizeof secret, 889 | &ct, &pk, tmp, sizeof tmp)); 890 | 891 | len = bat_128_256_encode_ciphertext(NULL, 0, &ct); 892 | if (len > sizeof buf) { 893 | fprintf(stderr, 894 | "oversized ciphertext encoding\n"); 895 | exit(EXIT_FAILURE); 896 | } 897 | len2 = bat_128_256_encode_ciphertext( 898 | buf, sizeof buf, &ct); 899 | if (len2 != len) { 900 | fprintf(stderr, 901 | "ciphertext encoding size mismatch" 902 | " (%zu vs %zu)\n", len, len2); 903 | exit(EXIT_FAILURE); 904 | } 905 | 906 | memset(&ct2, 0, sizeof ct2); 907 | len2 = bat_128_256_decode_ciphertext( 908 | &ct2, buf, sizeof buf); 909 | if (len2 != len) { 910 | fprintf(stderr, 911 | "ciphertext decoding size mismatch" 912 | " (%zu vs %zu)\n", len, len2); 913 | exit(EXIT_FAILURE); 914 | } 915 | check_equals(ct.c, ct2.c, sizeof ct.c, "ct c"); 916 | 917 | CC(bat_128_256_decapsulate(secret2, sizeof secret2, 918 | &ct, &sk, tmp, sizeof tmp)); 919 | check_equals(secret, secret2, sizeof secret, "secret"); 920 | } 921 | 922 | printf("."); 923 | fflush(stdout); 924 | } 925 | 926 | printf(" done.\n"); 927 | fflush(stdout); 928 | } 929 | 930 | static void 931 | test_kem_257_512(void) 932 | { 933 | int i; 934 | bat_257_512_private_key sk, sk2; 935 | bat_257_512_public_key pk, pk2; 936 | bat_257_512_ciphertext ct, ct2; 937 | uint8_t tmp[BAT_257_512_TMP_KEYGEN], buf[33 + 8 * 512]; 938 | size_t len, len2; 939 | 940 | printf("Test KEM-257-512: "); 941 | fflush(stdout); 942 | 943 | for (i = 0; i < 100; i ++) { 944 | int j; 945 | 946 | CC(bat_257_512_keygen(&sk, tmp, sizeof tmp)); 947 | 948 | len = bat_257_512_encode_private_key(NULL, 0, &sk, 0); 949 | if (len > sizeof buf) { 950 | fprintf(stderr, "oversized private key encoding\n"); 951 | exit(EXIT_FAILURE); 952 | } 953 | len2 = bat_257_512_encode_private_key(buf, sizeof buf, &sk, 0); 954 | if (len2 != len) { 955 | fprintf(stderr, "private key encoding size mismatch\n"); 956 | exit(EXIT_FAILURE); 957 | } 958 | 959 | memset(&sk2, 0, sizeof sk2); 960 | len2 = bat_257_512_decode_private_key( 961 | &sk2, buf, sizeof buf, NULL, 0); 962 | if (len2 != len) { 963 | fprintf(stderr, "private key decoding size mismatch" 964 | "(%zu / %zu)\n", len, len2); 965 | exit(EXIT_FAILURE); 966 | } 967 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 968 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 969 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 970 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 971 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 972 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 973 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 974 | 975 | len = bat_257_512_encode_private_key(NULL, 0, &sk, 1); 976 | if (len > sizeof buf) { 977 | fprintf(stderr, "oversized private key encoding" 978 | " (short form)\n"); 979 | exit(EXIT_FAILURE); 980 | } 981 | len2 = bat_257_512_encode_private_key(buf, sizeof buf, &sk, 1); 982 | if (len2 != len) { 983 | fprintf(stderr, "private key encoding size mismatch" 984 | " (short form) (%zu vs %zu)\n", len, len2); 985 | exit(EXIT_FAILURE); 986 | } 987 | 988 | memset(&sk2, 0, sizeof sk2); 989 | len2 = bat_257_512_decode_private_key( 990 | &sk2, buf, sizeof buf, tmp, sizeof tmp); 991 | if (len2 != len) { 992 | fprintf(stderr, "private key decoding size mismatch" 993 | " (short form)\n"); 994 | exit(EXIT_FAILURE); 995 | } 996 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 997 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 998 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 999 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 1000 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 1001 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 1002 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 1003 | 1004 | bat_257_512_get_public_key(&pk, &sk); 1005 | 1006 | len = bat_257_512_encode_public_key(NULL, 0, &pk); 1007 | if (len > sizeof buf) { 1008 | fprintf(stderr, "oversized public key encoding\n"); 1009 | exit(EXIT_FAILURE); 1010 | } 1011 | len2 = bat_257_512_encode_public_key(buf, sizeof buf, &pk); 1012 | if (len2 != len) { 1013 | fprintf(stderr, "public key encoding size mismatch" 1014 | " (%zu vs %zu)\n", len, len2); 1015 | exit(EXIT_FAILURE); 1016 | } 1017 | 1018 | memset(&pk2, 0, sizeof pk2); 1019 | len2 = bat_257_512_decode_public_key(&pk2, buf, sizeof buf); 1020 | if (len2 != len) { 1021 | fprintf(stderr, "public key decoding size mismatch" 1022 | " (%zu vs %zu)\n", len, len2); 1023 | exit(EXIT_FAILURE); 1024 | } 1025 | check_equals(pk.h, pk2.h, sizeof pk.h, "pk h"); 1026 | 1027 | for (j = 0; j < 100; j ++) { 1028 | uint8_t secret[48], secret2[48]; 1029 | 1030 | CC(bat_257_512_encapsulate(secret, sizeof secret, 1031 | &ct, &pk, tmp, sizeof tmp)); 1032 | 1033 | len = bat_257_512_encode_ciphertext(NULL, 0, &ct); 1034 | if (len > sizeof buf) { 1035 | fprintf(stderr, 1036 | "oversized ciphertext encoding\n"); 1037 | exit(EXIT_FAILURE); 1038 | } 1039 | len2 = bat_257_512_encode_ciphertext( 1040 | buf, sizeof buf, &ct); 1041 | if (len2 != len) { 1042 | fprintf(stderr, 1043 | "ciphertext encoding size mismatch" 1044 | " (%zu vs %zu)\n", len, len2); 1045 | exit(EXIT_FAILURE); 1046 | } 1047 | 1048 | memset(&ct2, 0, sizeof ct2); 1049 | len2 = bat_257_512_decode_ciphertext( 1050 | &ct2, buf, sizeof buf); 1051 | if (len2 != len) { 1052 | fprintf(stderr, 1053 | "ciphertext decoding size mismatch" 1054 | " (%zu vs %zu)\n", len, len2); 1055 | exit(EXIT_FAILURE); 1056 | } 1057 | check_equals(ct.c, ct2.c, sizeof ct.c, "ct c"); 1058 | 1059 | CC(bat_257_512_decapsulate(secret2, sizeof secret2, 1060 | &ct, &sk, tmp, sizeof tmp)); 1061 | check_equals(secret, secret2, sizeof secret, "secret"); 1062 | } 1063 | 1064 | printf("."); 1065 | fflush(stdout); 1066 | } 1067 | 1068 | printf(" done.\n"); 1069 | fflush(stdout); 1070 | } 1071 | 1072 | static void 1073 | test_kem_769_1024(void) 1074 | { 1075 | int i; 1076 | bat_769_1024_private_key sk, sk2; 1077 | bat_769_1024_public_key pk, pk2; 1078 | bat_769_1024_ciphertext ct, ct2; 1079 | uint8_t tmp[BAT_769_1024_TMP_KEYGEN], buf[33 + 8 * 1024]; 1080 | size_t len, len2; 1081 | 1082 | printf("Test KEM-769-1024: "); 1083 | fflush(stdout); 1084 | 1085 | for (i = 0; i < 100; i ++) { 1086 | int j; 1087 | 1088 | CC(bat_769_1024_keygen(&sk, tmp, sizeof tmp)); 1089 | 1090 | len = bat_769_1024_encode_private_key(NULL, 0, &sk, 0); 1091 | if (len > sizeof buf) { 1092 | fprintf(stderr, "oversized private key encoding\n"); 1093 | exit(EXIT_FAILURE); 1094 | } 1095 | len2 = bat_769_1024_encode_private_key(buf, sizeof buf, &sk, 0); 1096 | if (len2 != len) { 1097 | fprintf(stderr, "private key encoding size mismatch\n"); 1098 | exit(EXIT_FAILURE); 1099 | } 1100 | 1101 | memset(&sk2, 0, sizeof sk2); 1102 | len2 = bat_769_1024_decode_private_key( 1103 | &sk2, buf, sizeof buf, NULL, 0); 1104 | if (len2 != len) { 1105 | fprintf(stderr, "private key decoding size mismatch" 1106 | "(%zu / %zu)\n", len, len2); 1107 | exit(EXIT_FAILURE); 1108 | } 1109 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 1110 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 1111 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 1112 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 1113 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 1114 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 1115 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 1116 | 1117 | len = bat_769_1024_encode_private_key(NULL, 0, &sk, 1); 1118 | if (len > sizeof buf) { 1119 | fprintf(stderr, "oversized private key encoding" 1120 | " (short form)\n"); 1121 | exit(EXIT_FAILURE); 1122 | } 1123 | len2 = bat_769_1024_encode_private_key(buf, sizeof buf, &sk, 1); 1124 | if (len2 != len) { 1125 | fprintf(stderr, "private key encoding size mismatch" 1126 | " (short form) (%zu vs %zu)\n", len, len2); 1127 | exit(EXIT_FAILURE); 1128 | } 1129 | 1130 | memset(&sk2, 0, sizeof sk2); 1131 | len2 = bat_769_1024_decode_private_key( 1132 | &sk2, buf, sizeof buf, tmp, sizeof tmp); 1133 | if (len2 != len) { 1134 | fprintf(stderr, "private key decoding size mismatch" 1135 | " (short form)\n"); 1136 | exit(EXIT_FAILURE); 1137 | } 1138 | check_equals(sk.seed, sk2.seed, sizeof sk.seed, "sk seed"); 1139 | check_equals(sk.f, sk2.f, sizeof sk.f, "sk f"); 1140 | check_equals(sk.g, sk2.g, sizeof sk.g, "sk g"); 1141 | check_equals(sk.F, sk2.F, sizeof sk.F, "sk F"); 1142 | check_equals(sk.G, sk2.G, sizeof sk.G, "sk G"); 1143 | check_equals(sk.w, sk2.w, sizeof sk.w, "sk w"); 1144 | check_equals(sk.h, sk2.h, sizeof sk.h, "sk h"); 1145 | 1146 | bat_769_1024_get_public_key(&pk, &sk); 1147 | 1148 | len = bat_769_1024_encode_public_key(NULL, 0, &pk); 1149 | if (len > sizeof buf) { 1150 | fprintf(stderr, "oversized public key encoding\n"); 1151 | exit(EXIT_FAILURE); 1152 | } 1153 | len2 = bat_769_1024_encode_public_key(buf, sizeof buf, &pk); 1154 | if (len2 != len) { 1155 | fprintf(stderr, "public key encoding size mismatch" 1156 | " (%zu vs %zu)\n", len, len2); 1157 | exit(EXIT_FAILURE); 1158 | } 1159 | 1160 | memset(&pk2, 0, sizeof pk2); 1161 | len2 = bat_769_1024_decode_public_key(&pk2, buf, sizeof buf); 1162 | if (len2 != len) { 1163 | fprintf(stderr, "public key decoding size mismatch" 1164 | " (%zu vs %zu)\n", len, len2); 1165 | exit(EXIT_FAILURE); 1166 | } 1167 | check_equals(pk.h, pk2.h, sizeof pk.h, "pk h"); 1168 | 1169 | for (j = 0; j < 100; j ++) { 1170 | uint8_t secret[48], secret2[48]; 1171 | 1172 | CC(bat_769_1024_encapsulate(secret, sizeof secret, 1173 | &ct, &pk, tmp, sizeof tmp)); 1174 | 1175 | len = bat_769_1024_encode_ciphertext(NULL, 0, &ct); 1176 | if (len > sizeof buf) { 1177 | fprintf(stderr, 1178 | "oversized ciphertext encoding\n"); 1179 | exit(EXIT_FAILURE); 1180 | } 1181 | len2 = bat_769_1024_encode_ciphertext( 1182 | buf, sizeof buf, &ct); 1183 | if (len2 != len) { 1184 | fprintf(stderr, 1185 | "ciphertext encoding size mismatch" 1186 | " (%zu vs %zu)\n", len, len2); 1187 | exit(EXIT_FAILURE); 1188 | } 1189 | 1190 | memset(&ct2, 0, sizeof ct2); 1191 | len2 = bat_769_1024_decode_ciphertext( 1192 | &ct2, buf, sizeof buf); 1193 | if (len2 != len) { 1194 | fprintf(stderr, 1195 | "ciphertext decoding size mismatch" 1196 | " (%zu vs %zu)\n", len, len2); 1197 | exit(EXIT_FAILURE); 1198 | } 1199 | check_equals(ct.c, ct2.c, sizeof ct.c, "ct c"); 1200 | 1201 | CC(bat_769_1024_decapsulate(secret2, sizeof secret2, 1202 | &ct, &sk, tmp, sizeof tmp)); 1203 | check_equals(secret, secret2, sizeof secret, "secret"); 1204 | } 1205 | 1206 | printf("."); 1207 | fflush(stdout); 1208 | } 1209 | 1210 | printf(" done.\n"); 1211 | fflush(stdout); 1212 | } 1213 | 1214 | #if 0 1215 | /* 1216 | * Sample code to generate key pairs and print them out in text format 1217 | * for external analysis. Each output line contains f, g, Fd and Gd. 1218 | */ 1219 | 1220 | /* defined in keygen.c */ 1221 | void bat_make_Fd(int32_t *Fd, const int8_t *f, const int8_t *F, 1222 | const int32_t *w, unsigned qp, unsigned logn, uint32_t *tmp); 1223 | 1224 | static void 1225 | make_keys(uint32_t q, unsigned logn, int num) 1226 | { 1227 | int i; 1228 | union { 1229 | uint8_t b[24 * 1024 + 8]; 1230 | uint32_t w[6 * 1024 + 2]; 1231 | uint64_t d; 1232 | } tmp; 1233 | int8_t f[1024], g[1024], F[1024], G[1024]; 1234 | uint16_t h[1024]; 1235 | int32_t w[1024], Fd[1024], Gd[1024]; 1236 | 1237 | for (i = 0; i < num; i ++) { 1238 | prng_context rng; 1239 | uint8_t kg_seed[32]; 1240 | size_t u, n; 1241 | 1242 | rand_init(&rng, "make_keys", 1243 | ((uint64_t)(q << 4 | logn) << 32) | (uint64_t)i); 1244 | 1245 | /* 1246 | * Generate a new key pair. 1247 | */ 1248 | for (;;) { 1249 | int r; 1250 | 1251 | prng_get_bytes(&rng, kg_seed, sizeof kg_seed); 1252 | r = bat_keygen_make_fg(f, g, h, q, logn, 1253 | kg_seed, sizeof kg_seed, tmp.w); 1254 | if (!r) { 1255 | continue; 1256 | } 1257 | 1258 | r = bat_keygen_solve_FG(F, G, f, g, q, logn, tmp.w); 1259 | if (!r) { 1260 | continue; 1261 | } 1262 | 1263 | r = bat_keygen_compute_w(w, f, g, F, G, q, logn, tmp.w); 1264 | if (!r) { 1265 | continue; 1266 | } 1267 | 1268 | break; 1269 | } 1270 | bat_make_Fd(Fd, f, F, w, 64513, logn, tmp.w); 1271 | bat_make_Fd(Gd, g, G, w, 64513, logn, tmp.w); 1272 | 1273 | fprintf(stderr, "."); 1274 | fflush(stderr); 1275 | 1276 | n = 1u << logn; 1277 | printf("[["); 1278 | for (u = 0; u < n; u ++) { 1279 | if (u != 0) { 1280 | printf(", "); 1281 | } 1282 | printf("%d", f[u]); 1283 | } 1284 | printf("], ["); 1285 | for (u = 0; u < n; u ++) { 1286 | if (u != 0) { 1287 | printf(", "); 1288 | } 1289 | printf("%d", g[u]); 1290 | } 1291 | printf("], ["); 1292 | for (u = 0; u < n; u ++) { 1293 | if (u != 0) { 1294 | printf(", "); 1295 | } 1296 | printf("%d", Fd[u]); 1297 | } 1298 | printf("], ["); 1299 | for (u = 0; u < n; u ++) { 1300 | if (u != 0) { 1301 | printf(", "); 1302 | } 1303 | printf("%d", Gd[u]); 1304 | } 1305 | printf("]]\n"); 1306 | fflush(stdout); 1307 | } 1308 | fprintf(stderr, "\n"); 1309 | fflush(stderr); 1310 | } 1311 | #endif 1312 | 1313 | #if 0 1314 | /* 1315 | * Sample code to generate key pairs and export them in a custom binary 1316 | * format for external analysis. Polynomials f, g, F and G use one byte 1317 | * per coefficient; w uses 4 bytes per coefficient (little-endian). Keys 1318 | * are written one after the other in the specified file. 1319 | */ 1320 | 1321 | static void 1322 | make_keys_bin(const char *fname, uint32_t q, unsigned logn, int num) 1323 | { 1324 | int i; 1325 | union { 1326 | uint8_t b[24 * 1024 + 8]; 1327 | uint32_t w[6 * 1024 + 2]; 1328 | uint64_t d; 1329 | } tmp; 1330 | int8_t f[1024], g[1024], F[1024], G[1024]; 1331 | uint16_t h[1024]; 1332 | int32_t w[1024]; 1333 | FILE *kf; 1334 | 1335 | kf = fopen(fname, "wb"); 1336 | if (kf == NULL) { 1337 | perror("fopen"); 1338 | exit(EXIT_FAILURE); 1339 | } 1340 | for (i = 0; i < num; i ++) { 1341 | prng_context rng; 1342 | uint8_t kg_seed[32]; 1343 | size_t u, n; 1344 | 1345 | rand_init(&rng, "make_keys_bin", 1346 | ((uint64_t)(q << 4 | logn) << 32) | (uint64_t)i); 1347 | 1348 | /* 1349 | * Generate a new key pair. 1350 | */ 1351 | for (;;) { 1352 | int r; 1353 | 1354 | prng_get_bytes(&rng, kg_seed, sizeof kg_seed); 1355 | r = bat_keygen_make_fg(f, g, h, q, logn, 1356 | kg_seed, sizeof kg_seed, tmp.w); 1357 | if (!r) { 1358 | continue; 1359 | } 1360 | 1361 | r = bat_keygen_solve_FG(F, G, f, g, q, logn, tmp.w); 1362 | if (!r) { 1363 | continue; 1364 | } 1365 | 1366 | r = bat_keygen_compute_w(w, f, g, F, G, q, logn, tmp.w); 1367 | if (!r) { 1368 | continue; 1369 | } 1370 | 1371 | break; 1372 | } 1373 | 1374 | n = 1u << logn; 1375 | fwrite(f, 1, n, kf); 1376 | fwrite(g, 1, n, kf); 1377 | fwrite(F, 1, n, kf); 1378 | fwrite(G, 1, n, kf); 1379 | for (u = 0; u < n; u ++) { 1380 | uint8_t tbuf[4]; 1381 | 1382 | enc32le(tbuf, w[u]); 1383 | fwrite(tbuf, 1, 4, kf); 1384 | } 1385 | 1386 | if ((i + 1) % 100 == 0) { 1387 | fprintf(stderr, "."); 1388 | fflush(stderr); 1389 | } 1390 | } 1391 | fprintf(stderr, "\n"); 1392 | fflush(stderr); 1393 | 1394 | fclose(kf); 1395 | } 1396 | #endif 1397 | 1398 | int 1399 | main(void) 1400 | { 1401 | test_BLAKE2s_self(); 1402 | test_BLAKE2s_expand(); 1403 | test_BLAKE2b_self(); 1404 | test_BLAKE2b_expand(); 1405 | 1406 | test_FFT(); 1407 | test_kem_inner(); 1408 | test_kem_128_256(); 1409 | test_kem_257_512(); 1410 | test_kem_769_1024(); 1411 | 1412 | printf("Sizes: pub ct priv (short/long)\n"); 1413 | printf("BAT-128-256: %4zu %4zu %3zu / %4zu\n", 1414 | bat_128_256_encode_public_key(0, 0, 0), 1415 | bat_128_256_encode_ciphertext(0, 0, 0), 1416 | bat_128_256_encode_private_key(0, 0, 0, 1), 1417 | bat_128_256_encode_private_key(0, 0, 0, 0)); 1418 | printf("BAT-257-512: %4zu %4zu %3zu / %4zu\n", 1419 | bat_257_512_encode_public_key(0, 0, 0), 1420 | bat_257_512_encode_ciphertext(0, 0, 0), 1421 | bat_257_512_encode_private_key(0, 0, 0, 1), 1422 | bat_257_512_encode_private_key(0, 0, 0, 0)); 1423 | printf("BAT-769-1024: %4zu %4zu %3zu / %4zu\n", 1424 | bat_769_1024_encode_public_key(0, 0, 0), 1425 | bat_769_1024_encode_ciphertext(0, 0, 0), 1426 | bat_769_1024_encode_private_key(0, 0, 0, 1), 1427 | bat_769_1024_encode_private_key(0, 0, 0, 0)); 1428 | 1429 | return 0; 1430 | } 1431 | --------------------------------------------------------------------------------