├── avx2 ├── kem.c ├── kem.h ├── indcpa.h ├── verify.h ├── fips202.c ├── randombytes.c ├── randombytes.h ├── symmetric-shake.c ├── test │ ├── cpucycles.c │ ├── cpucycles.h │ ├── speed_print.c │ ├── speed_print.h │ ├── test_kyber.c │ ├── test_speed.c │ └── test_vectors.c ├── keccak4x │ ├── KeccakP-SIMD256-config.h │ ├── KeccakP-align.h │ ├── KeccakP-1600-times4-SnP.h │ ├── KeccakP-brg_endian.h │ └── KeccakP-1600-unrolling.macros ├── .gitignore ├── reduce.h ├── cbd.h ├── align.h ├── rejsample.h ├── shuffle.inc ├── fq.inc ├── ntt.h ├── consts.h ├── polyvec.h ├── symmetric.h ├── fq.S ├── params.h ├── fips202.h ├── fips202x4.h ├── verify.c ├── basemul.S ├── poly.h ├── api.h ├── ntt.S ├── Makefile ├── cbd.c ├── shuffle.S ├── invntt.S ├── consts.c ├── fips202x4.c └── polyvec.c ├── .gitattributes ├── .gitignore ├── AUTHORS ├── ref ├── randombytes.h ├── test │ ├── speed_print.h │ ├── cpucycles.c │ ├── cpucycles.h │ ├── speed_print.c │ ├── test_vectors.c │ ├── test_kyber.c │ └── test_speed.c ├── .gitignore ├── cbd.h ├── reduce.h ├── verify.h ├── ntt.h ├── indcpa.h ├── kem.h ├── polyvec.h ├── symmetric.h ├── reduce.c ├── nistkat │ ├── rng.h │ ├── rng.c │ └── PQCgenKAT_kem.c ├── params.h ├── poly.h ├── randombytes.c ├── fips202.h ├── verify.c ├── symmetric-shake.c ├── api.h ├── cbd.c ├── Makefile ├── ntt.c ├── kem.c ├── polyvec.c └── poly.c ├── SHA256SUMS ├── LICENSE ├── runlcov.sh ├── runtests.sh ├── .travis.yml ├── Common_META.yml ├── Kyber512_META.yml ├── Kyber768_META.yml ├── Kyber1024_META.yml └── README.md /avx2/kem.c: -------------------------------------------------------------------------------- 1 | ../ref/kem.c -------------------------------------------------------------------------------- /avx2/kem.h: -------------------------------------------------------------------------------- 1 | ../ref/kem.h -------------------------------------------------------------------------------- /avx2/indcpa.h: -------------------------------------------------------------------------------- 1 | ../ref/indcpa.h -------------------------------------------------------------------------------- /avx2/verify.h: -------------------------------------------------------------------------------- 1 | ../ref/verify.h -------------------------------------------------------------------------------- /avx2/fips202.c: -------------------------------------------------------------------------------- 1 | ../ref/fips202.c -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | SHA256SUMS eol=lf 2 | -------------------------------------------------------------------------------- /avx2/randombytes.c: -------------------------------------------------------------------------------- 1 | ../ref/randombytes.c -------------------------------------------------------------------------------- /avx2/randombytes.h: -------------------------------------------------------------------------------- 1 | ../ref/randombytes.h -------------------------------------------------------------------------------- /avx2/symmetric-shake.c: -------------------------------------------------------------------------------- 1 | ../ref/symmetric-shake.c -------------------------------------------------------------------------------- /avx2/test/cpucycles.c: -------------------------------------------------------------------------------- 1 | ../../ref/test/cpucycles.c -------------------------------------------------------------------------------- /avx2/test/cpucycles.h: -------------------------------------------------------------------------------- 1 | ../../ref/test/cpucycles.h -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tvecs1024 2 | tvecs512 3 | tvecs768 4 | -------------------------------------------------------------------------------- /avx2/test/speed_print.c: -------------------------------------------------------------------------------- 1 | ../../ref/test/speed_print.c -------------------------------------------------------------------------------- /avx2/test/speed_print.h: -------------------------------------------------------------------------------- 1 | ../../ref/test/speed_print.h -------------------------------------------------------------------------------- /avx2/test/test_kyber.c: -------------------------------------------------------------------------------- 1 | ../../ref/test/test_kyber.c -------------------------------------------------------------------------------- /avx2/test/test_speed.c: -------------------------------------------------------------------------------- 1 | ../../ref/test/test_speed.c -------------------------------------------------------------------------------- /avx2/test/test_vectors.c: -------------------------------------------------------------------------------- 1 | ../../ref/test/test_vectors.c -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Joppe Bos, 2 | Léo Ducas, 3 | Eike Kiltz, 4 | Tancrède Lepoint, 5 | Vadim Lyubashevsky, 6 | John Schanck, 7 | Peter Schwabe, 8 | Gregor Seiler, 9 | Damien Stehlé 10 | -------------------------------------------------------------------------------- /avx2/keccak4x/KeccakP-SIMD256-config.h: -------------------------------------------------------------------------------- 1 | #define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled" 2 | #define KeccakP1600times4_fullUnrolling 3 | #define KeccakP1600times4_useAVX2 4 | -------------------------------------------------------------------------------- /ref/randombytes.h: -------------------------------------------------------------------------------- 1 | #ifndef RANDOMBYTES_H 2 | #define RANDOMBYTES_H 3 | 4 | #include 5 | #include 6 | 7 | void randombytes(uint8_t *out, size_t outlen); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /ref/test/speed_print.h: -------------------------------------------------------------------------------- 1 | #ifndef PRINT_SPEED_H 2 | #define PRINT_SPEED_H 3 | 4 | #include 5 | #include 6 | 7 | void print_results(const char *s, uint64_t *t, size_t tlen); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /SHA256SUMS: -------------------------------------------------------------------------------- 1 | 4d34994299a8f8dcb36c550951a00f6e16918d6d5b6f280ee2aa12a7bf8375a0 tvecs512 2 | b59ac4d2b429b1f8c3b8a5542fb638179da2fd8b1212891d2f976e70e219fed1 tvecs768 3 | 3f577090c7cb7a345ce0417a2a2353153a9f1b8d79f8d927cb6a7b4ec17fd2a1 tvecs1024 4 | -------------------------------------------------------------------------------- /avx2/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | test/test_kyber1024 4 | test/test_kyber512 5 | test/test_kyber768 6 | test/test_speed1024 7 | test/test_speed512 8 | test/test_speed768 9 | test/test_vectors1024 10 | test/test_vectors512 11 | test/test_vectors768 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); 2 | or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). 3 | 4 | For Keccak and AES we are using public-domain 5 | code from sources and by authors listed in 6 | comments on top of the respective files. 7 | -------------------------------------------------------------------------------- /avx2/reduce.h: -------------------------------------------------------------------------------- 1 | #ifndef REDUCE_H 2 | #define REDUCE_H 3 | 4 | #include "params.h" 5 | #include 6 | 7 | #define reduce_avx KYBER_NAMESPACE(reduce_avx) 8 | void reduce_avx(__m256i *r, const __m256i *qdata); 9 | #define tomont_avx KYBER_NAMESPACE(tomont_avx) 10 | void tomont_avx(__m256i *r, const __m256i *qdata); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /ref/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | test/test_kyber1024 4 | test/test_kyber512 5 | test/test_kyber768 6 | test/test_speed1024 7 | test/test_speed512 8 | test/test_speed768 9 | test/test_vectors1024 10 | test/test_vectors512 11 | test/test_vectors768 12 | nistkat/PQCgenKAT_kem512 13 | nistkat/PQCgenKAT_kem768 14 | nistkat/PQCgenKAT_kem1024 15 | nistkat/*.req 16 | nistkat/*.rsp 17 | -------------------------------------------------------------------------------- /ref/test/cpucycles.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cpucycles.h" 3 | 4 | uint64_t cpucycles_overhead(void) { 5 | uint64_t t0, t1, overhead = -1LL; 6 | unsigned int i; 7 | 8 | for(i=0;i<100000;i++) { 9 | t0 = cpucycles(); 10 | __asm__ volatile (""); 11 | t1 = cpucycles(); 12 | if(t1 - t0 < overhead) 13 | overhead = t1 - t0; 14 | } 15 | 16 | return overhead; 17 | } 18 | -------------------------------------------------------------------------------- /ref/cbd.h: -------------------------------------------------------------------------------- 1 | #ifndef CBD_H 2 | #define CBD_H 3 | 4 | #include 5 | #include "params.h" 6 | #include "poly.h" 7 | 8 | #define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) 9 | void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]); 10 | 11 | #define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) 12 | void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /ref/reduce.h: -------------------------------------------------------------------------------- 1 | #ifndef REDUCE_H 2 | #define REDUCE_H 3 | 4 | #include 5 | #include "params.h" 6 | 7 | #define MONT -1044 // 2^16 mod q 8 | #define QINV -3327 // q^-1 mod 2^16 9 | 10 | #define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce) 11 | int16_t montgomery_reduce(int32_t a); 12 | 13 | #define barrett_reduce KYBER_NAMESPACE(barrett_reduce) 14 | int16_t barrett_reduce(int16_t a); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /avx2/cbd.h: -------------------------------------------------------------------------------- 1 | #ifndef CBD_H 2 | #define CBD_H 3 | 4 | #include 5 | #include 6 | #include "params.h" 7 | #include "poly.h" 8 | 9 | #define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1) 10 | void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]); 11 | 12 | #define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2) 13 | void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /avx2/align.h: -------------------------------------------------------------------------------- 1 | #ifndef ALIGN_H 2 | #define ALIGN_H 3 | 4 | #include 5 | #include 6 | 7 | #define ALIGNED_UINT8(N) \ 8 | union { \ 9 | uint8_t coeffs[N]; \ 10 | __m256i vec[(N+31)/32]; \ 11 | } 12 | 13 | #define ALIGNED_INT16(N) \ 14 | union { \ 15 | int16_t coeffs[N]; \ 16 | __m256i vec[(N+15)/16]; \ 17 | } 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /runlcov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | cd ref 4 | 5 | for alg in 512 768 1024; do 6 | make -B test/test_kyber$alg CFLAGS="-O0 -g --coverage" 7 | ./test/test_kyber$alg 8 | lcov -c -d . -o kyber$alg.lcov 9 | lcov -z -d . 10 | rm test/test_kyber$alg 11 | done 12 | 13 | lcov -o kyber.lcov \ 14 | -a kyber512.lcov \ 15 | -a kyber768.lcov \ 16 | -a kyber1024.lcov \ 17 | 18 | lcov -r kyber.lcov -o kyber.lcov \ 19 | '*/test/test_kyber.c' 20 | 21 | exit 0 22 | -------------------------------------------------------------------------------- /avx2/rejsample.h: -------------------------------------------------------------------------------- 1 | #ifndef REJSAMPLE_H 2 | #define REJSAMPLE_H 3 | 4 | #include 5 | #include "params.h" 6 | #include "symmetric.h" 7 | 8 | #define REJ_UNIFORM_AVX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) 9 | #define REJ_UNIFORM_AVX_BUFLEN (REJ_UNIFORM_AVX_NBLOCKS*XOF_BLOCKBYTES) 10 | 11 | #define rej_uniform_avx KYBER_NAMESPACE(rej_uniform_avx) 12 | unsigned int rej_uniform_avx(int16_t *r, const uint8_t *buf); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /ref/verify.h: -------------------------------------------------------------------------------- 1 | #ifndef VERIFY_H 2 | #define VERIFY_H 3 | 4 | #include 5 | #include 6 | #include "params.h" 7 | 8 | #define verify KYBER_NAMESPACE(verify) 9 | int verify(const uint8_t *a, const uint8_t *b, size_t len); 10 | 11 | #define cmov KYBER_NAMESPACE(cmov) 12 | void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); 13 | 14 | #define cmov_int16 KYBER_NAMESPACE(cmov_int16) 15 | void cmov_int16(int16_t *r, int16_t v, uint16_t b); 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /ref/ntt.h: -------------------------------------------------------------------------------- 1 | #ifndef NTT_H 2 | #define NTT_H 3 | 4 | #include 5 | #include "params.h" 6 | 7 | #define zetas KYBER_NAMESPACE(zetas) 8 | extern const int16_t zetas[128]; 9 | 10 | #define ntt KYBER_NAMESPACE(ntt) 11 | void ntt(int16_t poly[256]); 12 | 13 | #define invntt KYBER_NAMESPACE(invntt) 14 | void invntt(int16_t poly[256]); 15 | 16 | #define basemul KYBER_NAMESPACE(basemul) 17 | void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /avx2/shuffle.inc: -------------------------------------------------------------------------------- 1 | .macro shuffle8 r0,r1,r2,r3 2 | vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 3 | vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 4 | .endm 5 | 6 | .macro shuffle4 r0,r1,r2,r3 7 | vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 8 | vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 9 | .endm 10 | 11 | .macro shuffle2 r0,r1,r2,r3 12 | #vpsllq $32,%ymm\r1,%ymm\r2 13 | vmovsldup %ymm\r1,%ymm\r2 14 | vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 15 | vpsrlq $32,%ymm\r0,%ymm\r0 16 | #vmovshdup %ymm\r0,%ymm\r0 17 | vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 18 | .endm 19 | 20 | .macro shuffle1 r0,r1,r2,r3 21 | vpslld $16,%ymm\r1,%ymm\r2 22 | vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 23 | vpsrld $16,%ymm\r0,%ymm\r0 24 | vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 25 | .endm 26 | -------------------------------------------------------------------------------- /ref/test/cpucycles.h: -------------------------------------------------------------------------------- 1 | #ifndef CPUCYCLES_H 2 | #define CPUCYCLES_H 3 | 4 | #include 5 | 6 | #ifdef USE_RDPMC /* Needs echo 2 > /sys/devices/cpu/rdpmc */ 7 | 8 | static inline uint64_t cpucycles(void) { 9 | const uint32_t ecx = (1U << 30) + 1; 10 | uint64_t result; 11 | 12 | __asm__ volatile ("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax" 13 | : "=a" (result) : "c" (ecx) : "rdx"); 14 | 15 | return result; 16 | } 17 | 18 | #else 19 | 20 | static inline uint64_t cpucycles(void) { 21 | uint64_t result; 22 | 23 | __asm__ volatile ("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax" 24 | : "=a" (result) : : "%rdx"); 25 | 26 | return result; 27 | } 28 | 29 | #endif 30 | 31 | uint64_t cpucycles_overhead(void); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /avx2/fq.inc: -------------------------------------------------------------------------------- 1 | .macro red16 r,rs=0,x=12 2 | vpmulhw %ymm1,%ymm\r,%ymm\x 3 | .if \rs 4 | vpmulhrsw %ymm\rs,%ymm\x,%ymm\x 5 | .else 6 | vpsraw $10,%ymm\x,%ymm\x 7 | .endif 8 | vpmullw %ymm0,%ymm\x,%ymm\x 9 | vpsubw %ymm\x,%ymm\r,%ymm\r 10 | .endm 11 | 12 | .macro csubq r,x=12 13 | vpsubw %ymm0,%ymm\r,%ymm\r 14 | vpsraw $15,%ymm\r,%ymm\x 15 | vpand %ymm0,%ymm\x,%ymm\x 16 | vpaddw %ymm\x,%ymm\r,%ymm\r 17 | .endm 18 | 19 | .macro caddq r,x=12 20 | vpsraw $15,%ymm\r,%ymm\x 21 | vpand %ymm0,%ymm\x,%ymm\x 22 | vpaddw %ymm\x,%ymm\r,%ymm\r 23 | .endm 24 | 25 | .macro fqmulprecomp al,ah,b,x=12 26 | vpmullw %ymm\al,%ymm\b,%ymm\x 27 | vpmulhw %ymm\ah,%ymm\b,%ymm\b 28 | vpmulhw %ymm0,%ymm\x,%ymm\x 29 | vpsubw %ymm\x,%ymm\b,%ymm\b 30 | .endm 31 | -------------------------------------------------------------------------------- /runtests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | nproc="${nproc:-2}" 3 | 4 | ARCH="${ARCH:-amd64}" 5 | ARCH="${TRAVIS_CPU_ARCH:-$ARCH}" 6 | 7 | if [ "$ARCH" = "amd64" -a "$TRAVIS_OS_NAME" != "osx" ]; then 8 | DIRS="ref avx2" 9 | else 10 | DIRS="ref" 11 | fi 12 | 13 | if [ "$ARCH" = "amd64" -o "$ARCH" = "arm64" ]; then 14 | export CC=/usr/bin/gcc 15 | # export CFLAGS="-fsanitize=undefined,address ${CFLAGS}" 16 | fi 17 | 18 | for dir in $DIRS; do 19 | make -j$(nproc) -C $dir clean 20 | make -j$(nproc) -C $dir 21 | for alg in 512 768 1024; do 22 | valgrind --vex-guest-max-insns=25 ./$dir/test/test_kyber$alg 23 | echo test_kyber$alg 24 | ./$dir/test/test_kyber$alg 25 | ./$dir/test/test_vectors$alg > tvecs$alg 26 | done 27 | shasum -a256 -c SHA256SUMS 28 | done 29 | 30 | exit 0 31 | -------------------------------------------------------------------------------- /avx2/ntt.h: -------------------------------------------------------------------------------- 1 | #ifndef NTT_H 2 | #define NTT_H 3 | 4 | #include 5 | #include 6 | 7 | #define ntt_avx KYBER_NAMESPACE(ntt_avx) 8 | void ntt_avx(__m256i *r, const __m256i *qdata); 9 | #define invntt_avx KYBER_NAMESPACE(invntt_avx) 10 | void invntt_avx(__m256i *r, const __m256i *qdata); 11 | 12 | #define nttpack_avx KYBER_NAMESPACE(nttpack_avx) 13 | void nttpack_avx(__m256i *r, const __m256i *qdata); 14 | #define nttunpack_avx KYBER_NAMESPACE(nttunpack_avx) 15 | void nttunpack_avx(__m256i *r, const __m256i *qdata); 16 | 17 | #define basemul_avx KYBER_NAMESPACE(basemul_avx) 18 | void basemul_avx(__m256i *r, 19 | const __m256i *a, 20 | const __m256i *b, 21 | const __m256i *qdata); 22 | 23 | #define ntttobytes_avx KYBER_NAMESPACE(ntttobytes_avx) 24 | void ntttobytes_avx(uint8_t *r, const __m256i *a, const __m256i *qdata); 25 | #define nttfrombytes_avx KYBER_NAMESPACE(nttfrombytes_avx) 26 | void nttfrombytes_avx(__m256i *r, const uint8_t *a, const __m256i *qdata); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /ref/indcpa.h: -------------------------------------------------------------------------------- 1 | #ifndef INDCPA_H 2 | #define INDCPA_H 3 | 4 | #include 5 | #include "params.h" 6 | #include "polyvec.h" 7 | 8 | #define gen_matrix KYBER_NAMESPACE(gen_matrix) 9 | void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed); 10 | 11 | #define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand) 12 | void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], 13 | uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES], 14 | const uint8_t coins[KYBER_SYMBYTES]); 15 | 16 | #define indcpa_enc KYBER_NAMESPACE(indcpa_enc) 17 | void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES], 18 | const uint8_t m[KYBER_INDCPA_MSGBYTES], 19 | const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES], 20 | const uint8_t coins[KYBER_SYMBYTES]); 21 | 22 | #define indcpa_dec KYBER_NAMESPACE(indcpa_dec) 23 | void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES], 24 | const uint8_t c[KYBER_INDCPA_BYTES], 25 | const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | os: linux 3 | dist: bionic 4 | compiler: 5 | - gcc 6 | - clang 7 | arch: 8 | - amd64 9 | - arm64 10 | - ppc64le 11 | - s390x 12 | script: ./runtests.sh 13 | 14 | jobs: 15 | include: 16 | - os: osx 17 | compiler: clang 18 | env: 19 | - CFLAGS="-I/usr/local/opt/openssl@1.1/include/" 20 | - NISTFLAGS="-I/usr/local/opt/openssl@1.1/include/" 21 | - LDFLAGS="-L/usr/local/opt/openssl@1.1/lib/" 22 | 23 | - stage: coverage 24 | os: linux 25 | compiler: gcc 26 | before_install: 27 | - sudo apt-get update 28 | - sudo apt-get install -y lcov 29 | - gem install coveralls-lcov 30 | script: ./runlcov.sh 31 | after_success: 32 | - coveralls-lcov ref/kyber.lcov 33 | 34 | allow_failures: 35 | - os: windows 36 | compiler: msvc19 37 | before_install: 38 | - choco install openssl 39 | script: 40 | - mkdir build 41 | - cd build 42 | - cmake -A x64 .. 43 | - cmake --build . 44 | - ctest --output-on-failure 45 | 46 | -------------------------------------------------------------------------------- /avx2/keccak4x/KeccakP-align.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, 3 | Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby 4 | denoted as "the implementer". 5 | 6 | For more information, feedback or questions, please refer to our websites: 7 | http://keccak.noekeon.org/ 8 | http://keyak.noekeon.org/ 9 | http://ketje.noekeon.org/ 10 | 11 | To the extent possible under law, the implementer has waived all copyright 12 | and related or neighboring rights to the source code in this file. 13 | http://creativecommons.org/publicdomain/zero/1.0/ 14 | */ 15 | 16 | #ifndef _keccakp_align_h_ 17 | #define _keccakp_align_h_ 18 | 19 | /* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ 20 | #ifdef ALIGN 21 | #undef ALIGN 22 | #endif 23 | 24 | #if defined(__GNUC__) 25 | #define ALIGN(x) __attribute__ ((aligned(x))) 26 | #elif defined(_MSC_VER) 27 | #define ALIGN(x) __declspec(align(x)) 28 | #elif defined(__ARMCC_VERSION) 29 | #define ALIGN(x) __align(x) 30 | #else 31 | #define ALIGN(x) 32 | #endif 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /Common_META.yml: -------------------------------------------------------------------------------- 1 | commons: 2 | - name: common_ref 3 | folder_name: ref 4 | sources: fips202.c fips202.h 5 | - name: common_aes 6 | folder_name: avx2 7 | sources: aes256ctr.c aes256ctr.h 8 | supported_platforms: 9 | - architecture: x86_64 10 | operating_systems: 11 | - Darwin 12 | - Linux 13 | required_flags: 14 | - sse2 15 | - ssse3 16 | - name: common_avx2 17 | folder_name: avx2 18 | sources: fips202.c fips202.h fips202x4.c fips202x4.h 19 | supported_platforms: 20 | - architecture: x86_64 21 | operating_systems: 22 | - Darwin 23 | - Linux 24 | required_flags: 25 | - avx2 26 | - name: common_keccak4x_avx2 27 | folder_name: avx2 28 | sources: fips202x4.h keccak4x/KeccakP-1600-times4-SIMD256.c keccak4x/KeccakP-1600-times4-SnP.h keccak4x/KeccakP-1600-unrolling.macros keccak4x/KeccakP-SIMD256-config.h keccak4x/KeccakP-align.h keccak4x/KeccakP-brg_endian.h 29 | supported_platforms: 30 | - architecture: x86_64 31 | operating_systems: 32 | - Darwin 33 | - Linux 34 | required_flags: 35 | - avx2 36 | -------------------------------------------------------------------------------- /avx2/consts.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTS_H 2 | #define CONSTS_H 3 | 4 | #include "params.h" 5 | 6 | #define _16XQ 0 7 | #define _16XQINV 16 8 | #define _16XV 32 9 | #define _16XFLO 48 10 | #define _16XFHI 64 11 | #define _16XMONTSQLO 80 12 | #define _16XMONTSQHI 96 13 | #define _16XMASK 112 14 | #define _REVIDXB 128 15 | #define _REVIDXD 144 16 | #define _ZETAS_EXP 160 17 | #define _16XSHIFT 624 18 | 19 | /* The C ABI on MacOS exports all symbols with a leading 20 | * underscore. This means that any symbols we refer to from 21 | * C files (functions) can't be found, and all symbols we 22 | * refer to from ASM also can't be found. 23 | * 24 | * This define helps us get around this 25 | */ 26 | #ifdef __ASSEMBLER__ 27 | #if defined(__WIN32__) || defined(__APPLE__) 28 | #define decorate(s) _##s 29 | #define cdecl2(s) decorate(s) 30 | #define cdecl(s) cdecl2(KYBER_NAMESPACE(##s)) 31 | #else 32 | #define cdecl(s) KYBER_NAMESPACE(##s) 33 | #endif 34 | #endif 35 | 36 | #ifndef __ASSEMBLER__ 37 | #include "align.h" 38 | typedef ALIGNED_INT16(640) qdata_t; 39 | #define qdata KYBER_NAMESPACE(qdata) 40 | extern const qdata_t qdata; 41 | #endif 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /ref/kem.h: -------------------------------------------------------------------------------- 1 | #ifndef KEM_H 2 | #define KEM_H 3 | 4 | #include 5 | #include "params.h" 6 | 7 | #define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES 8 | #define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES 9 | #define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES 10 | #define CRYPTO_BYTES KYBER_SSBYTES 11 | 12 | #if (KYBER_K == 2) 13 | #define CRYPTO_ALGNAME "Kyber512" 14 | #elif (KYBER_K == 3) 15 | #define CRYPTO_ALGNAME "Kyber768" 16 | #elif (KYBER_K == 4) 17 | #define CRYPTO_ALGNAME "Kyber1024" 18 | #endif 19 | 20 | #define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand) 21 | int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 22 | 23 | #define crypto_kem_keypair KYBER_NAMESPACE(keypair) 24 | int crypto_kem_keypair(uint8_t *pk, uint8_t *sk); 25 | 26 | #define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand) 27 | int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 28 | 29 | #define crypto_kem_enc KYBER_NAMESPACE(enc) 30 | int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 31 | 32 | #define crypto_kem_dec KYBER_NAMESPACE(dec) 33 | int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /ref/test/speed_print.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cpucycles.h" 6 | #include "speed_print.h" 7 | 8 | static int cmp_uint64(const void *a, const void *b) { 9 | if(*(uint64_t *)a < *(uint64_t *)b) return -1; 10 | if(*(uint64_t *)a > *(uint64_t *)b) return 1; 11 | return 0; 12 | } 13 | 14 | static uint64_t median(uint64_t *l, size_t llen) { 15 | qsort(l,llen,sizeof(uint64_t),cmp_uint64); 16 | 17 | if(llen%2) return l[llen/2]; 18 | else return (l[llen/2-1]+l[llen/2])/2; 19 | } 20 | 21 | static uint64_t average(uint64_t *t, size_t tlen) { 22 | size_t i; 23 | uint64_t acc=0; 24 | 25 | for(i=0;i 5 | #include "params.h" 6 | #include "poly.h" 7 | 8 | typedef struct{ 9 | poly vec[KYBER_K]; 10 | } polyvec; 11 | 12 | #define polyvec_compress KYBER_NAMESPACE(polyvec_compress) 13 | void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a); 14 | #define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) 15 | void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]); 16 | 17 | #define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) 18 | void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); 19 | #define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) 20 | void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); 21 | 22 | #define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) 23 | void polyvec_ntt(polyvec *r); 24 | #define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) 25 | void polyvec_invntt_tomont(polyvec *r); 26 | 27 | #define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) 28 | void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); 29 | 30 | #define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) 31 | void polyvec_reduce(polyvec *r); 32 | 33 | #define polyvec_add KYBER_NAMESPACE(polyvec_add) 34 | void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /avx2/polyvec.h: -------------------------------------------------------------------------------- 1 | #ifndef POLYVEC_H 2 | #define POLYVEC_H 3 | 4 | #include 5 | #include "params.h" 6 | #include "poly.h" 7 | 8 | typedef struct{ 9 | poly vec[KYBER_K]; 10 | } polyvec; 11 | 12 | #define polyvec_compress KYBER_NAMESPACE(polyvec_compress) 13 | void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); 14 | #define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress) 15 | void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]); 16 | 17 | #define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes) 18 | void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a); 19 | #define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes) 20 | void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]); 21 | 22 | #define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt) 23 | void polyvec_ntt(polyvec *r); 24 | #define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont) 25 | void polyvec_invntt_tomont(polyvec *r); 26 | 27 | #define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery) 28 | void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); 29 | 30 | #define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce) 31 | void polyvec_reduce(polyvec *r); 32 | 33 | #define polyvec_add KYBER_NAMESPACE(polyvec_add) 34 | void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /ref/symmetric.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMMETRIC_H 2 | #define SYMMETRIC_H 3 | 4 | #include 5 | #include 6 | #include "params.h" 7 | 8 | #include "fips202.h" 9 | 10 | typedef keccak_state xof_state; 11 | 12 | #define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) 13 | void kyber_shake128_absorb(keccak_state *s, 14 | const uint8_t seed[KYBER_SYMBYTES], 15 | uint8_t x, 16 | uint8_t y); 17 | 18 | #define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) 19 | void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); 20 | 21 | #define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) 22 | void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); 23 | 24 | #define XOF_BLOCKBYTES SHAKE128_RATE 25 | 26 | #define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) 27 | #define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) 28 | #define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) 29 | #define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) 30 | #define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) 31 | #define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) 32 | 33 | #endif /* SYMMETRIC_H */ 34 | -------------------------------------------------------------------------------- /ref/reduce.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "params.h" 3 | #include "reduce.h" 4 | 5 | /************************************************* 6 | * Name: montgomery_reduce 7 | * 8 | * Description: Montgomery reduction; given a 32-bit integer a, computes 9 | * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 10 | * 11 | * Arguments: - int32_t a: input integer to be reduced; 12 | * has to be in {-q2^15,...,q2^15-1} 13 | * 14 | * Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. 15 | **************************************************/ 16 | int16_t montgomery_reduce(int32_t a) 17 | { 18 | int16_t t; 19 | 20 | t = (int16_t)a*QINV; 21 | t = (a - (int32_t)t*KYBER_Q) >> 16; 22 | return t; 23 | } 24 | 25 | /************************************************* 26 | * Name: barrett_reduce 27 | * 28 | * Description: Barrett reduction; given a 16-bit integer a, computes 29 | * centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} 30 | * 31 | * Arguments: - int16_t a: input integer to be reduced 32 | * 33 | * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. 34 | **************************************************/ 35 | int16_t barrett_reduce(int16_t a) { 36 | int16_t t; 37 | const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; 38 | 39 | t = ((int32_t)v*a + (1<<25)) >> 26; 40 | t *= KYBER_Q; 41 | return a - t; 42 | } 43 | -------------------------------------------------------------------------------- /avx2/symmetric.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMMETRIC_H 2 | #define SYMMETRIC_H 3 | 4 | #include 5 | #include 6 | #include "params.h" 7 | 8 | #include "fips202.h" 9 | #include "fips202x4.h" 10 | 11 | typedef keccak_state xof_state; 12 | 13 | #define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb) 14 | void kyber_shake128_absorb(keccak_state *s, 15 | const uint8_t seed[KYBER_SYMBYTES], 16 | uint8_t x, 17 | uint8_t y); 18 | 19 | #define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf) 20 | void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce); 21 | 22 | #define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf) 23 | void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]); 24 | 25 | #define XOF_BLOCKBYTES SHAKE128_RATE 26 | 27 | #define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) 28 | #define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) 29 | #define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y) 30 | #define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) 31 | #define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE) 32 | #define rkprf(OUT, KEY, INPUT) kyber_shake256_rkprf(OUT, KEY, INPUT) 33 | 34 | #endif /* SYMMETRIC_H */ 35 | -------------------------------------------------------------------------------- /ref/nistkat/rng.h: -------------------------------------------------------------------------------- 1 | // 2 | // rng.h 3 | // 4 | // Created by Bassham, Lawrence E (Fed) on 8/29/17. 5 | // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. 6 | // 7 | 8 | #ifndef rng_h 9 | #define rng_h 10 | 11 | #include 12 | 13 | #define RNG_SUCCESS 0 14 | #define RNG_BAD_MAXLEN -1 15 | #define RNG_BAD_OUTBUF -2 16 | #define RNG_BAD_REQ_LEN -3 17 | 18 | typedef struct { 19 | unsigned char buffer[16]; 20 | int buffer_pos; 21 | unsigned long length_remaining; 22 | unsigned char key[32]; 23 | unsigned char ctr[16]; 24 | } AES_XOF_struct; 25 | 26 | typedef struct { 27 | unsigned char Key[32]; 28 | unsigned char V[16]; 29 | int reseed_counter; 30 | } AES256_CTR_DRBG_struct; 31 | 32 | 33 | void 34 | AES256_CTR_DRBG_Update(unsigned char *provided_data, 35 | unsigned char *Key, 36 | unsigned char *V); 37 | 38 | int 39 | seedexpander_init(AES_XOF_struct *ctx, 40 | unsigned char *seed, 41 | unsigned char *diversifier, 42 | unsigned long maxlen); 43 | 44 | int 45 | seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen); 46 | 47 | void 48 | randombytes_init(unsigned char *entropy_input, 49 | unsigned char *personalization_string, 50 | int security_strength); 51 | 52 | int 53 | randombytes(unsigned char *x, unsigned long long xlen); 54 | 55 | #endif /* rng_h */ 56 | -------------------------------------------------------------------------------- /avx2/fq.S: -------------------------------------------------------------------------------- 1 | #include "consts.h" 2 | .include "fq.inc" 3 | 4 | .text 5 | reduce128_avx: 6 | #load 7 | vmovdqa (%rdi),%ymm2 8 | vmovdqa 32(%rdi),%ymm3 9 | vmovdqa 64(%rdi),%ymm4 10 | vmovdqa 96(%rdi),%ymm5 11 | vmovdqa 128(%rdi),%ymm6 12 | vmovdqa 160(%rdi),%ymm7 13 | vmovdqa 192(%rdi),%ymm8 14 | vmovdqa 224(%rdi),%ymm9 15 | 16 | red16 2 17 | red16 3 18 | red16 4 19 | red16 5 20 | red16 6 21 | red16 7 22 | red16 8 23 | red16 9 24 | 25 | #store 26 | vmovdqa %ymm2,(%rdi) 27 | vmovdqa %ymm3,32(%rdi) 28 | vmovdqa %ymm4,64(%rdi) 29 | vmovdqa %ymm5,96(%rdi) 30 | vmovdqa %ymm6,128(%rdi) 31 | vmovdqa %ymm7,160(%rdi) 32 | vmovdqa %ymm8,192(%rdi) 33 | vmovdqa %ymm9,224(%rdi) 34 | 35 | ret 36 | 37 | .global cdecl(reduce_avx) 38 | cdecl(reduce_avx): 39 | #consts 40 | vmovdqa _16XQ*2(%rsi),%ymm0 41 | vmovdqa _16XV*2(%rsi),%ymm1 42 | call reduce128_avx 43 | add $256,%rdi 44 | call reduce128_avx 45 | ret 46 | 47 | tomont128_avx: 48 | #load 49 | vmovdqa (%rdi),%ymm3 50 | vmovdqa 32(%rdi),%ymm4 51 | vmovdqa 64(%rdi),%ymm5 52 | vmovdqa 96(%rdi),%ymm6 53 | vmovdqa 128(%rdi),%ymm7 54 | vmovdqa 160(%rdi),%ymm8 55 | vmovdqa 192(%rdi),%ymm9 56 | vmovdqa 224(%rdi),%ymm10 57 | 58 | fqmulprecomp 1,2,3,11 59 | fqmulprecomp 1,2,4,12 60 | fqmulprecomp 1,2,5,13 61 | fqmulprecomp 1,2,6,14 62 | fqmulprecomp 1,2,7,15 63 | fqmulprecomp 1,2,8,11 64 | fqmulprecomp 1,2,9,12 65 | fqmulprecomp 1,2,10,13 66 | 67 | #store 68 | vmovdqa %ymm3,(%rdi) 69 | vmovdqa %ymm4,32(%rdi) 70 | vmovdqa %ymm5,64(%rdi) 71 | vmovdqa %ymm6,96(%rdi) 72 | vmovdqa %ymm7,128(%rdi) 73 | vmovdqa %ymm8,160(%rdi) 74 | vmovdqa %ymm9,192(%rdi) 75 | vmovdqa %ymm10,224(%rdi) 76 | 77 | ret 78 | 79 | .global cdecl(tomont_avx) 80 | cdecl(tomont_avx): 81 | #consts 82 | vmovdqa _16XQ*2(%rsi),%ymm0 83 | vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 84 | vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 85 | call tomont128_avx 86 | add $256,%rdi 87 | call tomont128_avx 88 | ret 89 | -------------------------------------------------------------------------------- /ref/params.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMS_H 2 | #define PARAMS_H 3 | 4 | #ifndef KYBER_K 5 | #define KYBER_K 3 /* Change this for different security strengths */ 6 | #endif 7 | 8 | 9 | /* Don't change parameters below this line */ 10 | #if (KYBER_K == 2) 11 | #define KYBER_NAMESPACE(s) pqcrystals_kyber512_ref_##s 12 | #elif (KYBER_K == 3) 13 | #define KYBER_NAMESPACE(s) pqcrystals_kyber768_ref_##s 14 | #elif (KYBER_K == 4) 15 | #define KYBER_NAMESPACE(s) pqcrystals_kyber1024_ref_##s 16 | #else 17 | #error "KYBER_K must be in {2,3,4}" 18 | #endif 19 | 20 | #define KYBER_N 256 21 | #define KYBER_Q 3329 22 | 23 | #define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ 24 | #define KYBER_SSBYTES 32 /* size in bytes of shared key */ 25 | 26 | #define KYBER_POLYBYTES 384 27 | #define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) 28 | 29 | #if KYBER_K == 2 30 | #define KYBER_ETA1 3 31 | #define KYBER_POLYCOMPRESSEDBYTES 128 32 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) 33 | #elif KYBER_K == 3 34 | #define KYBER_ETA1 2 35 | #define KYBER_POLYCOMPRESSEDBYTES 128 36 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) 37 | #elif KYBER_K == 4 38 | #define KYBER_ETA1 2 39 | #define KYBER_POLYCOMPRESSEDBYTES 160 40 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) 41 | #endif 42 | 43 | #define KYBER_ETA2 2 44 | 45 | #define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) 46 | #define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) 47 | #define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) 48 | #define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) 49 | 50 | #define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) 51 | /* 32 bytes of additional space to save H(pk) */ 52 | #define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) 53 | #define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /ref/poly.h: -------------------------------------------------------------------------------- 1 | #ifndef POLY_H 2 | #define POLY_H 3 | 4 | #include 5 | #include "params.h" 6 | 7 | /* 8 | * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial 9 | * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] 10 | */ 11 | typedef struct{ 12 | int16_t coeffs[KYBER_N]; 13 | } poly; 14 | 15 | #define poly_compress KYBER_NAMESPACE(poly_compress) 16 | void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); 17 | #define poly_decompress KYBER_NAMESPACE(poly_decompress) 18 | void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); 19 | 20 | #define poly_tobytes KYBER_NAMESPACE(poly_tobytes) 21 | void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); 22 | #define poly_frombytes KYBER_NAMESPACE(poly_frombytes) 23 | void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); 24 | 25 | #define poly_frommsg KYBER_NAMESPACE(poly_frommsg) 26 | void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); 27 | #define poly_tomsg KYBER_NAMESPACE(poly_tomsg) 28 | void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); 29 | 30 | #define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) 31 | void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); 32 | 33 | #define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) 34 | void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); 35 | 36 | #define poly_ntt KYBER_NAMESPACE(poly_ntt) 37 | void poly_ntt(poly *r); 38 | #define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) 39 | void poly_invntt_tomont(poly *r); 40 | #define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) 41 | void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); 42 | #define poly_tomont KYBER_NAMESPACE(poly_tomont) 43 | void poly_tomont(poly *r); 44 | 45 | #define poly_reduce KYBER_NAMESPACE(poly_reduce) 46 | void poly_reduce(poly *r); 47 | 48 | #define poly_add KYBER_NAMESPACE(poly_add) 49 | void poly_add(poly *r, const poly *a, const poly *b); 50 | #define poly_sub KYBER_NAMESPACE(poly_sub) 51 | void poly_sub(poly *r, const poly *a, const poly *b); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /Kyber512_META.yml: -------------------------------------------------------------------------------- 1 | name: Kyber512 2 | type: kem 3 | claimed-nist-level: 1 4 | claimed-security: IND-CCA2 5 | length-public-key: 800 6 | length-ciphertext: 768 7 | length-secret-key: 1632 8 | length-shared-secret: 32 9 | nistkat-sha256: bb0481d3325d828817900b709d23917cefbc10026fc857f098979451f67bb0ca 10 | testvectors-sha256: 6730bb552c22d9d2176ffb5568e48eb30952cf1f065073ec5f9724f6a3c6ea85 11 | principal-submitters: 12 | - Peter Schwabe 13 | auxiliary-submitters: 14 | - Roberto Avanzi 15 | - Joppe Bos 16 | - Léo Ducas 17 | - Eike Kiltz 18 | - Tancrède Lepoint 19 | - Vadim Lyubashevsky 20 | - John M. Schanck 21 | - Gregor Seiler 22 | - Damien Stehlé 23 | implementations: 24 | - name: ref 25 | version: https://github.com/pq-crystals/kyber/commit/74cad307858b61e434490c75f812cb9b9ef7279b 26 | folder_name: ref 27 | compile_opts: -DKYBER_K=2 28 | signature_keypair: pqcrystals_kyber512_ref_keypair 29 | signature_enc: pqcrystals_kyber512_ref_enc 30 | signature_dec: pqcrystals_kyber512_ref_dec 31 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c 32 | common_dep: common_ref 33 | - name: avx2 34 | version: https://github.com/pq-crystals/kyber/commit/36414d64fc1890ed58d1ca8b1e0cab23635d1ac2 35 | compile_opts: -DKYBER_K=2 36 | signature_keypair: pqcrystals_kyber512_avx2_keypair 37 | signature_enc: pqcrystals_kyber512_avx2_enc 38 | signature_dec: pqcrystals_kyber512_avx2_dec 39 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c 40 | common_dep: common_avx2 common_keccak4x_avx2 41 | supported_platforms: 42 | - architecture: x86_64 43 | operating_systems: 44 | - Linux 45 | - Darwin 46 | required_flags: 47 | - avx2 48 | - bmi2 49 | - popcnt 50 | -------------------------------------------------------------------------------- /Kyber768_META.yml: -------------------------------------------------------------------------------- 1 | name: Kyber768 2 | type: kem 3 | claimed-nist-level: 3 4 | claimed-security: IND-CCA2 5 | length-public-key: 1184 6 | length-ciphertext: 1088 7 | length-secret-key: 2400 8 | length-shared-secret: 32 9 | nistkat-sha256: 89e82a5bf2d4ddb2c6444e10409e6d9ca65dafbca67d1a0db2c9b54920a29172 10 | testvectors-sha256: 667c8ca2ca93729c0df6ff24588460bad1bbdbfb64ece0fe8563852a7ff348c6 11 | principal-submitters: 12 | - Peter Schwabe 13 | auxiliary-submitters: 14 | - Roberto Avanzi 15 | - Joppe Bos 16 | - Léo Ducas 17 | - Eike Kiltz 18 | - Tancrède Lepoint 19 | - Vadim Lyubashevsky 20 | - John M. Schanck 21 | - Gregor Seiler 22 | - Damien Stehlé 23 | implementations: 24 | - name: ref 25 | version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff 26 | folder_name: ref 27 | compile_opts: -DKYBER_K=3 28 | signature_keypair: pqcrystals_kyber768_ref_keypair 29 | signature_enc: pqcrystals_kyber768_ref_enc 30 | signature_dec: pqcrystals_kyber768_ref_dec 31 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c 32 | common_dep: common_ref 33 | - name: avx2 34 | version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff 35 | compile_opts: -DKYBER_K=3 36 | signature_keypair: pqcrystals_kyber768_avx2_keypair 37 | signature_enc: pqcrystals_kyber768_avx2_enc 38 | signature_dec: pqcrystals_kyber768_avx2_dec 39 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c 40 | common_dep: common_avx2 common_keccak4x_avx2 41 | supported_platforms: 42 | - architecture: x86_64 43 | operating_systems: 44 | - Linux 45 | - Darwin 46 | required_flags: 47 | - avx2 48 | - bmi2 49 | - popcnt 50 | -------------------------------------------------------------------------------- /Kyber1024_META.yml: -------------------------------------------------------------------------------- 1 | name: Kyber1024 2 | type: kem 3 | claimed-nist-level: 5 4 | claimed-security: IND-CCA2 5 | length-public-key: 1568 6 | length-ciphertext: 1568 7 | length-secret-key: 3168 8 | length-shared-secret: 32 9 | nistkat-sha256: 5afcf2a568ad32d49b55105b032af1850f03f3888ff9e2a72f4059c58e968f60 10 | testvectors-sha256: ff1a854b9b6761a70c65ccae85246fe0596a949e72eae0866a8a2a2d4ea54b10 11 | principal-submitters: 12 | - Peter Schwabe 13 | auxiliary-submitters: 14 | - Roberto Avanzi 15 | - Joppe Bos 16 | - Léo Ducas 17 | - Eike Kiltz 18 | - Tancrède Lepoint 19 | - Vadim Lyubashevsky 20 | - John M. Schanck 21 | - Gregor Seiler 22 | - Damien Stehlé 23 | implementations: 24 | - name: ref 25 | version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff 26 | folder_name: ref 27 | compile_opts: -DKYBER_K=4 28 | signature_keypair: pqcrystals_kyber1024_ref_keypair 29 | signature_enc: pqcrystals_kyber1024_ref_enc 30 | signature_dec: pqcrystals_kyber1024_ref_dec 31 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c reduce.c ntt.c cbd.c verify.c kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h ntt.h cbd.h verify.h symmetric.h fips202.h symmetric-shake.c 32 | common_dep: common_ref 33 | - name: avx2 34 | version: https://github.com/pq-crystals/kyber/commit/28413dfbf523fdde181246451c2bd77199c0f7ff 35 | compile_opts: -DKYBER_K=4 36 | signature_keypair: pqcrystals_kyber1024_avx2_keypair 37 | signature_enc: pqcrystals_kyber1024_avx2_enc 38 | signature_dec: pqcrystals_kyber1024_avx2_dec 39 | sources: ../LICENSE kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S basemul.S consts.c rejsample.c cbd.c verify.c align.h kem.h params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h fips202.h fips202x4.h symmetric-shake.c 40 | common_dep: common_avx2 common_keccak4x_avx2 41 | supported_platforms: 42 | - architecture: x86_64 43 | operating_systems: 44 | - Linux 45 | - Darwin 46 | required_flags: 47 | - avx2 48 | - bmi2 49 | - popcnt 50 | -------------------------------------------------------------------------------- /ref/randombytes.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "randombytes.h" 5 | 6 | #ifdef _WIN32 7 | #include 8 | #include 9 | #else 10 | #include 11 | #include 12 | #ifdef __linux__ 13 | #define _GNU_SOURCE 14 | #include 15 | #include 16 | #elif __NetBSD__ 17 | #include 18 | #else 19 | #include 20 | #endif 21 | #endif 22 | 23 | #ifdef _WIN32 24 | void randombytes(uint8_t *out, size_t outlen) { 25 | HCRYPTPROV ctx; 26 | size_t len; 27 | 28 | if(!CryptAcquireContext(&ctx, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) 29 | abort(); 30 | 31 | while(outlen > 0) { 32 | len = (outlen > 1048576) ? 1048576 : outlen; 33 | if(!CryptGenRandom(ctx, len, (BYTE *)out)) 34 | abort(); 35 | 36 | out += len; 37 | outlen -= len; 38 | } 39 | 40 | if(!CryptReleaseContext(ctx, 0)) 41 | abort(); 42 | } 43 | #elif defined(__linux__) && defined(SYS_getrandom) 44 | void randombytes(uint8_t *out, size_t outlen) { 45 | ssize_t ret; 46 | 47 | while(outlen > 0) { 48 | ret = syscall(SYS_getrandom, out, outlen, 0); 49 | if(ret == -1 && errno == EINTR) 50 | continue; 51 | else if(ret == -1) 52 | abort(); 53 | 54 | out += ret; 55 | outlen -= ret; 56 | } 57 | } 58 | #elif defined(__NetBSD__) 59 | void randombytes(uint8_t *out, size_t outlen) { 60 | ssize_t ret; 61 | 62 | while(outlen > 0) { 63 | ret = getrandom(out, outlen, 0); 64 | if(ret == -1 && errno == EINTR) 65 | continue; 66 | else if(ret == -1) 67 | abort(); 68 | 69 | out += ret; 70 | outlen -= ret; 71 | } 72 | } 73 | #else 74 | void randombytes(uint8_t *out, size_t outlen) { 75 | static int fd = -1; 76 | ssize_t ret; 77 | 78 | while(fd == -1) { 79 | fd = open("/dev/urandom", O_RDONLY); 80 | if(fd == -1 && errno == EINTR) 81 | continue; 82 | else if(fd == -1) 83 | abort(); 84 | } 85 | 86 | while(outlen > 0) { 87 | ret = read(fd, out, outlen); 88 | if(ret == -1 && errno == EINTR) 89 | continue; 90 | else if(ret == -1) 91 | abort(); 92 | 93 | out += ret; 94 | outlen -= ret; 95 | } 96 | } 97 | #endif 98 | -------------------------------------------------------------------------------- /avx2/params.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMS_H 2 | #define PARAMS_H 3 | 4 | #ifndef KYBER_K 5 | #define KYBER_K 3 /* Change this for different security strengths */ 6 | #endif 7 | 8 | //#define KYBER_90S /* Uncomment this if you want the 90S variant */ 9 | 10 | /* Don't change parameters below this line */ 11 | #if (KYBER_K == 2) 12 | #ifdef KYBER_90S 13 | #define KYBER_NAMESPACE(s) pqcrystals_kyber512_90s_avx2_##s 14 | #else 15 | #define KYBER_NAMESPACE(s) pqcrystals_kyber512_avx2_##s 16 | #endif 17 | #elif (KYBER_K == 3) 18 | #ifdef KYBER_90S 19 | #define KYBER_NAMESPACE(s) pqcrystals_kyber768_90s_avx2_##s 20 | #else 21 | #define KYBER_NAMESPACE(s) pqcrystals_kyber768_avx2_##s 22 | #endif 23 | #elif (KYBER_K == 4) 24 | #ifdef KYBER_90S 25 | #define KYBER_NAMESPACE(s) pqcrystals_kyber1024_90s_avx2_##s 26 | #else 27 | #define KYBER_NAMESPACE(s) pqcrystals_kyber1024_avx2_##s 28 | #endif 29 | #else 30 | #error "KYBER_K must be in {2,3,4}" 31 | #endif 32 | 33 | #define KYBER_N 256 34 | #define KYBER_Q 3329 35 | 36 | #define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ 37 | #define KYBER_SSBYTES 32 /* size in bytes of shared key */ 38 | 39 | #define KYBER_POLYBYTES 384 40 | #define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) 41 | 42 | #if KYBER_K == 2 43 | #define KYBER_ETA1 3 44 | #define KYBER_POLYCOMPRESSEDBYTES 128 45 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) 46 | #elif KYBER_K == 3 47 | #define KYBER_ETA1 2 48 | #define KYBER_POLYCOMPRESSEDBYTES 128 49 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) 50 | #elif KYBER_K == 4 51 | #define KYBER_ETA1 2 52 | #define KYBER_POLYCOMPRESSEDBYTES 160 53 | #define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352) 54 | #endif 55 | 56 | #define KYBER_ETA2 2 57 | 58 | #define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES) 59 | #define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) 60 | #define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) 61 | #define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) 62 | 63 | #define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) 64 | /* 32 bytes of additional space to save H(pk) */ 65 | #define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) 66 | #define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES) 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /ref/test/test_vectors.c: -------------------------------------------------------------------------------- 1 | /* Deterministic randombytes by Daniel J. Bernstein */ 2 | /* taken from SUPERCOP (https://bench.cr.yp.to) */ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "../kem.h" 8 | #include "../randombytes.h" 9 | #include "../fips202.h" 10 | 11 | #define NTESTS 10000 12 | 13 | 14 | /* Initital state after absorbing empty string 15 | * Permute before squeeze is achieved by setting pos to SHAKE128_RATE */ 16 | static keccak_state rngstate = {{0x1F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (1ULL << 63), 0, 0, 0, 0}, SHAKE128_RATE}; 17 | 18 | void randombytes(uint8_t *x,size_t xlen) 19 | { 20 | shake128_squeeze(x, xlen, &rngstate); 21 | } 22 | 23 | int main(void) 24 | { 25 | unsigned int i,j; 26 | uint8_t pk[CRYPTO_PUBLICKEYBYTES]; 27 | uint8_t sk[CRYPTO_SECRETKEYBYTES]; 28 | uint8_t ct[CRYPTO_CIPHERTEXTBYTES]; 29 | uint8_t key_a[CRYPTO_BYTES]; 30 | uint8_t key_b[CRYPTO_BYTES]; 31 | 32 | for(i=0;i 5 | #include 6 | 7 | #define SHAKE128_RATE 168 8 | #define SHAKE256_RATE 136 9 | #define SHA3_256_RATE 136 10 | #define SHA3_512_RATE 72 11 | 12 | #define FIPS202_NAMESPACE(s) pqcrystals_kyber_fips202_ref_##s 13 | 14 | typedef struct { 15 | uint64_t s[25]; 16 | unsigned int pos; 17 | } keccak_state; 18 | 19 | #define shake128_init FIPS202_NAMESPACE(shake128_init) 20 | void shake128_init(keccak_state *state); 21 | #define shake128_absorb FIPS202_NAMESPACE(shake128_absorb) 22 | void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen); 23 | #define shake128_finalize FIPS202_NAMESPACE(shake128_finalize) 24 | void shake128_finalize(keccak_state *state); 25 | #define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze) 26 | void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state); 27 | #define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once) 28 | void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); 29 | #define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks) 30 | void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); 31 | 32 | #define shake256_init FIPS202_NAMESPACE(shake256_init) 33 | void shake256_init(keccak_state *state); 34 | #define shake256_absorb FIPS202_NAMESPACE(shake256_absorb) 35 | void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen); 36 | #define shake256_finalize FIPS202_NAMESPACE(shake256_finalize) 37 | void shake256_finalize(keccak_state *state); 38 | #define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze) 39 | void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state); 40 | #define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once) 41 | void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); 42 | #define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks) 43 | void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); 44 | 45 | #define shake128 FIPS202_NAMESPACE(shake128) 46 | void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); 47 | #define shake256 FIPS202_NAMESPACE(shake256) 48 | void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); 49 | #define sha3_256 FIPS202_NAMESPACE(sha3_256) 50 | void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen); 51 | #define sha3_512 FIPS202_NAMESPACE(sha3_512) 52 | void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen); 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /avx2/fips202.h: -------------------------------------------------------------------------------- 1 | #ifndef FIPS202_H 2 | #define FIPS202_H 3 | 4 | #include 5 | #include 6 | 7 | #define SHAKE128_RATE 168 8 | #define SHAKE256_RATE 136 9 | #define SHA3_256_RATE 136 10 | #define SHA3_512_RATE 72 11 | 12 | #define FIPS202_NAMESPACE(s) pqcrystals_kyber_fips202_avx2_##s 13 | 14 | typedef struct { 15 | uint64_t s[25]; 16 | unsigned int pos; 17 | } keccak_state; 18 | 19 | #define shake128_init FIPS202_NAMESPACE(shake128_init) 20 | void shake128_init(keccak_state *state); 21 | #define shake128_absorb FIPS202_NAMESPACE(shake128_absorb) 22 | void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen); 23 | #define shake128_finalize FIPS202_NAMESPACE(shake128_finalize) 24 | void shake128_finalize(keccak_state *state); 25 | #define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze) 26 | void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state); 27 | #define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once) 28 | void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); 29 | #define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks) 30 | void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); 31 | 32 | #define shake256_init FIPS202_NAMESPACE(shake256_init) 33 | void shake256_init(keccak_state *state); 34 | #define shake256_absorb FIPS202_NAMESPACE(shake256_absorb) 35 | void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen); 36 | #define shake256_finalize FIPS202_NAMESPACE(shake256_finalize) 37 | void shake256_finalize(keccak_state *state); 38 | #define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze) 39 | void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state); 40 | #define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once) 41 | void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen); 42 | #define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks) 43 | void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state); 44 | 45 | #define shake128 FIPS202_NAMESPACE(shake128) 46 | void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); 47 | #define shake256 FIPS202_NAMESPACE(shake256) 48 | void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen); 49 | #define sha3_256 FIPS202_NAMESPACE(sha3_256) 50 | void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen); 51 | #define sha3_512 FIPS202_NAMESPACE(sha3_512) 52 | void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen); 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /ref/verify.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "verify.h" 4 | 5 | /************************************************* 6 | * Name: verify 7 | * 8 | * Description: Compare two arrays for equality in constant time. 9 | * 10 | * Arguments: const uint8_t *a: pointer to first byte array 11 | * const uint8_t *b: pointer to second byte array 12 | * size_t len: length of the byte arrays 13 | * 14 | * Returns 0 if the byte arrays are equal, 1 otherwise 15 | **************************************************/ 16 | int verify(const uint8_t *a, const uint8_t *b, size_t len) 17 | { 18 | size_t i; 19 | uint8_t r = 0; 20 | 21 | for(i=0;i> 63; 25 | } 26 | 27 | /************************************************* 28 | * Name: cmov 29 | * 30 | * Description: Copy len bytes from x to r if b is 1; 31 | * don't modify x if b is 0. Requires b to be in {0,1}; 32 | * assumes two's complement representation of negative integers. 33 | * Runs in constant time. 34 | * 35 | * Arguments: uint8_t *r: pointer to output byte array 36 | * const uint8_t *x: pointer to input byte array 37 | * size_t len: Amount of bytes to be copied 38 | * uint8_t b: Condition bit; has to be in {0,1} 39 | **************************************************/ 40 | void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) 41 | { 42 | size_t i; 43 | 44 | #if defined(__GNUC__) || defined(__clang__) 45 | // Prevent the compiler from 46 | // 1) inferring that b is 0/1-valued, and 47 | // 2) handling the two cases with a branch. 48 | // This is not necessary when verify.c and kem.c are separate translation 49 | // units, but we expect that downstream consumers will copy this code and/or 50 | // change how it is built. 51 | __asm__("" : "+r"(b) : /* no inputs */); 52 | #endif 53 | 54 | b = -b; 55 | for(i=0;i 5 | #include 6 | #include 7 | 8 | #define FIPS202X4_NAMESPACE(s) pqcrystals_kyber_fips202x4_avx2_##s 9 | 10 | typedef struct { 11 | __m256i s[25]; 12 | } keccakx4_state; 13 | 14 | #define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once) 15 | void shake128x4_absorb_once(keccakx4_state *state, 16 | const uint8_t *in0, 17 | const uint8_t *in1, 18 | const uint8_t *in2, 19 | const uint8_t *in3, 20 | size_t inlen); 21 | 22 | #define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks) 23 | void shake128x4_squeezeblocks(uint8_t *out0, 24 | uint8_t *out1, 25 | uint8_t *out2, 26 | uint8_t *out3, 27 | size_t nblocks, 28 | keccakx4_state *state); 29 | 30 | #define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once) 31 | void shake256x4_absorb_once(keccakx4_state *state, 32 | const uint8_t *in0, 33 | const uint8_t *in1, 34 | const uint8_t *in2, 35 | const uint8_t *in3, 36 | size_t inlen); 37 | 38 | #define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks) 39 | void shake256x4_squeezeblocks(uint8_t *out0, 40 | uint8_t *out1, 41 | uint8_t *out2, 42 | uint8_t *out3, 43 | size_t nblocks, 44 | keccakx4_state *state); 45 | 46 | #define shake128x4 FIPS202X4_NAMESPACE(shake128x4) 47 | void shake128x4(uint8_t *out0, 48 | uint8_t *out1, 49 | uint8_t *out2, 50 | uint8_t *out3, 51 | size_t outlen, 52 | const uint8_t *in0, 53 | const uint8_t *in1, 54 | const uint8_t *in2, 55 | const uint8_t *in3, 56 | size_t inlen); 57 | 58 | #define shake256x4 FIPS202X4_NAMESPACE(shake256x4) 59 | void shake256x4(uint8_t *out0, 60 | uint8_t *out1, 61 | uint8_t *out2, 62 | uint8_t *out3, 63 | size_t outlen, 64 | const uint8_t *in0, 65 | const uint8_t *in1, 66 | const uint8_t *in2, 67 | const uint8_t *in3, 68 | size_t inlen); 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /avx2/verify.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "verify.h" 5 | 6 | /************************************************* 7 | * Name: verify 8 | * 9 | * Description: Compare two arrays for equality in constant time. 10 | * 11 | * Arguments: const uint8_t *a: pointer to first byte array 12 | * const uint8_t *b: pointer to second byte array 13 | * size_t len: length of the byte arrays 14 | * 15 | * Returns 0 if the byte arrays are equal, 1 otherwise 16 | **************************************************/ 17 | int verify(const uint8_t *a, const uint8_t *b, size_t len) 18 | { 19 | size_t i; 20 | uint64_t r; 21 | __m256i f, g, h; 22 | 23 | h = _mm256_setzero_si256(); 24 | for(i=0;i> 63; 39 | return r; 40 | } 41 | 42 | /************************************************* 43 | * Name: cmov 44 | * 45 | * Description: Copy len bytes from x to r if b is 1; 46 | * don't modify x if b is 0. Requires b to be in {0,1}; 47 | * assumes two's complement representation of negative integers. 48 | * Runs in constant time. 49 | * 50 | * Arguments: uint8_t *r: pointer to output byte array 51 | * const uint8_t *x: pointer to input byte array 52 | * size_t len: Amount of bytes to be copied 53 | * uint8_t b: Condition bit; has to be in {0,1} 54 | **************************************************/ 55 | void cmov(uint8_t * restrict r, const uint8_t *x, size_t len, uint8_t b) 56 | { 57 | size_t i; 58 | __m256i xvec, rvec, bvec; 59 | 60 | #if defined(__GNUC__) || defined(__clang__) 61 | // Prevent the compiler from 62 | // 1) inferring that b is 0/1-valued, and 63 | // 2) handling the two cases with a branch. 64 | // This is not necessary when verify.c and kem.c are separate translation 65 | // units, but we expect that downstream consumers will copy this code and/or 66 | // change how it is built. 67 | __asm__("" : "+r"(b) : /* no inputs */); 68 | #endif 69 | 70 | bvec = _mm256_set1_epi64x(-(uint64_t)b); 71 | for(i=0;i 2 | #include 3 | #include 4 | #include "params.h" 5 | #include "symmetric.h" 6 | #include "fips202.h" 7 | 8 | /************************************************* 9 | * Name: kyber_shake128_absorb 10 | * 11 | * Description: Absorb step of the SHAKE128 specialized for the Kyber context. 12 | * 13 | * Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state 14 | * - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state 15 | * - uint8_t i: additional byte of input 16 | * - uint8_t j: additional byte of input 17 | **************************************************/ 18 | void kyber_shake128_absorb(keccak_state *state, 19 | const uint8_t seed[KYBER_SYMBYTES], 20 | uint8_t x, 21 | uint8_t y) 22 | { 23 | uint8_t extseed[KYBER_SYMBYTES+2]; 24 | 25 | memcpy(extseed, seed, KYBER_SYMBYTES); 26 | extseed[KYBER_SYMBYTES+0] = x; 27 | extseed[KYBER_SYMBYTES+1] = y; 28 | 29 | shake128_absorb_once(state, extseed, sizeof(extseed)); 30 | } 31 | 32 | /************************************************* 33 | * Name: kyber_shake256_prf 34 | * 35 | * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input 36 | * and then generates outlen bytes of SHAKE256 output 37 | * 38 | * Arguments: - uint8_t *out: pointer to output 39 | * - size_t outlen: number of requested output bytes 40 | * - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) 41 | * - uint8_t nonce: single-byte nonce (public PRF input) 42 | **************************************************/ 43 | void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce) 44 | { 45 | uint8_t extkey[KYBER_SYMBYTES+1]; 46 | 47 | memcpy(extkey, key, KYBER_SYMBYTES); 48 | extkey[KYBER_SYMBYTES] = nonce; 49 | 50 | shake256(out, outlen, extkey, sizeof(extkey)); 51 | } 52 | 53 | /************************************************* 54 | * Name: kyber_shake256_prf 55 | * 56 | * Description: Usage of SHAKE256 as a PRF, concatenates secret and public input 57 | * and then generates outlen bytes of SHAKE256 output 58 | * 59 | * Arguments: - uint8_t *out: pointer to output 60 | * - size_t outlen: number of requested output bytes 61 | * - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES) 62 | * - uint8_t nonce: single-byte nonce (public PRF input) 63 | **************************************************/ 64 | void kyber_shake256_rkprf(uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t input[KYBER_CIPHERTEXTBYTES]) 65 | { 66 | keccak_state s; 67 | 68 | shake256_init(&s); 69 | shake256_absorb(&s, key, KYBER_SYMBYTES); 70 | shake256_absorb(&s, input, KYBER_CIPHERTEXTBYTES); 71 | shake256_finalize(&s); 72 | shake256_squeeze(out, KYBER_SSBYTES, &s); 73 | } 74 | -------------------------------------------------------------------------------- /avx2/basemul.S: -------------------------------------------------------------------------------- 1 | #include "consts.h" 2 | 3 | .macro schoolbook off 4 | vmovdqa _16XQINV*2(%rcx),%ymm0 5 | vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 6 | vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 7 | vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 8 | vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 9 | 10 | vpmullw %ymm0,%ymm1,%ymm9 # a0.lo 11 | vpmullw %ymm0,%ymm2,%ymm10 # b0.lo 12 | vpmullw %ymm0,%ymm3,%ymm11 # a1.lo 13 | vpmullw %ymm0,%ymm4,%ymm12 # b1.lo 14 | 15 | vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 16 | vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 17 | 18 | vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi 19 | vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi 20 | vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi 21 | vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi 22 | 23 | vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 24 | vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 25 | 26 | vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi 27 | vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi 28 | vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi 29 | vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi 30 | 31 | vmovdqa %ymm13,(%rsp) 32 | 33 | vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo 34 | vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo 35 | vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo 36 | vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo 37 | 38 | vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo 39 | vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo 40 | vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo 41 | vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo 42 | 43 | vmovdqa _16XQ*2(%rcx),%ymm8 44 | vpmulhw %ymm8,%ymm13,%ymm13 45 | vpmulhw %ymm8,%ymm9,%ymm9 46 | vpmulhw %ymm8,%ymm5,%ymm5 47 | vpmulhw %ymm8,%ymm10,%ymm10 48 | vpmulhw %ymm8,%ymm6,%ymm6 49 | vpmulhw %ymm8,%ymm11,%ymm11 50 | vpmulhw %ymm8,%ymm7,%ymm7 51 | vpmulhw %ymm8,%ymm12,%ymm12 52 | 53 | vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 54 | vpsubw %ymm9,%ymm1,%ymm9 # a0d0 55 | vpsubw %ymm5,%ymm14,%ymm5 # b0c0 56 | vpsubw %ymm10,%ymm2,%ymm10 # b0d0 57 | 58 | vpsubw %ymm6,%ymm15,%ymm6 # a1c1 59 | vpsubw %ymm11,%ymm3,%ymm11 # a1d1 60 | vpsubw %ymm7,%ymm0,%ymm7 # b1c1 61 | vpsubw %ymm12,%ymm4,%ymm12 # b1d1 62 | 63 | vmovdqa (%r9),%ymm0 64 | vmovdqa 32(%r9),%ymm1 65 | vpmullw %ymm0,%ymm10,%ymm2 66 | vpmullw %ymm0,%ymm12,%ymm3 67 | vpmulhw %ymm1,%ymm10,%ymm10 68 | vpmulhw %ymm1,%ymm12,%ymm12 69 | vpmulhw %ymm8,%ymm2,%ymm2 70 | vpmulhw %ymm8,%ymm3,%ymm3 71 | vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 72 | vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 73 | 74 | vpaddw %ymm5,%ymm9,%ymm9 75 | vpaddw %ymm7,%ymm11,%ymm11 76 | vpsubw %ymm13,%ymm10,%ymm13 77 | vpsubw %ymm12,%ymm6,%ymm6 78 | 79 | vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) 80 | vmovdqa %ymm9,(64*\off+16)*2(%rdi) 81 | vmovdqa %ymm6,(64*\off+32)*2(%rdi) 82 | vmovdqa %ymm11,(64*\off+48)*2(%rdi) 83 | .endm 84 | 85 | .text 86 | .global cdecl(basemul_avx) 87 | cdecl(basemul_avx): 88 | mov %rsp,%r8 89 | and $-32,%rsp 90 | sub $32,%rsp 91 | 92 | lea (_ZETAS_EXP+176)*2(%rcx),%r9 93 | schoolbook 0 94 | 95 | add $32*2,%r9 96 | schoolbook 1 97 | 98 | add $192*2,%r9 99 | schoolbook 2 100 | 101 | add $32*2,%r9 102 | schoolbook 3 103 | 104 | mov %r8,%rsp 105 | ret 106 | -------------------------------------------------------------------------------- /ref/test/test_kyber.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../kem.h" 5 | #include "../randombytes.h" 6 | 7 | #define NTESTS 1000 8 | 9 | static int test_keys(void) 10 | { 11 | uint8_t pk[CRYPTO_PUBLICKEYBYTES]; 12 | uint8_t sk[CRYPTO_SECRETKEYBYTES]; 13 | uint8_t ct[CRYPTO_CIPHERTEXTBYTES]; 14 | uint8_t key_a[CRYPTO_BYTES]; 15 | uint8_t key_b[CRYPTO_BYTES]; 16 | 17 | //Alice generates a public key 18 | crypto_kem_keypair(pk, sk); 19 | 20 | //Bob derives a secret key and creates a response 21 | crypto_kem_enc(ct, key_b, pk); 22 | 23 | //Alice uses Bobs response to get her shared key 24 | crypto_kem_dec(key_a, ct, sk); 25 | 26 | if(memcmp(key_a, key_b, CRYPTO_BYTES)) { 27 | printf("ERROR keys\n"); 28 | return 1; 29 | } 30 | 31 | return 0; 32 | } 33 | 34 | static int test_invalid_sk_a(void) 35 | { 36 | uint8_t pk[CRYPTO_PUBLICKEYBYTES]; 37 | uint8_t sk[CRYPTO_SECRETKEYBYTES]; 38 | uint8_t ct[CRYPTO_CIPHERTEXTBYTES]; 39 | uint8_t key_a[CRYPTO_BYTES]; 40 | uint8_t key_b[CRYPTO_BYTES]; 41 | 42 | //Alice generates a public key 43 | crypto_kem_keypair(pk, sk); 44 | 45 | //Bob derives a secret key and creates a response 46 | crypto_kem_enc(ct, key_b, pk); 47 | 48 | //Replace secret key with random values 49 | randombytes(sk, CRYPTO_SECRETKEYBYTES); 50 | 51 | //Alice uses Bobs response to get her shared key 52 | crypto_kem_dec(key_a, ct, sk); 53 | 54 | if(!memcmp(key_a, key_b, CRYPTO_BYTES)) { 55 | printf("ERROR invalid sk\n"); 56 | return 1; 57 | } 58 | 59 | return 0; 60 | } 61 | 62 | static int test_invalid_ciphertext(void) 63 | { 64 | uint8_t pk[CRYPTO_PUBLICKEYBYTES]; 65 | uint8_t sk[CRYPTO_SECRETKEYBYTES]; 66 | uint8_t ct[CRYPTO_CIPHERTEXTBYTES]; 67 | uint8_t key_a[CRYPTO_BYTES]; 68 | uint8_t key_b[CRYPTO_BYTES]; 69 | uint8_t b; 70 | size_t pos; 71 | 72 | do { 73 | randombytes(&b, sizeof(uint8_t)); 74 | } while(!b); 75 | randombytes((uint8_t *)&pos, sizeof(size_t)); 76 | 77 | //Alice generates a public key 78 | crypto_kem_keypair(pk, sk); 79 | 80 | //Bob derives a secret key and creates a response 81 | crypto_kem_enc(ct, key_b, pk); 82 | 83 | //Change some byte in the ciphertext (i.e., encapsulated key) 84 | ct[pos % CRYPTO_CIPHERTEXTBYTES] ^= b; 85 | 86 | //Alice uses Bobs response to get her shared key 87 | crypto_kem_dec(key_a, ct, sk); 88 | 89 | if(!memcmp(key_a, key_b, CRYPTO_BYTES)) { 90 | printf("ERROR invalid ciphertext\n"); 91 | return 1; 92 | } 93 | 94 | return 0; 95 | } 96 | 97 | int main(void) 98 | { 99 | unsigned int i; 100 | int r; 101 | 102 | for(i=0;i 5 | #include "align.h" 6 | #include "params.h" 7 | 8 | typedef ALIGNED_INT16(KYBER_N) poly; 9 | 10 | #define poly_compress KYBER_NAMESPACE(poly_compress) 11 | void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a); 12 | #define poly_decompress KYBER_NAMESPACE(poly_decompress) 13 | void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]); 14 | 15 | #define poly_tobytes KYBER_NAMESPACE(poly_tobytes) 16 | void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a); 17 | #define poly_frombytes KYBER_NAMESPACE(poly_frombytes) 18 | void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]); 19 | 20 | #define poly_frommsg KYBER_NAMESPACE(poly_frommsg) 21 | void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]); 22 | #define poly_tomsg KYBER_NAMESPACE(poly_tomsg) 23 | void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r); 24 | 25 | #define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1) 26 | void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); 27 | 28 | #define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2) 29 | void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce); 30 | 31 | #ifndef KYBER_90S 32 | #define poly_getnoise_eta1_4x KYBER_NAMESPACE(poly_getnoise_eta2_4x) 33 | void poly_getnoise_eta1_4x(poly *r0, 34 | poly *r1, 35 | poly *r2, 36 | poly *r3, 37 | const uint8_t seed[32], 38 | uint8_t nonce0, 39 | uint8_t nonce1, 40 | uint8_t nonce2, 41 | uint8_t nonce3); 42 | 43 | #if KYBER_K == 2 44 | #define poly_getnoise_eta1122_4x KYBER_NAMESPACE(poly_getnoise_eta1122_4x) 45 | void poly_getnoise_eta1122_4x(poly *r0, 46 | poly *r1, 47 | poly *r2, 48 | poly *r3, 49 | const uint8_t seed[32], 50 | uint8_t nonce0, 51 | uint8_t nonce1, 52 | uint8_t nonce2, 53 | uint8_t nonce3); 54 | #endif 55 | #endif 56 | 57 | 58 | #define poly_ntt KYBER_NAMESPACE(poly_ntt) 59 | void poly_ntt(poly *r); 60 | #define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont) 61 | void poly_invntt_tomont(poly *r); 62 | #define poly_nttunpack KYBER_NAMESPACE(poly_nttunpack) 63 | void poly_nttunpack(poly *r); 64 | #define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery) 65 | void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); 66 | #define poly_tomont KYBER_NAMESPACE(poly_tomont) 67 | void poly_tomont(poly *r); 68 | 69 | #define poly_reduce KYBER_NAMESPACE(poly_reduce) 70 | void poly_reduce(poly *r); 71 | 72 | #define poly_add KYBER_NAMESPACE(poly_add) 73 | void poly_add(poly *r, const poly *a, const poly *b); 74 | #define poly_sub KYBER_NAMESPACE(poly_sub) 75 | void poly_sub(poly *r, const poly *a, const poly *b); 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /ref/api.h: -------------------------------------------------------------------------------- 1 | #ifndef API_H 2 | #define API_H 3 | 4 | #include 5 | 6 | #define pqcrystals_kyber512_SECRETKEYBYTES 1632 7 | #define pqcrystals_kyber512_PUBLICKEYBYTES 800 8 | #define pqcrystals_kyber512_CIPHERTEXTBYTES 768 9 | #define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 10 | #define pqcrystals_kyber512_ENCCOINBYTES 32 11 | #define pqcrystals_kyber512_BYTES 32 12 | 13 | #define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES 14 | #define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES 15 | #define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES 16 | #define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES 17 | #define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES 18 | #define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES 19 | 20 | int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 21 | int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk); 22 | int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 23 | int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 24 | int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 25 | 26 | #define pqcrystals_kyber768_SECRETKEYBYTES 2400 27 | #define pqcrystals_kyber768_PUBLICKEYBYTES 1184 28 | #define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 29 | #define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 30 | #define pqcrystals_kyber768_ENCCOINBYTES 32 31 | #define pqcrystals_kyber768_BYTES 32 32 | 33 | #define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES 34 | #define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES 35 | #define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES 36 | #define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES 37 | #define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES 38 | #define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES 39 | 40 | int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 41 | int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk); 42 | int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 43 | int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 44 | int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 45 | 46 | #define pqcrystals_kyber1024_SECRETKEYBYTES 3168 47 | #define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 48 | #define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 49 | #define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 50 | #define pqcrystals_kyber1024_ENCCOINBYTES 32 51 | #define pqcrystals_kyber1024_BYTES 32 52 | 53 | #define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES 54 | #define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES 55 | #define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES 56 | #define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES 57 | #define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES 58 | #define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES 59 | 60 | int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 61 | int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk); 62 | int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 63 | int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 64 | int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /avx2/api.h: -------------------------------------------------------------------------------- 1 | #ifndef API_H 2 | #define API_H 3 | 4 | #include 5 | 6 | #define pqcrystals_kyber512_SECRETKEYBYTES 1632 7 | #define pqcrystals_kyber512_PUBLICKEYBYTES 800 8 | #define pqcrystals_kyber512_CIPHERTEXTBYTES 768 9 | #define pqcrystals_kyber512_KEYPAIRCOINBYTES 64 10 | #define pqcrystals_kyber512_ENCCOINBYTES 32 11 | #define pqcrystals_kyber512_BYTES 32 12 | 13 | #define pqcrystals_kyber512_avx2_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES 14 | #define pqcrystals_kyber512_avx2_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES 15 | #define pqcrystals_kyber512_avx2_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES 16 | #define pqcrystals_kyber512_avx2_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES 17 | #define pqcrystals_kyber512_avx2_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES 18 | #define pqcrystals_kyber512_avx2_BYTES pqcrystals_kyber512_BYTES 19 | 20 | int pqcrystals_kyber512_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 21 | int pqcrystals_kyber512_avx2_keypair(uint8_t *pk, uint8_t *sk); 22 | int pqcrystals_kyber512_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 23 | int pqcrystals_kyber512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 24 | int pqcrystals_kyber512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 25 | 26 | #define pqcrystals_kyber768_SECRETKEYBYTES 2400 27 | #define pqcrystals_kyber768_PUBLICKEYBYTES 1184 28 | #define pqcrystals_kyber768_CIPHERTEXTBYTES 1088 29 | #define pqcrystals_kyber768_KEYPAIRCOINBYTES 64 30 | #define pqcrystals_kyber768_ENCCOINBYTES 32 31 | #define pqcrystals_kyber768_BYTES 32 32 | 33 | #define pqcrystals_kyber768_avx2_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES 34 | #define pqcrystals_kyber768_avx2_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES 35 | #define pqcrystals_kyber768_avx2_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES 36 | #define pqcrystals_kyber768_avx2_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES 37 | #define pqcrystals_kyber768_avx2_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES 38 | #define pqcrystals_kyber768_avx2_BYTES pqcrystals_kyber768_BYTES 39 | 40 | int pqcrystals_kyber768_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 41 | int pqcrystals_kyber768_avx2_keypair(uint8_t *pk, uint8_t *sk); 42 | int pqcrystals_kyber768_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 43 | int pqcrystals_kyber768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 44 | int pqcrystals_kyber768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 45 | 46 | #define pqcrystals_kyber1024_SECRETKEYBYTES 3168 47 | #define pqcrystals_kyber1024_PUBLICKEYBYTES 1568 48 | #define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568 49 | #define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64 50 | #define pqcrystals_kyber1024_ENCCOINBYTES 32 51 | #define pqcrystals_kyber1024_BYTES 32 52 | 53 | #define pqcrystals_kyber1024_avx2_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES 54 | #define pqcrystals_kyber1024_avx2_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES 55 | #define pqcrystals_kyber1024_avx2_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES 56 | #define pqcrystals_kyber1024_avx2_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES 57 | #define pqcrystals_kyber1024_avx2_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES 58 | #define pqcrystals_kyber1024_avx2_BYTES pqcrystals_kyber1024_BYTES 59 | 60 | int pqcrystals_kyber1024_avx2_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins); 61 | int pqcrystals_kyber1024_avx2_keypair(uint8_t *pk, uint8_t *sk); 62 | int pqcrystals_kyber1024_avx2_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins); 63 | int pqcrystals_kyber1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); 64 | int pqcrystals_kyber1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /ref/cbd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "params.h" 3 | #include "cbd.h" 4 | 5 | /************************************************* 6 | * Name: load32_littleendian 7 | * 8 | * Description: load 4 bytes into a 32-bit integer 9 | * in little-endian order 10 | * 11 | * Arguments: - const uint8_t *x: pointer to input byte array 12 | * 13 | * Returns 32-bit unsigned integer loaded from x 14 | **************************************************/ 15 | static uint32_t load32_littleendian(const uint8_t x[4]) 16 | { 17 | uint32_t r; 18 | r = (uint32_t)x[0]; 19 | r |= (uint32_t)x[1] << 8; 20 | r |= (uint32_t)x[2] << 16; 21 | r |= (uint32_t)x[3] << 24; 22 | return r; 23 | } 24 | 25 | /************************************************* 26 | * Name: load24_littleendian 27 | * 28 | * Description: load 3 bytes into a 32-bit integer 29 | * in little-endian order. 30 | * This function is only needed for Kyber-512 31 | * 32 | * Arguments: - const uint8_t *x: pointer to input byte array 33 | * 34 | * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) 35 | **************************************************/ 36 | #if KYBER_ETA1 == 3 37 | static uint32_t load24_littleendian(const uint8_t x[3]) 38 | { 39 | uint32_t r; 40 | r = (uint32_t)x[0]; 41 | r |= (uint32_t)x[1] << 8; 42 | r |= (uint32_t)x[2] << 16; 43 | return r; 44 | } 45 | #endif 46 | 47 | 48 | /************************************************* 49 | * Name: cbd2 50 | * 51 | * Description: Given an array of uniformly random bytes, compute 52 | * polynomial with coefficients distributed according to 53 | * a centered binomial distribution with parameter eta=2 54 | * 55 | * Arguments: - poly *r: pointer to output polynomial 56 | * - const uint8_t *buf: pointer to input byte array 57 | **************************************************/ 58 | static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) 59 | { 60 | unsigned int i,j; 61 | uint32_t t,d; 62 | int16_t a,b; 63 | 64 | for(i=0;i>1) & 0x55555555; 68 | 69 | for(j=0;j<8;j++) { 70 | a = (d >> (4*j+0)) & 0x3; 71 | b = (d >> (4*j+2)) & 0x3; 72 | r->coeffs[8*i+j] = a - b; 73 | } 74 | } 75 | } 76 | 77 | /************************************************* 78 | * Name: cbd3 79 | * 80 | * Description: Given an array of uniformly random bytes, compute 81 | * polynomial with coefficients distributed according to 82 | * a centered binomial distribution with parameter eta=3. 83 | * This function is only needed for Kyber-512 84 | * 85 | * Arguments: - poly *r: pointer to output polynomial 86 | * - const uint8_t *buf: pointer to input byte array 87 | **************************************************/ 88 | #if KYBER_ETA1 == 3 89 | static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) 90 | { 91 | unsigned int i,j; 92 | uint32_t t,d; 93 | int16_t a,b; 94 | 95 | for(i=0;i>1) & 0x00249249; 99 | d += (t>>2) & 0x00249249; 100 | 101 | for(j=0;j<4;j++) { 102 | a = (d >> (6*j+0)) & 0x7; 103 | b = (d >> (6*j+3)) & 0x7; 104 | r->coeffs[4*i+j] = a - b; 105 | } 106 | } 107 | } 108 | #endif 109 | 110 | void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1*KYBER_N/4]) 111 | { 112 | #if KYBER_ETA1 == 2 113 | cbd2(r, buf); 114 | #elif KYBER_ETA1 == 3 115 | cbd3(r, buf); 116 | #else 117 | #error "This implementation requires eta1 in {2,3}" 118 | #endif 119 | } 120 | 121 | void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]) 122 | { 123 | #if KYBER_ETA2 == 2 124 | cbd2(r, buf); 125 | #else 126 | #error "This implementation requires eta2 = 2" 127 | #endif 128 | } 129 | -------------------------------------------------------------------------------- /ref/test/test_speed.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../kem.h" 6 | #include "../params.h" 7 | #include "../indcpa.h" 8 | #include "../polyvec.h" 9 | #include "../poly.h" 10 | #include "../randombytes.h" 11 | #include "cpucycles.h" 12 | #include "speed_print.h" 13 | 14 | #define NTESTS 1000 15 | 16 | uint64_t t[NTESTS]; 17 | uint8_t seed[KYBER_SYMBYTES] = {0}; 18 | 19 | int main(void) 20 | { 21 | unsigned int i; 22 | uint8_t pk[CRYPTO_PUBLICKEYBYTES]; 23 | uint8_t sk[CRYPTO_SECRETKEYBYTES]; 24 | uint8_t ct[CRYPTO_CIPHERTEXTBYTES]; 25 | uint8_t key[CRYPTO_BYTES]; 26 | uint8_t coins32[KYBER_SYMBYTES]; 27 | uint8_t coins64[2*KYBER_SYMBYTES]; 28 | polyvec matrix[KYBER_K]; 29 | poly ap; 30 | 31 | randombytes(coins32, KYBER_SYMBYTES); 32 | randombytes(coins64, 2*KYBER_SYMBYTES); 33 | 34 | for(i=0;i 32 | 33 | #define KeccakP1600times4_StaticInitialize() 34 | #define KeccakP1600times4_InitializeAll FIPS202X4_NAMESPACE(KeccakP1600times4_InitializeAll) 35 | void KeccakP1600times4_InitializeAll(void *states); 36 | #define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \ 37 | ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte) 38 | #define KeccakP1600times4_AddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_AddBytes) 39 | void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); 40 | #define KeccakP1600times4_AddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_AddLanesAll) 41 | void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); 42 | #define KeccakP1600times4_OverwriteBytes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteBytes) 43 | void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); 44 | #define KeccakP1600times4_OverwriteLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteLanesAll) 45 | void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); 46 | #define KeccakP1600times4_OverwriteWithZeroes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteWithZeroes) 47 | void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount); 48 | #define KeccakP1600times4_PermuteAll_12rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_12rounds) 49 | void KeccakP1600times4_PermuteAll_12rounds(void *states); 50 | #define KeccakP1600times4_PermuteAll_24rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds) 51 | void KeccakP1600times4_PermuteAll_24rounds(void *states); 52 | #define KeccakP1600times4_ExtractBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractBytes) 53 | void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length); 54 | #define KeccakP1600times4_ExtractLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractLanesAll) 55 | void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset); 56 | #define KeccakP1600times4_ExtractAndAddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddBytes) 57 | void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); 58 | #define KeccakP1600times4_ExtractAndAddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddLanesAll) 59 | void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset); 60 | #define KeccakF1600times4_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakF1600times4_FastLoop_Absorb) 61 | size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); 62 | #define KeccakP1600times4_12rounds_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakP1600times4_12rounds_FastLoop_Absorb) 63 | size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kyber 2 | 3 | [![Build Status](https://travis-ci.org/pq-crystals/kyber.svg?branch=master)](https://travis-ci.org/pq-crystals/kyber) 4 | [![Coverage Status](https://coveralls.io/repos/github/pq-crystals/kyber/badge.svg?branch=master)](https://coveralls.io/github/pq-crystals/kyber?branch=master) 5 | 6 | This repository contains the official reference implementation of the [Kyber](https://www.pq-crystals.org/kyber/) key encapsulation mechanism, 7 | and an optimized implementation for x86 CPUs supporting the AVX2 instruction set. 8 | Kyber has been selected for standardization in [round 3](https://csrc.nist.gov/Projects/post-quantum-cryptography/round-3-submissions) 9 | of the [NIST PQC](https://csrc.nist.gov/projects/post-quantum-cryptography) standardization project. 10 | 11 | 12 | ## Build instructions 13 | 14 | The implementations contain several test and benchmarking programs and a Makefile to facilitate compilation. 15 | 16 | ### Prerequisites 17 | 18 | Some of the test programs require [OpenSSL](https://openssl.org). 19 | If the OpenSSL header files and/or shared libraries do not lie in one of the standard locations on your system, 20 | it is necessary to specify their location via compiler and linker flags in the environment variables `CFLAGS`, `NISTFLAGS`, and `LDFLAGS`. 21 | 22 | For example, on macOS you can install OpenSSL via [Homebrew](https://brew.sh) by running 23 | ```sh 24 | brew install openssl 25 | ``` 26 | Then, run 27 | ```sh 28 | export CFLAGS="-I/usr/local/opt/openssl@1.1/include" 29 | export NISTFLAGS="-I/usr/local/opt/openssl@1.1/include" 30 | export LDFLAGS="-L/usr/local/opt/openssl@1.1/lib" 31 | ``` 32 | before compilation to add the OpenSSL header and library locations to the respective search paths. 33 | 34 | ### Building all binaries 35 | 36 | To compile the test and benchmarking programs on Linux or macOS, go to the `ref/` or `avx2/` directory and run 37 | ```sh 38 | make 39 | ``` 40 | This produces the executables 41 | ```sh 42 | test/test_kyber$ALG 43 | test/test_vectors$ALG 44 | test/test_speed$ALG 45 | ``` 46 | where `$ALG` ranges over the parameter sets 512, 768, 1024. 47 | 48 | * `test_kyber$ALG` tests 1000 times to generate keys, encapsulate a random key and correctly decapsulate it again. 49 | Also, the program tests that the keys cannot correctly be decapsulated using a random secret key 50 | or a ciphertext where a single random byte was randomly distorted in order to test for trivial failures of the CCA security. 51 | The program will abort with an error message and return 1 if there was an error. 52 | Otherwise it will output the key and ciphertext sizes and return 0. 53 | * `test_vectors$ALG` generates 10000 sets of test vectors containing keys, ciphertexts and shared secrets 54 | whose byte-strings are output in hexadecimal. It also generates test vector for decapsulation of invalid 55 | (pseudorandom) ciphertexts. 56 | The required random bytes are deterministic and come from SHAKE128 on empty input. 57 | * `test_speed$ALG` reports the median and average cycle counts of 1000 executions of various internal functions 58 | and the API functions for key generation, encapsulation and decapsulation. 59 | By default the Time Step Counter is used. 60 | If instead you want to obtain the actual cycle counts from the Performance Measurement Counters, export `CFLAGS="-DUSE_RDPMC"` before compilation. 61 | 62 | Please note that the reference implementation in `ref/` is not optimized for any platform, and, since it prioritises clean code, 63 | is significantly slower than a trivially optimized but still platform-independent implementation. 64 | Hence benchmarking the reference code does not provide particularly meaningful results. 65 | 66 | 70 | 71 | ## Shared libraries 72 | 73 | All implementations can be compiled into shared libraries by running 74 | ```sh 75 | make shared 76 | ``` 77 | For example in the directory `ref/` of the reference implementation, this produces the libraries 78 | ```sh 79 | libpqcrystals_kyber$ALG_ref.so 80 | ``` 81 | for all parameter sets `$ALG`, and the required symmetric crypto libraries 82 | ``` 83 | libpqcrystals_aes256ctr_ref.so 84 | libpqcrystals_fips202_ref.so 85 | ``` 86 | All global symbols in the libraries lie in the namespaces `pqcrystals_kyber$ALG_ref`, `libpqcrystals_aes256ctr_ref` and `libpqcrystals_fips202_ref`. Hence it is possible to link a program against all libraries simultaneously and obtain access to all implementations for all parameter sets. The corresponding API header file is `ref/api.h`, which contains prototypes for all API functions and preprocessor defines for the key and signature lengths. 87 | 88 | -------------------------------------------------------------------------------- /avx2/ntt.S: -------------------------------------------------------------------------------- 1 | #include "consts.h" 2 | .include "shuffle.inc" 3 | 4 | .macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 5 | vpmullw %ymm\zl0,%ymm\rh0,%ymm12 6 | vpmullw %ymm\zl0,%ymm\rh1,%ymm13 7 | 8 | vpmullw %ymm\zl1,%ymm\rh2,%ymm14 9 | vpmullw %ymm\zl1,%ymm\rh3,%ymm15 10 | 11 | vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 12 | vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 13 | 14 | vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 15 | vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 16 | .endm 17 | 18 | .macro reduce 19 | vpmulhw %ymm0,%ymm12,%ymm12 20 | vpmulhw %ymm0,%ymm13,%ymm13 21 | 22 | vpmulhw %ymm0,%ymm14,%ymm14 23 | vpmulhw %ymm0,%ymm15,%ymm15 24 | .endm 25 | 26 | .macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 27 | vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln 28 | vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 29 | vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 30 | 31 | vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 32 | vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 33 | vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 34 | 35 | vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 36 | vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 37 | 38 | vpsubw %ymm12,%ymm\rln,%ymm\rln 39 | vpaddw %ymm12,%ymm\rh0,%ymm\rh0 40 | vpsubw %ymm13,%ymm\rl0,%ymm\rl0 41 | 42 | vpaddw %ymm13,%ymm\rh1,%ymm\rh1 43 | vpsubw %ymm14,%ymm\rl1,%ymm\rl1 44 | vpaddw %ymm14,%ymm\rh2,%ymm\rh2 45 | 46 | vpsubw %ymm15,%ymm\rl2,%ymm\rl2 47 | vpaddw %ymm15,%ymm\rh3,%ymm\rh3 48 | .endm 49 | 50 | .macro level0 off 51 | vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 52 | vmovdqa (64*\off+128)*2(%rdi),%ymm8 53 | vmovdqa (64*\off+144)*2(%rdi),%ymm9 54 | vmovdqa (64*\off+160)*2(%rdi),%ymm10 55 | vmovdqa (64*\off+176)*2(%rdi),%ymm11 56 | vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 57 | 58 | mul 8,9,10,11 59 | 60 | vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 61 | vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 62 | vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 63 | vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 64 | 65 | reduce 66 | update 3,4,5,6,7,8,9,10,11 67 | 68 | vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) 69 | vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) 70 | vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) 71 | vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) 72 | vmovdqa %ymm8,(64*\off+128)*2(%rdi) 73 | vmovdqa %ymm9,(64*\off+144)*2(%rdi) 74 | vmovdqa %ymm10,(64*\off+160)*2(%rdi) 75 | vmovdqa %ymm11,(64*\off+176)*2(%rdi) 76 | .endm 77 | 78 | .macro levels1t6 off 79 | /* level 1 */ 80 | vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 81 | vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 82 | vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 83 | vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 84 | vmovdqa (128*\off+112)*2(%rdi),%ymm11 85 | vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 86 | 87 | mul 8,9,10,11 88 | 89 | vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 90 | vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 91 | vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 92 | vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 93 | 94 | reduce 95 | update 3,4,5,6,7,8,9,10,11 96 | 97 | /* level 2 */ 98 | shuffle8 5,10,7,10 99 | shuffle8 6,11,5,11 100 | 101 | vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 102 | vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 103 | 104 | mul 7,10,5,11 105 | 106 | shuffle8 3,8,6,8 107 | shuffle8 4,9,3,9 108 | 109 | reduce 110 | update 4,6,8,3,9,7,10,5,11 111 | 112 | /* level 3 */ 113 | shuffle4 8,5,9,5 114 | shuffle4 3,11,8,11 115 | 116 | vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 117 | vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 118 | 119 | mul 9,5,8,11 120 | 121 | shuffle4 4,7,3,7 122 | shuffle4 6,10,4,10 123 | 124 | reduce 125 | update 6,3,7,4,10,9,5,8,11 126 | 127 | /* level 4 */ 128 | shuffle2 7,8,10,8 129 | shuffle2 4,11,7,11 130 | 131 | vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 132 | vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 133 | 134 | mul 10,8,7,11 135 | 136 | shuffle2 6,9,4,9 137 | shuffle2 3,5,6,5 138 | 139 | reduce 140 | update 3,4,9,6,5,10,8,7,11 141 | 142 | /* level 5 */ 143 | shuffle1 9,7,5,7 144 | shuffle1 6,11,9,11 145 | 146 | vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 147 | vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 148 | 149 | mul 5,7,9,11 150 | 151 | shuffle1 3,10,6,10 152 | shuffle1 4,8,3,8 153 | 154 | reduce 155 | update 4,6,10,3,8,5,7,9,11 156 | 157 | /* level 6 */ 158 | vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 159 | vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 160 | vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 161 | vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 162 | 163 | mul 10,3,9,11,14,15,8,2 164 | 165 | reduce 166 | update 8,4,6,5,7,10,3,9,11 167 | 168 | vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) 169 | vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) 170 | vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) 171 | vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) 172 | vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) 173 | vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) 174 | vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) 175 | vmovdqa %ymm11,(128*\off+112)*2(%rdi) 176 | .endm 177 | 178 | .text 179 | .global cdecl(ntt_avx) 180 | cdecl(ntt_avx): 181 | vmovdqa _16XQ*2(%rsi),%ymm0 182 | 183 | level0 0 184 | level0 1 185 | 186 | levels1t6 0 187 | levels1t6 1 188 | 189 | ret 190 | -------------------------------------------------------------------------------- /ref/Makefile: -------------------------------------------------------------------------------- 1 | CC ?= /usr/bin/cc 2 | CFLAGS += -Wall -Wextra -Wpedantic -Wmissing-prototypes -Wredundant-decls \ 3 | -Wshadow -Wpointer-arith -O3 -fomit-frame-pointer -z noexecstack 4 | NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer 5 | RM = /bin/rm 6 | 7 | SOURCES = kem.c indcpa.c polyvec.c poly.c ntt.c cbd.c reduce.c verify.c 8 | SOURCESKECCAK = $(SOURCES) fips202.c symmetric-shake.c 9 | HEADERS = params.h kem.h indcpa.h polyvec.h poly.h ntt.h cbd.h reduce.c verify.h symmetric.h 10 | HEADERSKECCAK = $(HEADERS) fips202.h 11 | 12 | .PHONY: all speed shared clean 13 | 14 | all: test speed shared nistkat 15 | 16 | test: \ 17 | test/test_kyber512 \ 18 | test/test_kyber768 \ 19 | test/test_kyber1024 \ 20 | test/test_vectors512 \ 21 | test/test_vectors768 \ 22 | test/test_vectors1024 \ 23 | 24 | speed: \ 25 | test/test_speed512 \ 26 | test/test_speed768 \ 27 | test/test_speed1024 \ 28 | 29 | shared: \ 30 | lib/libpqcrystals_kyber512_ref.so \ 31 | lib/libpqcrystals_kyber768_ref.so \ 32 | lib/libpqcrystals_kyber1024_ref.so \ 33 | lib/libpqcrystals_fips202_ref.so \ 34 | 35 | nistkat: \ 36 | nistkat/PQCgenKAT_kem512 \ 37 | nistkat/PQCgenKAT_kem768 \ 38 | nistkat/PQCgenKAT_kem1024 \ 39 | 40 | 41 | lib/libpqcrystals_fips202_ref.so: fips202.c fips202.h 42 | mkdir -p lib 43 | $(CC) -shared -fPIC $(CFLAGS) fips202.c -o $@ 44 | 45 | lib/libpqcrystals_kyber512_ref.so: $(SOURCES) $(HEADERS) symmetric-shake.c 46 | mkdir -p lib 47 | $(CC) -shared -fPIC $(CFLAGS) -DKYBER_K=2 $(SOURCES) symmetric-shake.c -o $@ 48 | 49 | lib/libpqcrystals_kyber768_ref.so: $(SOURCES) $(HEADERS) symmetric-shake.c 50 | mkdir -p lib 51 | $(CC) -shared -fPIC $(CFLAGS) -DKYBER_K=3 $(SOURCES) symmetric-shake.c -o $@ 52 | 53 | lib/libpqcrystals_kyber1024_ref.so: $(SOURCES) $(HEADERS) symmetric-shake.c 54 | mkdir -p lib 55 | $(CC) -shared -fPIC $(CFLAGS) -DKYBER_K=4 $(SOURCES) symmetric-shake.c -o $@ 56 | 57 | test/test_kyber512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 58 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 59 | 60 | test/test_kyber768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 61 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 62 | 63 | test/test_kyber1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 64 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 65 | 66 | test/test_vectors512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 67 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) test/test_vectors.c -o $@ 68 | 69 | test/test_vectors768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 70 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) test/test_vectors.c -o $@ 71 | 72 | test/test_vectors1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 73 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) test/test_vectors.c -o $@ 74 | 75 | test/test_speed512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 76 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 77 | 78 | test/test_speed768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 79 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 80 | 81 | test/test_speed1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 82 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 83 | 84 | nistkat/PQCgenKAT_kem512: $(SOURCESKECCAK) $(HEADERSKECCAK) nistkat/PQCgenKAT_kem.c nistkat/rng.c nistkat/rng.h 85 | $(CC) $(NISTFLAGS) -DKYBER_K=2 -o $@ $(SOURCESKECCAK) nistkat/rng.c nistkat/PQCgenKAT_kem.c $(LDFLAGS) -lcrypto 86 | 87 | nistkat/PQCgenKAT_kem768: $(SOURCESKECCAK) $(HEADERSKECCAK) nistkat/PQCgenKAT_kem.c nistkat/rng.c nistkat/rng.h 88 | $(CC) $(NISTFLAGS) -DKYBER_K=3 -o $@ $(SOURCESKECCAK) nistkat/rng.c nistkat/PQCgenKAT_kem.c $(LDFLAGS) -lcrypto 89 | 90 | nistkat/PQCgenKAT_kem1024: $(SOURCESKECCAK) $(HEADERSKECCAK) nistkat/PQCgenKAT_kem.c nistkat/rng.c nistkat/rng.h 91 | $(CC) $(NISTFLAGS) -DKYBER_K=4 -o $@ $(SOURCESKECCAK) nistkat/rng.c nistkat/PQCgenKAT_kem.c $(LDFLAGS) -lcrypto 92 | 93 | clean: 94 | -$(RM) -f *.gcno *.gcda *.lcov *.o *.so 95 | -$(RM) -f test/test_kyber512 96 | -$(RM) -f test/test_kyber768 97 | -$(RM) -f test/test_kyber1024 98 | -$(RM) -f test/test_vectors512 99 | -$(RM) -f test/test_vectors768 100 | -$(RM) -f test/test_vectors1024 101 | -$(RM) -f test/test_speed512 102 | -$(RM) -f test/test_speed768 103 | -$(RM) -f test/test_speed1024 104 | -$(RM) -f nistkat/PQCgenKAT_kem512 105 | -$(RM) -f nistkat/PQCgenKAT_kem768 106 | -$(RM) -f nistkat/PQCgenKAT_kem1024 107 | -$(RM) -f nistkat/*.req 108 | -$(RM) -f nistkat/*.rsp 109 | -$(RM) -rf lib/ 110 | 111 | -------------------------------------------------------------------------------- /avx2/Makefile: -------------------------------------------------------------------------------- 1 | CC ?= /usr/bin/cc 2 | CFLAGS += -Wall -Wextra -Wpedantic -Wmissing-prototypes -Wredundant-decls \ 3 | -Wshadow -Wpointer-arith -mavx2 -mbmi2 -mpopcnt \ 4 | -march=native -mtune=native -O3 -fomit-frame-pointer -z noexecstack 5 | NISTFLAGS += -Wno-unused-result -mavx2 -mbmi2 -mpopcnt \ 6 | -march=native -mtune=native -O3 -fomit-frame-pointer 7 | RM = /bin/rm 8 | 9 | SOURCES = kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S \ 10 | basemul.S consts.c rejsample.c cbd.c verify.c 11 | SOURCESKECCAK = $(SOURCES) fips202.c fips202x4.c symmetric-shake.c \ 12 | keccak4x/KeccakP-1600-times4-SIMD256.o 13 | HEADERS = params.h align.h kem.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc \ 14 | ntt.h consts.h rejsample.h cbd.h verify.h symmetric.h randombytes.h 15 | HEADERSKECCAK = $(HEADERS) fips202.h fips202x4.h 16 | 17 | .PHONY: all shared clean 18 | 19 | all: \ 20 | test/test_kyber512 \ 21 | test/test_kyber768 \ 22 | test/test_kyber1024 \ 23 | test/test_vectors512 \ 24 | test/test_vectors768 \ 25 | test/test_vectors1024 \ 26 | speed 27 | 28 | speed: \ 29 | test/test_speed512 \ 30 | test/test_speed768 \ 31 | test/test_speed1024 \ 32 | 33 | shared: \ 34 | libpqcrystals_kyber512_avx2.so \ 35 | libpqcrystals_kyber768_avx2.so \ 36 | libpqcrystals_kyber1024_avx2.so \ 37 | libpqcrystals_fips202_ref.so \ 38 | libpqcrystals_fips202x4_avx2.so \ 39 | 40 | keccak4x/KeccakP-1600-times4-SIMD256.o: \ 41 | keccak4x/KeccakP-1600-times4-SIMD256.c \ 42 | keccak4x/KeccakP-1600-times4-SnP.h \ 43 | keccak4x/KeccakP-1600-unrolling.macros \ 44 | keccak4x/KeccakP-SIMD256-config.h \ 45 | keccak4x/KeccakP-align.h \ 46 | keccak4x/KeccakP-brg_endian.h 47 | $(CC) $(CFLAGS) -c $< -o $@ 48 | 49 | libpqcrystals_fips202_ref.so: fips202.c fips202.h 50 | $(CC) -shared -fPIC $(CFLAGS) -o $@ $< 51 | 52 | libpqcrystals_fips202x4_avx2.so: fips202x4.c fips202x4.h \ 53 | keccak4x/KeccakP-1600-times4-SIMD256.c \ 54 | keccak4x/KeccakP-1600-times4-SnP.h \ 55 | keccak4x/KeccakP-1600-unrolling.macros \ 56 | keccak4x/KeccakP-SIMD256-config.h \ 57 | keccak4x/KeccakP-align.h \ 58 | keccak4x/KeccakP-brg_endian.h 59 | $(CC) -shared -fPIC $(CFLAGS) -o $@ $< keccak4x/KeccakP-1600-times4-SIMD256.c 60 | 61 | libpqcrystals_kyber512_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c 62 | $(CC) -shared -fpic $(CFLAGS) -DKYBER_K=2 $(SOURCES) \ 63 | symmetric-shake.c -o libpqcrystals_kyber512_avx2.so 64 | 65 | libpqcrystals_kyber768_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c 66 | $(CC) -shared -fpic $(CFLAGS) -DKYBER_K=3 $(SOURCES) \ 67 | symmetric-shake.c -o libpqcrystals_kyber768_avx2.so 68 | 69 | libpqcrystals_kyber1024_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c 70 | $(CC) -shared -fpic $(CFLAGS) -DKYBER_K=4 $(SOURCES) \ 71 | symmetric-shake.c -o libpqcrystals_kyber1024_avx2.so 72 | 73 | test/test_kyber512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 74 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 75 | 76 | test/test_kyber768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 77 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 78 | 79 | test/test_kyber1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_kyber.c randombytes.c 80 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) randombytes.c test/test_kyber.c -o $@ 81 | 82 | test/test_vectors512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 83 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) test/test_vectors.c -o $@ 84 | 85 | test/test_vectors768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 86 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) test/test_vectors.c -o $@ 87 | 88 | test/test_vectors1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/test_vectors.c 89 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) test/test_vectors.c -o $@ 90 | 91 | test/test_speed512: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 92 | $(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 93 | 94 | test/test_speed768: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 95 | $(CC) $(CFLAGS) -DKYBER_K=3 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 96 | 97 | test/test_speed1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test/cpucycles.h test/cpucycles.c test/speed_print.h test/speed_print.c test/test_speed.c randombytes.c 98 | $(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) randombytes.c test/cpucycles.c test/speed_print.c test/test_speed.c -o $@ 99 | 100 | 101 | clean: 102 | -$(RM) -rf *.o *.a *.so 103 | -$(RM) -rf test/test_kyber512 104 | -$(RM) -rf test/test_kyber768 105 | -$(RM) -rf test/test_kyber1024 106 | -$(RM) -rf test/test_vectors512 107 | -$(RM) -rf test/test_vectors768 108 | -$(RM) -rf test/test_vectors1024 109 | -$(RM) -rf test/test_speed512 110 | -$(RM) -rf test/test_speed768 111 | -$(RM) -rf test/test_speed1024 112 | -$(RM) -rf keccak4x/KeccakP-1600-times4-SIMD256.o 113 | -------------------------------------------------------------------------------- /avx2/cbd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "params.h" 4 | #include "cbd.h" 5 | 6 | /************************************************* 7 | * Name: cbd2 8 | * 9 | * Description: Given an array of uniformly random bytes, compute 10 | * polynomial with coefficients distributed according to 11 | * a centered binomial distribution with parameter eta=2 12 | * 13 | * Arguments: - poly *r: pointer to output polynomial 14 | * - const __m256i *buf: pointer to aligned input byte array 15 | **************************************************/ 16 | static void cbd2(poly * restrict r, const __m256i buf[2*KYBER_N/128]) 17 | { 18 | unsigned int i; 19 | __m256i f0, f1, f2, f3; 20 | const __m256i mask55 = _mm256_set1_epi32(0x55555555); 21 | const __m256i mask33 = _mm256_set1_epi32(0x33333333); 22 | const __m256i mask03 = _mm256_set1_epi32(0x03030303); 23 | const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); 24 | 25 | for(i = 0; i < KYBER_N/64; i++) { 26 | f0 = _mm256_load_si256(&buf[i]); 27 | 28 | f1 = _mm256_srli_epi16(f0, 1); 29 | f0 = _mm256_and_si256(mask55, f0); 30 | f1 = _mm256_and_si256(mask55, f1); 31 | f0 = _mm256_add_epi8(f0, f1); 32 | 33 | f1 = _mm256_srli_epi16(f0, 2); 34 | f0 = _mm256_and_si256(mask33, f0); 35 | f1 = _mm256_and_si256(mask33, f1); 36 | f0 = _mm256_add_epi8(f0, mask33); 37 | f0 = _mm256_sub_epi8(f0, f1); 38 | 39 | f1 = _mm256_srli_epi16(f0, 4); 40 | f0 = _mm256_and_si256(mask0F, f0); 41 | f1 = _mm256_and_si256(mask0F, f1); 42 | f0 = _mm256_sub_epi8(f0, mask03); 43 | f1 = _mm256_sub_epi8(f1, mask03); 44 | 45 | f2 = _mm256_unpacklo_epi8(f0, f1); 46 | f3 = _mm256_unpackhi_epi8(f0, f1); 47 | 48 | f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); 49 | f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); 50 | f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); 51 | f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); 52 | 53 | _mm256_store_si256(&r->vec[4*i+0], f0); 54 | _mm256_store_si256(&r->vec[4*i+1], f2); 55 | _mm256_store_si256(&r->vec[4*i+2], f1); 56 | _mm256_store_si256(&r->vec[4*i+3], f3); 57 | } 58 | } 59 | 60 | #if KYBER_ETA1 == 3 61 | /************************************************* 62 | * Name: cbd3 63 | * 64 | * Description: Given an array of uniformly random bytes, compute 65 | * polynomial with coefficients distributed according to 66 | * a centered binomial distribution with parameter eta=3 67 | * This function is only needed for Kyber-512 68 | * 69 | * Arguments: - poly *r: pointer to output polynomial 70 | * - const __m256i *buf: pointer to aligned input byte array 71 | **************************************************/ 72 | static void cbd3(poly * restrict r, const uint8_t buf[3*KYBER_N/4+8]) 73 | { 74 | unsigned int i; 75 | __m256i f0, f1, f2, f3; 76 | const __m256i mask249 = _mm256_set1_epi32(0x249249); 77 | const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); 78 | const __m256i mask07 = _mm256_set1_epi32(7); 79 | const __m256i mask70 = _mm256_set1_epi32(7 << 16); 80 | const __m256i mask3 = _mm256_set1_epi16(3); 81 | const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, 82 | -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); 83 | 84 | for(i = 0; i < KYBER_N/32; i++) { 85 | f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); 86 | f0 = _mm256_permute4x64_epi64(f0,0x94); 87 | f0 = _mm256_shuffle_epi8(f0,shufbidx); 88 | 89 | f1 = _mm256_srli_epi32(f0,1); 90 | f2 = _mm256_srli_epi32(f0,2); 91 | f0 = _mm256_and_si256(mask249,f0); 92 | f1 = _mm256_and_si256(mask249,f1); 93 | f2 = _mm256_and_si256(mask249,f2); 94 | f0 = _mm256_add_epi32(f0,f1); 95 | f0 = _mm256_add_epi32(f0,f2); 96 | 97 | f1 = _mm256_srli_epi32(f0,3); 98 | f0 = _mm256_add_epi32(f0,mask6DB); 99 | f0 = _mm256_sub_epi32(f0,f1); 100 | 101 | f1 = _mm256_slli_epi32(f0,10); 102 | f2 = _mm256_srli_epi32(f0,12); 103 | f3 = _mm256_srli_epi32(f0, 2); 104 | f0 = _mm256_and_si256(f0,mask07); 105 | f1 = _mm256_and_si256(f1,mask70); 106 | f2 = _mm256_and_si256(f2,mask07); 107 | f3 = _mm256_and_si256(f3,mask70); 108 | f0 = _mm256_add_epi16(f0,f1); 109 | f1 = _mm256_add_epi16(f2,f3); 110 | f0 = _mm256_sub_epi16(f0,mask3); 111 | f1 = _mm256_sub_epi16(f1,mask3); 112 | 113 | f2 = _mm256_unpacklo_epi32(f0,f1); 114 | f3 = _mm256_unpackhi_epi32(f0,f1); 115 | 116 | f0 = _mm256_permute2x128_si256(f2,f3,0x20); 117 | f1 = _mm256_permute2x128_si256(f2,f3,0x31); 118 | 119 | _mm256_store_si256(&r->vec[2*i+0], f0); 120 | _mm256_store_si256(&r->vec[2*i+1], f1); 121 | } 122 | } 123 | #endif 124 | 125 | /* buf 32 bytes longer for cbd3 */ 126 | void poly_cbd_eta1(poly *r, const __m256i buf[KYBER_ETA1*KYBER_N/128+1]) 127 | { 128 | #if KYBER_ETA1 == 2 129 | cbd2(r, buf); 130 | #elif KYBER_ETA1 == 3 131 | cbd3(r, (uint8_t *)buf); 132 | #else 133 | #error "This implementation requires eta1 in {2,3}" 134 | #endif 135 | } 136 | 137 | void poly_cbd_eta2(poly *r, const __m256i buf[KYBER_ETA2*KYBER_N/128]) 138 | { 139 | #if KYBER_ETA2 == 2 140 | cbd2(r, buf); 141 | #else 142 | #error "This implementation requires eta2 = 2" 143 | #endif 144 | } 145 | -------------------------------------------------------------------------------- /ref/ntt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "params.h" 3 | #include "ntt.h" 4 | #include "reduce.h" 5 | 6 | /* Code to generate zetas and zetas_inv used in the number-theoretic transform: 7 | 8 | #define KYBER_ROOT_OF_UNITY 17 9 | 10 | static const uint8_t tree[128] = { 11 | 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, 12 | 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, 13 | 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, 14 | 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, 15 | 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, 16 | 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, 17 | 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, 18 | 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 19 | }; 20 | 21 | void init_ntt() { 22 | unsigned int i; 23 | int16_t tmp[128]; 24 | 25 | tmp[0] = MONT; 26 | for(i=1;i<128;i++) 27 | tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); 28 | 29 | for(i=0;i<128;i++) { 30 | zetas[i] = tmp[tree[i]]; 31 | if(zetas[i] > KYBER_Q/2) 32 | zetas[i] -= KYBER_Q; 33 | if(zetas[i] < -KYBER_Q/2) 34 | zetas[i] += KYBER_Q; 35 | } 36 | } 37 | */ 38 | 39 | const int16_t zetas[128] = { 40 | -1044, -758, -359, -1517, 1493, 1422, 287, 202, 41 | -171, 622, 1577, 182, 962, -1202, -1474, 1468, 42 | 573, -1325, 264, 383, -829, 1458, -1602, -130, 43 | -681, 1017, 732, 608, -1542, 411, -205, -1571, 44 | 1223, 652, -552, 1015, -1293, 1491, -282, -1544, 45 | 516, -8, -320, -666, -1618, -1162, 126, 1469, 46 | -853, -90, -271, 830, 107, -1421, -247, -951, 47 | -398, 961, -1508, -725, 448, -1065, 677, -1275, 48 | -1103, 430, 555, 843, -1251, 871, 1550, 105, 49 | 422, 587, 177, -235, -291, -460, 1574, 1653, 50 | -246, 778, 1159, -147, -777, 1483, -602, 1119, 51 | -1590, 644, -872, 349, 418, 329, -156, -75, 52 | 817, 1097, 603, 610, 1322, -1285, -1465, 384, 53 | -1215, -136, 1218, -1335, -874, 220, -1187, -1659, 54 | -1185, -1530, -1278, 794, -1510, -854, -870, 478, 55 | -108, -308, 996, 991, 958, -1460, 1522, 1628 56 | }; 57 | 58 | /************************************************* 59 | * Name: fqmul 60 | * 61 | * Description: Multiplication followed by Montgomery reduction 62 | * 63 | * Arguments: - int16_t a: first factor 64 | * - int16_t b: second factor 65 | * 66 | * Returns 16-bit integer congruent to a*b*R^{-1} mod q 67 | **************************************************/ 68 | static int16_t fqmul(int16_t a, int16_t b) { 69 | return montgomery_reduce((int32_t)a*b); 70 | } 71 | 72 | /************************************************* 73 | * Name: ntt 74 | * 75 | * Description: Inplace number-theoretic transform (NTT) in Rq. 76 | * input is in standard order, output is in bitreversed order 77 | * 78 | * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq 79 | **************************************************/ 80 | void ntt(int16_t r[256]) { 81 | unsigned int len, start, j, k; 82 | int16_t t, zeta; 83 | 84 | k = 1; 85 | for(len = 128; len >= 2; len >>= 1) { 86 | for(start = 0; start < 256; start = j + len) { 87 | zeta = zetas[k++]; 88 | for(j = start; j < start + len; j++) { 89 | t = fqmul(zeta, r[j + len]); 90 | r[j + len] = r[j] - t; 91 | r[j] = r[j] + t; 92 | } 93 | } 94 | } 95 | } 96 | 97 | /************************************************* 98 | * Name: invntt_tomont 99 | * 100 | * Description: Inplace inverse number-theoretic transform in Rq and 101 | * multiplication by Montgomery factor 2^16. 102 | * Input is in bitreversed order, output is in standard order 103 | * 104 | * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq 105 | **************************************************/ 106 | void invntt(int16_t r[256]) { 107 | unsigned int start, len, j, k; 108 | int16_t t, zeta; 109 | const int16_t f = 1441; // mont^2/128 110 | 111 | k = 127; 112 | for(len = 2; len <= 128; len <<= 1) { 113 | for(start = 0; start < 256; start = j + len) { 114 | zeta = zetas[k--]; 115 | for(j = start; j < start + len; j++) { 116 | t = r[j]; 117 | r[j] = barrett_reduce(t + r[j + len]); 118 | r[j + len] = r[j + len] - t; 119 | r[j + len] = fqmul(zeta, r[j + len]); 120 | } 121 | } 122 | } 123 | 124 | for(j = 0; j < 256; j++) 125 | r[j] = fqmul(r[j], f); 126 | } 127 | 128 | /************************************************* 129 | * Name: basemul 130 | * 131 | * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) 132 | * used for multiplication of elements in Rq in NTT domain 133 | * 134 | * Arguments: - int16_t r[2]: pointer to the output polynomial 135 | * - const int16_t a[2]: pointer to the first factor 136 | * - const int16_t b[2]: pointer to the second factor 137 | * - int16_t zeta: integer defining the reduction polynomial 138 | **************************************************/ 139 | void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) 140 | { 141 | r[0] = fqmul(a[1], b[1]); 142 | r[0] = fqmul(r[0], zeta); 143 | r[0] += fqmul(a[0], b[0]); 144 | r[1] = fqmul(a[0], b[1]); 145 | r[1] += fqmul(a[1], b[0]); 146 | } 147 | -------------------------------------------------------------------------------- /avx2/shuffle.S: -------------------------------------------------------------------------------- 1 | #include "consts.h" 2 | .include "fq.inc" 3 | .include "shuffle.inc" 4 | 5 | /* 6 | nttpack_avx: 7 | #load 8 | vmovdqa (%rdi),%ymm4 9 | vmovdqa 32(%rdi),%ymm5 10 | vmovdqa 64(%rdi),%ymm6 11 | vmovdqa 96(%rdi),%ymm7 12 | vmovdqa 128(%rdi),%ymm8 13 | vmovdqa 160(%rdi),%ymm9 14 | vmovdqa 192(%rdi),%ymm10 15 | vmovdqa 224(%rdi),%ymm11 16 | 17 | shuffle1 4,5,3,5 18 | shuffle1 6,7,4,7 19 | shuffle1 8,9,6,9 20 | shuffle1 10,11,8,11 21 | 22 | shuffle2 3,4,10,4 23 | shuffle2 6,8,3,8 24 | shuffle2 5,7,6,7 25 | shuffle2 9,11,5,11 26 | 27 | shuffle4 10,3,9,3 28 | shuffle4 6,5,10,5 29 | shuffle4 4,8,6,8 30 | shuffle4 7,11,4,11 31 | 32 | shuffle8 9,10,7,10 33 | shuffle8 6,4,9,4 34 | shuffle8 3,5,6,5 35 | shuffle8 8,11,3,11 36 | 37 | #store 38 | vmovdqa %ymm7,(%rdi) 39 | vmovdqa %ymm9,32(%rdi) 40 | vmovdqa %ymm6,64(%rdi) 41 | vmovdqa %ymm3,96(%rdi) 42 | vmovdqa %ymm10,128(%rdi) 43 | vmovdqa %ymm4,160(%rdi) 44 | vmovdqa %ymm5,192(%rdi) 45 | vmovdqa %ymm11,224(%rdi) 46 | 47 | ret 48 | */ 49 | 50 | .text 51 | nttunpack128_avx: 52 | #load 53 | vmovdqa (%rdi),%ymm4 54 | vmovdqa 32(%rdi),%ymm5 55 | vmovdqa 64(%rdi),%ymm6 56 | vmovdqa 96(%rdi),%ymm7 57 | vmovdqa 128(%rdi),%ymm8 58 | vmovdqa 160(%rdi),%ymm9 59 | vmovdqa 192(%rdi),%ymm10 60 | vmovdqa 224(%rdi),%ymm11 61 | 62 | shuffle8 4,8,3,8 63 | shuffle8 5,9,4,9 64 | shuffle8 6,10,5,10 65 | shuffle8 7,11,6,11 66 | 67 | shuffle4 3,5,7,5 68 | shuffle4 8,10,3,10 69 | shuffle4 4,6,8,6 70 | shuffle4 9,11,4,11 71 | 72 | shuffle2 7,8,9,8 73 | shuffle2 5,6,7,6 74 | shuffle2 3,4,5,4 75 | shuffle2 10,11,3,11 76 | 77 | shuffle1 9,5,10,5 78 | shuffle1 8,4,9,4 79 | shuffle1 7,3,8,3 80 | shuffle1 6,11,7,11 81 | 82 | #store 83 | vmovdqa %ymm10,(%rdi) 84 | vmovdqa %ymm5,32(%rdi) 85 | vmovdqa %ymm9,64(%rdi) 86 | vmovdqa %ymm4,96(%rdi) 87 | vmovdqa %ymm8,128(%rdi) 88 | vmovdqa %ymm3,160(%rdi) 89 | vmovdqa %ymm7,192(%rdi) 90 | vmovdqa %ymm11,224(%rdi) 91 | 92 | ret 93 | 94 | .global cdecl(nttunpack_avx) 95 | cdecl(nttunpack_avx): 96 | call nttunpack128_avx 97 | add $256,%rdi 98 | call nttunpack128_avx 99 | ret 100 | 101 | ntttobytes128_avx: 102 | #load 103 | vmovdqa (%rsi),%ymm5 104 | vmovdqa 32(%rsi),%ymm6 105 | vmovdqa 64(%rsi),%ymm7 106 | vmovdqa 96(%rsi),%ymm8 107 | vmovdqa 128(%rsi),%ymm9 108 | vmovdqa 160(%rsi),%ymm10 109 | vmovdqa 192(%rsi),%ymm11 110 | vmovdqa 224(%rsi),%ymm12 111 | 112 | #csubq 113 | csubq 5,13 114 | csubq 6,13 115 | csubq 7,13 116 | csubq 8,13 117 | csubq 9,13 118 | csubq 10,13 119 | csubq 11,13 120 | csubq 12,13 121 | 122 | #bitpack 123 | vpsllw $12,%ymm6,%ymm4 124 | vpor %ymm4,%ymm5,%ymm4 125 | 126 | vpsrlw $4,%ymm6,%ymm5 127 | vpsllw $8,%ymm7,%ymm6 128 | vpor %ymm5,%ymm6,%ymm5 129 | 130 | vpsrlw $8,%ymm7,%ymm6 131 | vpsllw $4,%ymm8,%ymm7 132 | vpor %ymm6,%ymm7,%ymm6 133 | 134 | vpsllw $12,%ymm10,%ymm7 135 | vpor %ymm7,%ymm9,%ymm7 136 | 137 | vpsrlw $4,%ymm10,%ymm8 138 | vpsllw $8,%ymm11,%ymm9 139 | vpor %ymm8,%ymm9,%ymm8 140 | 141 | vpsrlw $8,%ymm11,%ymm9 142 | vpsllw $4,%ymm12,%ymm10 143 | vpor %ymm9,%ymm10,%ymm9 144 | 145 | shuffle1 4,5,3,5 146 | shuffle1 6,7,4,7 147 | shuffle1 8,9,6,9 148 | 149 | shuffle2 3,4,8,4 150 | shuffle2 6,5,3,5 151 | shuffle2 7,9,6,9 152 | 153 | shuffle4 8,3,7,3 154 | shuffle4 6,4,8,4 155 | shuffle4 5,9,6,9 156 | 157 | shuffle8 7,8,5,8 158 | shuffle8 6,3,7,3 159 | shuffle8 4,9,6,9 160 | 161 | #store 162 | vmovdqu %ymm5,(%rdi) 163 | vmovdqu %ymm7,32(%rdi) 164 | vmovdqu %ymm6,64(%rdi) 165 | vmovdqu %ymm8,96(%rdi) 166 | vmovdqu %ymm3,128(%rdi) 167 | vmovdqu %ymm9,160(%rdi) 168 | 169 | ret 170 | 171 | .global cdecl(ntttobytes_avx) 172 | cdecl(ntttobytes_avx): 173 | #consts 174 | vmovdqa _16XQ*2(%rdx),%ymm0 175 | call ntttobytes128_avx 176 | add $256,%rsi 177 | add $192,%rdi 178 | call ntttobytes128_avx 179 | ret 180 | 181 | nttfrombytes128_avx: 182 | #load 183 | vmovdqu (%rsi),%ymm4 184 | vmovdqu 32(%rsi),%ymm5 185 | vmovdqu 64(%rsi),%ymm6 186 | vmovdqu 96(%rsi),%ymm7 187 | vmovdqu 128(%rsi),%ymm8 188 | vmovdqu 160(%rsi),%ymm9 189 | 190 | shuffle8 4,7,3,7 191 | shuffle8 5,8,4,8 192 | shuffle8 6,9,5,9 193 | 194 | shuffle4 3,8,6,8 195 | shuffle4 7,5,3,5 196 | shuffle4 4,9,7,9 197 | 198 | shuffle2 6,5,4,5 199 | shuffle2 8,7,6,7 200 | shuffle2 3,9,8,9 201 | 202 | shuffle1 4,7,10,7 203 | shuffle1 5,8,4,8 204 | shuffle1 6,9,5,9 205 | 206 | #bitunpack 207 | vpsrlw $12,%ymm10,%ymm11 208 | vpsllw $4,%ymm7,%ymm12 209 | vpor %ymm11,%ymm12,%ymm11 210 | vpand %ymm0,%ymm10,%ymm10 211 | vpand %ymm0,%ymm11,%ymm11 212 | 213 | vpsrlw $8,%ymm7,%ymm12 214 | vpsllw $8,%ymm4,%ymm13 215 | vpor %ymm12,%ymm13,%ymm12 216 | vpand %ymm0,%ymm12,%ymm12 217 | 218 | vpsrlw $4,%ymm4,%ymm13 219 | vpand %ymm0,%ymm13,%ymm13 220 | 221 | vpsrlw $12,%ymm8,%ymm14 222 | vpsllw $4,%ymm5,%ymm15 223 | vpor %ymm14,%ymm15,%ymm14 224 | vpand %ymm0,%ymm8,%ymm8 225 | vpand %ymm0,%ymm14,%ymm14 226 | 227 | vpsrlw $8,%ymm5,%ymm15 228 | vpsllw $8,%ymm9,%ymm1 229 | vpor %ymm15,%ymm1,%ymm15 230 | vpand %ymm0,%ymm15,%ymm15 231 | 232 | vpsrlw $4,%ymm9,%ymm1 233 | vpand %ymm0,%ymm1,%ymm1 234 | 235 | #store 236 | vmovdqa %ymm10,(%rdi) 237 | vmovdqa %ymm11,32(%rdi) 238 | vmovdqa %ymm12,64(%rdi) 239 | vmovdqa %ymm13,96(%rdi) 240 | vmovdqa %ymm8,128(%rdi) 241 | vmovdqa %ymm14,160(%rdi) 242 | vmovdqa %ymm15,192(%rdi) 243 | vmovdqa %ymm1,224(%rdi) 244 | 245 | ret 246 | 247 | .global cdecl(nttfrombytes_avx) 248 | cdecl(nttfrombytes_avx): 249 | #consts 250 | vmovdqa _16XMASK*2(%rdx),%ymm0 251 | call nttfrombytes128_avx 252 | add $256,%rdi 253 | add $192,%rsi 254 | call nttfrombytes128_avx 255 | ret 256 | -------------------------------------------------------------------------------- /avx2/invntt.S: -------------------------------------------------------------------------------- 1 | #include "consts.h" 2 | .include "shuffle.inc" 3 | .include "fq.inc" 4 | 5 | .macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 6 | vpsubw %ymm\rl0,%ymm\rh0,%ymm12 7 | vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 8 | vpsubw %ymm\rl1,%ymm\rh1,%ymm13 9 | 10 | vpmullw %ymm\zl0,%ymm12,%ymm\rh0 11 | vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 12 | vpsubw %ymm\rl2,%ymm\rh2,%ymm14 13 | 14 | vpmullw %ymm\zl0,%ymm13,%ymm\rh1 15 | vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 16 | vpsubw %ymm\rl3,%ymm\rh3,%ymm15 17 | 18 | vpmullw %ymm\zl1,%ymm14,%ymm\rh2 19 | vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 20 | vpmullw %ymm\zl1,%ymm15,%ymm\rh3 21 | 22 | vpmulhw %ymm\zh0,%ymm12,%ymm12 23 | vpmulhw %ymm\zh0,%ymm13,%ymm13 24 | 25 | vpmulhw %ymm\zh1,%ymm14,%ymm14 26 | vpmulhw %ymm\zh1,%ymm15,%ymm15 27 | 28 | vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 29 | 30 | vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 31 | 32 | vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 33 | vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 34 | 35 | # 36 | 37 | # 38 | 39 | vpsubw %ymm\rh0,%ymm12,%ymm\rh0 40 | 41 | vpsubw %ymm\rh1,%ymm13,%ymm\rh1 42 | 43 | vpsubw %ymm\rh2,%ymm14,%ymm\rh2 44 | vpsubw %ymm\rh3,%ymm15,%ymm\rh3 45 | .endm 46 | 47 | .macro intt_levels0t5 off 48 | /* level 0 */ 49 | vmovdqa _16XFLO*2(%rsi),%ymm2 50 | vmovdqa _16XFHI*2(%rsi),%ymm3 51 | 52 | vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 53 | vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 54 | vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 55 | vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 56 | 57 | fqmulprecomp 2,3,4 58 | fqmulprecomp 2,3,6 59 | fqmulprecomp 2,3,5 60 | fqmulprecomp 2,3,7 61 | 62 | vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 63 | vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 64 | vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 65 | vmovdqa (128*\off+112)*2(%rdi),%ymm11 66 | 67 | fqmulprecomp 2,3,8 68 | fqmulprecomp 2,3,10 69 | fqmulprecomp 2,3,9 70 | fqmulprecomp 2,3,11 71 | 72 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 73 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 74 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 75 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 76 | vmovdqa _REVIDXB*2(%rsi),%ymm12 77 | vpshufb %ymm12,%ymm15,%ymm15 78 | vpshufb %ymm12,%ymm1,%ymm1 79 | vpshufb %ymm12,%ymm2,%ymm2 80 | vpshufb %ymm12,%ymm3,%ymm3 81 | 82 | butterfly 4,5,8,9,6,7,10,11,15,1,2,3 83 | 84 | /* level 1 */ 85 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 86 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 87 | vmovdqa _REVIDXB*2(%rsi),%ymm1 88 | vpshufb %ymm1,%ymm2,%ymm2 89 | vpshufb %ymm1,%ymm3,%ymm3 90 | 91 | butterfly 4,5,6,7,8,9,10,11,2,2,3,3 92 | 93 | shuffle1 4,5,3,5 94 | shuffle1 6,7,4,7 95 | shuffle1 8,9,6,9 96 | shuffle1 10,11,8,11 97 | 98 | /* level 2 */ 99 | vmovdqa _REVIDXD*2(%rsi),%ymm12 100 | vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 101 | vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 102 | 103 | butterfly 3,4,6,8,5,7,9,11,2,2,10,10 104 | 105 | vmovdqa _16XV*2(%rsi),%ymm1 106 | red16 3 107 | 108 | shuffle2 3,4,10,4 109 | shuffle2 6,8,3,8 110 | shuffle2 5,7,6,7 111 | shuffle2 9,11,5,11 112 | 113 | /* level 3 */ 114 | vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 115 | vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 116 | 117 | butterfly 10,3,6,5,4,8,7,11,2,2,9,9 118 | 119 | shuffle4 10,3,9,3 120 | shuffle4 6,5,10,5 121 | shuffle4 4,8,6,8 122 | shuffle4 7,11,4,11 123 | 124 | /* level 4 */ 125 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 126 | vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 127 | 128 | butterfly 9,10,6,4,3,5,8,11,2,2,7,7 129 | 130 | red16 9 131 | 132 | shuffle8 9,10,7,10 133 | shuffle8 6,4,9,4 134 | shuffle8 3,5,6,5 135 | shuffle8 8,11,3,11 136 | 137 | /* level 5 */ 138 | vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 139 | vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 140 | 141 | butterfly 7,9,6,3,10,4,5,11,2,2,8,8 142 | 143 | vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) 144 | vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) 145 | vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) 146 | vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) 147 | vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) 148 | vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) 149 | vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) 150 | vmovdqa %ymm11,(128*\off+112)*2(%rdi) 151 | .endm 152 | 153 | .macro intt_level6 off 154 | /* level 6 */ 155 | vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 156 | vmovdqa (64*\off+128)*2(%rdi),%ymm8 157 | vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 158 | vmovdqa (64*\off+144)*2(%rdi),%ymm9 159 | vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 160 | 161 | vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 162 | vmovdqa (64*\off+160)*2(%rdi),%ymm10 163 | vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 164 | vmovdqa (64*\off+176)*2(%rdi),%ymm11 165 | vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 166 | 167 | butterfly 4,5,6,7,8,9,10,11 168 | 169 | .if \off == 0 170 | red16 4 171 | .endif 172 | 173 | vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) 174 | vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) 175 | vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) 176 | vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) 177 | vmovdqa %ymm8,(64*\off+128)*2(%rdi) 178 | vmovdqa %ymm9,(64*\off+144)*2(%rdi) 179 | vmovdqa %ymm10,(64*\off+160)*2(%rdi) 180 | vmovdqa %ymm11,(64*\off+176)*2(%rdi) 181 | .endm 182 | 183 | .text 184 | .global cdecl(invntt_avx) 185 | cdecl(invntt_avx): 186 | vmovdqa _16XQ*2(%rsi),%ymm0 187 | 188 | intt_levels0t5 0 189 | intt_levels0t5 1 190 | 191 | intt_level6 0 192 | intt_level6 1 193 | ret 194 | -------------------------------------------------------------------------------- /avx2/consts.c: -------------------------------------------------------------------------------- 1 | #include "align.h" 2 | #include "params.h" 3 | #include "consts.h" 4 | 5 | #define Q KYBER_Q 6 | #define MONT -1044 // 2^16 mod q 7 | #define QINV -3327 // q^-1 mod 2^16 8 | #define V 20159 // floor(2^26/q + 0.5) 9 | #define FHI 1441 // mont^2/128 10 | #define FLO -10079 // qinv*FHI 11 | #define MONTSQHI 1353 // mont^2 12 | #define MONTSQLO 20553 // qinv*MONTSQHI 13 | #define MASK 4095 14 | #define SHIFT 32 15 | 16 | const qdata_t qdata = {{ 17 | #define _16XQ 0 18 | Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, 19 | 20 | #define _16XQINV 16 21 | QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, 22 | QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, 23 | 24 | #define _16XV 32 25 | V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, 26 | 27 | #define _16XFLO 48 28 | FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, 29 | FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, 30 | 31 | #define _16XFHI 64 32 | FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, 33 | FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, 34 | 35 | #define _16XMONTSQLO 80 36 | MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, 37 | MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, 38 | MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, 39 | MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, 40 | 41 | #define _16XMONTSQHI 96 42 | MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, 43 | MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, 44 | MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, 45 | MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, 46 | 47 | #define _16XMASK 112 48 | MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, 49 | MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, 50 | 51 | #define _REVIDXB 128 52 | 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, 53 | 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, 54 | 55 | #define _REVIDXD 144 56 | 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, 57 | 58 | #define _ZETAS_EXP 160 59 | 31498, 31498, 31498, 31498, -758, -758, -758, -758, 60 | 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, 61 | 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, 62 | 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, 63 | -359, -359, -359, -359, -359, -359, -359, -359, 64 | -359, -359, -359, -359, -359, -359, -359, -359, 65 | 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, 66 | -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, 67 | 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, 68 | 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, 69 | -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, 70 | -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, 71 | -171, -171, -171, -171, 622, 622, 622, 622, 72 | 1577, 1577, 1577, 1577, 182, 182, 182, 182, 73 | -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, 74 | 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, 75 | 573, 573, -1325, -1325, 264, 264, 383, 383, 76 | -829, -829, 1458, 1458, -1602, -1602, -130, -130, 77 | -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, 78 | -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, 79 | 1223, 652, -552, 1015, -1293, 1491, -282, -1544, 80 | 516, -8, -320, -666, -1618, -1162, 126, 1469, 81 | -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, 82 | 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, 83 | -1103, 555, -1251, 1550, 422, 177, -291, 1574, 84 | -246, 1159, -777, -602, -1590, -872, 418, -156, 85 | 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, 86 | -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, 87 | 430, 843, 871, 105, 587, -235, -460, 1653, 88 | 778, -147, 1483, 1119, 644, 349, 329, -75, 89 | 787, 787, 787, 787, 787, 787, 787, 787, 90 | 787, 787, 787, 787, 787, 787, 787, 787, 91 | -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, 92 | -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, 93 | 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, 94 | -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, 95 | 287, 287, 287, 287, 287, 287, 287, 287, 96 | 202, 202, 202, 202, 202, 202, 202, 202, 97 | 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, 98 | -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, 99 | 962, 962, 962, 962, -1202, -1202, -1202, -1202, 100 | -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, 101 | -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, 102 | 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, 103 | -681, -681, 1017, 1017, 732, 732, 608, 608, 104 | -1542, -1542, 411, 411, -205, -205, -1571, -1571, 105 | 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, 106 | 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, 107 | -853, -90, -271, 830, 107, -1421, -247, -951, 108 | -398, 961, -1508, -725, 448, -1065, 677, -1275, 109 | -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, 110 | 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, 111 | 817, 603, 1322, -1465, -1215, 1218, -874, -1187, 112 | -1185, -1278, -1510, -870, -108, 996, 958, 1522, 113 | 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, 114 | -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, 115 | 1097, 610, -1285, 384, -136, -1335, 220, -1659, 116 | -1530, 794, -854, 478, -308, 991, -1460, 1628, 117 | 118 | #define _16XSHIFT 624 119 | SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, 120 | SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT 121 | }}; 122 | -------------------------------------------------------------------------------- /avx2/keccak4x/KeccakP-brg_endian.h: -------------------------------------------------------------------------------- 1 | /* 2 | --------------------------------------------------------------------------- 3 | Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. 4 | 5 | LICENSE TERMS 6 | 7 | The redistribution and use of this software (with or without changes) 8 | is allowed without the payment of fees or royalties provided that: 9 | 10 | 1. source code distributions include the above copyright notice, this 11 | list of conditions and the following disclaimer; 12 | 13 | 2. binary distributions include the above copyright notice, this list 14 | of conditions and the following disclaimer in their documentation; 15 | 16 | 3. the name of the copyright holder is not used to endorse products 17 | built using this software without specific written permission. 18 | 19 | DISCLAIMER 20 | 21 | This software is provided 'as is' with no explicit or implied warranties 22 | in respect of its properties, including, but not limited to, correctness 23 | and/or fitness for purpose. 24 | --------------------------------------------------------------------------- 25 | Issue Date: 20/12/2007 26 | Changes for ARM 9/9/2010 27 | */ 28 | 29 | #ifndef _KECCAKP_BRG_ENDIAN_H 30 | #define _KECCAKP_BRG_ENDIAN_H 31 | 32 | #define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ 33 | #define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ 34 | 35 | #if 0 36 | /* Include files where endian defines and byteswap functions may reside */ 37 | #if defined( __sun ) 38 | # include 39 | #elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) 40 | # include 41 | #elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ 42 | defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) 43 | # include 44 | #elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) 45 | # if !defined( __MINGW32__ ) && !defined( _AIX ) 46 | # include 47 | # if !defined( __BEOS__ ) 48 | # include 49 | # endif 50 | # endif 51 | #endif 52 | #endif 53 | 54 | /* Now attempt to set the define for platform byte order using any */ 55 | /* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ 56 | /* seem to encompass most endian symbol definitions */ 57 | 58 | #if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) 59 | # if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN 60 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 61 | # elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN 62 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 63 | # endif 64 | #elif defined( BIG_ENDIAN ) 65 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 66 | #elif defined( LITTLE_ENDIAN ) 67 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 68 | #endif 69 | 70 | #if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) 71 | # if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN 72 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 73 | # elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN 74 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 75 | # endif 76 | #elif defined( _BIG_ENDIAN ) 77 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 78 | #elif defined( _LITTLE_ENDIAN ) 79 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 80 | #endif 81 | 82 | #if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) 83 | # if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN 84 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 85 | # elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN 86 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 87 | # endif 88 | #elif defined( __BIG_ENDIAN ) 89 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 90 | #elif defined( __LITTLE_ENDIAN ) 91 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 92 | #endif 93 | 94 | #if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) 95 | # if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ 96 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 97 | # elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ 98 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 99 | # endif 100 | #elif defined( __BIG_ENDIAN__ ) 101 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 102 | #elif defined( __LITTLE_ENDIAN__ ) 103 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 104 | #endif 105 | 106 | /* if the platform byte order could not be determined, then try to */ 107 | /* set this define using common machine defines */ 108 | #if !defined(PLATFORM_BYTE_ORDER) 109 | 110 | #if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ 111 | defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ 112 | defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ 113 | defined( vax ) || defined( vms ) || defined( VMS ) || \ 114 | defined( __VMS ) || defined( _M_X64 ) 115 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 116 | 117 | #elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ 118 | defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ 119 | defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ 120 | defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ 121 | defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ 122 | defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ 123 | defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) 124 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 125 | 126 | #elif defined(__arm__) 127 | # ifdef __BIG_ENDIAN 128 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 129 | # else 130 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 131 | # endif 132 | #elif 1 /* **** EDIT HERE IF NECESSARY **** */ 133 | # define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN 134 | #elif 0 /* **** EDIT HERE IF NECESSARY **** */ 135 | # define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN 136 | #else 137 | # error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order 138 | #endif 139 | 140 | #endif 141 | 142 | #endif 143 | -------------------------------------------------------------------------------- /ref/nistkat/rng.c: -------------------------------------------------------------------------------- 1 | // 2 | // rng.c 3 | // 4 | // Created by Bassham, Lawrence E (Fed) on 8/29/17. 5 | // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. 6 | // 7 | 8 | #include 9 | #include "rng.h" 10 | #include 11 | #include 12 | #include 13 | 14 | AES256_CTR_DRBG_struct DRBG_ctx; 15 | 16 | void AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer); 17 | 18 | /* 19 | seedexpander_init() 20 | ctx - stores the current state of an instance of the seed expander 21 | seed - a 32 byte random value 22 | diversifier - an 8 byte diversifier 23 | maxlen - maximum number of bytes (less than 2**32) generated under this seed and diversifier 24 | */ 25 | int 26 | seedexpander_init(AES_XOF_struct *ctx, 27 | unsigned char *seed, 28 | unsigned char *diversifier, 29 | unsigned long maxlen) 30 | { 31 | if ( maxlen >= 0x100000000 ) 32 | return RNG_BAD_MAXLEN; 33 | 34 | ctx->length_remaining = maxlen; 35 | 36 | memcpy(ctx->key, seed, 32); 37 | 38 | memcpy(ctx->ctr, diversifier, 8); 39 | ctx->ctr[11] = maxlen % 256; 40 | maxlen >>= 8; 41 | ctx->ctr[10] = maxlen % 256; 42 | maxlen >>= 8; 43 | ctx->ctr[9] = maxlen % 256; 44 | maxlen >>= 8; 45 | ctx->ctr[8] = maxlen % 256; 46 | memset(ctx->ctr+12, 0x00, 4); 47 | 48 | ctx->buffer_pos = 16; 49 | memset(ctx->buffer, 0x00, 16); 50 | 51 | return RNG_SUCCESS; 52 | } 53 | 54 | /* 55 | seedexpander() 56 | ctx - stores the current state of an instance of the seed expander 57 | x - returns the XOF data 58 | xlen - number of bytes to return 59 | */ 60 | int 61 | seedexpander(AES_XOF_struct *ctx, unsigned char *x, unsigned long xlen) 62 | { 63 | unsigned long offset; 64 | 65 | if ( x == NULL ) 66 | return RNG_BAD_OUTBUF; 67 | if ( xlen >= ctx->length_remaining ) 68 | return RNG_BAD_REQ_LEN; 69 | 70 | ctx->length_remaining -= xlen; 71 | 72 | offset = 0; 73 | while ( xlen > 0 ) { 74 | if ( xlen <= (16-ctx->buffer_pos) ) { // buffer has what we need 75 | memcpy(x+offset, ctx->buffer+ctx->buffer_pos, xlen); 76 | ctx->buffer_pos += xlen; 77 | 78 | return RNG_SUCCESS; 79 | } 80 | 81 | // take what's in the buffer 82 | memcpy(x+offset, ctx->buffer+ctx->buffer_pos, 16-ctx->buffer_pos); 83 | xlen -= 16-ctx->buffer_pos; 84 | offset += 16-ctx->buffer_pos; 85 | 86 | AES256_ECB(ctx->key, ctx->ctr, ctx->buffer); 87 | ctx->buffer_pos = 0; 88 | 89 | //increment the counter 90 | for (int i=15; i>=12; i--) { 91 | if ( ctx->ctr[i] == 0xff ) 92 | ctx->ctr[i] = 0x00; 93 | else { 94 | ctx->ctr[i]++; 95 | break; 96 | } 97 | } 98 | 99 | } 100 | 101 | return RNG_SUCCESS; 102 | } 103 | 104 | 105 | void handleErrors(void) 106 | { 107 | ERR_print_errors_fp(stderr); 108 | abort(); 109 | } 110 | 111 | // Use whatever AES implementation you have. This uses AES from openSSL library 112 | // key - 256-bit AES key 113 | // ctr - a 128-bit plaintext value 114 | // buffer - a 128-bit ciphertext value 115 | void 116 | AES256_ECB(unsigned char *key, unsigned char *ctr, unsigned char *buffer) 117 | { 118 | EVP_CIPHER_CTX *ctx; 119 | 120 | int len; 121 | 122 | int ciphertext_len; 123 | 124 | /* Create and initialise the context */ 125 | if(!(ctx = EVP_CIPHER_CTX_new())) handleErrors(); 126 | 127 | if(1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_ecb(), NULL, key, NULL)) 128 | handleErrors(); 129 | 130 | if(1 != EVP_EncryptUpdate(ctx, buffer, &len, ctr, 16)) 131 | handleErrors(); 132 | ciphertext_len = len; 133 | 134 | /* Clean up */ 135 | EVP_CIPHER_CTX_free(ctx); 136 | } 137 | 138 | void 139 | randombytes_init(unsigned char *entropy_input, 140 | unsigned char *personalization_string, 141 | int security_strength) 142 | { 143 | unsigned char seed_material[48]; 144 | 145 | memcpy(seed_material, entropy_input, 48); 146 | if (personalization_string) 147 | for (int i=0; i<48; i++) 148 | seed_material[i] ^= personalization_string[i]; 149 | memset(DRBG_ctx.Key, 0x00, 32); 150 | memset(DRBG_ctx.V, 0x00, 16); 151 | AES256_CTR_DRBG_Update(seed_material, DRBG_ctx.Key, DRBG_ctx.V); 152 | DRBG_ctx.reseed_counter = 1; 153 | } 154 | 155 | int 156 | randombytes(unsigned char *x, unsigned long long xlen) 157 | { 158 | unsigned char block[16]; 159 | int i = 0; 160 | 161 | while ( xlen > 0 ) { 162 | //increment V 163 | for (int j=15; j>=0; j--) { 164 | if ( DRBG_ctx.V[j] == 0xff ) 165 | DRBG_ctx.V[j] = 0x00; 166 | else { 167 | DRBG_ctx.V[j]++; 168 | break; 169 | } 170 | } 171 | AES256_ECB(DRBG_ctx.Key, DRBG_ctx.V, block); 172 | if ( xlen > 15 ) { 173 | memcpy(x+i, block, 16); 174 | i += 16; 175 | xlen -= 16; 176 | } 177 | else { 178 | memcpy(x+i, block, xlen); 179 | xlen = 0; 180 | } 181 | } 182 | AES256_CTR_DRBG_Update(NULL, DRBG_ctx.Key, DRBG_ctx.V); 183 | DRBG_ctx.reseed_counter++; 184 | 185 | return RNG_SUCCESS; 186 | } 187 | 188 | void 189 | AES256_CTR_DRBG_Update(unsigned char *provided_data, 190 | unsigned char *Key, 191 | unsigned char *V) 192 | { 193 | unsigned char temp[48]; 194 | 195 | for (int i=0; i<3; i++) { 196 | //increment V 197 | for (int j=15; j>=0; j--) { 198 | if ( V[j] == 0xff ) 199 | V[j] = 0x00; 200 | else { 201 | V[j]++; 202 | break; 203 | } 204 | } 205 | 206 | AES256_ECB(Key, V, temp+16*i); 207 | } 208 | if ( provided_data != NULL ) 209 | for (int i=0; i<48; i++) 210 | temp[i] ^= provided_data[i]; 211 | memcpy(Key, temp, 32); 212 | memcpy(V, temp+32, 16); 213 | } 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /ref/kem.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "params.h" 5 | #include "kem.h" 6 | #include "indcpa.h" 7 | #include "verify.h" 8 | #include "symmetric.h" 9 | #include "randombytes.h" 10 | /************************************************* 11 | * Name: crypto_kem_keypair_derand 12 | * 13 | * Description: Generates public and private key 14 | * for CCA-secure Kyber key encapsulation mechanism 15 | * 16 | * Arguments: - uint8_t *pk: pointer to output public key 17 | * (an already allocated array of KYBER_PUBLICKEYBYTES bytes) 18 | * - uint8_t *sk: pointer to output private key 19 | * (an already allocated array of KYBER_SECRETKEYBYTES bytes) 20 | * - uint8_t *coins: pointer to input randomness 21 | * (an already allocated array filled with 2*KYBER_SYMBYTES random bytes) 22 | ** 23 | * Returns 0 (success) 24 | **************************************************/ 25 | int crypto_kem_keypair_derand(uint8_t *pk, 26 | uint8_t *sk, 27 | const uint8_t *coins) 28 | { 29 | indcpa_keypair_derand(pk, sk, coins); 30 | memcpy(sk+KYBER_INDCPA_SECRETKEYBYTES, pk, KYBER_PUBLICKEYBYTES); 31 | hash_h(sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); 32 | /* Value z for pseudo-random output on reject */ 33 | memcpy(sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES); 34 | return 0; 35 | } 36 | 37 | /************************************************* 38 | * Name: crypto_kem_keypair 39 | * 40 | * Description: Generates public and private key 41 | * for CCA-secure Kyber key encapsulation mechanism 42 | * 43 | * Arguments: - uint8_t *pk: pointer to output public key 44 | * (an already allocated array of KYBER_PUBLICKEYBYTES bytes) 45 | * - uint8_t *sk: pointer to output private key 46 | * (an already allocated array of KYBER_SECRETKEYBYTES bytes) 47 | * 48 | * Returns 0 (success) 49 | **************************************************/ 50 | int crypto_kem_keypair(uint8_t *pk, 51 | uint8_t *sk) 52 | { 53 | uint8_t coins[2*KYBER_SYMBYTES]; 54 | randombytes(coins, 2*KYBER_SYMBYTES); 55 | crypto_kem_keypair_derand(pk, sk, coins); 56 | return 0; 57 | } 58 | 59 | /************************************************* 60 | * Name: crypto_kem_enc_derand 61 | * 62 | * Description: Generates cipher text and shared 63 | * secret for given public key 64 | * 65 | * Arguments: - uint8_t *ct: pointer to output cipher text 66 | * (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) 67 | * - uint8_t *ss: pointer to output shared secret 68 | * (an already allocated array of KYBER_SSBYTES bytes) 69 | * - const uint8_t *pk: pointer to input public key 70 | * (an already allocated array of KYBER_PUBLICKEYBYTES bytes) 71 | * - const uint8_t *coins: pointer to input randomness 72 | * (an already allocated array filled with KYBER_SYMBYTES random bytes) 73 | ** 74 | * Returns 0 (success) 75 | **************************************************/ 76 | int crypto_kem_enc_derand(uint8_t *ct, 77 | uint8_t *ss, 78 | const uint8_t *pk, 79 | const uint8_t *coins) 80 | { 81 | uint8_t buf[2*KYBER_SYMBYTES]; 82 | /* Will contain key, coins */ 83 | uint8_t kr[2*KYBER_SYMBYTES]; 84 | 85 | memcpy(buf, coins, KYBER_SYMBYTES); 86 | 87 | /* Multitarget countermeasure for coins + contributory KEM */ 88 | hash_h(buf+KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); 89 | hash_g(kr, buf, 2*KYBER_SYMBYTES); 90 | 91 | /* coins are in kr+KYBER_SYMBYTES */ 92 | indcpa_enc(ct, buf, pk, kr+KYBER_SYMBYTES); 93 | 94 | memcpy(ss,kr,KYBER_SYMBYTES); 95 | return 0; 96 | } 97 | 98 | /************************************************* 99 | * Name: crypto_kem_enc 100 | * 101 | * Description: Generates cipher text and shared 102 | * secret for given public key 103 | * 104 | * Arguments: - uint8_t *ct: pointer to output cipher text 105 | * (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) 106 | * - uint8_t *ss: pointer to output shared secret 107 | * (an already allocated array of KYBER_SSBYTES bytes) 108 | * - const uint8_t *pk: pointer to input public key 109 | * (an already allocated array of KYBER_PUBLICKEYBYTES bytes) 110 | * 111 | * Returns 0 (success) 112 | **************************************************/ 113 | int crypto_kem_enc(uint8_t *ct, 114 | uint8_t *ss, 115 | const uint8_t *pk) 116 | { 117 | uint8_t coins[KYBER_SYMBYTES]; 118 | randombytes(coins, KYBER_SYMBYTES); 119 | crypto_kem_enc_derand(ct, ss, pk, coins); 120 | return 0; 121 | } 122 | 123 | /************************************************* 124 | * Name: crypto_kem_dec 125 | * 126 | * Description: Generates shared secret for given 127 | * cipher text and private key 128 | * 129 | * Arguments: - uint8_t *ss: pointer to output shared secret 130 | * (an already allocated array of KYBER_SSBYTES bytes) 131 | * - const uint8_t *ct: pointer to input cipher text 132 | * (an already allocated array of KYBER_CIPHERTEXTBYTES bytes) 133 | * - const uint8_t *sk: pointer to input private key 134 | * (an already allocated array of KYBER_SECRETKEYBYTES bytes) 135 | * 136 | * Returns 0. 137 | * 138 | * On failure, ss will contain a pseudo-random value. 139 | **************************************************/ 140 | int crypto_kem_dec(uint8_t *ss, 141 | const uint8_t *ct, 142 | const uint8_t *sk) 143 | { 144 | int fail; 145 | uint8_t buf[2*KYBER_SYMBYTES]; 146 | /* Will contain key, coins */ 147 | uint8_t kr[2*KYBER_SYMBYTES]; 148 | // uint8_t cmp[KYBER_CIPHERTEXTBYTES+KYBER_SYMBYTES]; 149 | uint8_t cmp[KYBER_CIPHERTEXTBYTES]; 150 | const uint8_t *pk = sk+KYBER_INDCPA_SECRETKEYBYTES; 151 | 152 | indcpa_dec(buf, ct, sk); 153 | 154 | /* Multitarget countermeasure for coins + contributory KEM */ 155 | memcpy(buf+KYBER_SYMBYTES, sk+KYBER_SECRETKEYBYTES-2*KYBER_SYMBYTES, KYBER_SYMBYTES); 156 | hash_g(kr, buf, 2*KYBER_SYMBYTES); 157 | 158 | /* coins are in kr+KYBER_SYMBYTES */ 159 | indcpa_enc(cmp, buf, pk, kr+KYBER_SYMBYTES); 160 | 161 | fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES); 162 | 163 | /* Compute rejection key */ 164 | rkprf(ss,sk+KYBER_SECRETKEYBYTES-KYBER_SYMBYTES,ct); 165 | 166 | /* Copy true key to return buffer if fail is false */ 167 | cmov(ss,kr,KYBER_SYMBYTES,!fail); 168 | 169 | return 0; 170 | } 171 | -------------------------------------------------------------------------------- /ref/nistkat/PQCgenKAT_kem.c: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // PQCgenKAT_kem.c 4 | // 5 | // Created by Bassham, Lawrence E (Fed) on 8/29/17. 6 | // Copyright © 2017 Bassham, Lawrence E (Fed). All rights reserved. 7 | // 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "rng.h" 13 | #include "../kem.h" 14 | 15 | #define MAX_MARKER_LEN 50 16 | #define KAT_SUCCESS 0 17 | #define KAT_FILE_OPEN_ERROR -1 18 | #define KAT_DATA_ERROR -3 19 | #define KAT_CRYPTO_FAILURE -4 20 | 21 | int FindMarker(FILE *infile, const char *marker); 22 | int ReadHex(FILE *infile, unsigned char *A, int Length, char *str); 23 | void fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L); 24 | 25 | int 26 | main() 27 | { 28 | char fn_req[32], fn_rsp[32]; 29 | FILE *fp_req, *fp_rsp; 30 | unsigned char seed[48]; 31 | unsigned char entropy_input[48]; 32 | unsigned char ct[CRYPTO_CIPHERTEXTBYTES], ss[CRYPTO_BYTES], ss1[CRYPTO_BYTES]; 33 | int count; 34 | int done; 35 | unsigned char pk[CRYPTO_PUBLICKEYBYTES], sk[CRYPTO_SECRETKEYBYTES]; 36 | int ret_val; 37 | 38 | // Create the REQUEST file 39 | sprintf(fn_req, "PQCkemKAT_%d.req", CRYPTO_SECRETKEYBYTES); 40 | if ( (fp_req = fopen(fn_req, "w")) == NULL ) { 41 | printf("Couldn't open <%s> for write\n", fn_req); 42 | return KAT_FILE_OPEN_ERROR; 43 | } 44 | sprintf(fn_rsp, "PQCkemKAT_%d.rsp", CRYPTO_SECRETKEYBYTES); 45 | if ( (fp_rsp = fopen(fn_rsp, "w")) == NULL ) { 46 | printf("Couldn't open <%s> for write\n", fn_rsp); 47 | return KAT_FILE_OPEN_ERROR; 48 | } 49 | 50 | for (int i=0; i<48; i++) 51 | entropy_input[i] = i; 52 | 53 | randombytes_init(entropy_input, NULL, 256); 54 | for (int i=0; i<100; i++) { 55 | fprintf(fp_req, "count = %d\n", i); 56 | randombytes(seed, 48); 57 | fprintBstr(fp_req, "seed = ", seed, 48); 58 | fprintf(fp_req, "pk =\n"); 59 | fprintf(fp_req, "sk =\n"); 60 | fprintf(fp_req, "ct =\n"); 61 | fprintf(fp_req, "ss =\n\n"); 62 | } 63 | fclose(fp_req); 64 | 65 | //Create the RESPONSE file based on what's in the REQUEST file 66 | if ( (fp_req = fopen(fn_req, "r")) == NULL ) { 67 | printf("Couldn't open <%s> for read\n", fn_req); 68 | return KAT_FILE_OPEN_ERROR; 69 | } 70 | 71 | fprintf(fp_rsp, "# %s\n\n", CRYPTO_ALGNAME); 72 | done = 0; 73 | do { 74 | if ( FindMarker(fp_req, "count = ") ) 75 | fscanf(fp_req, "%d", &count); 76 | else { 77 | done = 1; 78 | break; 79 | } 80 | fprintf(fp_rsp, "count = %d\n", count); 81 | 82 | if ( !ReadHex(fp_req, seed, 48, "seed = ") ) { 83 | printf("ERROR: unable to read 'seed' from <%s>\n", fn_req); 84 | return KAT_DATA_ERROR; 85 | } 86 | fprintBstr(fp_rsp, "seed = ", seed, 48); 87 | 88 | randombytes_init(seed, NULL, 256); 89 | 90 | // Generate the public/private keypair 91 | if ( (ret_val = crypto_kem_keypair(pk, sk)) != 0) { 92 | printf("crypto_kem_keypair returned <%d>\n", ret_val); 93 | return KAT_CRYPTO_FAILURE; 94 | } 95 | fprintBstr(fp_rsp, "pk = ", pk, CRYPTO_PUBLICKEYBYTES); 96 | fprintBstr(fp_rsp, "sk = ", sk, CRYPTO_SECRETKEYBYTES); 97 | 98 | if ( (ret_val = crypto_kem_enc(ct, ss, pk)) != 0) { 99 | printf("crypto_kem_enc returned <%d>\n", ret_val); 100 | return KAT_CRYPTO_FAILURE; 101 | } 102 | fprintBstr(fp_rsp, "ct = ", ct, CRYPTO_CIPHERTEXTBYTES); 103 | fprintBstr(fp_rsp, "ss = ", ss, CRYPTO_BYTES); 104 | 105 | fprintf(fp_rsp, "\n"); 106 | 107 | if ( (ret_val = crypto_kem_dec(ss1, ct, sk)) != 0) { 108 | printf("crypto_kem_dec returned <%d>\n", ret_val); 109 | return KAT_CRYPTO_FAILURE; 110 | } 111 | 112 | if ( memcmp(ss, ss1, CRYPTO_BYTES) ) { 113 | printf("crypto_kem_dec returned bad 'ss' value\n"); 114 | return KAT_CRYPTO_FAILURE; 115 | } 116 | 117 | } while ( !done ); 118 | 119 | fclose(fp_req); 120 | fclose(fp_rsp); 121 | 122 | return KAT_SUCCESS; 123 | } 124 | 125 | 126 | 127 | // 128 | // ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) 129 | // 130 | // 131 | // ALLOW TO READ HEXADECIMAL ENTRY (KEYS, DATA, TEXT, etc.) 132 | // 133 | int 134 | FindMarker(FILE *infile, const char *marker) 135 | { 136 | char line[MAX_MARKER_LEN]; 137 | int i, len; 138 | int curr_line; 139 | 140 | len = (int)strlen(marker); 141 | if ( len > MAX_MARKER_LEN-1 ) 142 | len = MAX_MARKER_LEN-1; 143 | 144 | for ( i=0; i= '0') && (ch <= '9') ) 199 | ich = ch - '0'; 200 | else if ( (ch >= 'A') && (ch <= 'F') ) 201 | ich = ch - 'A' + 10; 202 | else if ( (ch >= 'a') && (ch <= 'f') ) 203 | ich = ch - 'a' + 10; 204 | else // shouldn't ever get here 205 | ich = 0; 206 | 207 | for ( i=0; i> 4); 209 | A[Length-1] = (A[Length-1] << 4) | ich; 210 | } 211 | else 212 | return 0; 213 | 214 | return 1; 215 | } 216 | 217 | void 218 | fprintBstr(FILE *fp, char *S, unsigned char *A, unsigned long long L) 219 | { 220 | unsigned long long i; 221 | 222 | fprintf(fp, "%s", S); 223 | 224 | for ( i=0; i 2 | #include 3 | #include 4 | #include 5 | #include "fips202.h" 6 | #include "fips202x4.h" 7 | 8 | /* Use implementation from the Keccak Code Package */ 9 | #define KeccakF1600_StatePermute4x FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds) 10 | extern void KeccakF1600_StatePermute4x(__m256i *s); 11 | 12 | static void keccakx4_absorb_once(__m256i s[25], 13 | unsigned int r, 14 | const uint8_t *in0, 15 | const uint8_t *in1, 16 | const uint8_t *in2, 17 | const uint8_t *in3, 18 | size_t inlen, 19 | uint8_t p) 20 | { 21 | size_t i; 22 | uint64_t pos = 0; 23 | __m256i t, idx; 24 | 25 | for(i = 0; i < 25; ++i) 26 | s[i] = _mm256_setzero_si256(); 27 | 28 | idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0); 29 | while(inlen >= r) { 30 | for(i = 0; i < r/8; ++i) { 31 | t = _mm256_i64gather_epi64((long long *)pos, idx, 1); 32 | s[i] = _mm256_xor_si256(s[i], t); 33 | pos += 8; 34 | } 35 | inlen -= r; 36 | 37 | KeccakF1600_StatePermute4x(s); 38 | } 39 | 40 | for(i = 0; i < inlen/8; ++i) { 41 | t = _mm256_i64gather_epi64((long long *)pos, idx, 1); 42 | s[i] = _mm256_xor_si256(s[i], t); 43 | pos += 8; 44 | } 45 | inlen -= 8*i; 46 | 47 | if(inlen) { 48 | t = _mm256_i64gather_epi64((long long *)pos, idx, 1); 49 | idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1); 50 | t = _mm256_and_si256(t, idx); 51 | s[i] = _mm256_xor_si256(s[i], t); 52 | } 53 | 54 | t = _mm256_set1_epi64x((uint64_t)p << 8*inlen); 55 | s[i] = _mm256_xor_si256(s[i], t); 56 | t = _mm256_set1_epi64x(1ULL << 63); 57 | s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t); 58 | } 59 | 60 | static void keccakx4_squeezeblocks(uint8_t *out0, 61 | uint8_t *out1, 62 | uint8_t *out2, 63 | uint8_t *out3, 64 | size_t nblocks, 65 | unsigned int r, 66 | __m256i s[25]) 67 | { 68 | unsigned int i; 69 | __m128d t; 70 | 71 | while(nblocks > 0) { 72 | KeccakF1600_StatePermute4x(s); 73 | for(i=0; i < r/8; ++i) { 74 | t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); 75 | _mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t); 76 | _mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t); 77 | t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1)); 78 | _mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t); 79 | _mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t); 80 | } 81 | 82 | out0 += r; 83 | out1 += r; 84 | out2 += r; 85 | out3 += r; 86 | --nblocks; 87 | } 88 | } 89 | 90 | void shake128x4_absorb_once(keccakx4_state *state, 91 | const uint8_t *in0, 92 | const uint8_t *in1, 93 | const uint8_t *in2, 94 | const uint8_t *in3, 95 | size_t inlen) 96 | { 97 | keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); 98 | } 99 | 100 | void shake128x4_squeezeblocks(uint8_t *out0, 101 | uint8_t *out1, 102 | uint8_t *out2, 103 | uint8_t *out3, 104 | size_t nblocks, 105 | keccakx4_state *state) 106 | { 107 | keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s); 108 | } 109 | 110 | void shake256x4_absorb_once(keccakx4_state *state, 111 | const uint8_t *in0, 112 | const uint8_t *in1, 113 | const uint8_t *in2, 114 | const uint8_t *in3, 115 | size_t inlen) 116 | { 117 | keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); 118 | } 119 | 120 | void shake256x4_squeezeblocks(uint8_t *out0, 121 | uint8_t *out1, 122 | uint8_t *out2, 123 | uint8_t *out3, 124 | size_t nblocks, 125 | keccakx4_state *state) 126 | { 127 | keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s); 128 | } 129 | 130 | void shake128x4(uint8_t *out0, 131 | uint8_t *out1, 132 | uint8_t *out2, 133 | uint8_t *out3, 134 | size_t outlen, 135 | const uint8_t *in0, 136 | const uint8_t *in1, 137 | const uint8_t *in2, 138 | const uint8_t *in3, 139 | size_t inlen) 140 | { 141 | unsigned int i; 142 | size_t nblocks = outlen/SHAKE128_RATE; 143 | uint8_t t[4][SHAKE128_RATE]; 144 | keccakx4_state state; 145 | 146 | shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen); 147 | shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); 148 | 149 | out0 += nblocks*SHAKE128_RATE; 150 | out1 += nblocks*SHAKE128_RATE; 151 | out2 += nblocks*SHAKE128_RATE; 152 | out3 += nblocks*SHAKE128_RATE; 153 | outlen -= nblocks*SHAKE128_RATE; 154 | 155 | if(outlen) { 156 | shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); 157 | for(i = 0; i < outlen; ++i) { 158 | out0[i] = t[0][i]; 159 | out1[i] = t[1][i]; 160 | out2[i] = t[2][i]; 161 | out3[i] = t[3][i]; 162 | } 163 | } 164 | } 165 | 166 | void shake256x4(uint8_t *out0, 167 | uint8_t *out1, 168 | uint8_t *out2, 169 | uint8_t *out3, 170 | size_t outlen, 171 | const uint8_t *in0, 172 | const uint8_t *in1, 173 | const uint8_t *in2, 174 | const uint8_t *in3, 175 | size_t inlen) 176 | { 177 | unsigned int i; 178 | size_t nblocks = outlen/SHAKE256_RATE; 179 | uint8_t t[4][SHAKE256_RATE]; 180 | keccakx4_state state; 181 | 182 | shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen); 183 | shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); 184 | 185 | out0 += nblocks*SHAKE256_RATE; 186 | out1 += nblocks*SHAKE256_RATE; 187 | out2 += nblocks*SHAKE256_RATE; 188 | out3 += nblocks*SHAKE256_RATE; 189 | outlen -= nblocks*SHAKE256_RATE; 190 | 191 | if(outlen) { 192 | shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); 193 | for(i = 0; i < outlen; ++i) { 194 | out0[i] = t[0][i]; 195 | out1[i] = t[1][i]; 196 | out2[i] = t[2][i]; 197 | out3[i] = t[3][i]; 198 | } 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /avx2/keccak4x/KeccakP-1600-unrolling.macros: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, 3 | Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby 4 | denoted as "the implementer". 5 | 6 | For more information, feedback or questions, please refer to our websites: 7 | http://keccak.noekeon.org/ 8 | http://keyak.noekeon.org/ 9 | http://ketje.noekeon.org/ 10 | 11 | To the extent possible under law, the implementer has waived all copyright 12 | and related or neighboring rights to the source code in this file. 13 | http://creativecommons.org/publicdomain/zero/1.0/ 14 | */ 15 | 16 | #if (defined(FullUnrolling)) 17 | #define rounds24 \ 18 | prepareTheta \ 19 | thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ 20 | thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ 21 | thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ 22 | thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ 23 | thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ 24 | thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ 25 | thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ 26 | thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ 27 | thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ 28 | thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ 29 | thetaRhoPiChiIotaPrepareTheta(10, A, E) \ 30 | thetaRhoPiChiIotaPrepareTheta(11, E, A) \ 31 | thetaRhoPiChiIotaPrepareTheta(12, A, E) \ 32 | thetaRhoPiChiIotaPrepareTheta(13, E, A) \ 33 | thetaRhoPiChiIotaPrepareTheta(14, A, E) \ 34 | thetaRhoPiChiIotaPrepareTheta(15, E, A) \ 35 | thetaRhoPiChiIotaPrepareTheta(16, A, E) \ 36 | thetaRhoPiChiIotaPrepareTheta(17, E, A) \ 37 | thetaRhoPiChiIotaPrepareTheta(18, A, E) \ 38 | thetaRhoPiChiIotaPrepareTheta(19, E, A) \ 39 | thetaRhoPiChiIotaPrepareTheta(20, A, E) \ 40 | thetaRhoPiChiIotaPrepareTheta(21, E, A) \ 41 | thetaRhoPiChiIotaPrepareTheta(22, A, E) \ 42 | thetaRhoPiChiIota(23, E, A) \ 43 | 44 | #define rounds12 \ 45 | prepareTheta \ 46 | thetaRhoPiChiIotaPrepareTheta(12, A, E) \ 47 | thetaRhoPiChiIotaPrepareTheta(13, E, A) \ 48 | thetaRhoPiChiIotaPrepareTheta(14, A, E) \ 49 | thetaRhoPiChiIotaPrepareTheta(15, E, A) \ 50 | thetaRhoPiChiIotaPrepareTheta(16, A, E) \ 51 | thetaRhoPiChiIotaPrepareTheta(17, E, A) \ 52 | thetaRhoPiChiIotaPrepareTheta(18, A, E) \ 53 | thetaRhoPiChiIotaPrepareTheta(19, E, A) \ 54 | thetaRhoPiChiIotaPrepareTheta(20, A, E) \ 55 | thetaRhoPiChiIotaPrepareTheta(21, E, A) \ 56 | thetaRhoPiChiIotaPrepareTheta(22, A, E) \ 57 | thetaRhoPiChiIota(23, E, A) \ 58 | 59 | #elif (Unrolling == 12) 60 | #define rounds24 \ 61 | prepareTheta \ 62 | for(i=0; i<24; i+=12) { \ 63 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 64 | thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ 65 | thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ 66 | thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ 67 | thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ 68 | thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ 69 | thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ 70 | thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ 71 | thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ 72 | thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ 73 | thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ 74 | thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ 75 | } \ 76 | 77 | #define rounds12 \ 78 | prepareTheta \ 79 | thetaRhoPiChiIotaPrepareTheta(12, A, E) \ 80 | thetaRhoPiChiIotaPrepareTheta(13, E, A) \ 81 | thetaRhoPiChiIotaPrepareTheta(14, A, E) \ 82 | thetaRhoPiChiIotaPrepareTheta(15, E, A) \ 83 | thetaRhoPiChiIotaPrepareTheta(16, A, E) \ 84 | thetaRhoPiChiIotaPrepareTheta(17, E, A) \ 85 | thetaRhoPiChiIotaPrepareTheta(18, A, E) \ 86 | thetaRhoPiChiIotaPrepareTheta(19, E, A) \ 87 | thetaRhoPiChiIotaPrepareTheta(20, A, E) \ 88 | thetaRhoPiChiIotaPrepareTheta(21, E, A) \ 89 | thetaRhoPiChiIotaPrepareTheta(22, A, E) \ 90 | thetaRhoPiChiIota(23, E, A) \ 91 | 92 | #elif (Unrolling == 6) 93 | #define rounds24 \ 94 | prepareTheta \ 95 | for(i=0; i<24; i+=6) { \ 96 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 97 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 98 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 99 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 100 | thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ 101 | thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ 102 | } \ 103 | 104 | #define rounds12 \ 105 | prepareTheta \ 106 | for(i=12; i<24; i+=6) { \ 107 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 108 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 109 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 110 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 111 | thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ 112 | thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ 113 | } \ 114 | 115 | #elif (Unrolling == 4) 116 | #define rounds24 \ 117 | prepareTheta \ 118 | for(i=0; i<24; i+=4) { \ 119 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 120 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 121 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 122 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 123 | } \ 124 | 125 | #define rounds12 \ 126 | prepareTheta \ 127 | for(i=12; i<24; i+=4) { \ 128 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 129 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 130 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 131 | thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ 132 | } \ 133 | 134 | #elif (Unrolling == 3) 135 | #define rounds24 \ 136 | prepareTheta \ 137 | for(i=0; i<24; i+=3) { \ 138 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 139 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 140 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 141 | copyStateVariables(A, E) \ 142 | } \ 143 | 144 | #define rounds12 \ 145 | prepareTheta \ 146 | for(i=12; i<24; i+=3) { \ 147 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 148 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 149 | thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ 150 | copyStateVariables(A, E) \ 151 | } \ 152 | 153 | #elif (Unrolling == 2) 154 | #define rounds24 \ 155 | prepareTheta \ 156 | for(i=0; i<24; i+=2) { \ 157 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 158 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 159 | } \ 160 | 161 | #define rounds12 \ 162 | prepareTheta \ 163 | for(i=12; i<24; i+=2) { \ 164 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 165 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 166 | } \ 167 | 168 | #elif (Unrolling == 1) 169 | #define rounds24 \ 170 | prepareTheta \ 171 | for(i=0; i<24; i++) { \ 172 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 173 | copyStateVariables(A, E) \ 174 | } \ 175 | 176 | #define rounds12 \ 177 | prepareTheta \ 178 | for(i=12; i<24; i++) { \ 179 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 180 | copyStateVariables(A, E) \ 181 | } \ 182 | 183 | #else 184 | #error "Unrolling is not correctly specified!" 185 | #endif 186 | 187 | #define roundsN(__nrounds) \ 188 | prepareTheta \ 189 | i = 24 - (__nrounds); \ 190 | if ((i&1) != 0) { \ 191 | thetaRhoPiChiIotaPrepareTheta(i, A, E) \ 192 | copyStateVariables(A, E) \ 193 | ++i; \ 194 | } \ 195 | for( /* empty */; i<24; i+=2) { \ 196 | thetaRhoPiChiIotaPrepareTheta(i , A, E) \ 197 | thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ 198 | } 199 | -------------------------------------------------------------------------------- /ref/polyvec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "params.h" 3 | #include "poly.h" 4 | #include "polyvec.h" 5 | 6 | /************************************************* 7 | * Name: polyvec_compress 8 | * 9 | * Description: Compress and serialize vector of polynomials 10 | * 11 | * Arguments: - uint8_t *r: pointer to output byte array 12 | * (needs space for KYBER_POLYVECCOMPRESSEDBYTES) 13 | * - const polyvec *a: pointer to input vector of polynomials 14 | **************************************************/ 15 | void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a) 16 | { 17 | unsigned int i,j,k; 18 | uint64_t d0; 19 | 20 | #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) 21 | uint16_t t[8]; 22 | for(i=0;ivec[i].coeffs[8*j+k]; 26 | t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; 27 | /* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */ 28 | d0 = t[k]; 29 | d0 <<= 11; 30 | d0 += 1664; 31 | d0 *= 645084; 32 | d0 >>= 31; 33 | t[k] = d0 & 0x7ff; 34 | } 35 | 36 | r[ 0] = (t[0] >> 0); 37 | r[ 1] = (t[0] >> 8) | (t[1] << 3); 38 | r[ 2] = (t[1] >> 5) | (t[2] << 6); 39 | r[ 3] = (t[2] >> 2); 40 | r[ 4] = (t[2] >> 10) | (t[3] << 1); 41 | r[ 5] = (t[3] >> 7) | (t[4] << 4); 42 | r[ 6] = (t[4] >> 4) | (t[5] << 7); 43 | r[ 7] = (t[5] >> 1); 44 | r[ 8] = (t[5] >> 9) | (t[6] << 2); 45 | r[ 9] = (t[6] >> 6) | (t[7] << 5); 46 | r[10] = (t[7] >> 3); 47 | r += 11; 48 | } 49 | } 50 | #elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) 51 | uint16_t t[4]; 52 | for(i=0;ivec[i].coeffs[4*j+k]; 56 | t[k] += ((int16_t)t[k] >> 15) & KYBER_Q; 57 | /* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */ 58 | d0 = t[k]; 59 | d0 <<= 10; 60 | d0 += 1665; 61 | d0 *= 1290167; 62 | d0 >>= 32; 63 | t[k] = d0 & 0x3ff; 64 | } 65 | 66 | r[0] = (t[0] >> 0); 67 | r[1] = (t[0] >> 8) | (t[1] << 2); 68 | r[2] = (t[1] >> 6) | (t[2] << 4); 69 | r[3] = (t[2] >> 4) | (t[3] << 6); 70 | r[4] = (t[3] >> 2); 71 | r += 5; 72 | } 73 | } 74 | #else 75 | #error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" 76 | #endif 77 | } 78 | 79 | /************************************************* 80 | * Name: polyvec_decompress 81 | * 82 | * Description: De-serialize and decompress vector of polynomials; 83 | * approximate inverse of polyvec_compress 84 | * 85 | * Arguments: - polyvec *r: pointer to output vector of polynomials 86 | * - const uint8_t *a: pointer to input byte array 87 | * (of length KYBER_POLYVECCOMPRESSEDBYTES) 88 | **************************************************/ 89 | void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]) 90 | { 91 | unsigned int i,j,k; 92 | 93 | #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) 94 | uint16_t t[8]; 95 | for(i=0;i> 0) | ((uint16_t)a[ 1] << 8); 98 | t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5); 99 | t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10); 100 | t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7); 101 | t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4); 102 | t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9); 103 | t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6); 104 | t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3); 105 | a += 11; 106 | 107 | for(k=0;k<8;k++) 108 | r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11; 109 | } 110 | } 111 | #elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) 112 | uint16_t t[4]; 113 | for(i=0;i> 0) | ((uint16_t)a[1] << 8); 116 | t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); 117 | t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); 118 | t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); 119 | a += 5; 120 | 121 | for(k=0;k<4;k++) 122 | r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10; 123 | } 124 | } 125 | #else 126 | #error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}" 127 | #endif 128 | } 129 | 130 | /************************************************* 131 | * Name: polyvec_tobytes 132 | * 133 | * Description: Serialize vector of polynomials 134 | * 135 | * Arguments: - uint8_t *r: pointer to output byte array 136 | * (needs space for KYBER_POLYVECBYTES) 137 | * - const polyvec *a: pointer to input vector of polynomials 138 | **************************************************/ 139 | void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) 140 | { 141 | unsigned int i; 142 | for(i=0;ivec[i]); 144 | } 145 | 146 | /************************************************* 147 | * Name: polyvec_frombytes 148 | * 149 | * Description: De-serialize vector of polynomials; 150 | * inverse of polyvec_tobytes 151 | * 152 | * Arguments: - uint8_t *r: pointer to output byte array 153 | * - const polyvec *a: pointer to input vector of polynomials 154 | * (of length KYBER_POLYVECBYTES) 155 | **************************************************/ 156 | void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) 157 | { 158 | unsigned int i; 159 | for(i=0;ivec[i], a+i*KYBER_POLYBYTES); 161 | } 162 | 163 | /************************************************* 164 | * Name: polyvec_ntt 165 | * 166 | * Description: Apply forward NTT to all elements of a vector of polynomials 167 | * 168 | * Arguments: - polyvec *r: pointer to in/output vector of polynomials 169 | **************************************************/ 170 | void polyvec_ntt(polyvec *r) 171 | { 172 | unsigned int i; 173 | for(i=0;ivec[i]); 175 | } 176 | 177 | /************************************************* 178 | * Name: polyvec_invntt_tomont 179 | * 180 | * Description: Apply inverse NTT to all elements of a vector of polynomials 181 | * and multiply by Montgomery factor 2^16 182 | * 183 | * Arguments: - polyvec *r: pointer to in/output vector of polynomials 184 | **************************************************/ 185 | void polyvec_invntt_tomont(polyvec *r) 186 | { 187 | unsigned int i; 188 | for(i=0;ivec[i]); 190 | } 191 | 192 | /************************************************* 193 | * Name: polyvec_basemul_acc_montgomery 194 | * 195 | * Description: Multiply elements of a and b in NTT domain, accumulate into r, 196 | * and multiply by 2^-16. 197 | * 198 | * Arguments: - poly *r: pointer to output polynomial 199 | * - const polyvec *a: pointer to first input vector of polynomials 200 | * - const polyvec *b: pointer to second input vector of polynomials 201 | **************************************************/ 202 | void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) 203 | { 204 | unsigned int i; 205 | poly t; 206 | 207 | poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); 208 | for(i=1;ivec[i], &b->vec[i]); 210 | poly_add(r, r, &t); 211 | } 212 | 213 | poly_reduce(r); 214 | } 215 | 216 | /************************************************* 217 | * Name: polyvec_reduce 218 | * 219 | * Description: Applies Barrett reduction to each coefficient 220 | * of each element of a vector of polynomials; 221 | * for details of the Barrett reduction see comments in reduce.c 222 | * 223 | * Arguments: - polyvec *r: pointer to input/output polynomial 224 | **************************************************/ 225 | void polyvec_reduce(polyvec *r) 226 | { 227 | unsigned int i; 228 | for(i=0;ivec[i]); 230 | } 231 | 232 | /************************************************* 233 | * Name: polyvec_add 234 | * 235 | * Description: Add vectors of polynomials 236 | * 237 | * Arguments: - polyvec *r: pointer to output vector of polynomials 238 | * - const polyvec *a: pointer to first input vector of polynomials 239 | * - const polyvec *b: pointer to second input vector of polynomials 240 | **************************************************/ 241 | void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) 242 | { 243 | unsigned int i; 244 | for(i=0;ivec[i], &a->vec[i], &b->vec[i]); 246 | } 247 | -------------------------------------------------------------------------------- /avx2/polyvec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "params.h" 5 | #include "polyvec.h" 6 | #include "poly.h" 7 | #include "ntt.h" 8 | #include "consts.h" 9 | 10 | #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) 11 | static void poly_compress10(uint8_t r[320], const poly * restrict a) 12 | { 13 | unsigned int i; 14 | __m256i f0, f1, f2; 15 | __m128i t0, t1; 16 | const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); 17 | const __m256i v8 = _mm256_slli_epi16(v,3); 18 | const __m256i off = _mm256_set1_epi16(15); 19 | const __m256i shift1 = _mm256_set1_epi16(1 << 12); 20 | const __m256i mask = _mm256_set1_epi16(1023); 21 | const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); 22 | const __m256i sllvdidx = _mm256_set1_epi64x(12); 23 | const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, 24 | -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); 25 | 26 | for(i=0;ivec[i]); 28 | f1 = _mm256_mullo_epi16(f0,v8); 29 | f2 = _mm256_add_epi16(f0,off); 30 | f0 = _mm256_slli_epi16(f0,3); 31 | f0 = _mm256_mulhi_epi16(f0,v); 32 | f2 = _mm256_sub_epi16(f1,f2); 33 | f1 = _mm256_andnot_si256(f1,f2); 34 | f1 = _mm256_srli_epi16(f1,15); 35 | f0 = _mm256_sub_epi16(f0,f1); 36 | f0 = _mm256_mulhrs_epi16(f0,shift1); 37 | f0 = _mm256_and_si256(f0,mask); 38 | f0 = _mm256_madd_epi16(f0,shift2); 39 | f0 = _mm256_sllv_epi32(f0,sllvdidx); 40 | f0 = _mm256_srli_epi64(f0,12); 41 | f0 = _mm256_shuffle_epi8(f0,shufbidx); 42 | t0 = _mm256_castsi256_si128(f0); 43 | t1 = _mm256_extracti128_si256(f0,1); 44 | t0 = _mm_blend_epi16(t0,t1,0xE0); 45 | _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); 46 | memcpy(&r[20*i+16],&t1,4); 47 | } 48 | } 49 | 50 | static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) 51 | { 52 | unsigned int i; 53 | __m256i f; 54 | const __m256i q = _mm256_set1_epi32((KYBER_Q << 16) + 4*KYBER_Q); 55 | const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, 56 | 6, 5, 5, 4, 4, 3, 3, 2, 57 | 9, 8, 8, 7, 7, 6, 6, 5, 58 | 4, 3, 3, 2, 2, 1, 1, 0); 59 | const __m256i sllvdidx = _mm256_set1_epi64x(4); 60 | const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); 61 | 62 | for(i=0;ivec[i],f); 71 | } 72 | } 73 | 74 | #elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) 75 | static void poly_compress11(uint8_t r[352+2], const poly * restrict a) 76 | { 77 | unsigned int i; 78 | __m256i f0, f1, f2; 79 | __m128i t0, t1; 80 | const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); 81 | const __m256i v8 = _mm256_slli_epi16(v,3); 82 | const __m256i off = _mm256_set1_epi16(36); 83 | const __m256i shift1 = _mm256_set1_epi16(1 << 13); 84 | const __m256i mask = _mm256_set1_epi16(2047); 85 | const __m256i shift2 = _mm256_set1_epi64x((2048LL << 48) + (1LL << 32) + (2048 << 16) + 1); 86 | const __m256i sllvdidx = _mm256_set1_epi64x(10); 87 | const __m256i srlvqidx = _mm256_set_epi64x(30,10,30,10); 88 | const __m256i shufbidx = _mm256_set_epi8( 4, 3, 2, 1, 0, 0,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 89 | -1,-1,-1,-1,-1,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); 90 | 91 | for(i=0;ivec[i]); 93 | f1 = _mm256_mullo_epi16(f0,v8); 94 | f2 = _mm256_add_epi16(f0,off); 95 | f0 = _mm256_slli_epi16(f0,3); 96 | f0 = _mm256_mulhi_epi16(f0,v); 97 | f2 = _mm256_sub_epi16(f1,f2); 98 | f1 = _mm256_andnot_si256(f1,f2); 99 | f1 = _mm256_srli_epi16(f1,15); 100 | f0 = _mm256_sub_epi16(f0,f1); 101 | f0 = _mm256_mulhrs_epi16(f0,shift1); 102 | f0 = _mm256_and_si256(f0,mask); 103 | f0 = _mm256_madd_epi16(f0,shift2); 104 | f0 = _mm256_sllv_epi32(f0,sllvdidx); 105 | f1 = _mm256_bsrli_epi128(f0,8); 106 | f0 = _mm256_srlv_epi64(f0,srlvqidx); 107 | f1 = _mm256_slli_epi64(f1,34); 108 | f0 = _mm256_add_epi64(f0,f1); 109 | f0 = _mm256_shuffle_epi8(f0,shufbidx); 110 | t0 = _mm256_castsi256_si128(f0); 111 | t1 = _mm256_extracti128_si256(f0,1); 112 | t0 = _mm_blendv_epi8(t0,t1,_mm256_castsi256_si128(shufbidx)); 113 | _mm_storeu_si128((__m128i *)&r[22*i+ 0],t0); 114 | _mm_storel_epi64((__m128i *)&r[22*i+16],t1); 115 | } 116 | } 117 | 118 | static void poly_decompress11(poly * restrict r, const uint8_t a[352+10]) 119 | { 120 | unsigned int i; 121 | __m256i f; 122 | const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); 123 | const __m256i shufbidx = _mm256_set_epi8(13,12,12,11,10, 9, 9, 8, 124 | 8, 7, 6, 5, 5, 4, 4, 3, 125 | 10, 9, 9, 8, 7, 6, 6, 5, 126 | 5, 4, 3, 2, 2, 1, 1, 0); 127 | const __m256i srlvdidx = _mm256_set_epi32(0,0,1,0,0,0,1,0); 128 | const __m256i srlvqidx = _mm256_set_epi64x(2,0,2,0); 129 | const __m256i shift = _mm256_set_epi16(4,32,1,8,32,1,4,32,4,32,1,8,32,1,4,32); 130 | const __m256i mask = _mm256_set1_epi16(32752); 131 | 132 | for(i=0;ivec[i],f); 143 | } 144 | } 145 | 146 | #endif 147 | 148 | /************************************************* 149 | * Name: polyvec_compress 150 | * 151 | * Description: Compress and serialize vector of polynomials 152 | * 153 | * Arguments: - uint8_t *r: pointer to output byte array 154 | * (needs space for KYBER_POLYVECCOMPRESSEDBYTES) 155 | * - polyvec *a: pointer to input vector of polynomials 156 | **************************************************/ 157 | void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) 158 | { 159 | unsigned int i; 160 | 161 | #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) 162 | for(i=0;ivec[i]); 164 | #elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) 165 | for(i=0;ivec[i]); 167 | #endif 168 | } 169 | 170 | /************************************************* 171 | * Name: polyvec_decompress 172 | * 173 | * Description: De-serialize and decompress vector of polynomials; 174 | * approximate inverse of polyvec_compress 175 | * 176 | * Arguments: - polyvec *r: pointer to output vector of polynomials 177 | * - const uint8_t *a: pointer to input byte array 178 | * (of length KYBER_POLYVECCOMPRESSEDBYTES) 179 | **************************************************/ 180 | void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES+12]) 181 | { 182 | unsigned int i; 183 | 184 | #if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320)) 185 | for(i=0;ivec[i],&a[320*i]); 187 | #elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352)) 188 | for(i=0;ivec[i],&a[352*i]); 190 | #endif 191 | } 192 | 193 | /************************************************* 194 | * Name: polyvec_tobytes 195 | * 196 | * Description: Serialize vector of polynomials 197 | * 198 | * Arguments: - uint8_t *r: pointer to output byte array 199 | * (needs space for KYBER_POLYVECBYTES) 200 | * - polyvec *a: pointer to input vector of polynomials 201 | **************************************************/ 202 | void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a) 203 | { 204 | unsigned int i; 205 | for(i=0;ivec[i]); 207 | } 208 | 209 | /************************************************* 210 | * Name: polyvec_frombytes 211 | * 212 | * Description: De-serialize vector of polynomials; 213 | * inverse of polyvec_tobytes 214 | * 215 | * Arguments: - uint8_t *r: pointer to output byte array 216 | * - const polyvec *a: pointer to input vector of polynomials 217 | * (of length KYBER_POLYVECBYTES) 218 | **************************************************/ 219 | void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]) 220 | { 221 | unsigned int i; 222 | for(i=0;ivec[i], a+i*KYBER_POLYBYTES); 224 | } 225 | 226 | /************************************************* 227 | * Name: polyvec_ntt 228 | * 229 | * Description: Apply forward NTT to all elements of a vector of polynomials 230 | * 231 | * Arguments: - polyvec *r: pointer to in/output vector of polynomials 232 | **************************************************/ 233 | void polyvec_ntt(polyvec *r) 234 | { 235 | unsigned int i; 236 | for(i=0;ivec[i]); 238 | } 239 | 240 | /************************************************* 241 | * Name: polyvec_invntt_tomont 242 | * 243 | * Description: Apply inverse NTT to all elements of a vector of polynomials 244 | * and multiply by Montgomery factor 2^16 245 | * 246 | * Arguments: - polyvec *r: pointer to in/output vector of polynomials 247 | **************************************************/ 248 | void polyvec_invntt_tomont(polyvec *r) 249 | { 250 | unsigned int i; 251 | for(i=0;ivec[i]); 253 | } 254 | 255 | /************************************************* 256 | * Name: polyvec_basemul_acc_montgomery 257 | * 258 | * Description: Multiply elements in a and b in NTT domain, accumulate into r, 259 | * and multiply by 2^-16. 260 | * 261 | * Arguments: - poly *r: pointer to output polynomial 262 | * - const polyvec *a: pointer to first input vector of polynomials 263 | * - const polyvec *b: pointer to second input vector of polynomials 264 | **************************************************/ 265 | void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) 266 | { 267 | unsigned int i; 268 | poly tmp; 269 | 270 | poly_basemul_montgomery(r,&a->vec[0],&b->vec[0]); 271 | for(i=1;ivec[i],&b->vec[i]); 273 | poly_add(r,r,&tmp); 274 | } 275 | } 276 | 277 | /************************************************* 278 | * Name: polyvec_reduce 279 | * 280 | * Description: Applies Barrett reduction to each coefficient 281 | * of each element of a vector of polynomials; 282 | * for details of the Barrett reduction see comments in reduce.c 283 | * 284 | * Arguments: - polyvec *r: pointer to input/output polynomial 285 | **************************************************/ 286 | void polyvec_reduce(polyvec *r) 287 | { 288 | unsigned int i; 289 | for(i=0;ivec[i]); 291 | } 292 | 293 | /************************************************* 294 | * Name: polyvec_add 295 | * 296 | * Description: Add vectors of polynomials 297 | * 298 | * Arguments: - polyvec *r: pointer to output vector of polynomials 299 | * - const polyvec *a: pointer to first input vector of polynomials 300 | * - const polyvec *b: pointer to second input vector of polynomials 301 | **************************************************/ 302 | void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) 303 | { 304 | unsigned int i; 305 | for(i=0;ivec[i], &a->vec[i], &b->vec[i]); 307 | } 308 | -------------------------------------------------------------------------------- /ref/poly.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "params.h" 3 | #include "poly.h" 4 | #include "ntt.h" 5 | #include "reduce.h" 6 | #include "cbd.h" 7 | #include "symmetric.h" 8 | #include "verify.h" 9 | 10 | /************************************************* 11 | * Name: poly_compress 12 | * 13 | * Description: Compression and subsequent serialization of a polynomial 14 | * 15 | * Arguments: - uint8_t *r: pointer to output byte array 16 | * (of length KYBER_POLYCOMPRESSEDBYTES) 17 | * - const poly *a: pointer to input polynomial 18 | **************************************************/ 19 | void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a) 20 | { 21 | unsigned int i,j; 22 | int16_t u; 23 | uint32_t d0; 24 | uint8_t t[8]; 25 | 26 | #if (KYBER_POLYCOMPRESSEDBYTES == 128) 27 | 28 | for(i=0;icoeffs[8*i+j]; 32 | u += (u >> 15) & KYBER_Q; 33 | /* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ 34 | d0 = u << 4; 35 | d0 += 1665; 36 | d0 *= 80635; 37 | d0 >>= 28; 38 | t[j] = d0 & 0xf; 39 | } 40 | 41 | r[0] = t[0] | (t[1] << 4); 42 | r[1] = t[2] | (t[3] << 4); 43 | r[2] = t[4] | (t[5] << 4); 44 | r[3] = t[6] | (t[7] << 4); 45 | r += 4; 46 | } 47 | #elif (KYBER_POLYCOMPRESSEDBYTES == 160) 48 | for(i=0;icoeffs[8*i+j]; 52 | u += (u >> 15) & KYBER_Q; 53 | /* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ 54 | d0 = u << 5; 55 | d0 += 1664; 56 | d0 *= 40318; 57 | d0 >>= 27; 58 | t[j] = d0 & 0x1f; 59 | } 60 | 61 | r[0] = (t[0] >> 0) | (t[1] << 5); 62 | r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); 63 | r[2] = (t[3] >> 1) | (t[4] << 4); 64 | r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); 65 | r[4] = (t[6] >> 2) | (t[7] << 3); 66 | r += 5; 67 | } 68 | #else 69 | #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" 70 | #endif 71 | } 72 | 73 | /************************************************* 74 | * Name: poly_decompress 75 | * 76 | * Description: De-serialization and subsequent decompression of a polynomial; 77 | * approximate inverse of poly_compress 78 | * 79 | * Arguments: - poly *r: pointer to output polynomial 80 | * - const uint8_t *a: pointer to input byte array 81 | * (of length KYBER_POLYCOMPRESSEDBYTES bytes) 82 | **************************************************/ 83 | void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]) 84 | { 85 | unsigned int i; 86 | 87 | #if (KYBER_POLYCOMPRESSEDBYTES == 128) 88 | for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; 90 | r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; 91 | a += 1; 92 | } 93 | #elif (KYBER_POLYCOMPRESSEDBYTES == 160) 94 | unsigned int j; 95 | uint8_t t[8]; 96 | for(i=0;i> 0); 98 | t[1] = (a[0] >> 5) | (a[1] << 3); 99 | t[2] = (a[1] >> 2); 100 | t[3] = (a[1] >> 7) | (a[2] << 1); 101 | t[4] = (a[2] >> 4) | (a[3] << 4); 102 | t[5] = (a[3] >> 1); 103 | t[6] = (a[3] >> 6) | (a[4] << 2); 104 | t[7] = (a[4] >> 3); 105 | a += 5; 106 | 107 | for(j=0;j<8;j++) 108 | r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; 109 | } 110 | #else 111 | #error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}" 112 | #endif 113 | } 114 | 115 | /************************************************* 116 | * Name: poly_tobytes 117 | * 118 | * Description: Serialization of a polynomial 119 | * 120 | * Arguments: - uint8_t *r: pointer to output byte array 121 | * (needs space for KYBER_POLYBYTES bytes) 122 | * - const poly *a: pointer to input polynomial 123 | **************************************************/ 124 | void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) 125 | { 126 | unsigned int i; 127 | uint16_t t0, t1; 128 | 129 | for(i=0;icoeffs[2*i]; 132 | t0 += ((int16_t)t0 >> 15) & KYBER_Q; 133 | t1 = a->coeffs[2*i+1]; 134 | t1 += ((int16_t)t1 >> 15) & KYBER_Q; 135 | r[3*i+0] = (t0 >> 0); 136 | r[3*i+1] = (t0 >> 8) | (t1 << 4); 137 | r[3*i+2] = (t1 >> 4); 138 | } 139 | } 140 | 141 | /************************************************* 142 | * Name: poly_frombytes 143 | * 144 | * Description: De-serialization of a polynomial; 145 | * inverse of poly_tobytes 146 | * 147 | * Arguments: - poly *r: pointer to output polynomial 148 | * - const uint8_t *a: pointer to input byte array 149 | * (of KYBER_POLYBYTES bytes) 150 | **************************************************/ 151 | void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) 152 | { 153 | unsigned int i; 154 | for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; 156 | r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; 157 | } 158 | } 159 | 160 | /************************************************* 161 | * Name: poly_frommsg 162 | * 163 | * Description: Convert 32-byte message to polynomial 164 | * 165 | * Arguments: - poly *r: pointer to output polynomial 166 | * - const uint8_t *msg: pointer to input message 167 | **************************************************/ 168 | void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) 169 | { 170 | unsigned int i,j; 171 | 172 | #if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) 173 | #error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" 174 | #endif 175 | 176 | for(i=0;icoeffs[8*i+j] = 0; 179 | cmov_int16(r->coeffs+8*i+j, ((KYBER_Q+1)/2), (msg[i] >> j)&1); 180 | } 181 | } 182 | } 183 | 184 | /************************************************* 185 | * Name: poly_tomsg 186 | * 187 | * Description: Convert polynomial to 32-byte message 188 | * 189 | * Arguments: - uint8_t *msg: pointer to output message 190 | * - const poly *a: pointer to input polynomial 191 | **************************************************/ 192 | void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) 193 | { 194 | unsigned int i,j; 195 | uint32_t t; 196 | 197 | for(i=0;icoeffs[8*i+j]; 201 | // t += ((int16_t)t >> 15) & KYBER_Q; 202 | // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; 203 | t <<= 1; 204 | t += 1665; 205 | t *= 80635; 206 | t >>= 28; 207 | t &= 1; 208 | msg[i] |= t << j; 209 | } 210 | } 211 | } 212 | 213 | /************************************************* 214 | * Name: poly_getnoise_eta1 215 | * 216 | * Description: Sample a polynomial deterministically from a seed and a nonce, 217 | * with output polynomial close to centered binomial distribution 218 | * with parameter KYBER_ETA1 219 | * 220 | * Arguments: - poly *r: pointer to output polynomial 221 | * - const uint8_t *seed: pointer to input seed 222 | * (of length KYBER_SYMBYTES bytes) 223 | * - uint8_t nonce: one-byte input nonce 224 | **************************************************/ 225 | void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) 226 | { 227 | uint8_t buf[KYBER_ETA1*KYBER_N/4]; 228 | prf(buf, sizeof(buf), seed, nonce); 229 | poly_cbd_eta1(r, buf); 230 | } 231 | 232 | /************************************************* 233 | * Name: poly_getnoise_eta2 234 | * 235 | * Description: Sample a polynomial deterministically from a seed and a nonce, 236 | * with output polynomial close to centered binomial distribution 237 | * with parameter KYBER_ETA2 238 | * 239 | * Arguments: - poly *r: pointer to output polynomial 240 | * - const uint8_t *seed: pointer to input seed 241 | * (of length KYBER_SYMBYTES bytes) 242 | * - uint8_t nonce: one-byte input nonce 243 | **************************************************/ 244 | void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) 245 | { 246 | uint8_t buf[KYBER_ETA2*KYBER_N/4]; 247 | prf(buf, sizeof(buf), seed, nonce); 248 | poly_cbd_eta2(r, buf); 249 | } 250 | 251 | 252 | /************************************************* 253 | * Name: poly_ntt 254 | * 255 | * Description: Computes negacyclic number-theoretic transform (NTT) of 256 | * a polynomial in place; 257 | * inputs assumed to be in normal order, output in bitreversed order 258 | * 259 | * Arguments: - uint16_t *r: pointer to in/output polynomial 260 | **************************************************/ 261 | void poly_ntt(poly *r) 262 | { 263 | ntt(r->coeffs); 264 | poly_reduce(r); 265 | } 266 | 267 | /************************************************* 268 | * Name: poly_invntt_tomont 269 | * 270 | * Description: Computes inverse of negacyclic number-theoretic transform (NTT) 271 | * of a polynomial in place; 272 | * inputs assumed to be in bitreversed order, output in normal order 273 | * 274 | * Arguments: - uint16_t *a: pointer to in/output polynomial 275 | **************************************************/ 276 | void poly_invntt_tomont(poly *r) 277 | { 278 | invntt(r->coeffs); 279 | } 280 | 281 | /************************************************* 282 | * Name: poly_basemul_montgomery 283 | * 284 | * Description: Multiplication of two polynomials in NTT domain 285 | * 286 | * Arguments: - poly *r: pointer to output polynomial 287 | * - const poly *a: pointer to first input polynomial 288 | * - const poly *b: pointer to second input polynomial 289 | **************************************************/ 290 | void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) 291 | { 292 | unsigned int i; 293 | for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); 295 | basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); 296 | } 297 | } 298 | 299 | /************************************************* 300 | * Name: poly_tomont 301 | * 302 | * Description: Inplace conversion of all coefficients of a polynomial 303 | * from normal domain to Montgomery domain 304 | * 305 | * Arguments: - poly *r: pointer to input/output polynomial 306 | **************************************************/ 307 | void poly_tomont(poly *r) 308 | { 309 | unsigned int i; 310 | const int16_t f = (1ULL << 32) % KYBER_Q; 311 | for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); 313 | } 314 | 315 | /************************************************* 316 | * Name: poly_reduce 317 | * 318 | * Description: Applies Barrett reduction to all coefficients of a polynomial 319 | * for details of the Barrett reduction see comments in reduce.c 320 | * 321 | * Arguments: - poly *r: pointer to input/output polynomial 322 | **************************************************/ 323 | void poly_reduce(poly *r) 324 | { 325 | unsigned int i; 326 | for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); 328 | } 329 | 330 | /************************************************* 331 | * Name: poly_add 332 | * 333 | * Description: Add two polynomials; no modular reduction is performed 334 | * 335 | * Arguments: - poly *r: pointer to output polynomial 336 | * - const poly *a: pointer to first input polynomial 337 | * - const poly *b: pointer to second input polynomial 338 | **************************************************/ 339 | void poly_add(poly *r, const poly *a, const poly *b) 340 | { 341 | unsigned int i; 342 | for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; 344 | } 345 | 346 | /************************************************* 347 | * Name: poly_sub 348 | * 349 | * Description: Subtract two polynomials; no modular reduction is performed 350 | * 351 | * Arguments: - poly *r: pointer to output polynomial 352 | * - const poly *a: pointer to first input polynomial 353 | * - const poly *b: pointer to second input polynomial 354 | **************************************************/ 355 | void poly_sub(poly *r, const poly *a, const poly *b) 356 | { 357 | unsigned int i; 358 | for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; 360 | } 361 | --------------------------------------------------------------------------------