├── src ├── NOTES.md ├── sort.h ├── bitrev16_table.c ├── bitrev16_table.h ├── bitrev256_table.h ├── bitrev512_table.h ├── test_avx_support.c ├── bitrev1024_table.h ├── data_poly1024.h ├── tests_in_paper │ ├── test_ntt_red1024b.c │ ├── test_ntt_red1024.c │ ├── test_intt_red1024b.c │ ├── test_ntt_red1024e.c │ ├── test_ntt_red1024f.c │ ├── test_intt_red1024.c │ ├── test_ntt_red1024d.c │ ├── test_ntt_red1024c.c │ └── Makefile ├── test_shift.c ├── test_bitrev_tables.h ├── ntt32_tables.h ├── sort.c ├── ntt16.c ├── ntt256.c ├── ntt512.c ├── ntt1024.c ├── test_ntt_tables.h ├── kat_mul1024.c ├── test_mul1024.c ├── naive_ntt16.c ├── kat_mul1024_red.c ├── naive_ntt256.c ├── naive_ntt512.c ├── kat_mul1024_red_asm.c ├── naive_ntt1024.c ├── bitrev256_table.c ├── intervals.h ├── ntt256.h ├── ntt512.h ├── ntt1024.h ├── ntt_red16.h ├── ntt_red256.h ├── ntt_red512.h ├── speed_mul1024.c ├── ntt_red1024.h ├── speed_mul1024_red.c ├── ntt_red_asm16.h ├── speed_mul1024_naive.c ├── speed_mul1024_red_asm.c ├── ntt_red_asm256.h ├── ntt_red_asm512.h ├── ntt_red_asm1024.h ├── red_bounds.h ├── ntt16.h ├── naive_ntt16.h ├── README.md ├── naive_ntt256.h ├── naive_ntt512.h ├── test_ntt_red_tables.h ├── naive_ntt1024.h ├── ntt32_tables.c ├── make_bitrev_table.c ├── ntt_red16.c ├── ntt_red256.c ├── ntt_red512.c ├── ntt_red1024.c ├── test_mod.c ├── bitrev512_table.c ├── ntt_red_asm16.c ├── ntt_red_asm1024.c ├── ntt_red_asm256.c ├── ntt_red_asm512.c └── test_red.c ├── paper ├── slides.pdf └── main_final.pdf ├── verifier ├── vstte20-benchmarks │ ├── sort.h │ ├── bitrev1024_table.h │ ├── harness_ntt_red1024b.c │ ├── harness_ntt_red1024e.c │ ├── harness_intt_red1024b.c │ ├── harness_ntt_red1024.c │ ├── harness_ntt_red1024f.c │ ├── harness_intt_red1024.c │ ├── harness_ntt_red1024c.c │ ├── harness_ntt_red1024d.c │ ├── sort.c │ ├── verify.sh │ ├── clam.h │ ├── verify_all │ ├── ntt_red1024.h │ ├── red_bounds.h │ ├── make_bitrev_table.c │ └── ntt_red1024.c ├── install.sh └── src │ └── CMakeLists.txt ├── .gitignore ├── LICENSE ├── README.md └── data └── primitive-roots-1024.txt /src/NOTES.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /paper/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SRI-CSL/NTT/HEAD/paper/slides.pdf -------------------------------------------------------------------------------- /paper/main_final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SRI-CSL/NTT/HEAD/paper/main_final.pdf -------------------------------------------------------------------------------- /src/sort.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sort an array of uint64_t numbers in increasing order 3 | */ 4 | 5 | #ifndef __SORT_H 6 | #define __SORT_H 7 | 8 | #include 9 | 10 | extern void sort(uint64_t *a, uint32_t n); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/bitrev16_table.c: -------------------------------------------------------------------------------- 1 | #include "bitrev16_table.h" 2 | 3 | const uint16_t bitrev16[BITREV16_NPAIRS][2] = { 4 | { 1, 8 }, { 2, 4 }, { 3, 12 }, { 5, 10 }, 5 | { 7, 14 }, { 11, 13 }, 6 | }; 7 | 8 | -------------------------------------------------------------------------------- /src/bitrev16_table.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITREV16_TABLE_H 2 | #define __BITREV16_TABLE_H 3 | 4 | #include 5 | 6 | #define BITREV16_NPAIRS 6 7 | 8 | extern const uint16_t bitrev16[BITREV16_NPAIRS][2]; 9 | 10 | #endif /* __BITREV16_TABLE_H */ 11 | -------------------------------------------------------------------------------- /src/bitrev256_table.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITREV256_TABLE_H 2 | #define __BITREV256_TABLE_H 3 | 4 | #include 5 | 6 | #define BITREV256_NPAIRS 120 7 | 8 | extern const uint16_t bitrev256[BITREV256_NPAIRS][2]; 9 | 10 | #endif /* __BITREV256_TABLE_H */ 11 | -------------------------------------------------------------------------------- /src/bitrev512_table.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITREV512_TABLE_H 2 | #define __BITREV512_TABLE_H 3 | 4 | #include 5 | 6 | #define BITREV512_NPAIRS 240 7 | 8 | extern const uint16_t bitrev512[BITREV512_NPAIRS][2]; 9 | 10 | #endif /* __BITREV512_TABLE_H */ 11 | -------------------------------------------------------------------------------- /src/test_avx_support.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ntt_asm.h" 3 | 4 | int main(void) { 5 | if (avx2_supported()) { 6 | printf("AVX2 is supported\n"); 7 | } else { 8 | printf("AVX2 is not supported\n"); 9 | } 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/sort.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sort an array of uint64_t numbers in increasing order 3 | */ 4 | 5 | #ifndef __SORT_H 6 | #define __SORT_H 7 | 8 | #include 9 | 10 | extern void sort(uint64_t *a, uint32_t n); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/bitrev1024_table.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITREV1024_TABLE_H 2 | #define __BITREV1024_TABLE_H 3 | 4 | #include 5 | 6 | #define BITREV1024_NPAIRS 496 7 | 8 | extern const uint16_t bitrev1024[BITREV1024_NPAIRS][2]; 9 | 10 | #endif /* __BITREV1024_TABLE_H */ 11 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/bitrev1024_table.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITREV1024_TABLE_H 2 | #define __BITREV1024_TABLE_H 3 | 4 | #include 5 | 6 | #define BITREV1024_NPAIRS 496 7 | 8 | extern const uint16_t bitrev1024[BITREV1024_NPAIRS][2]; 9 | 10 | #endif /* __BITREV1024_TABLE_H */ 11 | -------------------------------------------------------------------------------- /verifier/install.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | mkdir build && cd build 4 | cmake .. 5 | cmake --build . --target clam-seadsa && cmake .. 6 | cmake --build . --target clam-seallvm && cmake .. 7 | cmake --build . --target ntt-clam && cmake .. 8 | cmake --build . --target crab && cmake .. 9 | cmake --build . --target install 10 | -------------------------------------------------------------------------------- /src/data_poly1024.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Declarations for KAT 3 | */ 4 | 5 | #ifndef __DATA_POLY1024_H 6 | #define __DATA_POLY1024_H 7 | 8 | #include 9 | 10 | #define REPETITIONS 100 11 | #define N 1025 12 | 13 | extern int32_t a[REPETITIONS][N], b[REPETITIONS][N], c[REPETITIONS][N]; 14 | 15 | extern void build_kat(void); 16 | 17 | #endif /* __DATA_POLY1024_H */ 18 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024b.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_ct_rev2std(nd_a, 1024, ntt_red1024_omega_powers); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_gs_std2rev(nd_a, 1024, ntt_red1024_omega_powers); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_intt_red1024b.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_ct_rev2std(nd_a, 1024, ntt_red1024_inv_omega_powers); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_ct_std2rev(nd_a, 1024, ntt_red1024_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024f.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_gs_rev2std(nd_a, 1024, ntt_red1024_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_intt_red1024.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | 18 | int main(void) { 19 | ASSUME_FORALL(nd_a, 1024, 0, Q) 20 | // defined in ntt_red.c 21 | ntt_red_ct_std2rev(nd_a, 1024, ntt_red1024_inv_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024b.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * forward NTT, CT, rev2std 8 | * 9 | * static inline void ntt_red1024_ct_rev2std(int32_t *a) { 10 | * ntt_red_ct_rev2std(a, 1024, ntt_red1024_omega_powers); 11 | * } 12 | */ 13 | 14 | int main(void) { 15 | interval_t *a[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | abstract_ntt_red_ct_rev2std(a, 1024, ntt_red1024_omega_powers); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * forward NTT, CT, std2rev 8 | * 9 | * static inline void ntt_red1024_ct_std2rev(int32_t *a) { 10 | * ntt_red_ct_std2rev(a, 1024, ntt_red1024_omega_powers_rev); 11 | * } 12 | */ 13 | 14 | int main(void) { 15 | interval_t *a[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | abstract_ntt_red_ct_std2rev(a, 1024, ntt_red1024_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_intt_red1024b.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * inverse NTT, CT, rev2std 8 | * 9 | * static inline void intt_red1024_ct_rev2std(int32_t *a) { 10 | * ntt_red_ct_rev2std(a, 1024, ntt_red1024_inv_omega_powers); 11 | * } 12 | */ 13 | 14 | int main(void) { 15 | interval_t *a[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | abstract_ntt_red_ct_rev2std(a, 1024, ntt_red1024_inv_omega_powers); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024e.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * forward NTT, GS, std2rev 8 | * 9 | * static inline void ntt_red1024_gs_std2rev(int32_t *a) { 10 | * ntt_red_gs_std2rev(a, 1024, ntt_red1024_omega_powers); 11 | * } 12 | * 13 | */ 14 | 15 | int main(void) { 16 | interval_t *a[1024]; 17 | uint32_t i; 18 | 19 | for (i=0; i<1024; i++) { 20 | a[i] = interval(0, Q-1); 21 | } 22 | abstract_ntt_red_gs_std2rev(a, 1024, ntt_red1024_omega_powers); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024f.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * forward NTT, GS, rev2std 8 | * 9 | * static inline void ntt_red1024_gs_rev2std(int32_t *a) { 10 | * ntt_red_gs_rev2std(a, 1024, ntt_red1024_omega_powers_rev); 11 | * } 12 | */ 13 | 14 | int main(void) { 15 | interval_t *a[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | abstract_ntt_red_gs_rev2std(a, 1024, ntt_red1024_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_intt_red1024.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | #include "ntt_red1024_tables.h" 3 | 4 | #define Q 12289 5 | 6 | /* 7 | * inverse NTT, CT, std2rev 8 | * 9 | * static inline void intt_red1024_ct_std2rev(int32_t *a) { 10 | * ntt_red_ct_std2rev(a, 1024, ntt_red1024_inv_omega_powers_rev); 11 | * } 12 | */ 13 | 14 | int main(void) { 15 | interval_t *a[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | abstract_ntt_red_ct_std2rev(a, 1024, ntt_red1024_inv_omega_powers_rev); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024c.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | static int16_t nd_p[1024]; 18 | 19 | int main(void) { 20 | ASSUME_FORALL(nd_a, 1024, 0, Q) 21 | ASSUME_FORALL(nd_p, 1024, -6144, 6144) 22 | 23 | // defined in ntt_red.c 24 | ntt_red_ct_std2rev(nd_a, 1024, nd_p); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/harness_ntt_red1024d.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "ntt.h" 8 | #include "bitrev1024_table.h" 9 | #include "ntt_red1024.h" 10 | #include "sort.h" 11 | 12 | #include "clam.h" 13 | 14 | #define Q 12289 15 | 16 | static int32_t nd_a[1024]; 17 | static int16_t nd_p[1024]; 18 | 19 | int main(void) { 20 | ASSUME_FORALL(nd_a, 1024, 0, Q) 21 | ASSUME_FORALL(nd_p, 1024, -6144, 6144) 22 | 23 | // defined in ntt_red.c 24 | ntt_red_ct_rev2std(nd_a, 1024, nd_p); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024d.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | 3 | #define Q 12289 4 | 5 | /* 6 | * forward NTT, CT, rev2std 7 | * 8 | * static inline void ntt_red1024_ct_rev2std(int32_t *a) { 9 | * ntt_red_ct_rev2std(a, 1024, ntt_red1024_omega_powers); 10 | * } 11 | */ 12 | 13 | int main(void) { 14 | interval_t *a[1024]; 15 | interval_t *p[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | for (i=0; i<1024; i++) { 22 | p[i] = interval(-(Q-1)/2, (Q-1)/2); 23 | } 24 | 25 | abstract2_ntt_red_ct_rev2std(a, 1024, (const interval_t **) p); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /src/tests_in_paper/test_ntt_red1024c.c: -------------------------------------------------------------------------------- 1 | #include "../ntt_red_interval.h" 2 | 3 | #define Q 12289 4 | 5 | /* 6 | * forward NTT, CT, std2rev. 7 | * 8 | * static inline void ntt_red1024_ct_std2rev(int32_t *a) { 9 | * ntt_red_ct_std2rev(a, 1024, ntt_red1024_omega_powers_rev); 10 | * } 11 | */ 12 | 13 | int main(void) { 14 | interval_t *a[1024]; 15 | interval_t *p[1024]; 16 | uint32_t i; 17 | 18 | for (i=0; i<1024; i++) { 19 | a[i] = interval(0, Q-1); 20 | } 21 | for (i=0; i<1024; i++) { 22 | p[i] = interval(-(Q-1)/2, (Q-1)/2); 23 | } 24 | 25 | abstract2_ntt_red_ct_std2rev(a, 1024, (const interval_t **) p); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /src/test_shift.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void print_shift(int32_t x) { 5 | printf("shift_31(%"PRId32") = %"PRId32"\n", x, (x >> 31)); 6 | printf("shift_11(%"PRId32") = %"PRId32"\n", x, (x >> 11)); 7 | } 8 | 9 | static void print_shift_and(int32_t x, int32_t q) { 10 | printf("shift_31(%"PRId32" & %"PRId32") = %"PRId32"\n", x, q, (x >> 31) & q); 11 | printf("shift_11(%"PRId32" & %"PRId32") = %"PRId32"\n", x, q, (x >> 11) & q); 12 | } 13 | 14 | int main(void) { 15 | int32_t i; 16 | 17 | for (i=0; i<1003; i++) { 18 | print_shift(i); 19 | print_shift(-i); 20 | print_shift_and(i, 12289); 21 | print_shift_and(-i, 12289); 22 | } 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /src/test_bitrev_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Bitreverse tables. 3 | */ 4 | 5 | #ifndef __TEST_BITREV_TABLES_H 6 | #define __TEST_BITREV_TABLES_H 7 | 8 | #include 9 | 10 | /* 11 | * Tables for bit-reverse shuffle for n=128, 256, 512, 1024, 2048 12 | */ 13 | #define BITREV128_NPAIRS 56 14 | #define BITREV256_NPAIRS 120 15 | #define BITREV512_NPAIRS 240 16 | #define BITREV1024_NPAIRS 496 17 | #define BITREV2048_NPAIRS 992 18 | 19 | extern const uint16_t bitrev128_pair[BITREV128_NPAIRS][2]; 20 | extern const uint16_t bitrev256_pair[BITREV256_NPAIRS][2]; 21 | extern const uint16_t bitrev512_pair[BITREV512_NPAIRS][2]; 22 | extern const uint16_t bitrev1024_pair[BITREV1024_NPAIRS][2]; 23 | extern const uint16_t bitrev2048_pair[BITREV2048_NPAIRS][2]; 24 | 25 | #endif /* __TEST_BITREV_TABLES_H */ 26 | -------------------------------------------------------------------------------- /verifier/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_definitions(-D__STDC_CONSTANT_MACROS) 2 | add_definitions(-D__STDC_LIMIT_MACROS) 3 | 4 | set(LLVM_LINK_COMPONENTS 5 | irreader 6 | bitwriter 7 | ipo 8 | scalaropts 9 | instrumentation 10 | transformutils 11 | core 12 | codegen 13 | objcarcopts) 14 | 15 | 16 | add_llvm_executable(nttverifier DISABLE_LLVM_LINK_LLVM_DYLIB 17 | ntt_verifier.cpp 18 | ntt_intervals.cpp) 19 | 20 | target_link_libraries (nttverifier PRIVATE 21 | ${LLVM_SEAHORN_LIBS} 22 | ${SEA_DSA_BS} 23 | ${CLAM_LIBS} 24 | ) 25 | llvm_config (nttverifier ${LLVM_LINK_COMPONENTS}) 26 | install(TARGETS nttverifier RUNTIME DESTINATION bin) 27 | 28 | if (NTT_VERIFIER_STATIC_EXE) 29 | set (CMAKE_EXE_LINKER_FLAGS "-static -static-libgcc -static-libstdc++") 30 | set_target_properties (nttverifier PROPERTIES LINK_SEARCH_START_STATIC ON) 31 | set_target_properties (nttverifier PROPERTIES LINK_SEARCH_END_STATIC ON) 32 | endif() 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *~ 3 | *.dSYM 4 | test_ntt 5 | test_ntt16 6 | test_ntt256 7 | test_ntt512 8 | test_ntt1024 9 | kat_mul1024 10 | speed_mul1024 11 | kat_mul1024_red 12 | speed_mul1024_red 13 | kat_mul1024_red_asm 14 | speed_mul1024_red_asm 15 | test_ntt_red16 16 | test_ntt_red256 17 | test_ntt_red512 18 | test_ntt_red1024 19 | test_ntt_red_asm16 20 | test_ntt_red_asm256 21 | test_ntt_red_asm512 22 | test_ntt_red_asm1024 23 | test_ntt_red 24 | test_red_bounds 25 | test_avx 26 | test_avx_support 27 | test_ntt_avx 28 | make_tables 29 | make_red_tables 30 | make_bitrev_table 31 | ntt16_tables.h 32 | ntt16_tables.c 33 | ntt256_tables.h 34 | ntt256_tables.c 35 | ntt512_tables.h 36 | ntt512_tables.c 37 | ntt1024_tables.h 38 | ntt1024_tables.c 39 | ntt_red16_tables.h 40 | ntt_red16_tables.c 41 | ntt_red256_tables.h 42 | ntt_red256_tables.c 43 | ntt_red512_tables.h 44 | ntt_red512_tables.c 45 | ntt_red1024_tables.h 46 | ntt_red1024_tables.c 47 | bitrev16_tables.h 48 | bitrev16_tables.c 49 | bitrev256_tables.h 50 | bitrev256_tables.c 51 | bitrev512_tables.h 52 | bitrev512_tables.c 53 | bitrev1024_tables.h 54 | bitrev1024_tables.c 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 SRI International's Computer Science Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Number Theoretic Transform 2 | 3 | The Number Theoretic Transform ([NTT](https://en.wikipedia.org/wiki/Discrete_Fourier_transform_(general)#Number-theoretic_transform)) is an efficient algorithm for 4 | computing the products of polynomials whose coefficients belong to 5 | a finite field. 6 | 7 | This repository contains SRI's various implementations of the NTT (developed while 8 | implementing the [Bliss](https://github.com/SRI-CSL/Bliss)). 9 | 10 | It also includes the verification of these algorithms. 11 | 12 | The repository is organized into three subdirectories: 13 | 14 | * [src](https://github.com/SRI-CSL/NTT/tree/master/src/README.md) contains a plethora of code implementing the algorithms described in paper. 15 | 16 | * [verifier](https://github.com/SRI-CSL/NTT/tree/master/verifier/README.md) contains the code of the verifier that proves absence of integer overflows of the programs described in [src](https://github.com/SRI-CSL/NTT/tree/master/src/README.md). 17 | 18 | * [paper](https://github.com/SRI-CSL/NTT/blob/master/paper/main_final.pdf) contains the [VSTTE20 conference](https://sri-csl.github.io/VSTTE20/) version of the paper, as well as the [slides](https://github.com/SRI-CSL/NTT/blob/master/paper/slides.pdf) from the conference talk. 19 | -------------------------------------------------------------------------------- /src/tests_in_paper/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # all tests 3 | # 4 | tests=test_intt_red1024 test_intt_red1024b \ 5 | test_ntt_red1024 test_ntt_red1024b test_ntt_red1024c \ 6 | test_ntt_red1024d test_ntt_red1024e test_ntt_red1024f 7 | 8 | CC?=clang 9 | CFLAGS=-Wall -I../ 10 | 11 | # 12 | # We assume ../intervals.o ../red_bounds.o ../ntt_red_interval.o ../ntt_red1024_tables.o all exist 13 | # and are up to date. 14 | # 15 | obj=../intervals.o ../red_bounds.o ../ntt_red_interval.o ../ntt_red1024_tables.o 16 | 17 | all: $(tests) 18 | 19 | test_intt_red1024: test_intt_red1024.c 20 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 21 | 22 | test_intt_red1024b: test_intt_red1024b.c 23 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 24 | 25 | test_ntt_red1024: test_ntt_red1024.c 26 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 27 | 28 | test_ntt_red1024b: test_ntt_red1024b.c 29 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 30 | 31 | test_ntt_red1024c: test_ntt_red1024c.c 32 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 33 | 34 | test_ntt_red1024d: test_ntt_red1024d.c 35 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 36 | 37 | test_ntt_red1024e: test_ntt_red1024e.c 38 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 39 | 40 | test_ntt_red1024f: test_ntt_red1024f.c 41 | $(CC) $(CFLAGS) -o $@ $^ $(obj) 42 | 43 | # 44 | # Clean up 45 | # 46 | clean: 47 | rm -f $(tests) 48 | rm -f *.o 49 | -------------------------------------------------------------------------------- /src/ntt32_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Parameters: 3 | * - q = 12289 4 | * - n = 32 5 | * - psi = 563 6 | * - omega = psi^2 = 9744 7 | * - inverse of psi = 5828 8 | * - inverse of omega = 11077 9 | * - inverse of n = 11905 10 | */ 11 | 12 | #ifndef __NTT32_TABLES_H 13 | #define __NTT32_TABLES_H 14 | 15 | #include 16 | 17 | /* 18 | * PARAMETERS 19 | */ 20 | static const int32_t ntt32_psi = 563; 21 | static const int32_t ntt32_omega = 9744; 22 | static const int32_t ntt32_inv_psi = 5828; 23 | static const int32_t ntt32_inv_omega = 11077; 24 | static const int32_t ntt32_inv_n = 11905; 25 | 26 | /* 27 | * BIT-REVERSE SHUFFLE 28 | */ 29 | #define BITREV32_NPAIRS 12 30 | 31 | extern const uint16_t ntt32_bitrev[BITREV32_NPAIRS][2]; 32 | 33 | /* 34 | * POWERS OF PSI 35 | */ 36 | extern const uint16_t ntt32_psi_powers[32]; 37 | extern const uint16_t ntt32_inv_psi_powers[32]; 38 | extern const uint16_t ntt32_scaled_inv_psi_powers[32]; 39 | 40 | /* 41 | * TABLES FOR NTT COMPUTATION 42 | */ 43 | extern const uint16_t ntt32_omega_powers[32]; 44 | extern const uint16_t ntt32_omega_powers_rev[32]; 45 | extern const uint16_t ntt32_inv_omega_powers[32]; 46 | extern const uint16_t ntt32_inv_omega_powers_rev[32]; 47 | extern const uint16_t ntt32_mixed_powers[32]; 48 | extern const uint16_t ntt32_mixed_powers_rev[32]; 49 | extern const uint16_t ntt32_inv_mixed_powers[32]; 50 | extern const uint16_t ntt32_inv_mixed_powers_rev[32]; 51 | 52 | #endif /* __NTT32_TABLES_H */ 53 | -------------------------------------------------------------------------------- /src/sort.c: -------------------------------------------------------------------------------- 1 | /* 2 | * BASIC SORT FOR INTEGER ARRAYS 3 | */ 4 | 5 | #include "sort.h" 6 | 7 | static void qsort_int_array(uint64_t *a, uint32_t n); 8 | 9 | // insertion sort 10 | static void isort_int_array(uint64_t *a, uint32_t n) { 11 | uint32_t i, j; 12 | uint64_t x, y; 13 | 14 | for (i=1; i 1 35 | static void qsort_int_array(uint64_t *a, uint32_t n) { 36 | uint32_t i, j; 37 | uint64_t x, y; 38 | 39 | // x = random pivot 40 | i = n/2; 41 | x = a[i]; 42 | 43 | // swap x and a[0] 44 | a[i] = a[0]; 45 | a[0] = x; 46 | 47 | i = 0; 48 | j = n; 49 | 50 | do { j--; } while (a[j] > x); 51 | do { i++; } while (i <= j && a[i] < x); 52 | 53 | while (i < j) { 54 | y = a[i]; a[i] = a[j]; a[j] = y; 55 | 56 | do { j--; } while (a[j] > x); 57 | do { i++; } while (a[i] < x); 58 | } 59 | 60 | // pivot goes into a[j] 61 | a[0] = a[j]; 62 | a[j] = x; 63 | 64 | // sort a[0...j-1] and a[j+1 .. n-1] 65 | sort_array(a, j); 66 | j++; 67 | sort_array(a + j, n - j); 68 | } 69 | 70 | 71 | /* 72 | * External call 73 | */ 74 | void sort(uint64_t *a, uint32_t n) { 75 | sort_array(a, n); 76 | } 77 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/sort.c: -------------------------------------------------------------------------------- 1 | /* 2 | * BASIC SORT FOR INTEGER ARRAYS 3 | */ 4 | 5 | #include "sort.h" 6 | 7 | static void qsort_int_array(uint64_t *a, uint32_t n); 8 | 9 | // insertion sort 10 | static void isort_int_array(uint64_t *a, uint32_t n) { 11 | uint32_t i, j; 12 | uint64_t x, y; 13 | 14 | for (i=1; i 1 35 | static void qsort_int_array(uint64_t *a, uint32_t n) { 36 | uint32_t i, j; 37 | uint64_t x, y; 38 | 39 | // x = random pivot 40 | i = n/2; 41 | x = a[i]; 42 | 43 | // swap x and a[0] 44 | a[i] = a[0]; 45 | a[0] = x; 46 | 47 | i = 0; 48 | j = n; 49 | 50 | do { j--; } while (a[j] > x); 51 | do { i++; } while (i <= j && a[i] < x); 52 | 53 | while (i < j) { 54 | y = a[i]; a[i] = a[j]; a[j] = y; 55 | 56 | do { j--; } while (a[j] > x); 57 | do { i++; } while (a[i] < x); 58 | } 59 | 60 | // pivot goes into a[j] 61 | a[0] = a[j]; 62 | a[j] = x; 63 | 64 | // sort a[0...j-1] and a[j+1 .. n-1] 65 | sort_array(a, j); 66 | j++; 67 | sort_array(a + j, n - j); 68 | } 69 | 70 | 71 | /* 72 | * External call 73 | */ 74 | void sort(uint64_t *a, uint32_t n) { 75 | sort_array(a, n); 76 | } 77 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/verify.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Make sure we exit if there is a failure 4 | set -e 5 | 6 | usage() { 7 | echo "Usage: $0 FILE [-inline]" 8 | } 9 | 10 | if [[ $# -lt 1 ]]; then 11 | usage 12 | exit 1 13 | fi 14 | 15 | INLINE=false 16 | POSITIONAL=() 17 | while [[ $# -gt 0 ]] 18 | do 19 | key="$1" 20 | case $key in 21 | -inline|--inline) 22 | shift # past argument 23 | INLINE=true 24 | ;; 25 | -help|--help) 26 | usage 27 | exit 0 28 | ;; 29 | *) # unknown option 30 | POSITIONAL+=("$1") # save it in an array for later 31 | shift # past argument 32 | ;; 33 | esac 34 | done 35 | set -- "${POSITIONAL[@]}" # restore positional parameters 36 | FILE=$1 37 | shift 38 | 39 | INSTALL_DIR=$(pwd)/../install/bin 40 | 41 | CLAMPP=${INSTALL_DIR}/clam-pp 42 | if [ "${CLAMPP}" == "" ]; then 43 | echo "Cannot find clam-pp" 44 | exit 1 45 | fi 46 | 47 | SEAOPT=${INSTALL_DIR}/seaopt 48 | if [ "${SEAOPT}" == "" ]; then 49 | echo "Cannot find seaopt" 50 | exit 1 51 | fi 52 | 53 | NTTVERIFIER=${INSTALL_DIR}/nttverifier 54 | if [ "${NTTVERIFIER}" == "" ]; then 55 | echo "Cannot find nttverifier" 56 | exit 1 57 | fi 58 | 59 | ### Clam preprocessor 60 | CLAMPP_OPTS="--simplifycfg-sink-common=false --clam-devirt --devirt-resolver=sea-dsa --sea-dsa-type-aware=true" 61 | if [ ${INLINE} == true ] ; then 62 | CLAMPP_OPTS="${CLAMPP_OPTS} --clam-inline-all" 63 | fi 64 | ${CLAMPP} ${FILE} ${CLAMPP_OPTS} -o ${FILE}.pp.bc 65 | ### Static loop unrolling 66 | ${SEAOPT} -O1 ${FILE}.pp.bc \ 67 | -loop-simplify -fake-latch-exit -loop-unroll -unroll-threshold=99999999 \ 68 | -o ${FILE}.unrolled.pp.bc 69 | ### NTT Verifier 70 | ${NTTVERIFIER} ${FILE}.unrolled.pp.bc 71 | 72 | exit 0 73 | -------------------------------------------------------------------------------- /src/ntt16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=16. 3 | */ 4 | 5 | #include "ntt16.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void ntt16_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16(a, 16, ntt16_psi_powers); 12 | ntt16_ct_std2rev(a); 13 | mul_array16(b, 16, ntt16_psi_powers); 14 | ntt16_ct_std2rev(b); 15 | mul_array(c, 16, a, b); 16 | intt16_ct_rev2std(c); 17 | mul_array16(c, 16, ntt16_scaled_inv_psi_powers); 18 | } 19 | 20 | void ntt16_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16(a, 16, ntt16_psi_powers); 22 | ntt16_gs_std2rev(a); 23 | mul_array16(b, 16, ntt16_psi_powers); 24 | ntt16_gs_std2rev(b); 25 | mul_array(c, 16, a, b); 26 | intt16_ct_rev2std(c); 27 | mul_array16(c, 16, ntt16_scaled_inv_psi_powers); 28 | } 29 | 30 | void ntt16_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16(a, 16, ntt16_psi_powers); 32 | ntt16_ct_std2rev(a); 33 | mul_array16(b, 16, ntt16_psi_powers); 34 | ntt16_ct_std2rev(b); 35 | mul_array(c, 16, a, b); 36 | intt16_gs_rev2std(c); 37 | mul_array16(c, 16, ntt16_scaled_inv_psi_powers); 38 | } 39 | 40 | void ntt16_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16(a, 16, ntt16_psi_powers); 42 | ntt16_gs_std2rev(a); 43 | mul_array16(b, 16, ntt16_psi_powers); 44 | ntt16_gs_std2rev(b); 45 | mul_array(c, 16, a, b); 46 | intt16_gs_rev2std(c); 47 | mul_array16(c, 16, ntt16_scaled_inv_psi_powers); 48 | } 49 | 50 | 51 | /* 52 | * Use combined mulntt then inttmul 53 | */ 54 | void ntt16_product5(int32_t *c, int32_t *a, int32_t *b) { 55 | mulntt16_ct_std2rev(a); 56 | mulntt16_ct_std2rev(b); 57 | mul_array(c, 16, a, b); 58 | inttmul16_gs_rev2std(c); 59 | scalar_mul_array(c, 16, ntt16_inv_n); // divide by n 60 | } 61 | -------------------------------------------------------------------------------- /src/ntt256.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=256. 3 | */ 4 | 5 | #include "ntt256.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void ntt256_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16(a, 256, ntt256_psi_powers); 12 | ntt256_ct_std2rev(a); 13 | mul_array16(b, 256, ntt256_psi_powers); 14 | ntt256_ct_std2rev(b); 15 | mul_array(c, 256, a, b); 16 | intt256_ct_rev2std(c); 17 | mul_array16(c, 256, ntt256_scaled_inv_psi_powers); 18 | } 19 | 20 | void ntt256_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16(a, 256, ntt256_psi_powers); 22 | ntt256_gs_std2rev(a); 23 | mul_array16(b, 256, ntt256_psi_powers); 24 | ntt256_gs_std2rev(b); 25 | mul_array(c, 256, a, b); 26 | intt256_ct_rev2std(c); 27 | mul_array16(c, 256, ntt256_scaled_inv_psi_powers); 28 | } 29 | 30 | void ntt256_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16(a, 256, ntt256_psi_powers); 32 | ntt256_ct_std2rev(a); 33 | mul_array16(b, 256, ntt256_psi_powers); 34 | ntt256_ct_std2rev(b); 35 | mul_array(c, 256, a, b); 36 | intt256_gs_rev2std(c); 37 | mul_array16(c, 256, ntt256_scaled_inv_psi_powers); 38 | } 39 | 40 | void ntt256_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16(a, 256, ntt256_psi_powers); 42 | ntt256_gs_std2rev(a); 43 | mul_array16(b, 256, ntt256_psi_powers); 44 | ntt256_gs_std2rev(b); 45 | mul_array(c, 256, a, b); 46 | intt256_gs_rev2std(c); 47 | mul_array16(c, 256, ntt256_scaled_inv_psi_powers); 48 | } 49 | 50 | /* 51 | * Use combined mulntt then inttmul 52 | */ 53 | void ntt256_product5(int32_t *c, int32_t *a, int32_t *b) { 54 | mulntt256_ct_std2rev(a); 55 | mulntt256_ct_std2rev(b); 56 | mul_array(c, 256, a, b); 57 | inttmul256_gs_rev2std(c); 58 | scalar_mul_array(c, 256, ntt256_inv_n); // divide by n 59 | } 60 | -------------------------------------------------------------------------------- /src/ntt512.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=512. 3 | */ 4 | 5 | #include "ntt512.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void ntt512_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16(a, 512, ntt512_psi_powers); 12 | ntt512_ct_std2rev(a); 13 | mul_array16(b, 512, ntt512_psi_powers); 14 | ntt512_ct_std2rev(b); 15 | mul_array(c, 512, a, b); 16 | intt512_ct_rev2std(c); 17 | mul_array16(c, 512, ntt512_scaled_inv_psi_powers); 18 | } 19 | 20 | void ntt512_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16(a, 512, ntt512_psi_powers); 22 | ntt512_gs_std2rev(a); 23 | mul_array16(b, 512, ntt512_psi_powers); 24 | ntt512_gs_std2rev(b); 25 | mul_array(c, 512, a, b); 26 | intt512_ct_rev2std(c); 27 | mul_array16(c, 512, ntt512_scaled_inv_psi_powers); 28 | } 29 | 30 | void ntt512_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16(a, 512, ntt512_psi_powers); 32 | ntt512_ct_std2rev(a); 33 | mul_array16(b, 512, ntt512_psi_powers); 34 | ntt512_ct_std2rev(b); 35 | mul_array(c, 512, a, b); 36 | intt512_gs_rev2std(c); 37 | mul_array16(c, 512, ntt512_scaled_inv_psi_powers); 38 | } 39 | 40 | void ntt512_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16(a, 512, ntt512_psi_powers); 42 | ntt512_gs_std2rev(a); 43 | mul_array16(b, 512, ntt512_psi_powers); 44 | ntt512_gs_std2rev(b); 45 | mul_array(c, 512, a, b); 46 | intt512_gs_rev2std(c); 47 | mul_array16(c, 512, ntt512_scaled_inv_psi_powers); 48 | } 49 | 50 | /* 51 | * Use combined mulntt then inttmul 52 | */ 53 | void ntt512_product5(int32_t *c, int32_t *a, int32_t *b) { 54 | mulntt512_ct_std2rev(a); 55 | mulntt512_ct_std2rev(b); 56 | mul_array(c, 512, a, b); 57 | inttmul512_gs_rev2std(c); 58 | scalar_mul_array(c, 512, ntt512_inv_n); // divide by n 59 | } 60 | -------------------------------------------------------------------------------- /src/ntt1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=1024. 3 | */ 4 | 5 | #include "ntt1024.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void ntt1024_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16(a, 1024, ntt1024_psi_powers); 12 | ntt1024_ct_std2rev(a); 13 | mul_array16(b, 1024, ntt1024_psi_powers); 14 | ntt1024_ct_std2rev(b); 15 | mul_array(c, 1024, a, b); 16 | intt1024_ct_rev2std(c); 17 | mul_array16(c, 1024, ntt1024_scaled_inv_psi_powers); 18 | } 19 | 20 | void ntt1024_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16(a, 1024, ntt1024_psi_powers); 22 | ntt1024_gs_std2rev(a); 23 | mul_array16(b, 1024, ntt1024_psi_powers); 24 | ntt1024_gs_std2rev(b); 25 | mul_array(c, 1024, a, b); 26 | intt1024_ct_rev2std(c); 27 | mul_array16(c, 1024, ntt1024_scaled_inv_psi_powers); 28 | } 29 | 30 | void ntt1024_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16(a, 1024, ntt1024_psi_powers); 32 | ntt1024_ct_std2rev(a); 33 | mul_array16(b, 1024, ntt1024_psi_powers); 34 | ntt1024_ct_std2rev(b); 35 | mul_array(c, 1024, a, b); 36 | intt1024_gs_rev2std(c); 37 | mul_array16(c, 1024, ntt1024_scaled_inv_psi_powers); 38 | } 39 | 40 | void ntt1024_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16(a, 1024, ntt1024_psi_powers); 42 | ntt1024_gs_std2rev(a); 43 | mul_array16(b, 1024, ntt1024_psi_powers); 44 | ntt1024_gs_std2rev(b); 45 | mul_array(c, 1024, a, b); 46 | intt1024_gs_rev2std(c); 47 | mul_array16(c, 1024, ntt1024_scaled_inv_psi_powers); 48 | } 49 | 50 | /* 51 | * Use combined mulntt then inttmul 52 | */ 53 | void ntt1024_product5(int32_t *c, int32_t *a, int32_t *b) { 54 | mulntt1024_ct_std2rev(a); 55 | mulntt1024_ct_std2rev(b); 56 | mul_array(c, 1024, a, b); 57 | inttmul1024_gs_rev2std(c); 58 | scalar_mul_array(c, 1024, ntt1024_inv_n); // divide by n 59 | } 60 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/clam.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern void __CRAB_assert(int); 8 | extern void __CRAB_assume(int); 9 | extern int int_nd(void); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | 15 | 16 | #define clam_assume __CRAB_assume 17 | #define clam_assert(X) __CRAB_assert(X) 18 | 19 | // Enable this for verify2 20 | #define UNROLL_ASSUME_FORALL 21 | 22 | 23 | #define STRINGIFY_(A) #A 24 | #define STRINGIFY(A) STRINGIFY_(A) 25 | 26 | // forall i :: ARRAY[i] \in [LB_VAL, UB_VAL) 27 | #define ASSUME_FORALL_WITH_LOOP(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) \ 28 | { \ 29 | int i; \ 30 | _Pragma("nounroll") \ 31 | for(i=0; i= LB_VAL); \ 34 | clam_assume(x < UB_VAL); \ 35 | ARRAY[i] = x; \ 36 | } \ 37 | } 38 | 39 | 40 | // forall i :: ARRAY[i] \in [LB_VAL, UB_VAL) 41 | #define ASSUME_FORALL_WITHOUT_LOOP(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) \ 42 | { \ 43 | int i; \ 44 | _Pragma("unroll(1024)") \ 45 | for(i=0; i= LB_VAL); \ 48 | clam_assume(x < UB_VAL); \ 49 | ARRAY[i] = x; \ 50 | } \ 51 | } 52 | 53 | 54 | #ifndef UNROLL_ASSUME_FORALL 55 | #define ASSUME_FORALL(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) \ 56 | ASSUME_FORALL_WITH_LOOP(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) 57 | #else 58 | #define ASSUME_FORALL(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) \ 59 | ASSUME_FORALL_WITHOUT_LOOP(ARRAY, ARRAY_SIZE, LB_VAL, UB_VAL) 60 | #endif 61 | -------------------------------------------------------------------------------- /src/test_ntt_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tables for testing the NTT functions 3 | */ 4 | 5 | #ifndef __TEST_NTT_TABLES_H 6 | #define __TEST_NTT_TABLES_H 7 | 8 | #include 9 | 10 | /* 11 | * Powers of psi: 12 | * - for n=16: psi=1212, omega=6553 13 | * - for n=256: psi=1002, omega=8595 14 | * - for n=512: psi=1003, omega=10600 15 | * - for n=1024: psi=1014, omega=8209 16 | */ 17 | extern const uint16_t psi_powers_ntt16_12289[16]; 18 | extern const uint16_t psi_powers_ntt256_12289[256]; 19 | extern const uint16_t psi_powers_ntt512_12289[512]; 20 | extern const uint16_t psi_powers_ntt1024_12289[1024]; 21 | 22 | /* 23 | * Powers of omega in Shoup-style format 24 | * - use the same parameters psi/omega as above 25 | */ 26 | extern const uint16_t shoup_ntt16_12289[16]; 27 | extern const uint16_t shoup_ntt256_12289[256]; 28 | extern const uint16_t shoup_ntt512_12289[512]; 29 | extern const uint16_t shoup_ntt1024_12289[1024]; 30 | 31 | /* 32 | * Scaled tables in Shoup-style format: 33 | * - powers of omega multiplied by powers of psi 34 | */ 35 | extern const uint16_t shoup_scaled_ntt16_12289[16]; 36 | extern const uint16_t shoup_scaled_ntt256_12289[256]; 37 | extern const uint16_t shoup_scaled_ntt512_12289[512]; 38 | extern const uint16_t shoup_scaled_ntt1024_12289[1024]; 39 | 40 | /* 41 | * Powers of omega in bitreverse/Shoup-style format 42 | * - use the same parameters psi/omega as above 43 | */ 44 | extern const uint16_t rev_shoup_ntt16_12289[16]; 45 | extern const uint16_t rev_shoup_ntt256_12289[256]; 46 | extern const uint16_t rev_shoup_ntt512_12289[512]; 47 | extern const uint16_t rev_shoup_ntt1024_12289[1024]; 48 | 49 | /* 50 | * Powers of omega and spi in bitreverse/Shoup-style format 51 | * - use the same parameters psi/omega as above 52 | */ 53 | extern const uint16_t rev_shoup_scaled_ntt16_12289[16]; 54 | extern const uint16_t rev_shoup_scaled_ntt256_12289[256]; 55 | extern const uint16_t rev_shoup_scaled_ntt512_12289[512]; 56 | extern const uint16_t rev_shoup_scaled_ntt1024_12289[1024]; 57 | 58 | 59 | #endif /* __TEST_NTT_TABLES_H */ 60 | -------------------------------------------------------------------------------- /src/kat_mul1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Tancrede: I added to the repository Harvey’s NTT (with a file 4 | tools/precomputation-ntt-harvey.sage to explain how the constants 5 | are generated), and a new test test_poly.c which uses known values 6 | (a[i]*b[i]=c[i]) and verifies that INTT(NTT(a[i])*NTT(b[i])) == c[i] 7 | with Harvey’s NTT. I believe similar tests should be possible with 8 | the others NTTs in the repository, although they do not output 9 | numbers in [0, PARAM_Q) so the test should be adapted. 10 | 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ntt1024.h" 19 | #include "data_poly1024.h" 20 | 21 | static void copy_poly(int32_t a[1024], const int32_t b[1024]) { 22 | uint32_t i; 23 | 24 | for (i=0; i<1024; i++) { 25 | a[i] = b[i]; 26 | } 27 | } 28 | 29 | static void test_mul_from_KAT_values(void (*f)(int32_t *, int32_t *, int32_t *)) { 30 | int32_t ua[1024], ub[1024], uc[1024]; 31 | 32 | for (int i = 0; i < REPETITIONS; i++) { 33 | copy_poly(ua, a[i]); 34 | copy_poly(ub, b[i]); 35 | f(uc, ua, ub); 36 | 37 | for (int j = 0; j < 1024; j++) { 38 | if (uc[j] != c[i][j]) { 39 | printf("\t Failure at round %d on coeff %d: %"PRIi32" != %"PRIi32".\n", i, j, uc[j], c[i][j]); 40 | exit(EXIT_FAILURE); 41 | } 42 | } 43 | } 44 | 45 | printf("\t Success after %d tests\n", REPETITIONS); 46 | } 47 | 48 | int main(void){ 49 | build_kat(); 50 | 51 | printf("Testing ntt1024_product1 (KAT values)\n"); 52 | test_mul_from_KAT_values(ntt1024_product1); 53 | 54 | printf("\nTesting ntt1024_product2 (KAT values)\n"); 55 | test_mul_from_KAT_values(ntt1024_product2); 56 | 57 | printf("\nTesting ntt1024_product3 (KAT values)\n"); 58 | test_mul_from_KAT_values(ntt1024_product3); 59 | 60 | printf("\nTesting ntt1024_product4 (KAT values)\n"); 61 | test_mul_from_KAT_values(ntt1024_product4); 62 | 63 | printf("\nTesting ntt1024_product5 (KAT values)\n"); 64 | test_mul_from_KAT_values(ntt1024_product5); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /src/test_mul1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Tancrede: I added to the repository Harvey’s NTT (with a file 4 | tools/precomputation-ntt-harvey.sage to explain how the constants 5 | are generated), and a new test test_poly.c which uses known values 6 | (a[i]*b[i]=c[i]) and verifies that INTT(NTT(a[i])*NTT(b[i])) == c[i] 7 | with Harvey’s NTT. I believe similar tests should be possible with 8 | the others NTTs in the repository, although they do not output 9 | numbers in [0, PARAM_Q) so the test should be adapted. 10 | 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ntts1024.h" 19 | 20 | static void copy_poly(int32_t a[1024], const int32_t b[1024]) { 21 | uint32_t i; 22 | 23 | for (i=0; i<1024; i++) { 24 | a[i] = b[i]; 25 | } 26 | } 27 | 28 | static void test_mul_from_KAT_values(void (*f)(int32_t *, int32_t *, int32_t *)) { 29 | int32_t ua[1024], ub[1024], uc[1024]; 30 | 31 | // Include KAT vectors 32 | #include "data_poly1024.c" 33 | 34 | for (int i = 0; i < REPETITIONS; i++) { 35 | copy_poly(ua, a[i]); 36 | copy_poly(ub, b[i]); 37 | f(uc, ua, ub); 38 | 39 | for (int j = 0; j < 1024; j++) { 40 | if (uc[j] != c[i][j]) { 41 | printf("\t Failure at round %d on coeff %d: %"PRIi32" != %"PRIi32".\n", i, j, uc[j], c[i][j]); 42 | exit(EXIT_FAILURE); 43 | } 44 | } 45 | } 46 | 47 | printf("\t Success after %d tests\n", REPETITIONS); 48 | } 49 | 50 | int main(void){ 51 | printf("\nTesting ntt1024_product1 (KAT values)\n"); 52 | test_mul_from_KAT_values(ntt1024_product1); 53 | 54 | printf("\nTesting ntt1024_product2 (KAT values)\n"); 55 | test_mul_from_KAT_values(ntt1024_product2); 56 | 57 | printf("\nTesting ntt1024_product3 (KAT values)\n"); 58 | test_mul_from_KAT_values(ntt1024_product3); 59 | 60 | printf("\nTesting ntt1024_product4 (KAT values)\n"); 61 | test_mul_from_KAT_values(ntt1024_product4); 62 | 63 | printf("\nTesting ntt1024_product5 (KAT values)\n"); 64 | test_mul_from_KAT_values(ntt1024_product5); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /src/naive_ntt16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=16. 3 | */ 4 | 5 | #include "naive_ntt16.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void naive_ntt16_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16_naive(a, 16, ntt16_psi_powers, 12289); 12 | naive_ntt16_ct_std2rev(a); 13 | mul_array16_naive(b, 16, ntt16_psi_powers, 12289); 14 | naive_ntt16_ct_std2rev(b); 15 | mul_array_naive(c, 16, a, b, 12289); 16 | naive_intt16_ct_rev2std(c); 17 | mul_array16_naive(c, 16, ntt16_scaled_inv_psi_powers, 12289); 18 | } 19 | 20 | void naive_ntt16_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16_naive(a, 16, ntt16_psi_powers, 12289); 22 | naive_ntt16_gs_std2rev(a); 23 | mul_array16_naive(b, 16, ntt16_psi_powers, 12289); 24 | naive_ntt16_gs_std2rev(b); 25 | mul_array_naive(c, 16, a, b, 12289); 26 | naive_intt16_ct_rev2std(c); 27 | mul_array16_naive(c, 16, ntt16_scaled_inv_psi_powers, 12289); 28 | } 29 | 30 | void naive_ntt16_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16_naive(a, 16, ntt16_psi_powers, 12289); 32 | naive_ntt16_ct_std2rev(a); 33 | mul_array16_naive(b, 16, ntt16_psi_powers, 12289); 34 | naive_ntt16_ct_std2rev(b); 35 | mul_array_naive(c, 16, a, b, 12289); 36 | naive_intt16_gs_rev2std(c); 37 | mul_array16_naive(c, 16, ntt16_scaled_inv_psi_powers, 12289); 38 | } 39 | 40 | void naive_ntt16_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16_naive(a, 16, ntt16_psi_powers, 12289); 42 | naive_ntt16_gs_std2rev(a); 43 | mul_array16_naive(b, 16, ntt16_psi_powers, 12289); 44 | naive_ntt16_gs_std2rev(b); 45 | mul_array_naive(c, 16, a, b, 12289); 46 | naive_intt16_gs_rev2std(c); 47 | mul_array16_naive(c, 16, ntt16_scaled_inv_psi_powers, 12289); 48 | } 49 | 50 | 51 | /* 52 | * Use combined mulntt then inttmul 53 | */ 54 | void naive_ntt16_product5(int32_t *c, int32_t *a, int32_t *b) { 55 | naive_mulntt16_ct_std2rev(a); 56 | naive_mulntt16_ct_std2rev(b); 57 | mul_array_naive(c, 16, a, b, 12289); 58 | naive_inttmul16_gs_rev2std(c); 59 | scalar_mul_array_naive(c, 16, ntt16_inv_n, 12289); // divide by n 60 | } 61 | -------------------------------------------------------------------------------- /src/kat_mul1024_red.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Tancrede: I added to the repository Harvey’s NTT (with a file 4 | tools/precomputation-ntt-harvey.sage to explain how the constants 5 | are generated), and a new test test_poly.c which uses known values 6 | (a[i]*b[i]=c[i]) and verifies that INTT(NTT(a[i])*NTT(b[i])) == c[i] 7 | with Harvey’s NTT. I believe similar tests should be possible with 8 | the others NTTs in the repository, although they do not output 9 | numbers in [0, PARAM_Q) so the test should be adapted. 10 | 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ntt_red1024.h" 19 | #include "data_poly1024.h" 20 | #include "sort.h" 21 | 22 | static void copy_poly(int32_t a[1024], const int32_t b[1024]) { 23 | uint32_t i; 24 | 25 | for (i=0; i<1024; i++) { 26 | a[i] = b[i]; 27 | } 28 | } 29 | 30 | static void test_mul_from_KAT_values(void (*f)(int32_t *, int32_t *, int32_t *)) { 31 | int32_t ua[1024], ub[1024], uc[1024]; 32 | 33 | for (int i = 0; i < REPETITIONS; i++) { 34 | copy_poly(ua, a[i]); 35 | copy_poly(ub, b[i]); 36 | f(uc, ua, ub); 37 | 38 | for (int j = 0; j < 1024; j++) { 39 | if (uc[j] != c[i][j]) { 40 | printf("\t Failure at round %d on coeff %d: %"PRIi32" != %"PRIi32".\n", i, j, uc[j], c[i][j]); 41 | exit(EXIT_FAILURE); 42 | } 43 | } 44 | } 45 | 46 | printf("\t Success after %d tests\n", REPETITIONS); 47 | } 48 | 49 | int main(void){ 50 | build_kat(); 51 | 52 | printf("Testing ntt_red1024_product1 (KAT values)\n"); 53 | test_mul_from_KAT_values(ntt_red1024_product1); 54 | 55 | printf("\nTesting ntt_red1024_product2 (KAT values)\n"); 56 | test_mul_from_KAT_values(ntt_red1024_product2); 57 | 58 | printf("\nTesting ntt_red1024_product3 (KAT values)\n"); 59 | test_mul_from_KAT_values(ntt_red1024_product3); 60 | 61 | printf("\nTesting ntt_red1024_product4 (KAT values)\n"); 62 | test_mul_from_KAT_values(ntt_red1024_product4); 63 | 64 | printf("\nTesting ntt_red1024_product5 (KAT values)\n"); 65 | test_mul_from_KAT_values(ntt_red1024_product5); 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /src/naive_ntt256.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=256. 3 | */ 4 | 5 | #include "naive_ntt256.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void naive_ntt256_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16_naive(a, 256, ntt256_psi_powers, 12289); 12 | naive_ntt256_ct_std2rev(a); 13 | mul_array16_naive(b, 256, ntt256_psi_powers, 12289); 14 | naive_ntt256_ct_std2rev(b); 15 | mul_array_naive(c, 256, a, b, 12289); 16 | naive_intt256_ct_rev2std(c); 17 | mul_array16_naive(c, 256, ntt256_scaled_inv_psi_powers, 12289); 18 | } 19 | 20 | void naive_ntt256_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16_naive(a, 256, ntt256_psi_powers, 12289); 22 | naive_ntt256_gs_std2rev(a); 23 | mul_array16_naive(b, 256, ntt256_psi_powers, 12289); 24 | naive_ntt256_gs_std2rev(b); 25 | mul_array_naive(c, 256, a, b, 12289); 26 | naive_intt256_ct_rev2std(c); 27 | mul_array16_naive(c, 256, ntt256_scaled_inv_psi_powers, 12289); 28 | } 29 | 30 | void naive_ntt256_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16_naive(a, 256, ntt256_psi_powers, 12289); 32 | naive_ntt256_ct_std2rev(a); 33 | mul_array16_naive(b, 256, ntt256_psi_powers, 12289); 34 | naive_ntt256_ct_std2rev(b); 35 | mul_array_naive(c, 256, a, b, 12289); 36 | naive_intt256_gs_rev2std(c); 37 | mul_array16_naive(c, 256, ntt256_scaled_inv_psi_powers, 12289); 38 | } 39 | 40 | void naive_ntt256_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16_naive(a, 256, ntt256_psi_powers, 12289); 42 | naive_ntt256_gs_std2rev(a); 43 | mul_array16_naive(b, 256, ntt256_psi_powers, 12289); 44 | naive_ntt256_gs_std2rev(b); 45 | mul_array_naive(c, 256, a, b, 12289); 46 | naive_intt256_gs_rev2std(c); 47 | mul_array16_naive(c, 256, ntt256_scaled_inv_psi_powers, 12289); 48 | } 49 | 50 | 51 | /* 52 | * Use combined mulntt then inttmul 53 | */ 54 | void naive_ntt256_product5(int32_t *c, int32_t *a, int32_t *b) { 55 | naive_mulntt256_ct_std2rev(a); 56 | naive_mulntt256_ct_std2rev(b); 57 | mul_array_naive(c, 256, a, b, 12289); 58 | naive_inttmul256_gs_rev2std(c); 59 | scalar_mul_array_naive(c, 256, ntt256_inv_n, 12289); // divide by n 60 | } 61 | -------------------------------------------------------------------------------- /src/naive_ntt512.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=512. 3 | */ 4 | 5 | #include "naive_ntt512.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void naive_ntt512_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16_naive(a, 512, ntt512_psi_powers, 12289); 12 | naive_ntt512_ct_std2rev(a); 13 | mul_array16_naive(b, 512, ntt512_psi_powers, 12289); 14 | naive_ntt512_ct_std2rev(b); 15 | mul_array_naive(c, 512, a, b, 12289); 16 | naive_intt512_ct_rev2std(c); 17 | mul_array16_naive(c, 512, ntt512_scaled_inv_psi_powers, 12289); 18 | } 19 | 20 | void naive_ntt512_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16_naive(a, 512, ntt512_psi_powers, 12289); 22 | naive_ntt512_gs_std2rev(a); 23 | mul_array16_naive(b, 512, ntt512_psi_powers, 12289); 24 | naive_ntt512_gs_std2rev(b); 25 | mul_array_naive(c, 512, a, b, 12289); 26 | naive_intt512_ct_rev2std(c); 27 | mul_array16_naive(c, 512, ntt512_scaled_inv_psi_powers, 12289); 28 | } 29 | 30 | void naive_ntt512_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16_naive(a, 512, ntt512_psi_powers, 12289); 32 | naive_ntt512_ct_std2rev(a); 33 | mul_array16_naive(b, 512, ntt512_psi_powers, 12289); 34 | naive_ntt512_ct_std2rev(b); 35 | mul_array_naive(c, 512, a, b, 12289); 36 | naive_intt512_gs_rev2std(c); 37 | mul_array16_naive(c, 512, ntt512_scaled_inv_psi_powers, 12289); 38 | } 39 | 40 | void naive_ntt512_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16_naive(a, 512, ntt512_psi_powers, 12289); 42 | naive_ntt512_gs_std2rev(a); 43 | mul_array16_naive(b, 512, ntt512_psi_powers, 12289); 44 | naive_ntt512_gs_std2rev(b); 45 | mul_array_naive(c, 512, a, b, 12289); 46 | naive_intt512_gs_rev2std(c); 47 | mul_array16_naive(c, 512, ntt512_scaled_inv_psi_powers, 12289); 48 | } 49 | 50 | 51 | /* 52 | * Use combined mulntt then inttmul 53 | */ 54 | void naive_ntt512_product5(int32_t *c, int32_t *a, int32_t *b) { 55 | naive_mulntt512_ct_std2rev(a); 56 | naive_mulntt512_ct_std2rev(b); 57 | mul_array_naive(c, 512, a, b, 12289); 58 | naive_inttmul512_gs_rev2std(c); 59 | scalar_mul_array_naive(c, 512, ntt512_inv_n, 12289); // divide by n 60 | } 61 | -------------------------------------------------------------------------------- /src/kat_mul1024_red_asm.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Tancrede: I added to the repository Harvey’s NTT (with a file 4 | tools/precomputation-ntt-harvey.sage to explain how the constants 5 | are generated), and a new test test_poly.c which uses known values 6 | (a[i]*b[i]=c[i]) and verifies that INTT(NTT(a[i])*NTT(b[i])) == c[i] 7 | with Harvey’s NTT. I believe similar tests should be possible with 8 | the others NTTs in the repository, although they do not output 9 | numbers in [0, PARAM_Q) so the test should be adapted. 10 | 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ntt_red_asm1024.h" 19 | #include "data_poly1024.h" 20 | #include "sort.h" 21 | 22 | static void copy_poly(int32_t a[1024], const int32_t b[1024]) { 23 | uint32_t i; 24 | 25 | for (i=0; i<1024; i++) { 26 | a[i] = b[i]; 27 | } 28 | } 29 | 30 | static void test_mul_from_KAT_values(void (*f)(int32_t *, int32_t *, int32_t *)) { 31 | int32_t ua[1024], ub[1024], uc[1024]; 32 | 33 | for (int i = 0; i < REPETITIONS; i++) { 34 | copy_poly(ua, a[i]); 35 | copy_poly(ub, b[i]); 36 | f(uc, ua, ub); 37 | 38 | for (int j = 0; j < 1024; j++) { 39 | if (uc[j] != c[i][j]) { 40 | printf("\t Failure at round %d on coeff %d: %"PRIi32" != %"PRIi32".\n", i, j, uc[j], c[i][j]); 41 | exit(EXIT_FAILURE); 42 | } 43 | } 44 | } 45 | 46 | printf("\t Success after %d tests\n", REPETITIONS); 47 | } 48 | 49 | int main(void){ 50 | build_kat(); 51 | 52 | printf("Testing ntt_red1024_product1_asm (KAT values)\n"); 53 | test_mul_from_KAT_values(ntt_red1024_product1_asm); 54 | 55 | printf("\nTesting ntt_red1024_product2_asm (KAT values)\n"); 56 | test_mul_from_KAT_values(ntt_red1024_product2_asm); 57 | 58 | printf("\nTesting ntt_red1024_product3_asm (KAT values)\n"); 59 | test_mul_from_KAT_values(ntt_red1024_product3_asm); 60 | 61 | printf("\nTesting ntt_red1024_product4_asm (KAT values)\n"); 62 | test_mul_from_KAT_values(ntt_red1024_product4_asm); 63 | 64 | printf("\nTesting ntt_red1024_product5_asm (KAT values)\n"); 65 | test_mul_from_KAT_values(ntt_red1024_product5_asm); 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /src/naive_ntt1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=1024. 3 | */ 4 | 5 | #include "naive_ntt1024.h" 6 | 7 | /* 8 | * Product of two polynomials 9 | */ 10 | void naive_ntt1024_product1(int32_t *c, int32_t *a, int32_t *b) { 11 | mul_array16_naive(a, 1024, ntt1024_psi_powers, 12289); 12 | naive_ntt1024_ct_std2rev(a); 13 | mul_array16_naive(b, 1024, ntt1024_psi_powers, 12289); 14 | naive_ntt1024_ct_std2rev(b); 15 | mul_array_naive(c, 1024, a, b, 12289); 16 | naive_intt1024_ct_rev2std(c); 17 | mul_array16_naive(c, 1024, ntt1024_scaled_inv_psi_powers, 12289); 18 | } 19 | 20 | void naive_ntt1024_product2(int32_t *c, int32_t *a, int32_t *b) { 21 | mul_array16_naive(a, 1024, ntt1024_psi_powers, 12289); 22 | naive_ntt1024_gs_std2rev(a); 23 | mul_array16_naive(b, 1024, ntt1024_psi_powers, 12289); 24 | naive_ntt1024_gs_std2rev(b); 25 | mul_array_naive(c, 1024, a, b, 12289); 26 | naive_intt1024_ct_rev2std(c); 27 | mul_array16_naive(c, 1024, ntt1024_scaled_inv_psi_powers, 12289); 28 | } 29 | 30 | void naive_ntt1024_product3(int32_t *c, int32_t *a, int32_t *b) { 31 | mul_array16_naive(a, 1024, ntt1024_psi_powers, 12289); 32 | naive_ntt1024_ct_std2rev(a); 33 | mul_array16_naive(b, 1024, ntt1024_psi_powers, 12289); 34 | naive_ntt1024_ct_std2rev(b); 35 | mul_array_naive(c, 1024, a, b, 12289); 36 | naive_intt1024_gs_rev2std(c); 37 | mul_array16_naive(c, 1024, ntt1024_scaled_inv_psi_powers, 12289); 38 | } 39 | 40 | void naive_ntt1024_product4(int32_t *c, int32_t *a, int32_t *b) { 41 | mul_array16_naive(a, 1024, ntt1024_psi_powers, 12289); 42 | naive_ntt1024_gs_std2rev(a); 43 | mul_array16_naive(b, 1024, ntt1024_psi_powers, 12289); 44 | naive_ntt1024_gs_std2rev(b); 45 | mul_array_naive(c, 1024, a, b, 12289); 46 | naive_intt1024_gs_rev2std(c); 47 | mul_array16_naive(c, 1024, ntt1024_scaled_inv_psi_powers, 12289); 48 | } 49 | 50 | 51 | /* 52 | * Use combined mulntt then inttmul 53 | */ 54 | void naive_ntt1024_product5(int32_t *c, int32_t *a, int32_t *b) { 55 | naive_mulntt1024_ct_std2rev(a); 56 | naive_mulntt1024_ct_std2rev(b); 57 | mul_array_naive(c, 1024, a, b, 12289); 58 | naive_inttmul1024_gs_rev2std(c); 59 | scalar_mul_array_naive(c, 1024, ntt1024_inv_n, 12289); // divide by n 60 | } 61 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/verify_all: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VERIFY=./verify.sh 4 | make realclean 5 | make all 6 | make install INSTALL_DIR=./bitcode 7 | 8 | echo "=== Verifying harness_intt_red1024.all_linked.bc ===" 9 | time $VERIFY ./bitcode/harness_intt_red1024.all_linked.bc 10 | echo "=== Verifying harness_intt_red1024b.all_linked.bc ===" 11 | time $VERIFY ./bitcode/harness_intt_red1024b.all_linked.bc 12 | echo "=== Verifying harness_ntt_red1024.all_linked.bc ===" 13 | time $VERIFY ./bitcode/harness_ntt_red1024.all_linked.bc 14 | echo "=== Verifying harness_ntt_red1024b.all_linked.bc ===" 15 | time $VERIFY ./bitcode/harness_ntt_red1024b.all_linked.bc 16 | echo "=== Verifying harness_ntt_red1024c.all_linked.bc ===" 17 | time $VERIFY ./bitcode/harness_ntt_red1024c.all_linked.bc 18 | echo "=== Verifying harness_ntt_red1024d.all_linked.bc ===" 19 | time $VERIFY ./bitcode/harness_ntt_red1024d.all_linked.bc 20 | echo "=== Verifying harness_ntt_red1024e.all_linked.bc ===" 21 | time $VERIFY ./bitcode/harness_ntt_red1024e.all_linked.bc 22 | echo "=== Verifying harness_ntt_red1024f.all_linked.bc ===" 23 | time $VERIFY ./bitcode/harness_ntt_red1024f.all_linked.bc 24 | 25 | echo "=== Verifying harness_intt_red1024.all_linked.bc with inlining ===" 26 | time $VERIFY --inline ./bitcode/harness_intt_red1024.all_linked.bc 27 | echo "=== Verifying harness_intt_red1024b.all_linked.bc with inlining ===" 28 | time $VERIFY --inline ./bitcode/harness_intt_red1024b.all_linked.bc 29 | echo "=== Verifying harness_ntt_red1024.all_linked.bc with inlining ===" 30 | time $VERIFY --inline ./bitcode/harness_ntt_red1024.all_linked.bc 31 | echo "=== Verifying harness_ntt_red1024b.all_linked.bc with inlining ===" 32 | time $VERIFY --inline ./bitcode/harness_ntt_red1024b.all_linked.bc 33 | echo "=== Verifying harness_ntt_red1024c.all_linked.bc with inlining ===" 34 | time $VERIFY --inline ./bitcode/harness_ntt_red1024c.all_linked.bc 35 | echo "=== Verifying harness_ntt_red1024d.all_linked.bc with inlining ===" 36 | time $VERIFY --inline ./bitcode/harness_ntt_red1024d.all_linked.bc 37 | echo "=== Verifying harness_ntt_red1024e.all_linked.bc with inlining ===" 38 | time $VERIFY --inline ./bitcode/harness_ntt_red1024e.all_linked.bc 39 | echo "=== Verifying harness_ntt_red1024f.all_linked.bc with inlining ===" 40 | time $VERIFY --inline ./bitcode/harness_ntt_red1024f.all_linked.bc 41 | -------------------------------------------------------------------------------- /src/bitrev256_table.c: -------------------------------------------------------------------------------- 1 | #include "bitrev256_table.h" 2 | 3 | const uint16_t bitrev256[BITREV256_NPAIRS][2] = { 4 | { 1, 128 }, { 2, 64 }, { 3, 192 }, { 4, 32 }, 5 | { 5, 160 }, { 6, 96 }, { 7, 224 }, { 8, 16 }, 6 | { 9, 144 }, { 10, 80 }, { 11, 208 }, { 12, 48 }, 7 | { 13, 176 }, { 14, 112 }, { 15, 240 }, { 17, 136 }, 8 | { 18, 72 }, { 19, 200 }, { 20, 40 }, { 21, 168 }, 9 | { 22, 104 }, { 23, 232 }, { 25, 152 }, { 26, 88 }, 10 | { 27, 216 }, { 28, 56 }, { 29, 184 }, { 30, 120 }, 11 | { 31, 248 }, { 33, 132 }, { 34, 68 }, { 35, 196 }, 12 | { 37, 164 }, { 38, 100 }, { 39, 228 }, { 41, 148 }, 13 | { 42, 84 }, { 43, 212 }, { 44, 52 }, { 45, 180 }, 14 | { 46, 116 }, { 47, 244 }, { 49, 140 }, { 50, 76 }, 15 | { 51, 204 }, { 53, 172 }, { 54, 108 }, { 55, 236 }, 16 | { 57, 156 }, { 58, 92 }, { 59, 220 }, { 61, 188 }, 17 | { 62, 124 }, { 63, 252 }, { 65, 130 }, { 67, 194 }, 18 | { 69, 162 }, { 70, 98 }, { 71, 226 }, { 73, 146 }, 19 | { 74, 82 }, { 75, 210 }, { 77, 178 }, { 78, 114 }, 20 | { 79, 242 }, { 81, 138 }, { 83, 202 }, { 85, 170 }, 21 | { 86, 106 }, { 87, 234 }, { 89, 154 }, { 91, 218 }, 22 | { 93, 186 }, { 94, 122 }, { 95, 250 }, { 97, 134 }, 23 | { 99, 198 }, { 101, 166 }, { 103, 230 }, { 105, 150 }, 24 | { 107, 214 }, { 109, 182 }, { 110, 118 }, { 111, 246 }, 25 | { 113, 142 }, { 115, 206 }, { 117, 174 }, { 119, 238 }, 26 | { 121, 158 }, { 123, 222 }, { 125, 190 }, { 127, 254 }, 27 | { 131, 193 }, { 133, 161 }, { 135, 225 }, { 137, 145 }, 28 | { 139, 209 }, { 141, 177 }, { 143, 241 }, { 147, 201 }, 29 | { 149, 169 }, { 151, 233 }, { 155, 217 }, { 157, 185 }, 30 | { 159, 249 }, { 163, 197 }, { 167, 229 }, { 171, 213 }, 31 | { 173, 181 }, { 175, 245 }, { 179, 205 }, { 183, 237 }, 32 | { 187, 221 }, { 191, 253 }, { 199, 227 }, { 203, 211 }, 33 | { 207, 243 }, { 215, 235 }, { 223, 251 }, { 239, 247 }, 34 | }; 35 | 36 | -------------------------------------------------------------------------------- /src/intervals.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Interval abstract domain 3 | * - we represent intervals as pairs of 64bit signed numbers, 4 | * which should be safe for our NTT 5 | */ 6 | 7 | #ifndef INTERVALS_H 8 | #define INTERVALS_H 9 | 10 | #include 11 | 12 | typedef struct interval_s { 13 | int64_t min; 14 | int64_t max; 15 | } interval_t; 16 | 17 | 18 | /* 19 | * Constructors: all allocate and return a pointer to an interval 20 | * structure. 21 | */ 22 | extern interval_t *point(int64_t x); 23 | extern interval_t *interval(int64_t min, int64_t max); 24 | 25 | /* 26 | * Destructor: just calls free 27 | */ 28 | extern void delete_interval(interval_t *a); 29 | 30 | /* 31 | * Basic operations 32 | */ 33 | extern interval_t *add(const interval_t *a, const interval_t *b); 34 | extern interval_t *sub(const interval_t *a, const interval_t *b); 35 | extern interval_t *neg(const interval_t *a); 36 | 37 | /* 38 | * Reductions 39 | * - red(a) = [l, h] such that l <= red(x) <= h for any x in a 40 | * - red_mul(a, b) = [l, h] such that l <= red(x * y) <= h for any x in a and y in b. 41 | * - red_scale(k, a) = [l, h] such that l <= red(x * k) <= h for any x in a 42 | * - red_twice(a) = [l, h] such that l <= red(red(x)) <= h for x in a. 43 | */ 44 | extern interval_t *red(const interval_t *a); 45 | extern interval_t *red_mul(const interval_t *a, const interval_t *b); 46 | extern interval_t *red_scale(int64_t k, const interval_t *a); 47 | extern interval_t *red_twice(const interval_t *a); 48 | 49 | /* 50 | * Reduction modulo q: [l, h] such that l <= x % q <= h whenever x is in a. 51 | * The modulo operation returns an integer between 0 and q-1 here. 52 | * - q is 12289. 53 | */ 54 | extern interval_t *normal(const interval_t *a); 55 | 56 | /* 57 | * Multiply by inverse(3) then reduce modulo q 58 | */ 59 | extern interval_t *normal_inv3(const interval_t *a); 60 | 61 | /* 62 | * Shift representation: a must be a sub-interval of [0 .. q-1] 63 | * - returns [l, h] such that l <= shift(x) <= h where 64 | * shift(x) = x if 0 <= x <= (q-1)/2 65 | * shift(x) = x - q if (q-1/2) < x <= q-1 66 | */ 67 | extern interval_t *shift(const interval_t *a); 68 | 69 | 70 | /* 71 | * Correct: assume x is in the interval [-q, 2*q-1] then 72 | * correct(x) is if (x<0) then x+q elsif (x >= q) then x-q else x. 73 | * So correct(x) is in thee interval [0 .. q-1] 74 | * 75 | * Interval a must be a subinterval of [-q, 2q-1] 76 | * Correct(a) returns [l, h] such that l <= correct(x) <= h for x in a. 77 | */ 78 | extern interval_t *correct(const interval_t *a); 79 | 80 | 81 | #endif /* INTERVALS_H */ 82 | -------------------------------------------------------------------------------- /src/ntt256.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=256 3 | */ 4 | 5 | #ifndef __NTT256_H 6 | #define __NTT256_H 7 | 8 | #include "ntt256_tables.h" 9 | #include "ntt.h" 10 | 11 | /* 12 | * NTT VARIANTS 13 | * 14 | * - the input a is an array of n integers that must be between 0 and Q-1 15 | * - the result is stored in place 16 | * - the inverse transforms return a result scaled by n: 17 | * we have intt(ntt(a)) = n * a 18 | */ 19 | // forward 20 | static inline void ntt256_ct_rev2std(int32_t *a) { 21 | ntt_ct_rev2std(a, 256, ntt256_omega_powers); 22 | } 23 | 24 | static inline void ntt256_gs_rev2std(int32_t *a) { 25 | ntt_gs_rev2std(a, 256, ntt256_omega_powers_rev); 26 | } 27 | 28 | static inline void ntt256_ct_std2rev(int32_t *a) { 29 | ntt_ct_std2rev(a, 256, ntt256_omega_powers_rev); 30 | } 31 | 32 | static inline void ntt256_gs_std2rev(int32_t *a) { 33 | ntt_gs_std2rev(a, 256, ntt256_omega_powers); 34 | } 35 | 36 | // inverse 37 | static inline void intt256_ct_rev2std(int32_t *a) { 38 | ntt_ct_rev2std(a, 256, ntt256_inv_omega_powers); 39 | } 40 | 41 | static inline void intt256_gs_rev2std(int32_t *a) { 42 | ntt_gs_rev2std(a, 256, ntt256_inv_omega_powers_rev); 43 | } 44 | 45 | static inline void intt256_ct_std2rev(int32_t *a) { 46 | ntt_ct_std2rev(a, 256, ntt256_inv_omega_powers_rev); 47 | } 48 | 49 | static inline void intt256_gs_std2rev(int32_t *a) { 50 | ntt_gs_std2rev(a, 256, ntt256_inv_omega_powers); 51 | } 52 | 53 | // multiplication by powers of psi then forward ntt 54 | static inline void mulntt256_ct_rev2std(int32_t *a) { 55 | mulntt_ct_rev2std(a, 256, ntt256_mixed_powers); 56 | } 57 | 58 | static inline void mulntt256_ct_std2rev(int32_t *a) { 59 | mulntt_ct_std2rev(a, 256, ntt256_mixed_powers_rev); 60 | } 61 | 62 | // inverse ntt then multiplication by powers of psi^-1 63 | static inline void inttmul256_gs_rev2std(int32_t *a) { 64 | nttmul_gs_rev2std(a, 256, ntt256_inv_mixed_powers_rev); 65 | } 66 | 67 | static inline void inttmul256_gs_std2rev(int32_t *a) { 68 | nttmul_gs_std2rev(a, 256, ntt256_inv_mixed_powers); 69 | } 70 | 71 | 72 | /* 73 | * PRODUCTS 74 | */ 75 | 76 | /* 77 | * Input: two arrays a and b in standard order 78 | * Result: 79 | * - the product is stored in array c, in standard order. 80 | * - arrays a and b are modified 81 | * 82 | * The input arrays must contain elements in the range [0 .. Q-1] 83 | * The result is also in that range. 84 | */ 85 | extern void ntt256_product1(int32_t *c, int32_t *a, int32_t *b); 86 | extern void ntt256_product2(int32_t *c, int32_t *a, int32_t *b); 87 | extern void ntt256_product3(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt256_product4(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt256_product5(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt256_product6(int32_t *c, int32_t *a, int32_t *b); 91 | 92 | #endif /* __NTT256_H */ 93 | -------------------------------------------------------------------------------- /src/ntt512.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=512 3 | */ 4 | 5 | #ifndef __NTT512_H 6 | #define __NTT512_H 7 | 8 | #include "ntt512_tables.h" 9 | #include "ntt.h" 10 | 11 | /* 12 | * NTT VARIANTS 13 | * 14 | * - the input a is an array of n integers that must be between 0 and Q-1 15 | * - the result is stored in place 16 | * - the inverse transforms return a result scaled by n: 17 | * we have intt(ntt(a)) = n * a 18 | */ 19 | // forward 20 | static inline void ntt512_ct_rev2std(int32_t *a) { 21 | ntt_ct_rev2std(a, 512, ntt512_omega_powers); 22 | } 23 | 24 | static inline void ntt512_gs_rev2std(int32_t *a) { 25 | ntt_gs_rev2std(a, 512, ntt512_omega_powers_rev); 26 | } 27 | 28 | static inline void ntt512_ct_std2rev(int32_t *a) { 29 | ntt_ct_std2rev(a, 512, ntt512_omega_powers_rev); 30 | } 31 | 32 | static inline void ntt512_gs_std2rev(int32_t *a) { 33 | ntt_gs_std2rev(a, 512, ntt512_omega_powers); 34 | } 35 | 36 | // inverse 37 | static inline void intt512_ct_rev2std(int32_t *a) { 38 | ntt_ct_rev2std(a, 512, ntt512_inv_omega_powers); 39 | } 40 | 41 | static inline void intt512_gs_rev2std(int32_t *a) { 42 | ntt_gs_rev2std(a, 512, ntt512_inv_omega_powers_rev); 43 | } 44 | 45 | static inline void intt512_ct_std2rev(int32_t *a) { 46 | ntt_ct_std2rev(a, 512, ntt512_inv_omega_powers_rev); 47 | } 48 | 49 | static inline void intt512_gs_std2rev(int32_t *a) { 50 | ntt_gs_std2rev(a, 512, ntt512_inv_omega_powers); 51 | } 52 | 53 | // multiplication by powers of psi then forward ntt 54 | static inline void mulntt512_ct_rev2std(int32_t *a) { 55 | mulntt_ct_rev2std(a, 512, ntt512_mixed_powers); 56 | } 57 | 58 | static inline void mulntt512_ct_std2rev(int32_t *a) { 59 | mulntt_ct_std2rev(a, 512, ntt512_mixed_powers_rev); 60 | } 61 | 62 | // inverse ntt then multiplication by powers of psi^-1 63 | static inline void inttmul512_gs_rev2std(int32_t *a) { 64 | nttmul_gs_rev2std(a, 512, ntt512_inv_mixed_powers_rev); 65 | } 66 | 67 | static inline void inttmul512_gs_std2rev(int32_t *a) { 68 | nttmul_gs_std2rev(a, 512, ntt512_inv_mixed_powers); 69 | } 70 | 71 | 72 | /* 73 | * PRODUCTS 74 | */ 75 | 76 | /* 77 | * Input: two arrays a and b in standard order 78 | * Result: 79 | * - the product is stored in array c, in standard order. 80 | * - arrays a and b are modified 81 | * 82 | * The input arrays must contain elements in the range [0 .. Q-1] 83 | * The result is also in that range. 84 | */ 85 | extern void ntt512_product1(int32_t *c, int32_t *a, int32_t *b); 86 | extern void ntt512_product2(int32_t *c, int32_t *a, int32_t *b); 87 | extern void ntt512_product3(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt512_product4(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt512_product5(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt512_product6(int32_t *c, int32_t *a, int32_t *b); 91 | 92 | #endif /* __NTT512_H */ 93 | -------------------------------------------------------------------------------- /src/ntt1024.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=1024 3 | */ 4 | 5 | #ifndef __NTT1024_H 6 | #define __NTT1024_H 7 | 8 | #include "ntt1024_tables.h" 9 | #include "ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void ntt1024_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std(a, 1024, ntt1024_omega_powers); 23 | } 24 | 25 | static inline void ntt1024_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std(a, 1024, ntt1024_omega_powers_rev); 27 | } 28 | 29 | static inline void ntt1024_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev(a, 1024, ntt1024_omega_powers_rev); 31 | } 32 | 33 | static inline void ntt1024_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev(a, 1024, ntt1024_omega_powers); 35 | } 36 | 37 | // inverse 38 | static inline void intt1024_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std(a, 1024, ntt1024_inv_omega_powers); 40 | } 41 | 42 | static inline void intt1024_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std(a, 1024, ntt1024_inv_omega_powers_rev); 44 | } 45 | 46 | static inline void intt1024_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev(a, 1024, ntt1024_inv_omega_powers_rev); 48 | } 49 | 50 | static inline void intt1024_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev(a, 1024, ntt1024_inv_omega_powers); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void mulntt1024_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std(a, 1024, ntt1024_mixed_powers); 57 | } 58 | 59 | static inline void mulntt1024_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev(a, 1024, ntt1024_mixed_powers_rev); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void inttmul1024_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std(a, 1024, ntt1024_inv_mixed_powers_rev); 66 | } 67 | 68 | static inline void inttmul1024_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev(a, 1024, ntt1024_inv_mixed_powers); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * Result: 80 | * - the product is stored in array c, in standard order. 81 | * - arrays a and b are modified 82 | * 83 | * The input arrays must contain elements in the range [0 .. Q-1] 84 | * The result is also in that range. 85 | */ 86 | extern void ntt1024_product1(int32_t *c, int32_t *a, int32_t *b); 87 | extern void ntt1024_product2(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt1024_product3(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt1024_product4(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt1024_product5(int32_t *c, int32_t *a, int32_t *b); 91 | extern void ntt1024_product6(int32_t *c, int32_t *a, int32_t *b); 92 | 93 | #endif /* __NTT1024_H */ 94 | -------------------------------------------------------------------------------- /src/ntt_red16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=16, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #ifndef __NTT_RED16_H 6 | #define __NTT_RED16_H 7 | 8 | #include "ntt_red16_tables.h" 9 | #include "ntt_red.h" 10 | 11 | /* 12 | * NTT Variants: as in ntt_red.h 13 | * using tables from ntt16_red_tables.h 14 | * 15 | * Input: a[i] for i=0 .. 15 is expected to satisfy 16 | * -21499 <= a[i] <= 21499 17 | * 18 | * The result is stored in a, it is not reduced modulo Q. 19 | */ 20 | // forward NTTs 21 | static inline void ntt_red16_ct_rev2std(int32_t *a) { 22 | ntt_red_ct_rev2std(a, 16, ntt_red16_omega_powers); 23 | } 24 | 25 | static inline void ntt_red16_gs_rev2std(int32_t *a) { 26 | ntt_red_gs_rev2std(a, 16, ntt_red16_omega_powers_rev); 27 | } 28 | 29 | static inline void ntt_red16_ct_std2rev(int32_t *a) { 30 | ntt_red_ct_std2rev(a, 16, ntt_red16_omega_powers_rev); 31 | } 32 | 33 | static inline void ntt_red16_gs_std2rev(int32_t *a) { 34 | ntt_red_gs_std2rev(a, 16, ntt_red16_omega_powers); 35 | } 36 | 37 | // inverse 38 | static inline void intt_red16_ct_rev2std(int32_t *a) { 39 | ntt_red_ct_rev2std(a, 16, ntt_red16_inv_omega_powers); 40 | } 41 | 42 | static inline void intt_red16_gs_rev2std(int32_t *a) { 43 | ntt_red_gs_rev2std(a, 16, ntt_red16_inv_omega_powers_rev); 44 | } 45 | 46 | static inline void intt_red16_ct_std2rev(int32_t *a) { 47 | ntt_red_ct_std2rev(a, 16, ntt_red16_inv_omega_powers_rev); 48 | } 49 | 50 | static inline void intt_red16_gs_std2rev(int32_t *a) { 51 | ntt_red_gs_std2rev(a, 16, ntt_red16_inv_omega_powers); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void mulntt_red16_ct_rev2std(int32_t *a) { 56 | mulntt_red_ct_rev2std(a, 16, ntt_red16_mixed_powers); 57 | } 58 | 59 | static inline void mulntt_red16_ct_std2rev(int32_t *a) { 60 | mulntt_red_ct_std2rev(a, 16, ntt_red16_mixed_powers_rev); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void inttmul_red16_gs_rev2std(int32_t *a) { 65 | nttmul_red_gs_rev2std(a, 16, ntt_red16_inv_mixed_powers_rev); 66 | } 67 | 68 | static inline void inttmul_red16_gs_std2rev(int32_t *a) { 69 | nttmul_red_gs_std2rev(a, 16, ntt_red16_inv_mixed_powers); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0, Q-1] 85 | * The result is also in that range. 86 | */ 87 | extern void ntt_red16_product1(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt_red16_product2(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt_red16_product3(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt_red16_product4(int32_t *c, int32_t *a, int32_t *b); 91 | extern void ntt_red16_product5(int32_t *c, int32_t *a, int32_t *b); 92 | 93 | #endif /* __NTT_RED16_H */ 94 | -------------------------------------------------------------------------------- /src/ntt_red256.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=256, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #ifndef __NTT_RED256_H 6 | #define __NTT_RED256_H 7 | 8 | #include "ntt_red256_tables.h" 9 | #include "ntt_red.h" 10 | 11 | /* 12 | * NTT Variants: as in ntt_red.h 13 | * using tables from ntt256_red_tables.h 14 | * 15 | * Input: a[i] for i=0 .. 15 is expected to satisfy 16 | * -21499 <= a[i] <= 21499 17 | * 18 | * The result is stored in a, it is not reduced modulo Q. 19 | */ 20 | // forward NTTs 21 | static inline void ntt_red256_ct_rev2std(int32_t *a) { 22 | ntt_red_ct_rev2std(a, 256, ntt_red256_omega_powers); 23 | } 24 | 25 | static inline void ntt_red256_gs_rev2std(int32_t *a) { 26 | ntt_red_gs_rev2std(a, 256, ntt_red256_omega_powers_rev); 27 | } 28 | 29 | static inline void ntt_red256_ct_std2rev(int32_t *a) { 30 | ntt_red_ct_std2rev(a, 256, ntt_red256_omega_powers_rev); 31 | } 32 | 33 | static inline void ntt_red256_gs_std2rev(int32_t *a) { 34 | ntt_red_gs_std2rev(a, 256, ntt_red256_omega_powers); 35 | } 36 | 37 | // inverse 38 | static inline void intt_red256_ct_rev2std(int32_t *a) { 39 | ntt_red_ct_rev2std(a, 256, ntt_red256_inv_omega_powers); 40 | } 41 | 42 | static inline void intt_red256_gs_rev2std(int32_t *a) { 43 | ntt_red_gs_rev2std(a, 256, ntt_red256_inv_omega_powers_rev); 44 | } 45 | 46 | static inline void intt_red256_ct_std2rev(int32_t *a) { 47 | ntt_red_ct_std2rev(a, 256, ntt_red256_inv_omega_powers_rev); 48 | } 49 | 50 | static inline void intt_red256_gs_std2rev(int32_t *a) { 51 | ntt_red_gs_std2rev(a, 256, ntt_red256_inv_omega_powers); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void mulntt_red256_ct_rev2std(int32_t *a) { 56 | mulntt_red_ct_rev2std(a, 256, ntt_red256_mixed_powers); 57 | } 58 | 59 | static inline void mulntt_red256_ct_std2rev(int32_t *a) { 60 | mulntt_red_ct_std2rev(a, 256, ntt_red256_mixed_powers_rev); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void inttmul_red256_gs_rev2std(int32_t *a) { 65 | nttmul_red_gs_rev2std(a, 256, ntt_red256_inv_mixed_powers_rev); 66 | } 67 | 68 | static inline void inttmul_red256_gs_std2rev(int32_t *a) { 69 | nttmul_red_gs_std2rev(a, 256, ntt_red256_inv_mixed_powers); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0, Q-1] 85 | * The result is also in that range. 86 | */ 87 | extern void ntt_red256_product1(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt_red256_product2(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt_red256_product3(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt_red256_product4(int32_t *c, int32_t *a, int32_t *b); 91 | extern void ntt_red256_product5(int32_t *c, int32_t *a, int32_t *b); 92 | 93 | #endif /* __NTT_RED256_H */ 94 | -------------------------------------------------------------------------------- /src/ntt_red512.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=512, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #ifndef __NTT_RED512_H 6 | #define __NTT_RED512_H 7 | 8 | #include "ntt_red512_tables.h" 9 | #include "ntt_red.h" 10 | 11 | /* 12 | * NTT Variants: as in ntt_red.h 13 | * using tables from ntt512_red_tables.h 14 | * 15 | * Input: a[i] for i=0 .. 15 is expected to satisfy 16 | * -21499 <= a[i] <= 21499 17 | * 18 | * The result is stored in a, it is not reduced modulo Q. 19 | */ 20 | // forward NTTs 21 | static inline void ntt_red512_ct_rev2std(int32_t *a) { 22 | ntt_red_ct_rev2std(a, 512, ntt_red512_omega_powers); 23 | } 24 | 25 | static inline void ntt_red512_gs_rev2std(int32_t *a) { 26 | ntt_red_gs_rev2std(a, 512, ntt_red512_omega_powers_rev); 27 | } 28 | 29 | static inline void ntt_red512_ct_std2rev(int32_t *a) { 30 | ntt_red_ct_std2rev(a, 512, ntt_red512_omega_powers_rev); 31 | } 32 | 33 | static inline void ntt_red512_gs_std2rev(int32_t *a) { 34 | ntt_red_gs_std2rev(a, 512, ntt_red512_omega_powers); 35 | } 36 | 37 | // inverse 38 | static inline void intt_red512_ct_rev2std(int32_t *a) { 39 | ntt_red_ct_rev2std(a, 512, ntt_red512_inv_omega_powers); 40 | } 41 | 42 | static inline void intt_red512_gs_rev2std(int32_t *a) { 43 | ntt_red_gs_rev2std(a, 512, ntt_red512_inv_omega_powers_rev); 44 | } 45 | 46 | static inline void intt_red512_ct_std2rev(int32_t *a) { 47 | ntt_red_ct_std2rev(a, 512, ntt_red512_inv_omega_powers_rev); 48 | } 49 | 50 | static inline void intt_red512_gs_std2rev(int32_t *a) { 51 | ntt_red_gs_std2rev(a, 512, ntt_red512_inv_omega_powers); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void mulntt_red512_ct_rev2std(int32_t *a) { 56 | mulntt_red_ct_rev2std(a, 512, ntt_red512_mixed_powers); 57 | } 58 | 59 | static inline void mulntt_red512_ct_std2rev(int32_t *a) { 60 | mulntt_red_ct_std2rev(a, 512, ntt_red512_mixed_powers_rev); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void inttmul_red512_gs_rev2std(int32_t *a) { 65 | nttmul_red_gs_rev2std(a, 512, ntt_red512_inv_mixed_powers_rev); 66 | } 67 | 68 | static inline void inttmul_red512_gs_std2rev(int32_t *a) { 69 | nttmul_red_gs_std2rev(a, 512, ntt_red512_inv_mixed_powers); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0, Q-1] 85 | * The result is also in that range. 86 | */ 87 | extern void ntt_red512_product1(int32_t *c, int32_t *a, int32_t *b); 88 | extern void ntt_red512_product2(int32_t *c, int32_t *a, int32_t *b); 89 | extern void ntt_red512_product3(int32_t *c, int32_t *a, int32_t *b); 90 | extern void ntt_red512_product4(int32_t *c, int32_t *a, int32_t *b); 91 | extern void ntt_red512_product5(int32_t *c, int32_t *a, int32_t *b); 92 | 93 | #endif /* __NTT_RED512_H */ 94 | -------------------------------------------------------------------------------- /src/speed_mul1024.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ntt1024.h" 7 | #include "sort.h" 8 | 9 | /* 10 | * PERFORMANCE MEASUREMENTS 11 | */ 12 | 13 | /* 14 | * For speed measurements: counter of CPU cycles 15 | */ 16 | static inline uint64_t cpucycles(void) { 17 | uint64_t result; 18 | __asm__ volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" 19 | : "=a" (result) :: "%rdx"); 20 | return result; 21 | } 22 | 23 | #define NTESTS 102400 24 | 25 | static uint64_t t[NTESTS]; 26 | 27 | // Average run time 28 | static uint64_t average_time(void) { 29 | uint64_t s; 30 | uint32_t i; 31 | 32 | s = 0; 33 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ntt_red1024.h" 7 | #include "sort.h" 8 | 9 | /* 10 | * PERFORMANCE MEASUREMENTS 11 | */ 12 | 13 | /* 14 | * For speed measurements: counter of CPU cycles 15 | */ 16 | static inline uint64_t cpucycles(void) { 17 | uint64_t result; 18 | __asm__ volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" 19 | : "=a" (result) :: "%rdx"); 20 | return result; 21 | } 22 | 23 | #define NTESTS 102400 24 | 25 | static uint64_t t[NTESTS]; 26 | 27 | // Average run time 28 | static uint64_t average_time(void) { 29 | uint64_t s; 30 | uint32_t i; 31 | 32 | s = 0; 33 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | #include "naive_ntt1024.h" 7 | #include "sort.h" 8 | 9 | /* 10 | * PERFORMANCE MEASUREMENTS 11 | */ 12 | 13 | /* 14 | * For speed measurements: counter of CPU cycles 15 | */ 16 | static inline uint64_t cpucycles(void) { 17 | uint64_t result; 18 | __asm__ volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" 19 | : "=a" (result) :: "%rdx"); 20 | return result; 21 | } 22 | 23 | #define NTESTS 102400 24 | 25 | static uint64_t t[NTESTS]; 26 | 27 | // Average run time 28 | static uint64_t average_time(void) { 29 | uint64_t s; 30 | uint32_t i; 31 | 32 | s = 0; 33 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | #include "ntt_red_asm1024.h" 7 | #include "sort.h" 8 | 9 | /* 10 | * PERFORMANCE MEASUREMENTS 11 | */ 12 | 13 | /* 14 | * For speed measurements: counter of CPU cycles 15 | */ 16 | static inline uint64_t cpucycles(void) { 17 | uint64_t result; 18 | __asm__ volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" 19 | : "=a" (result) :: "%rdx"); 20 | return result; 21 | } 22 | 23 | #define NTESTS 102400 24 | 25 | static uint64_t t[NTESTS]; 26 | 27 | // Average run time 28 | static uint64_t average_time(void) { 29 | uint64_t s; 30 | uint32_t i; 31 | 32 | s = 0; 33 | for (i=0; i 9 | 10 | /* 11 | * Maximum of red(x) for a <= x <= b 12 | * - red(x) is returned, x is stored in *m 13 | */ 14 | extern int64_t max_red(int64_t a, int64_t b, int64_t *m); 15 | 16 | /* 17 | * Minimum of red(x) for a <= x <= b 18 | */ 19 | extern int64_t min_red(int64_t a, int64_t b, int64_t *m); 20 | 21 | /* 22 | * Maximum of red(w x) for a <= x <= b 23 | */ 24 | extern int64_t max_red_mul(int64_t a, int64_t b, int64_t w, int64_t *m); 25 | 26 | /* 27 | * Minimum of red(w*x) for a <= x <= b 28 | */ 29 | extern int64_t min_red_mul(int64_t a, int64_t b, int64_t w, int64_t *m); 30 | 31 | /* 32 | * Maximum of red(w * x) for a <= x <= b and low <= w <= high. 33 | * - the max is returned. The corresponding x and w are stored in *m and 34 | * *mw, respectively. 35 | */ 36 | extern int64_t max_red_mul_interval(int64_t a, int64_t b, int64_t low, int64_t high, int64_t *m, int64_t *mw); 37 | 38 | /* 39 | * Minimum of red(x * w) for a <= x <= b and low <= w <= high 40 | */ 41 | extern int64_t min_red_mul_interval(int64_t a, int64_t b, int64_t low, int64_t high, int64_t *m, int64_t *wm); 42 | 43 | /* 44 | * Bounds after a CT step 45 | * - assuming |x| <= b and |y| <= b, this function returns b' 46 | * such that |x'| <= b' and |y'| <= b' after executing 47 | * x' = x + red(w * y) 48 | * y' = x - red(w * y) 49 | * for the worst-case w in interval [low, high]. 50 | */ 51 | extern int64_t ct_bound(int64_t b, int64_t low, int64_t high); 52 | 53 | /* 54 | * Bounds after a GS step: same as CT but the updates are 55 | * x' = x + y 56 | * y' = (x - y) * w. 57 | */ 58 | extern int64_t gs_bound(int64_t b, int64_t low, int64_t high); 59 | 60 | 61 | /* 62 | * Bounds after a CT step with a fixed w 63 | * - assuming |x| <= b and |y| <= b, returns b' such that 64 | * |x + red(w, y)| <= b' and |x - red(w, y)| <= b' 65 | */ 66 | extern int64_t ct_bound_fixed(int64_t b, int64_t w); 67 | 68 | /* 69 | * Bounds after a GS step with a fixed w 70 | * - assuming |x| <= b and |y| <= b, returns b' such that 71 | * |x + y| <= b' and |(x - y) * w| <= b' 72 | */ 73 | extern int64_t gs_bound_fixed(int64_t b, int64_t w); 74 | 75 | 76 | /* 77 | * Bounds after ntt computations based on Cooley Tukey 78 | * - b0 = bound on the input 79 | * - p = array of coefficients used in the algorithm 80 | * p[t + i] = omega^(n/2t)^i (or a variant of this). 81 | * We assume the input coefficients a[i] satisfy |a[i]| <= b0. 82 | * 83 | * The final bound is returned. 84 | * Bounds for each round are stored in array bounds (must be of size log_2(n)) 85 | */ 86 | extern int64_t ntt_ct_bounds(int64_t b0, uint32_t n, const int16_t *p, int64_t *bound); 87 | 88 | /* 89 | * Bounds after ntt computations based on Gentleman Sande 90 | * - b0 = bound on the input 91 | * - p = array of coefficients used in the algorithm 92 | * p[t + i] = omega^(n/2t)^i (or a variant of this). 93 | * We assume the input coefficients a[i] satisfy |a[i]| <= b0. 94 | * 95 | * The final bound is returned. 96 | * Bounds for each round are stored in array bound (must be of size log_2(n)) 97 | */ 98 | extern int64_t ntt_gs_bounds(int64_t b0, uint32_t n, const int16_t *p, int64_t *bound); 99 | 100 | #endif /* __RED_BOUNDS_H */ 101 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/red_bounds.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Bounds on the reduction function 3 | */ 4 | 5 | #ifndef __RED_BOUNDS_H 6 | #define __RED_BOUNDS_H 7 | 8 | #include 9 | 10 | /* 11 | * Maximum of red(x) for a <= x <= b 12 | * - red(x) is returned, x is stored in *m 13 | */ 14 | extern int64_t max_red(int64_t a, int64_t b, int64_t *m); 15 | 16 | /* 17 | * Minimum of red(x) for a <= x <= b 18 | */ 19 | extern int64_t min_red(int64_t a, int64_t b, int64_t *m); 20 | 21 | /* 22 | * Maximum of red(w x) for a <= x <= b 23 | */ 24 | extern int64_t max_red_mul(int64_t a, int64_t b, int64_t w, int64_t *m); 25 | 26 | /* 27 | * Minimum of red(w*x) for a <= x <= b 28 | */ 29 | extern int64_t min_red_mul(int64_t a, int64_t b, int64_t w, int64_t *m); 30 | 31 | /* 32 | * Maximum of red(w * x) for a <= x <= b and low <= w <= high. 33 | * - the max is returned. The corresponding x and w are stored in *m and 34 | * *mw, respectively. 35 | */ 36 | extern int64_t max_red_mul_interval(int64_t a, int64_t b, int64_t low, int64_t high, 37 | int64_t *m, int64_t *mw); 38 | 39 | /* 40 | * Minimum of red(x * w) for a <= x <= b and low <= w <= high 41 | */ 42 | extern int64_t min_red_mul_interval(int64_t a, int64_t b, int64_t low, int64_t high, 43 | int64_t *m, int64_t *wm); 44 | 45 | 46 | /* 47 | * Bounds after a CT step 48 | * - assuming |x| <= b and |y| <= b, this function returns b' 49 | * such that |x'| <= b' and |y'| <= b' after executing 50 | * x' = x + red(w * y) 51 | * y' = x - red(w * y) 52 | * for the worst-case w in interval [low, high]. 53 | */ 54 | extern int64_t ct_bound(int64_t b, int64_t low, int64_t high); 55 | 56 | /* 57 | * Bounds after a GS step: same as CT but the updates are 58 | * x' = x + y 59 | * y' = (x - y) * w. 60 | */ 61 | extern int64_t gs_bound(int64_t b, int64_t low, int64_t high); 62 | 63 | 64 | /* 65 | * Bounds after a CT step with a fixed w 66 | * - assuming |x| <= b and |y| <= b, returns b' such that 67 | * |x + red(w, y)| <= b' and |x - red(w, y)| <= b' 68 | */ 69 | extern int64_t ct_bound_fixed(int64_t b, int64_t w); 70 | 71 | /* 72 | * Bounds after a GS step with a fixed w 73 | * - assuming |x| <= b and |y| <= b, returns b' such that 74 | * |x + y| <= b' and |(x - y) * w| <= b' 75 | */ 76 | extern int64_t gs_bound_fixed(int64_t b, int64_t w); 77 | 78 | 79 | /* 80 | * Bounds after ntt computations based on Cooley Tukey 81 | * - b0 = bound on the input 82 | * - p = array of coefficients used in the algorithm 83 | * p[t + i] = omega^(n/2t)^i (or a variant of this). 84 | * We assume the input coefficients a[i] satisfy |a[i]| <= b0. 85 | * 86 | * The final bound is returned. 87 | * Bounds for each round are stored in array bounds (must be of size log_2(n)) 88 | */ 89 | extern int64_t ntt_ct_bounds(int64_t b0, uint32_t n, const int16_t *p, int64_t *bound); 90 | 91 | /* 92 | * Bounds after ntt computations based on Gentleman Sande 93 | * - b0 = bound on the input 94 | * - p = array of coefficients used in the algorithm 95 | * p[t + i] = omega^(n/2t)^i (or a variant of this). 96 | * We assume the input coefficients a[i] satisfy |a[i]| <= b0. 97 | * 98 | * The final bound is returned. 99 | * Bounds for each round are stored in array bound (must be of size log_2(n)) 100 | */ 101 | extern int64_t ntt_gs_bounds(int64_t b0, uint32_t n, const int16_t *p, int64_t *bound); 102 | 103 | #endif /* __RED_BOUNDS_H */ 104 | -------------------------------------------------------------------------------- /src/ntt16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289 and n=16 3 | */ 4 | 5 | #ifndef __NTT16_H 6 | #define __NTT16_H 7 | 8 | #include "ntt16_tables.h" 9 | #include "ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void ntt16_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std(a, 16, ntt16_omega_powers); 23 | } 24 | 25 | static inline void ntt16_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std(a, 16, ntt16_omega_powers_rev); 27 | } 28 | 29 | static inline void ntt16_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev(a, 16, ntt16_omega_powers_rev); 31 | } 32 | 33 | static inline void ntt16_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev(a, 16, ntt16_omega_powers); 35 | } 36 | 37 | // inverse 38 | static inline void intt16_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std(a, 16, ntt16_inv_omega_powers); 40 | } 41 | 42 | static inline void intt16_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std(a, 16, ntt16_inv_omega_powers_rev); 44 | } 45 | 46 | static inline void intt16_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev(a, 16, ntt16_inv_omega_powers_rev); 48 | } 49 | 50 | static inline void intt16_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev(a, 16, ntt16_inv_omega_powers); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void mulntt16_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std(a, 16, ntt16_mixed_powers); 57 | } 58 | 59 | static inline void mulntt16_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev(a, 16, ntt16_mixed_powers_rev); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void inttmul16_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std(a, 16, ntt16_inv_mixed_powers_rev); 66 | } 67 | 68 | static inline void inttmul16_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev(a, 16, ntt16_inv_mixed_powers); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0 .. Q-1] 85 | * The result is also in that range. 86 | * 87 | * The first four variants have the following form: 88 | * - multiply a and b by powers of psi 89 | * - compute NNT(a) and NTT(b) using a std2rev variant 90 | * - c = elementwise product of NTT(a) and NTT(b) 91 | * - compute INTT(c) usign a rev2std variant 92 | * - multiply the result by n^(-1) * powers of psi^(-1) 93 | * There are two choices for the NTT and INTT functions: 94 | * - NTT: either ntt_ct_std2rev or ntt_gs_std2rev 95 | * - INTT: either intt_ct_rev2std or intt_gs_reg2std 96 | * 97 | * Product5 uses the combined mul/ntt variants: 98 | * - compute MULNTT(a) and MULNTT(b) using mulntt_ct_std2rev 99 | * - c = elementwise product 100 | * - compute INTTMUL(c) using inttmul_gs_rev2std 101 | * - multiply the result by n^(-1) 102 | */ 103 | extern void ntt16_product1(int32_t *c, int32_t *a, int32_t *b); 104 | extern void ntt16_product2(int32_t *c, int32_t *a, int32_t *b); 105 | extern void ntt16_product3(int32_t *c, int32_t *a, int32_t *b); 106 | extern void ntt16_product4(int32_t *c, int32_t *a, int32_t *b); 107 | 108 | extern void ntt16_product5(int32_t *c, int32_t *a, int32_t *b); 109 | 110 | #endif /* __NTT16_H */ 111 | -------------------------------------------------------------------------------- /src/naive_ntt16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Naive NTT for Q=12289 and n=16 3 | */ 4 | 5 | #ifndef __NAIVE_NTT16_H 6 | #define __NAIVE_NTT16_H 7 | 8 | #include "ntt16_tables.h" 9 | #include "naive_ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void naive_ntt16_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std_naive(a, 16, ntt16_omega_powers, 12289); 23 | } 24 | 25 | static inline void naive_ntt16_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std_naive(a, 16, ntt16_omega_powers_rev, 12289); 27 | } 28 | 29 | static inline void naive_ntt16_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev_naive(a, 16, ntt16_omega_powers_rev, 12289); 31 | } 32 | 33 | static inline void naive_ntt16_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev_naive(a, 16, ntt16_omega_powers, 12289); 35 | } 36 | 37 | // inverse 38 | static inline void naive_intt16_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std_naive(a, 16, ntt16_inv_omega_powers, 12289); 40 | } 41 | 42 | static inline void naive_intt16_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std_naive(a, 16, ntt16_inv_omega_powers_rev, 12289); 44 | } 45 | 46 | static inline void naive_intt16_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev_naive(a, 16, ntt16_inv_omega_powers_rev, 12289); 48 | } 49 | 50 | static inline void naive_intt16_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev_naive(a, 16, ntt16_inv_omega_powers, 12289); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void naive_mulntt16_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std_naive(a, 16, ntt16_mixed_powers, 12289); 57 | } 58 | 59 | static inline void naive_mulntt16_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev_naive(a, 16, ntt16_mixed_powers_rev, 12289); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void naive_inttmul16_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std_naive(a, 16, ntt16_inv_mixed_powers_rev, 12289); 66 | } 67 | 68 | static inline void naive_inttmul16_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev_naive(a, 16, ntt16_inv_mixed_powers, 12289); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0 .. Q-1] 85 | * The result is also in that range. 86 | * 87 | * The first four variants have the following form: 88 | * - multiply a and b by powers of psi 89 | * - compute NNT(a) and NTT(b) using a std2rev variant 90 | * - c = elementwise product of NTT(a) and NTT(b) 91 | * - compute INTT(c) usign a rev2std variant 92 | * - multiply the result by n^(-1) * powers of psi^(-1) 93 | * There are two choices for the NTT and INTT functions: 94 | * - NTT: either ntt_ct_std2rev or ntt_gs_std2rev 95 | * - INTT: either intt_ct_rev2std or intt_gs_reg2std 96 | * 97 | * Product5 uses the combined mul/ntt variants: 98 | * - compute MULNTT(a) and MULNTT(b) using mulntt_ct_std2rev 99 | * - c = elementwise product 100 | * - compute INTTMUL(c) using inttmul_gs_rev2std 101 | * - multiply the result by n^(-1) 102 | */ 103 | extern void naive_ntt16_product1(int32_t *c, int32_t *a, int32_t *b); 104 | extern void naive_ntt16_product2(int32_t *c, int32_t *a, int32_t *b); 105 | extern void naive_ntt16_product3(int32_t *c, int32_t *a, int32_t *b); 106 | extern void naive_ntt16_product4(int32_t *c, int32_t *a, int32_t *b); 107 | 108 | extern void naive_ntt16_product5(int32_t *c, int32_t *a, int32_t *b); 109 | 110 | #endif /* __NAIVE_NTT16_H */ 111 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | # Sources 2 | 3 | 4 | ## NTT Implementations 5 | 6 | The various algorithms are parametric in Q and n. Q is the prime order of the finite field, and is 7 | always assumed to be 12289. n is the degree of the polynomials under consideration. 8 | n must be a power of two, no larger than 2048. In addition, the NTT is based on two parameters 9 | ``phi`` and ``psi`` such that ``phi^n = 1`` and ``psi^2 = phi``. 10 | 11 | We include four variant implementations of the basic algorithms: 12 | - naive implementation (unoptimized modular arithmetic) 13 | - default implementation (optimized modular arithmetic for Q=12289) 14 | - implementation in C based on the Longa-Naehrig reduction 15 | - implementation in x86-64 assembler that uses the AVX2 vector instruction (also using the Longa-Naehrig reduction) 16 | 17 | Each source file includes variant procedures for constructing forward and backward transforms, using either 18 | the Cooley-Tukey or the Gentleman-Sande approaches. Some variants implement pre or post-multiplication by 19 | powers of ``psi``. The source files also include utilities for shuffling array components, multiplying 20 | by scalars, amd more utilities that can be used to implement products of polynomials. 21 | 22 | The main source files include: 23 | - ``naive_ntt.c`` and ``naive_ntt.h``: naive implementation 24 | - ``ntt.c`` and ``ntt.h``: default implementation 25 | - ``ntt_red.c`` and ``ntt_red.h``: Longa-Naehrig reduction (C implementation) 26 | - ``ntt_asm.S`` and ``ntt_asm.h``: Longa-Naehrig reduction (assembler/AVX2 implementation) 27 | 28 | For testing and experimentation, we instantiate the generic procedures for n=16, 256, 512, and 1024, 29 | and for fixed values of the parameters ``phi`` and ``psi``. 30 | For example ``ntt1024.c`` uses the default NTT procedure (from ``ntt.h`` and ``ntt.c``). 31 | It is specialized for ``n=1024`` and it includes five procedures that compute products of polynonials. 32 | The five procedures are semantically equivalent but they use different forward/backward transforms. 33 | 34 | ## Tables 35 | 36 | All the NTT procedures we implement take a table of constants as argument. 37 | This table is derived from the parameters ``phi``, ``psi``, and ``n``. We include 38 | two utilies that generate the relevant tables based on these parameters. 39 | 40 | * `make_tables` generates tables suitable for ``naive_ntt`` and ``ntt``. The resulting 41 | tables are in ``ntt_[16, 256, 512, 1024]_tables.h``. 42 | 43 | * `make_red_tables` generates suitable tables for ``ntt_red`` and ``ntt_asm``. 44 | The resulting tables are in ``ntt_red[16, 256, 512, 1024]_tables.h``. 45 | 46 | For shuffling array elements in the bit-reverse order, we also use a table that defines 47 | an index permutation and we include a utility to generate this table: 48 | 49 | * `bitrev[16, 256, 512, 1024]_table.h` are generated by `make_bitrev_table` 50 | 51 | 52 | ## Tests 53 | 54 | Basic tests include 55 | 56 | ``` 57 | test_ntt 58 | test_ntt_red 59 | test_ntt_avx 60 | ``` 61 | These run a first round of tests to validate the implementations and a second 62 | round of tests to measure speed. 63 | 64 | The following variants do more extensive testing and are specialized for a fixed ``n``: 65 | 66 | ``` 67 | test_naive_ntt[16, 256, 512, 1024] 68 | test_ntt[16, 256, 512, 1024] 69 | test_ntt_red[16, 256, 512, 1024] 70 | test_ntt_red_asm[16, 256, 512, 1024] 71 | ``` 72 | 73 | We also include Known Answer Tests (kat) for n=1024: 74 | ``` 75 | data_poly1024.[ch] 76 | kat_mul1024[, _red, red_asm].c 77 | speed_mul1024[, _naive, _red, _red_asm].c 78 | ``` 79 | 80 | The tests in the paper can be found in this [subdirectory](https://github.com/SRI-CSL/NTT/tree/master/src/tests_in_paper). To make them one can simply do 81 | ``` 82 | make paper_tests 83 | ``` 84 | in *this* directory (not the subdirectory). 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /src/naive_ntt256.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Naive NTT for Q=12289 and n=256 3 | */ 4 | 5 | #ifndef __NAIVE_NTT256_H 6 | #define __NAIVE_NTT256_H 7 | 8 | #include "ntt256_tables.h" 9 | #include "naive_ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void naive_ntt256_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std_naive(a, 256, ntt256_omega_powers, 12289); 23 | } 24 | 25 | static inline void naive_ntt256_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std_naive(a, 256, ntt256_omega_powers_rev, 12289); 27 | } 28 | 29 | static inline void naive_ntt256_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev_naive(a, 256, ntt256_omega_powers_rev, 12289); 31 | } 32 | 33 | static inline void naive_ntt256_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev_naive(a, 256, ntt256_omega_powers, 12289); 35 | } 36 | 37 | // inverse 38 | static inline void naive_intt256_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std_naive(a, 256, ntt256_inv_omega_powers, 12289); 40 | } 41 | 42 | static inline void naive_intt256_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std_naive(a, 256, ntt256_inv_omega_powers_rev, 12289); 44 | } 45 | 46 | static inline void naive_intt256_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev_naive(a, 256, ntt256_inv_omega_powers_rev, 12289); 48 | } 49 | 50 | static inline void naive_intt256_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev_naive(a, 256, ntt256_inv_omega_powers, 12289); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void naive_mulntt256_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std_naive(a, 256, ntt256_mixed_powers, 12289); 57 | } 58 | 59 | static inline void naive_mulntt256_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev_naive(a, 256, ntt256_mixed_powers_rev, 12289); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void naive_inttmul256_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std_naive(a, 256, ntt256_inv_mixed_powers_rev, 12289); 66 | } 67 | 68 | static inline void naive_inttmul256_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev_naive(a, 256, ntt256_inv_mixed_powers, 12289); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0 .. Q-1] 85 | * The result is also in that range. 86 | * 87 | * The first four variants have the following form: 88 | * - multiply a and b by powers of psi 89 | * - compute NNT(a) and NTT(b) using a std2rev variant 90 | * - c = elementwise product of NTT(a) and NTT(b) 91 | * - compute INTT(c) usign a rev2std variant 92 | * - multiply the result by n^(-1) * powers of psi^(-1) 93 | * There are two choices for the NTT and INTT functions: 94 | * - NTT: either ntt_ct_std2rev or ntt_gs_std2rev 95 | * - INTT: either intt_ct_rev2std or intt_gs_reg2std 96 | * 97 | * Product5 uses the combined mul/ntt variants: 98 | * - compute MULNTT(a) and MULNTT(b) using mulntt_ct_std2rev 99 | * - c = elementwise product 100 | * - compute INTTMUL(c) using inttmul_gs_rev2std 101 | * - multiply the result by n^(-1) 102 | */ 103 | extern void naive_ntt256_product1(int32_t *c, int32_t *a, int32_t *b); 104 | extern void naive_ntt256_product2(int32_t *c, int32_t *a, int32_t *b); 105 | extern void naive_ntt256_product3(int32_t *c, int32_t *a, int32_t *b); 106 | extern void naive_ntt256_product4(int32_t *c, int32_t *a, int32_t *b); 107 | 108 | extern void naive_ntt256_product5(int32_t *c, int32_t *a, int32_t *b); 109 | 110 | #endif /* __NAIVE_NTT256_H */ 111 | -------------------------------------------------------------------------------- /src/naive_ntt512.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Naive NTT for Q=12289 and n=512 3 | */ 4 | 5 | #ifndef __NAIVE_NTT512_H 6 | #define __NAIVE_NTT512_H 7 | 8 | #include "ntt512_tables.h" 9 | #include "naive_ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void naive_ntt512_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std_naive(a, 512, ntt512_omega_powers, 12289); 23 | } 24 | 25 | static inline void naive_ntt512_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std_naive(a, 512, ntt512_omega_powers_rev, 12289); 27 | } 28 | 29 | static inline void naive_ntt512_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev_naive(a, 512, ntt512_omega_powers_rev, 12289); 31 | } 32 | 33 | static inline void naive_ntt512_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev_naive(a, 512, ntt512_omega_powers, 12289); 35 | } 36 | 37 | // inverse 38 | static inline void naive_intt512_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std_naive(a, 512, ntt512_inv_omega_powers, 12289); 40 | } 41 | 42 | static inline void naive_intt512_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std_naive(a, 512, ntt512_inv_omega_powers_rev, 12289); 44 | } 45 | 46 | static inline void naive_intt512_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev_naive(a, 512, ntt512_inv_omega_powers_rev, 12289); 48 | } 49 | 50 | static inline void naive_intt512_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev_naive(a, 512, ntt512_inv_omega_powers, 12289); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void naive_mulntt512_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std_naive(a, 512, ntt512_mixed_powers, 12289); 57 | } 58 | 59 | static inline void naive_mulntt512_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev_naive(a, 512, ntt512_mixed_powers_rev, 12289); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void naive_inttmul512_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std_naive(a, 512, ntt512_inv_mixed_powers_rev, 12289); 66 | } 67 | 68 | static inline void naive_inttmul512_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev_naive(a, 512, ntt512_inv_mixed_powers, 12289); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0 .. Q-1] 85 | * The result is also in that range. 86 | * 87 | * The first four variants have the following form: 88 | * - multiply a and b by powers of psi 89 | * - compute NNT(a) and NTT(b) using a std2rev variant 90 | * - c = elementwise product of NTT(a) and NTT(b) 91 | * - compute INTT(c) usign a rev2std variant 92 | * - multiply the result by n^(-1) * powers of psi^(-1) 93 | * There are two choices for the NTT and INTT functions: 94 | * - NTT: either ntt_ct_std2rev or ntt_gs_std2rev 95 | * - INTT: either intt_ct_rev2std or intt_gs_reg2std 96 | * 97 | * Product5 uses the combined mul/ntt variants: 98 | * - compute MULNTT(a) and MULNTT(b) using mulntt_ct_std2rev 99 | * - c = elementwise product 100 | * - compute INTTMUL(c) using inttmul_gs_rev2std 101 | * - multiply the result by n^(-1) 102 | */ 103 | extern void naive_ntt512_product1(int32_t *c, int32_t *a, int32_t *b); 104 | extern void naive_ntt512_product2(int32_t *c, int32_t *a, int32_t *b); 105 | extern void naive_ntt512_product3(int32_t *c, int32_t *a, int32_t *b); 106 | extern void naive_ntt512_product4(int32_t *c, int32_t *a, int32_t *b); 107 | 108 | extern void naive_ntt512_product5(int32_t *c, int32_t *a, int32_t *b); 109 | 110 | #endif /* __NAIVE_NTT512_H */ 111 | -------------------------------------------------------------------------------- /src/test_ntt_red_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Tables for testing NTT using Longa&Naehrig reduction. 3 | */ 4 | 5 | #ifndef __TEST_NTT_TABLES_H 6 | #define __TEST_NTT_TABLES_H 7 | 8 | #include 9 | 10 | /* 11 | * Parameters 12 | * - for n=16: psi=1212, omega=6553 13 | * - for n=16, psi=1022, omega=12208 14 | * - for n=256: psi=1002, omega=8595 15 | * - for n=512: psi=1003, omega=10600 16 | * - for n=1024: psi=1014, omega=8209 17 | * - for n=2048: psi=1016, omega=12269 18 | * 19 | * All tables are scaled by inverse(3) = 8193 20 | */ 21 | 22 | // powers of omega in standard order 23 | extern const int16_t shoup_red_ntt16_12289[16]; 24 | extern const int16_t shoup_red_ntt128_12289[128]; 25 | extern const int16_t shoup_red_ntt256_12289[256]; 26 | extern const int16_t shoup_red_ntt512_12289[512]; 27 | extern const int16_t shoup_red_ntt1024_12289[1024]; 28 | extern const int16_t shoup_red_ntt2048_12289[2048]; 29 | 30 | // powers of omega in bit-reverse order 31 | extern const int16_t rev_shoup_red_ntt16_12289[16]; 32 | extern const int16_t rev_shoup_red_ntt128_12289[128]; 33 | extern const int16_t rev_shoup_red_ntt256_12289[256]; 34 | extern const int16_t rev_shoup_red_ntt512_12289[512]; 35 | extern const int16_t rev_shoup_red_ntt1024_12289[1024]; 36 | extern const int16_t rev_shoup_red_ntt2048_12289[2048]; 37 | 38 | // powers of omega and psi in standard order 39 | extern const int16_t shoup_red_scaled_ntt16_12289[16]; 40 | extern const int16_t shoup_red_scaled_ntt128_12289[128]; 41 | extern const int16_t shoup_red_scaled_ntt256_12289[256]; 42 | extern const int16_t shoup_red_scaled_ntt512_12289[512]; 43 | extern const int16_t shoup_red_scaled_ntt1024_12289[1024]; 44 | extern const int16_t shoup_red_scaled_ntt2048_12289[2048]; 45 | 46 | // powers of omega and psi in bit-reverse order 47 | extern const int16_t rev_shoup_red_scaled_ntt16_12289[16]; 48 | extern const int16_t rev_shoup_red_scaled_ntt128_12289[128]; 49 | extern const int16_t rev_shoup_red_scaled_ntt256_12289[256]; 50 | extern const int16_t rev_shoup_red_scaled_ntt512_12289[512]; 51 | extern const int16_t rev_shoup_red_scaled_ntt1024_12289[1024]; 52 | extern const int16_t rev_shoup_red_scaled_ntt2048_12289[2048]; 53 | 54 | /* 55 | * Same tables but with coefficients in the interval [-6144,+6144] 56 | */ 57 | // powers of omega in standard order 58 | extern const int16_t shoup_sred_ntt16_12289[16]; 59 | extern const int16_t shoup_sred_ntt128_12289[128]; 60 | extern const int16_t shoup_sred_ntt256_12289[256]; 61 | extern const int16_t shoup_sred_ntt512_12289[512]; 62 | extern const int16_t shoup_sred_ntt1024_12289[1024]; 63 | extern const int16_t shoup_sred_ntt2048_12289[2048]; 64 | 65 | // powers of omega in bit-reverse order 66 | extern const int16_t rev_shoup_sred_ntt16_12289[16]; 67 | extern const int16_t rev_shoup_sred_ntt128_12289[128]; 68 | extern const int16_t rev_shoup_sred_ntt256_12289[256]; 69 | extern const int16_t rev_shoup_sred_ntt512_12289[512]; 70 | extern const int16_t rev_shoup_sred_ntt1024_12289[1024]; 71 | extern const int16_t rev_shoup_sred_ntt2048_12289[2048]; 72 | 73 | // powers of omega and psi in standard order 74 | extern const int16_t shoup_sred_scaled_ntt16_12289[16]; 75 | extern const int16_t shoup_sred_scaled_ntt128_12289[128]; 76 | extern const int16_t shoup_sred_scaled_ntt256_12289[256]; 77 | extern const int16_t shoup_sred_scaled_ntt512_12289[512]; 78 | extern const int16_t shoup_sred_scaled_ntt1024_12289[1024]; 79 | extern const int16_t shoup_sred_scaled_ntt2048_12289[2048]; 80 | 81 | // powers of omega and psi in bit-reverse order 82 | extern const int16_t rev_shoup_sred_scaled_ntt16_12289[16]; 83 | extern const int16_t rev_shoup_sred_scaled_ntt128_12289[128]; 84 | extern const int16_t rev_shoup_sred_scaled_ntt256_12289[256]; 85 | extern const int16_t rev_shoup_sred_scaled_ntt512_12289[512]; 86 | extern const int16_t rev_shoup_sred_scaled_ntt1024_12289[1024]; 87 | extern const int16_t rev_shoup_sred_scaled_ntt2048_12289[2048]; 88 | 89 | 90 | #endif /* __TEST_NTT_TABLES_H */ 91 | -------------------------------------------------------------------------------- /src/naive_ntt1024.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Naive NTT for Q=12289 and n=1024 3 | */ 4 | 5 | #ifndef __NAIVE_NTT1024_H 6 | #define __NAIVE_NTT1024_H 7 | 8 | #include "ntt1024_tables.h" 9 | #include "naive_ntt.h" 10 | 11 | 12 | /* 13 | * NTT VARIANTS 14 | * 15 | * - the input a is an array of n integers that must be between 0 and Q-1 16 | * - the result is stored in place 17 | * - the inverse transforms return a result scaled by n: 18 | * we have intt(ntt(a)) = n * a 19 | */ 20 | // forward 21 | static inline void naive_ntt1024_ct_rev2std(int32_t *a) { 22 | ntt_ct_rev2std_naive(a, 1024, ntt1024_omega_powers, 12289); 23 | } 24 | 25 | static inline void naive_ntt1024_gs_rev2std(int32_t *a) { 26 | ntt_gs_rev2std_naive(a, 1024, ntt1024_omega_powers_rev, 12289); 27 | } 28 | 29 | static inline void naive_ntt1024_ct_std2rev(int32_t *a) { 30 | ntt_ct_std2rev_naive(a, 1024, ntt1024_omega_powers_rev, 12289); 31 | } 32 | 33 | static inline void naive_ntt1024_gs_std2rev(int32_t *a) { 34 | ntt_gs_std2rev_naive(a, 1024, ntt1024_omega_powers, 12289); 35 | } 36 | 37 | // inverse 38 | static inline void naive_intt1024_ct_rev2std(int32_t *a) { 39 | ntt_ct_rev2std_naive(a, 1024, ntt1024_inv_omega_powers, 12289); 40 | } 41 | 42 | static inline void naive_intt1024_gs_rev2std(int32_t *a) { 43 | ntt_gs_rev2std_naive(a, 1024, ntt1024_inv_omega_powers_rev, 12289); 44 | } 45 | 46 | static inline void naive_intt1024_ct_std2rev(int32_t *a) { 47 | ntt_ct_std2rev_naive(a, 1024, ntt1024_inv_omega_powers_rev, 12289); 48 | } 49 | 50 | static inline void naive_intt1024_gs_std2rev(int32_t *a) { 51 | ntt_gs_std2rev_naive(a, 1024, ntt1024_inv_omega_powers, 12289); 52 | } 53 | 54 | // multiplication by powers of psi then forward ntt 55 | static inline void naive_mulntt1024_ct_rev2std(int32_t *a) { 56 | mulntt_ct_rev2std_naive(a, 1024, ntt1024_mixed_powers, 12289); 57 | } 58 | 59 | static inline void naive_mulntt1024_ct_std2rev(int32_t *a) { 60 | mulntt_ct_std2rev_naive(a, 1024, ntt1024_mixed_powers_rev, 12289); 61 | } 62 | 63 | // inverse ntt then multiplication by powers of psi^-1 64 | static inline void naive_inttmul1024_gs_rev2std(int32_t *a) { 65 | nttmul_gs_rev2std_naive(a, 1024, ntt1024_inv_mixed_powers_rev, 12289); 66 | } 67 | 68 | static inline void naive_inttmul1024_gs_std2rev(int32_t *a) { 69 | nttmul_gs_std2rev_naive(a, 1024, ntt1024_inv_mixed_powers, 12289); 70 | } 71 | 72 | 73 | /* 74 | * PRODUCTS 75 | */ 76 | 77 | /* 78 | * Input: two arrays a and b in standard order 79 | * 80 | * Result: 81 | * - the product is stored in array c, in standard order. 82 | * - arrays a and b are modified 83 | * 84 | * The input arrays must contain elements in the range [0 .. Q-1] 85 | * The result is also in that range. 86 | * 87 | * The first four variants have the following form: 88 | * - multiply a and b by powers of psi 89 | * - compute NNT(a) and NTT(b) using a std2rev variant 90 | * - c = elementwise product of NTT(a) and NTT(b) 91 | * - compute INTT(c) usign a rev2std variant 92 | * - multiply the result by n^(-1) * powers of psi^(-1) 93 | * There are two choices for the NTT and INTT functions: 94 | * - NTT: either ntt_ct_std2rev or ntt_gs_std2rev 95 | * - INTT: either intt_ct_rev2std or intt_gs_reg2std 96 | * 97 | * Product5 uses the combined mul/ntt variants: 98 | * - compute MULNTT(a) and MULNTT(b) using mulntt_ct_std2rev 99 | * - c = elementwise product 100 | * - compute INTTMUL(c) using inttmul_gs_rev2std 101 | * - multiply the result by n^(-1) 102 | */ 103 | extern void naive_ntt1024_product1(int32_t *c, int32_t *a, int32_t *b); 104 | extern void naive_ntt1024_product2(int32_t *c, int32_t *a, int32_t *b); 105 | extern void naive_ntt1024_product3(int32_t *c, int32_t *a, int32_t *b); 106 | extern void naive_ntt1024_product4(int32_t *c, int32_t *a, int32_t *b); 107 | 108 | extern void naive_ntt1024_product5(int32_t *c, int32_t *a, int32_t *b); 109 | 110 | #endif /* __NAIVE_NTT1024_H */ 111 | -------------------------------------------------------------------------------- /src/ntt32_tables.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Parameters: 3 | * - q = 12289 4 | * - n = 32 5 | * - psi = 563 6 | * - omega = psi^2 = 9744 7 | * - inverse of psi = 5828 8 | * - inverse of omega = 11077 9 | * - inverse of n = 11905 10 | */ 11 | 12 | #include "ntt32_tables.h" 13 | 14 | const uint16_t ntt32_bitrev[BITREV32_NPAIRS][2] = { 15 | { 1, 16 }, { 2, 8 }, { 3, 24 }, { 5, 20 }, 16 | { 6, 12 }, { 7, 28 }, { 9, 18 }, { 11, 26 }, 17 | { 13, 22 }, { 15, 30 }, { 19, 25 }, { 23, 29 }, 18 | }; 19 | 20 | const uint16_t ntt32_psi_powers[32] = { 21 | 1, 563, 9744, 4978, 722, 949, 5860, 5728, 22 | 5146, 9283, 3504, 6512, 4134, 4821, 10643, 7266, 23 | 10810, 2975, 3621, 10938, 1305, 9664, 9094, 7698, 24 | 8246, 9545, 3542, 3328, 5736, 9650, 1212, 6461, 25 | }; 26 | 27 | const uint16_t ntt32_inv_psi_powers[32] = { 28 | 1, 5828, 11077, 2639, 6553, 8961, 8747, 2744, 29 | 4043, 4591, 3195, 2625, 10984, 1351, 8668, 9314, 30 | 1479, 5023, 1646, 7468, 8155, 5777, 8785, 3006, 31 | 7143, 6561, 6429, 11340, 11567, 7311, 2545, 11726, 32 | }; 33 | 34 | const uint16_t ntt32_scaled_inv_psi_powers[32] = { 35 | 11905, 10935, 10715, 6611, 2893, 12185, 8338, 3158, 36 | 8191, 6672, 2020, 11987, 9560, 9643, 1807, 11812, 37 | 9647, 541, 6964, 7914, 2175, 5941, 6035, 862, 38 | 9824, 12110, 1353, 8035, 6890, 6757, 5840, 7279, 39 | }; 40 | 41 | const uint16_t ntt32_omega_powers[32] = { 42 | 0, 1, 1, 10810, 1, 5146, 10810, 8246, 43 | 1, 722, 5146, 4134, 10810, 1305, 8246, 5736, 44 | 1, 9744, 722, 5860, 5146, 3504, 4134, 10643, 45 | 10810, 3621, 1305, 9094, 8246, 3542, 5736, 1212, 46 | }; 47 | 48 | const uint16_t ntt32_omega_powers_rev[32] = { 49 | 0, 1, 1, 10810, 1, 10810, 5146, 8246, 50 | 1, 10810, 5146, 8246, 722, 1305, 4134, 5736, 51 | 1, 10810, 5146, 8246, 722, 1305, 4134, 5736, 52 | 9744, 3621, 3504, 3542, 5860, 9094, 10643, 1212, 53 | }; 54 | 55 | const uint16_t ntt32_inv_omega_powers[32] = { 56 | 0, 1, 1, 1479, 1, 4043, 1479, 7143, 57 | 1, 6553, 4043, 10984, 1479, 8155, 7143, 11567, 58 | 1, 11077, 6553, 8747, 4043, 3195, 10984, 8668, 59 | 1479, 1646, 8155, 8785, 7143, 6429, 11567, 2545, 60 | }; 61 | 62 | const uint16_t ntt32_inv_omega_powers_rev[32] = { 63 | 0, 1, 1, 1479, 1, 1479, 4043, 7143, 64 | 1, 1479, 4043, 7143, 6553, 8155, 10984, 11567, 65 | 1, 1479, 4043, 7143, 6553, 8155, 10984, 11567, 66 | 11077, 1646, 3195, 6429, 8747, 8785, 8668, 2545, 67 | }; 68 | 69 | const uint16_t ntt32_mixed_powers[32] = { 70 | 0, 10810, 5146, 8246, 722, 4134, 1305, 5736, 71 | 9744, 5860, 3504, 10643, 3621, 9094, 3542, 1212, 72 | 563, 4978, 949, 5728, 9283, 6512, 4821, 7266, 73 | 2975, 10938, 9664, 7698, 9545, 3328, 9650, 6461, 74 | }; 75 | 76 | const uint16_t ntt32_mixed_powers_rev[32] = { 77 | 0, 10810, 5146, 8246, 722, 1305, 4134, 5736, 78 | 9744, 3621, 3504, 3542, 5860, 9094, 10643, 1212, 79 | 563, 2975, 9283, 9545, 949, 9664, 4821, 9650, 80 | 4978, 10938, 6512, 3328, 5728, 7698, 7266, 6461, 81 | }; 82 | 83 | const uint16_t ntt32_inv_mixed_powers[32] = { 84 | 0, 1479, 4043, 7143, 6553, 10984, 8155, 11567, 85 | 11077, 8747, 3195, 8668, 1646, 8785, 6429, 2545, 86 | 5828, 2639, 8961, 2744, 4591, 2625, 1351, 9314, 87 | 5023, 7468, 5777, 3006, 6561, 11340, 7311, 11726, 88 | }; 89 | 90 | const uint16_t ntt32_inv_mixed_powers_rev[32] = { 91 | 0, 1479, 4043, 7143, 6553, 8155, 10984, 11567, 92 | 11077, 1646, 3195, 6429, 8747, 8785, 8668, 2545, 93 | 5828, 5023, 4591, 6561, 8961, 5777, 1351, 7311, 94 | 2639, 7468, 2625, 11340, 2744, 3006, 9314, 11726, 95 | }; 96 | 97 | -------------------------------------------------------------------------------- /src/make_bitrev_table.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Table for bitverse shuffle. 3 | * 4 | * Input: n = table size. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | * Check that n is a power of two and return k such that n=2^k. 16 | */ 17 | static bool logtwo(uint32_t n, uint32_t *k) { 18 | uint32_t i; 19 | 20 | i = 0; 21 | while ((n & 1) == 0) { 22 | i ++; 23 | n >>= 1; 24 | } 25 | if (n == 1) { 26 | *k = i; 27 | return true; 28 | } 29 | return false; 30 | } 31 | 32 | /* 33 | * Bitreverse of i, interpreted as a k-bit integer 34 | */ 35 | static uint32_t reverse(uint32_t i, uint32_t k) { 36 | uint32_t x, b, j; 37 | 38 | x = 0; 39 | for (j=0; j>= 1; 43 | } 44 | 45 | return x; 46 | } 47 | 48 | /* 49 | * Count the number of i such that i < bitrev(i, k), where n=2^k. 50 | */ 51 | static uint32_t rev_table_npairs(uint32_t n, uint32_t k) { 52 | uint32_t i, c; 53 | 54 | c = 0; 55 | for (i=0; i 0) fprintf(f, "\n"); 85 | fprintf(f, "};\n\n"); 86 | } 87 | 88 | 89 | /* 90 | * Declarations in file f 91 | */ 92 | static void print_bitrev_declarations(FILE *f, uint32_t n, uint32_t k) { 93 | uint32_t m; 94 | 95 | m = rev_table_npairs(n, k); 96 | 97 | fprintf(f, "#ifndef __BITREV%"PRIu32"_TABLE_H\n", n); 98 | fprintf(f, "#define __BITREV%"PRIu32"_TABLE_H\n\n", n); 99 | fprintf(f, "#include \n\n"); 100 | fprintf(f, "#define BITREV%"PRIu32"_NPAIRS %"PRIu32"\n\n", n, m); 101 | fprintf(f, "extern const uint16_t bitrev%"PRIu32"[BITREV%"PRIu32"_NPAIRS][2];\n\n", n, n); 102 | 103 | fprintf(f, "#endif /* __BITREV%"PRIu32"_TABLE_H */\n", n); 104 | } 105 | 106 | /* 107 | * Open file: name is "bitrev_table.h" or "bitrev_table.c" 108 | * - return NULL if we can't create the file 109 | */ 110 | #define BUFFER_SIZE 100 111 | 112 | static FILE *open_file(uint32_t n, const char *suffix) { 113 | char filename[BUFFER_SIZE]; 114 | int len; 115 | FILE *f; 116 | 117 | f = NULL; 118 | len = snprintf(filename, BUFFER_SIZE, "bitrev%"PRIu32"_table.%s", n, suffix); 119 | if (len < BUFFER_SIZE) { 120 | f = fopen(filename, "w"); 121 | } 122 | return f; 123 | } 124 | 125 | int main(int argc, char *argv[]) { 126 | uint32_t n, log_n; 127 | long x; 128 | FILE *f; 129 | 130 | if (argc != 2) { 131 | fprintf(stderr, "Usage: %s \n", argv[0]); 132 | exit(EXIT_FAILURE); 133 | } 134 | 135 | // size 136 | x = atol(argv[1]); 137 | if (x <= 1) { 138 | fprintf(stderr, "Invalid size %ld: must be at least 2\n", x); 139 | exit(EXIT_FAILURE); 140 | } 141 | if (x >= UINT16_MAX) { 142 | fprintf(stderr, "The size is too large: max = %"PRIu32"\n", (uint32_t)UINT16_MAX); 143 | exit(EXIT_FAILURE); 144 | } 145 | n = (uint32_t) x; 146 | if (!logtwo(n, &log_n)) { 147 | fprintf(stderr, "Invalid size: %"PRIu32" is not a power of two\n", n); 148 | exit(EXIT_FAILURE); 149 | } 150 | 151 | f = open_file(n, "h"); 152 | if (f == NULL) { 153 | fprintf(stderr, "failed to open file 'bitrev%"PRIu32"_tables.h'\n", n); 154 | exit(EXIT_FAILURE); 155 | } 156 | print_bitrev_declarations(f, n, log_n); 157 | fclose(f); 158 | 159 | f = open_file(n, "c"); 160 | if (f == NULL) { 161 | fprintf(stderr, "failed to open file 'bitrev%"PRIu32"_tables.h'\n", n); 162 | exit(EXIT_FAILURE); 163 | } 164 | print_bitrev_table(f, n, log_n); 165 | fclose(f); 166 | 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/make_bitrev_table.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Table for bitverse shuffle. 3 | * 4 | * Input: n = table size. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | * Check that n is a power of two and return k such that n=2^k. 16 | */ 17 | static bool logtwo(uint32_t n, uint32_t *k) { 18 | uint32_t i; 19 | 20 | i = 0; 21 | while ((n & 1) == 0) { 22 | i ++; 23 | n >>= 1; 24 | } 25 | if (n == 1) { 26 | *k = i; 27 | return true; 28 | } 29 | return false; 30 | } 31 | 32 | /* 33 | * Bitreverse of i, interpreted as a k-bit integer 34 | */ 35 | static uint32_t reverse(uint32_t i, uint32_t k) { 36 | uint32_t x, b, j; 37 | 38 | x = 0; 39 | for (j=0; j>= 1; 43 | } 44 | 45 | return x; 46 | } 47 | 48 | /* 49 | * Count the number of i such that i < bitrev(i, k), where n=2^k. 50 | */ 51 | static uint32_t rev_table_npairs(uint32_t n, uint32_t k) { 52 | uint32_t i, c; 53 | 54 | c = 0; 55 | for (i=0; i 0) fprintf(f, "\n"); 85 | fprintf(f, "};\n\n"); 86 | } 87 | 88 | 89 | /* 90 | * Declarations in file f 91 | */ 92 | static void print_bitrev_declarations(FILE *f, uint32_t n, uint32_t k) { 93 | uint32_t m; 94 | 95 | m = rev_table_npairs(n, k); 96 | 97 | fprintf(f, "#ifndef __BITREV%"PRIu32"_TABLE_H\n", n); 98 | fprintf(f, "#define __BITREV%"PRIu32"_TABLE_H\n\n", n); 99 | fprintf(f, "#include \n\n"); 100 | fprintf(f, "#define BITREV%"PRIu32"_NPAIRS %"PRIu32"\n\n", n, m); 101 | fprintf(f, "extern const uint16_t bitrev%"PRIu32"[BITREV%"PRIu32"_NPAIRS][2];\n\n", n, n); 102 | 103 | fprintf(f, "#endif /* __BITREV%"PRIu32"_TABLE_H */\n", n); 104 | } 105 | 106 | /* 107 | * Open file: name is "bitrev_table.h" or "bitrev_table.c" 108 | * - return NULL if we can't create the file 109 | */ 110 | #define BUFFER_SIZE 100 111 | 112 | static FILE *open_file(uint32_t n, const char *suffix) { 113 | char filename[BUFFER_SIZE]; 114 | int len; 115 | FILE *f; 116 | 117 | f = NULL; 118 | len = snprintf(filename, BUFFER_SIZE, "bitrev%"PRIu32"_table.%s", n, suffix); 119 | if (len < BUFFER_SIZE) { 120 | f = fopen(filename, "w"); 121 | } 122 | return f; 123 | } 124 | 125 | int main(int argc, char *argv[]) { 126 | uint32_t n, log_n; 127 | long x; 128 | FILE *f; 129 | 130 | if (argc != 2) { 131 | fprintf(stderr, "Usage: %s \n", argv[0]); 132 | exit(EXIT_FAILURE); 133 | } 134 | 135 | // size 136 | x = atol(argv[1]); 137 | if (x <= 1) { 138 | fprintf(stderr, "Invalid size %ld: must be at least 2\n", x); 139 | exit(EXIT_FAILURE); 140 | } 141 | if (x >= UINT16_MAX) { 142 | fprintf(stderr, "The size is too large: max = %"PRIu32"\n", (uint32_t)UINT16_MAX); 143 | exit(EXIT_FAILURE); 144 | } 145 | n = (uint32_t) x; 146 | if (!logtwo(n, &log_n)) { 147 | fprintf(stderr, "Invalid size: %"PRIu32" is not a power of two\n", n); 148 | exit(EXIT_FAILURE); 149 | } 150 | 151 | f = open_file(n, "h"); 152 | if (f == NULL) { 153 | fprintf(stderr, "failed to open file 'bitrev%"PRIu32"_tables.h'\n", n); 154 | exit(EXIT_FAILURE); 155 | } 156 | print_bitrev_declarations(f, n, log_n); 157 | fclose(f); 158 | 159 | f = open_file(n, "c"); 160 | if (f == NULL) { 161 | fprintf(stderr, "failed to open file 'bitrev%"PRIu32"_tables.h'\n", n); 162 | exit(EXIT_FAILURE); 163 | } 164 | print_bitrev_table(f, n, log_n); 165 | fclose(f); 166 | 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /src/ntt_red16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=16, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red16.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red16_product1(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16(a, 16, ntt_red16_psi_powers); 20 | ntt_red16_ct_std2rev(a); 21 | reduce_array(a, 16); 22 | 23 | shift_array(b, 16); 24 | mul_reduce_array16(b, 16, ntt_red16_psi_powers); 25 | ntt_red16_ct_std2rev(b); 26 | reduce_array(b, 16); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red16_ct_rev2std(c); 36 | mul_reduce_array16(c, 16, ntt_red16_scaled_inv_psi_powers); 37 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 38 | correct(c, 16); 39 | } 40 | 41 | void ntt_red16_product2(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16(a, 16, ntt_red16_psi_powers); 44 | ntt_red16_gs_std2rev(a); 45 | reduce_array(a, 16); 46 | 47 | shift_array(b, 16); 48 | mul_reduce_array16(b, 16, ntt_red16_psi_powers); 49 | ntt_red16_gs_std2rev(b); 50 | reduce_array(b, 16); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red16_ct_rev2std(c); 60 | mul_reduce_array16(c, 16, ntt_red16_scaled_inv_psi_powers); 61 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 62 | correct(c, 16); 63 | } 64 | 65 | void ntt_red16_product3(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16(a, 16, ntt_red16_psi_powers); 68 | ntt_red16_ct_std2rev(a); 69 | reduce_array(a, 16); 70 | 71 | shift_array(b, 16); 72 | mul_reduce_array16(b, 16, ntt_red16_psi_powers); 73 | ntt_red16_ct_std2rev(b); 74 | reduce_array(b, 16); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red16_gs_rev2std(c); 84 | mul_reduce_array16(c, 16, ntt_red16_scaled_inv_psi_powers); 85 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 86 | correct(c, 16); 87 | } 88 | 89 | void ntt_red16_product4(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16(a, 16, ntt_red16_psi_powers); 92 | ntt_red16_gs_std2rev(a); 93 | reduce_array(a, 16); 94 | 95 | shift_array(b, 16); 96 | mul_reduce_array16(b, 16, ntt_red16_psi_powers); 97 | ntt_red16_gs_std2rev(b); 98 | reduce_array(b, 16); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red16_gs_rev2std(c); 108 | mul_reduce_array16(c, 16, ntt_red16_scaled_inv_psi_powers); 109 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 110 | correct(c, 16); 111 | } 112 | 113 | void ntt_red16_product5(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array(a, 16); 115 | mulntt_red16_ct_std2rev(a); 116 | reduce_array(a, 16); 117 | 118 | shift_array(b, 16); 119 | mulntt_red16_ct_std2rev(b); 120 | reduce_array(b, 16); 121 | 122 | mul_reduce_array(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice(c, 16); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red16_gs_rev2std(c); 126 | scalar_mul_reduce_array(c, 16, ntt_red16_rescale8); 127 | reduce_array_twice(c, 16); 128 | correct(c, 16); 129 | } 130 | -------------------------------------------------------------------------------- /src/ntt_red256.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=256, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red256.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red256_product1(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16(a, 256, ntt_red256_psi_powers); 20 | ntt_red256_ct_std2rev(a); 21 | reduce_array(a, 256); 22 | 23 | shift_array(b, 256); 24 | mul_reduce_array16(b, 256, ntt_red256_psi_powers); 25 | ntt_red256_ct_std2rev(b); 26 | reduce_array(b, 256); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red256_ct_rev2std(c); 36 | mul_reduce_array16(c, 256, ntt_red256_scaled_inv_psi_powers); 37 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 38 | correct(c, 256); 39 | } 40 | 41 | void ntt_red256_product2(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16(a, 256, ntt_red256_psi_powers); 44 | ntt_red256_gs_std2rev(a); 45 | reduce_array(a, 256); 46 | 47 | shift_array(b, 256); 48 | mul_reduce_array16(b, 256, ntt_red256_psi_powers); 49 | ntt_red256_gs_std2rev(b); 50 | reduce_array(b, 256); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red256_ct_rev2std(c); 60 | mul_reduce_array16(c, 256, ntt_red256_scaled_inv_psi_powers); 61 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 62 | correct(c, 256); 63 | } 64 | 65 | void ntt_red256_product3(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16(a, 256, ntt_red256_psi_powers); 68 | ntt_red256_ct_std2rev(a); 69 | reduce_array(a, 256); 70 | 71 | shift_array(b, 256); 72 | mul_reduce_array16(b, 256, ntt_red256_psi_powers); 73 | ntt_red256_ct_std2rev(b); 74 | reduce_array(b, 256); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red256_gs_rev2std(c); 84 | mul_reduce_array16(c, 256, ntt_red256_scaled_inv_psi_powers); 85 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 86 | correct(c, 256); 87 | } 88 | 89 | void ntt_red256_product4(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16(a, 256, ntt_red256_psi_powers); 92 | ntt_red256_gs_std2rev(a); 93 | reduce_array(a, 256); 94 | 95 | shift_array(b, 256); 96 | mul_reduce_array16(b, 256, ntt_red256_psi_powers); 97 | ntt_red256_gs_std2rev(b); 98 | reduce_array(b, 256); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red256_gs_rev2std(c); 108 | mul_reduce_array16(c, 256, ntt_red256_scaled_inv_psi_powers); 109 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 110 | correct(c, 256); 111 | } 112 | 113 | void ntt_red256_product5(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array(a, 256); 115 | mulntt_red256_ct_std2rev(a); 116 | reduce_array(a, 256); 117 | 118 | shift_array(b, 256); 119 | mulntt_red256_ct_std2rev(b); 120 | reduce_array(b, 256); 121 | 122 | mul_reduce_array(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice(c, 256); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red256_gs_rev2std(c); 126 | scalar_mul_reduce_array(c, 256, ntt_red256_rescale8); 127 | reduce_array_twice(c, 256); 128 | correct(c, 256); 129 | } 130 | -------------------------------------------------------------------------------- /src/ntt_red512.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=512, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red512.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red512_product1(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16(a, 512, ntt_red512_psi_powers); 20 | ntt_red512_ct_std2rev(a); 21 | reduce_array(a, 512); 22 | 23 | shift_array(b, 512); 24 | mul_reduce_array16(b, 512, ntt_red512_psi_powers); 25 | ntt_red512_ct_std2rev(b); 26 | reduce_array(b, 512); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red512_ct_rev2std(c); 36 | mul_reduce_array16(c, 512, ntt_red512_scaled_inv_psi_powers); 37 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 38 | correct(c, 512); 39 | } 40 | 41 | void ntt_red512_product2(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16(a, 512, ntt_red512_psi_powers); 44 | ntt_red512_gs_std2rev(a); 45 | reduce_array(a, 512); 46 | 47 | shift_array(b, 512); 48 | mul_reduce_array16(b, 512, ntt_red512_psi_powers); 49 | ntt_red512_gs_std2rev(b); 50 | reduce_array(b, 512); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red512_ct_rev2std(c); 60 | mul_reduce_array16(c, 512, ntt_red512_scaled_inv_psi_powers); 61 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 62 | correct(c, 512); 63 | } 64 | 65 | void ntt_red512_product3(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16(a, 512, ntt_red512_psi_powers); 68 | ntt_red512_ct_std2rev(a); 69 | reduce_array(a, 512); 70 | 71 | shift_array(b, 512); 72 | mul_reduce_array16(b, 512, ntt_red512_psi_powers); 73 | ntt_red512_ct_std2rev(b); 74 | reduce_array(b, 512); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red512_gs_rev2std(c); 84 | mul_reduce_array16(c, 512, ntt_red512_scaled_inv_psi_powers); 85 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 86 | correct(c, 512); 87 | } 88 | 89 | void ntt_red512_product4(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16(a, 512, ntt_red512_psi_powers); 92 | ntt_red512_gs_std2rev(a); 93 | reduce_array(a, 512); 94 | 95 | shift_array(b, 512); 96 | mul_reduce_array16(b, 512, ntt_red512_psi_powers); 97 | ntt_red512_gs_std2rev(b); 98 | reduce_array(b, 512); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red512_gs_rev2std(c); 108 | mul_reduce_array16(c, 512, ntt_red512_scaled_inv_psi_powers); 109 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 110 | correct(c, 512); 111 | } 112 | 113 | void ntt_red512_product5(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array(a, 512); 115 | mulntt_red512_ct_std2rev(a); 116 | reduce_array(a, 512); 117 | 118 | shift_array(b, 512); 119 | mulntt_red512_ct_std2rev(b); 120 | reduce_array(b, 512); 121 | 122 | mul_reduce_array(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice(c, 512); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red512_gs_rev2std(c); 126 | scalar_mul_reduce_array(c, 512, ntt_red512_rescale8); 127 | reduce_array_twice(c, 512); 128 | correct(c, 512); 129 | } 130 | -------------------------------------------------------------------------------- /src/ntt_red1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=1024, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red1024.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red1024_product1(int32_t *c, int32_t *a, int32_t *b) { 18 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 19 | ntt_red1024_ct_std2rev(a); 20 | reduce_array(a, 1024); 21 | 22 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 23 | ntt_red1024_ct_std2rev(b); 24 | reduce_array(b, 1024); 25 | 26 | // at this point: 27 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 28 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 29 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 30 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 31 | 32 | // we have: -130 <= c[i] <= 12413 33 | intt_red1024_ct_rev2std(c); 34 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 35 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 36 | correct(c, 1024); 37 | } 38 | 39 | void ntt_red1024_product2(int32_t *c, int32_t *a, int32_t *b) { 40 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 41 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 42 | ntt_red1024_gs_std2rev(a); 43 | reduce_array(a, 1024); 44 | 45 | // shift_array(b, 1024); 46 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 47 | ntt_red1024_gs_std2rev(b); 48 | reduce_array(b, 1024); 49 | 50 | // at this point: 51 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 52 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 53 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 54 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 55 | 56 | // we have: -130 <= c[i] <= 12413 57 | intt_red1024_ct_rev2std(c); 58 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 59 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 60 | correct(c, 1024); 61 | } 62 | 63 | void ntt_red1024_product3(int32_t *c, int32_t *a, int32_t *b) { 64 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 65 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 66 | ntt_red1024_ct_std2rev(a); 67 | reduce_array(a, 1024); 68 | 69 | // shift_array(b, 1024); 70 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 71 | ntt_red1024_ct_std2rev(b); 72 | reduce_array(b, 1024); 73 | 74 | // at this point: 75 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 76 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 77 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 78 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 79 | 80 | // we have: -130 <= c[i] <= 12413 81 | intt_red1024_gs_rev2std(c); 82 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 83 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 84 | correct(c, 1024); 85 | } 86 | 87 | void ntt_red1024_product4(int32_t *c, int32_t *a, int32_t *b) { 88 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 89 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 90 | ntt_red1024_gs_std2rev(a); 91 | reduce_array(a, 1024); 92 | 93 | // shift_array(b, 1024); 94 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 95 | ntt_red1024_gs_std2rev(b); 96 | reduce_array(b, 1024); 97 | 98 | // at this point: 99 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 100 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 101 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 102 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 103 | 104 | // we have: -130 <= c[i] <= 12413 105 | intt_red1024_gs_rev2std(c); 106 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 107 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 108 | correct(c, 1024); 109 | } 110 | 111 | void ntt_red1024_product5(int32_t *c, int32_t *a, int32_t *b) { 112 | // shift_array(a, 1024); 113 | mulntt_red1024_ct_std2rev(a); 114 | reduce_array(a, 1024); 115 | 116 | // shift_array(b, 1024); 117 | mulntt_red1024_ct_std2rev(b); 118 | reduce_array(b, 1024); 119 | 120 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 121 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 122 | 123 | inttmul_red1024_gs_rev2std(c); 124 | scalar_mul_reduce_array(c, 1024, ntt_red1024_rescale8); 125 | reduce_array_twice(c, 1024); 126 | correct(c, 1024); 127 | } 128 | -------------------------------------------------------------------------------- /src/test_mod.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define Q 12289 6 | #define P 16 7 | #define R 4 8 | 9 | int32_t smodq(int32_t x) { 10 | return x % Q; 11 | } 12 | 13 | uint32_t umodq(uint32_t x) { 14 | return x % Q; 15 | } 16 | 17 | int32_t pmodq(int32_t x) { 18 | int32_t r; 19 | r = smodq(x); 20 | return (r < 0) ? r + Q : r; 21 | } 22 | 23 | static uint32_t approx_qinv(uint32_t n) { 24 | return (((uint64_t)1)<< n)/Q+1; 25 | } 26 | static uint32_t approx_divq(uint32_t x, uint32_t n) { 27 | return (((uint64_t) x) * approx_qinv(n)) >> n; 28 | } 29 | 30 | static uint32_t approx_modq(uint32_t x, uint32_t n) { 31 | return x - approx_divq(x, n) * Q; 32 | } 33 | 34 | static void test_approx(uint32_t n) { 35 | uint32_t x; 36 | 37 | x = 0; 38 | do { 39 | if (approx_modq(x, n) != (x % Q)) { 40 | printf("approx %"PRIu32" fails for %"PRIu32"\n", n, x); 41 | return; 42 | } 43 | x ++; 44 | // } while (x != 0); to test on full 32bit range 45 | } while (x <= (Q - 1) * (Q -1)); 46 | 47 | printf("approx %"PRIu32" works\n", n); 48 | } 49 | 50 | static void test_all_approx(void) { 51 | uint32_t n; 52 | 53 | for (n=30; n<50; n++) { 54 | printf("trying approx %"PRIu32", approx(1/q) = %"PRIu32"\n", n, approx_qinv(n)); 55 | test_approx(n); 56 | } 57 | } 58 | 59 | 60 | static int64_t approx_pinv(uint32_t n) { 61 | return (((int64_t) 1) << n)/Q+1; 62 | } 63 | 64 | static int32_t approx_pdivq(int32_t x, uint32_t n) { 65 | return (((int64_t) x) * approx_pinv(n)) >> n; 66 | } 67 | 68 | static int32_t approx_pmodq(int32_t x, uint32_t n) { 69 | int32_t r; 70 | r = x - approx_pdivq(x, n) * Q; 71 | return (r < Q) ? r : 0; 72 | } 73 | 74 | // check that pmod(x, n) is between 0 and Q 75 | static void check_approx_pmodq_bound(uint32_t n) { 76 | int32_t x, r; 77 | 78 | for (x = 0; x Q) { 81 | printf("signed-approx bound (with n=%"PRIu32") fails for x = %"PRId32" (approx = %"PRId32")\n", n, x, r); 82 | return; 83 | } 84 | } 85 | 86 | r = approx_pmodq(x, n); 87 | if (r < 0 || r > Q) { 88 | printf("signed-approx bound (with n=%"PRIu32") fails for x = %"PRId32" (approx = %"PRId32")\n", n, x, r); 89 | return; 90 | } 91 | 92 | for (x=-1; x>INT32_MIN; x--) { 93 | r = approx_pmodq(x, n); 94 | if (r < 0 || r > Q) { 95 | printf("signed-approx bound (with n=%"PRIu32") fails for x = %"PRId32" (approx = %"PRId32")\n", n, x, r); 96 | return; 97 | } 98 | } 99 | 100 | r = approx_pmodq(x, n); 101 | if (r < 0 || r > Q) { 102 | printf("signed-approx bound (with n=%"PRIu32") fails for x = %"PRId32" (approx = %"PRId32")\n", n, x, r); 103 | return; 104 | } 105 | 106 | printf("Bound for signed-approx (with n=%"PRIu32") holds\n", n); 107 | } 108 | 109 | static void test_pmod_approx(uint32_t n) { 110 | int32_t x; 111 | 112 | for (x = 0; xINT32_MIN; x--) { 125 | if (approx_pmodq(x, n) != pmodq(x)) { 126 | printf("neg: signed-approx %"PRIu32" fails for %"PRId32": pmod = %"PRId32", approx = %"PRId32"\n", n, x, pmodq(x), approx_pmodq(x, n)); 127 | return; 128 | } 129 | } 130 | 131 | if (approx_pmodq(x, n) != pmodq(x)) { 132 | printf("signed-approx %"PRIu32" fails for %"PRId32": pmod = %"PRId32", approx = %"PRId32"\n", n, x, pmodq(x), approx_pmodq(x, n)); 133 | return; 134 | } 135 | 136 | printf("signed-approx %"PRIu32" works\n", n); 137 | printf("checking bounds\n"); 138 | check_approx_pmodq_bound(n); 139 | } 140 | 141 | static void test_all_pmod_approx(void) { 142 | uint32_t n; 143 | 144 | for (n=30; n<50; n++) { 145 | printf("trying signed-approx %"PRIu32", approx(1/q) = %"PRId64"\n", n, approx_pinv(n)); 146 | test_pmod_approx(n); 147 | printf("\n"); 148 | } 149 | } 150 | 151 | int main(void) { 152 | int32_t i, x, m, min; 153 | 154 | min = Q; 155 | x = R; 156 | for (i=0; i<100000; i++) { 157 | m = smodq(x); 158 | if (m < min) min = m; 159 | x = P * x + R; 160 | } 161 | 162 | printf("min = %"PRId32"\n", min); 163 | 164 | test_all_approx(); 165 | printf("\n"); 166 | test_all_pmod_approx(); 167 | 168 | return 0; 169 | } 170 | -------------------------------------------------------------------------------- /verifier/vstte20-benchmarks/ntt_red1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=1024, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red1024.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red1024_product1(int32_t *c, int32_t *a, int32_t *b) { 18 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 20 | ntt_red1024_ct_std2rev(a); 21 | reduce_array(a, 1024); 22 | 23 | // shift_array(b, 1024); 24 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 25 | ntt_red1024_ct_std2rev(b); 26 | reduce_array(b, 1024); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red1024_ct_rev2std(c); 36 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 37 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 38 | correct(c, 1024); 39 | } 40 | 41 | void ntt_red1024_product2(int32_t *c, int32_t *a, int32_t *b) { 42 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 44 | ntt_red1024_gs_std2rev(a); 45 | reduce_array(a, 1024); 46 | 47 | // shift_array(b, 1024); 48 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 49 | ntt_red1024_gs_std2rev(b); 50 | reduce_array(b, 1024); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red1024_ct_rev2std(c); 60 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 61 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 62 | correct(c, 1024); 63 | } 64 | 65 | void ntt_red1024_product3(int32_t *c, int32_t *a, int32_t *b) { 66 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 68 | ntt_red1024_ct_std2rev(a); 69 | reduce_array(a, 1024); 70 | 71 | // shift_array(b, 1024); 72 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 73 | ntt_red1024_ct_std2rev(b); 74 | reduce_array(b, 1024); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red1024_gs_rev2std(c); 84 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 85 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 86 | correct(c, 1024); 87 | } 88 | 89 | void ntt_red1024_product4(int32_t *c, int32_t *a, int32_t *b) { 90 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16(a, 1024, ntt_red1024_psi_powers); 92 | ntt_red1024_gs_std2rev(a); 93 | reduce_array(a, 1024); 94 | 95 | // shift_array(b, 1024); 96 | mul_reduce_array16(b, 1024, ntt_red1024_psi_powers); 97 | ntt_red1024_gs_std2rev(b); 98 | reduce_array(b, 1024); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red1024_gs_rev2std(c); 108 | mul_reduce_array16(c, 1024, ntt_red1024_scaled_inv_psi_powers); 109 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 110 | correct(c, 1024); 111 | } 112 | 113 | void ntt_red1024_product5(int32_t *c, int32_t *a, int32_t *b) { 114 | // shift_array(a, 1024); 115 | mulntt_red1024_ct_std2rev(a); 116 | reduce_array(a, 1024); 117 | 118 | // shift_array(b, 1024); 119 | mulntt_red1024_ct_std2rev(b); 120 | reduce_array(b, 1024); 121 | 122 | mul_reduce_array(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice(c, 1024); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red1024_gs_rev2std(c); 126 | scalar_mul_reduce_array(c, 1024, ntt_red1024_rescale); 127 | reduce_array_twice(c, 1024); 128 | correct(c, 1024); 129 | } 130 | -------------------------------------------------------------------------------- /src/bitrev512_table.c: -------------------------------------------------------------------------------- 1 | #include "bitrev512_table.h" 2 | 3 | const uint16_t bitrev512[BITREV512_NPAIRS][2] = { 4 | { 1, 256 }, { 2, 128 }, { 3, 384 }, { 4, 64 }, 5 | { 5, 320 }, { 6, 192 }, { 7, 448 }, { 8, 32 }, 6 | { 9, 288 }, { 10, 160 }, { 11, 416 }, { 12, 96 }, 7 | { 13, 352 }, { 14, 224 }, { 15, 480 }, { 17, 272 }, 8 | { 18, 144 }, { 19, 400 }, { 20, 80 }, { 21, 336 }, 9 | { 22, 208 }, { 23, 464 }, { 24, 48 }, { 25, 304 }, 10 | { 26, 176 }, { 27, 432 }, { 28, 112 }, { 29, 368 }, 11 | { 30, 240 }, { 31, 496 }, { 33, 264 }, { 34, 136 }, 12 | { 35, 392 }, { 36, 72 }, { 37, 328 }, { 38, 200 }, 13 | { 39, 456 }, { 41, 296 }, { 42, 168 }, { 43, 424 }, 14 | { 44, 104 }, { 45, 360 }, { 46, 232 }, { 47, 488 }, 15 | { 49, 280 }, { 50, 152 }, { 51, 408 }, { 52, 88 }, 16 | { 53, 344 }, { 54, 216 }, { 55, 472 }, { 57, 312 }, 17 | { 58, 184 }, { 59, 440 }, { 60, 120 }, { 61, 376 }, 18 | { 62, 248 }, { 63, 504 }, { 65, 260 }, { 66, 132 }, 19 | { 67, 388 }, { 69, 324 }, { 70, 196 }, { 71, 452 }, 20 | { 73, 292 }, { 74, 164 }, { 75, 420 }, { 76, 100 }, 21 | { 77, 356 }, { 78, 228 }, { 79, 484 }, { 81, 276 }, 22 | { 82, 148 }, { 83, 404 }, { 85, 340 }, { 86, 212 }, 23 | { 87, 468 }, { 89, 308 }, { 90, 180 }, { 91, 436 }, 24 | { 92, 116 }, { 93, 372 }, { 94, 244 }, { 95, 500 }, 25 | { 97, 268 }, { 98, 140 }, { 99, 396 }, { 101, 332 }, 26 | { 102, 204 }, { 103, 460 }, { 105, 300 }, { 106, 172 }, 27 | { 107, 428 }, { 109, 364 }, { 110, 236 }, { 111, 492 }, 28 | { 113, 284 }, { 114, 156 }, { 115, 412 }, { 117, 348 }, 29 | { 118, 220 }, { 119, 476 }, { 121, 316 }, { 122, 188 }, 30 | { 123, 444 }, { 125, 380 }, { 126, 252 }, { 127, 508 }, 31 | { 129, 258 }, { 131, 386 }, { 133, 322 }, { 134, 194 }, 32 | { 135, 450 }, { 137, 290 }, { 138, 162 }, { 139, 418 }, 33 | { 141, 354 }, { 142, 226 }, { 143, 482 }, { 145, 274 }, 34 | { 147, 402 }, { 149, 338 }, { 150, 210 }, { 151, 466 }, 35 | { 153, 306 }, { 154, 178 }, { 155, 434 }, { 157, 370 }, 36 | { 158, 242 }, { 159, 498 }, { 161, 266 }, { 163, 394 }, 37 | { 165, 330 }, { 166, 202 }, { 167, 458 }, { 169, 298 }, 38 | { 171, 426 }, { 173, 362 }, { 174, 234 }, { 175, 490 }, 39 | { 177, 282 }, { 179, 410 }, { 181, 346 }, { 182, 218 }, 40 | { 183, 474 }, { 185, 314 }, { 187, 442 }, { 189, 378 }, 41 | { 190, 250 }, { 191, 506 }, { 193, 262 }, { 195, 390 }, 42 | { 197, 326 }, { 199, 454 }, { 201, 294 }, { 203, 422 }, 43 | { 205, 358 }, { 206, 230 }, { 207, 486 }, { 209, 278 }, 44 | { 211, 406 }, { 213, 342 }, { 215, 470 }, { 217, 310 }, 45 | { 219, 438 }, { 221, 374 }, { 222, 246 }, { 223, 502 }, 46 | { 225, 270 }, { 227, 398 }, { 229, 334 }, { 231, 462 }, 47 | { 233, 302 }, { 235, 430 }, { 237, 366 }, { 239, 494 }, 48 | { 241, 286 }, { 243, 414 }, { 245, 350 }, { 247, 478 }, 49 | { 249, 318 }, { 251, 446 }, { 253, 382 }, { 255, 510 }, 50 | { 259, 385 }, { 261, 321 }, { 263, 449 }, { 265, 289 }, 51 | { 267, 417 }, { 269, 353 }, { 271, 481 }, { 275, 401 }, 52 | { 277, 337 }, { 279, 465 }, { 281, 305 }, { 283, 433 }, 53 | { 285, 369 }, { 287, 497 }, { 291, 393 }, { 293, 329 }, 54 | { 295, 457 }, { 299, 425 }, { 301, 361 }, { 303, 489 }, 55 | { 307, 409 }, { 309, 345 }, { 311, 473 }, { 315, 441 }, 56 | { 317, 377 }, { 319, 505 }, { 323, 389 }, { 327, 453 }, 57 | { 331, 421 }, { 333, 357 }, { 335, 485 }, { 339, 405 }, 58 | { 343, 469 }, { 347, 437 }, { 349, 373 }, { 351, 501 }, 59 | { 355, 397 }, { 359, 461 }, { 363, 429 }, { 367, 493 }, 60 | { 371, 413 }, { 375, 477 }, { 379, 445 }, { 383, 509 }, 61 | { 391, 451 }, { 395, 419 }, { 399, 483 }, { 407, 467 }, 62 | { 411, 435 }, { 415, 499 }, { 423, 459 }, { 431, 491 }, 63 | { 439, 475 }, { 447, 507 }, { 463, 487 }, { 479, 503 }, 64 | }; 65 | 66 | -------------------------------------------------------------------------------- /src/ntt_red_asm16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=16, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red_asm16.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red16_product1_asm(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array_asm(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16_asm(a, 16, ntt_red16_psi_powers); 20 | ntt_red16_ct_std2rev_asm(a); 21 | reduce_array_asm(a, 16); 22 | 23 | shift_array_asm(b, 16); 24 | mul_reduce_array16_asm(b, 16, ntt_red16_psi_powers); 25 | ntt_red16_ct_std2rev_asm(b); 26 | reduce_array_asm(b, 16); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array_asm(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red16_ct_rev2std_asm(c); 36 | mul_reduce_array16_asm(c, 16, ntt_red16_scaled_inv_psi_powers); 37 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 38 | correct_asm(c, 16); 39 | } 40 | 41 | void ntt_red16_product2_asm(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array_asm(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16_asm(a, 16, ntt_red16_psi_powers); 44 | ntt_red16_gs_std2rev_asm(a); 45 | reduce_array_asm(a, 16); 46 | 47 | shift_array_asm(b, 16); 48 | mul_reduce_array16_asm(b, 16, ntt_red16_psi_powers); 49 | ntt_red16_gs_std2rev_asm(b); 50 | reduce_array_asm(b, 16); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array_asm(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red16_ct_rev2std_asm(c); 60 | mul_reduce_array16_asm(c, 16, ntt_red16_scaled_inv_psi_powers); 61 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 62 | correct_asm(c, 16); 63 | } 64 | 65 | void ntt_red16_product3_asm(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array_asm(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16_asm(a, 16, ntt_red16_psi_powers); 68 | ntt_red16_ct_std2rev_asm(a); 69 | reduce_array_asm(a, 16); 70 | 71 | shift_array_asm(b, 16); 72 | mul_reduce_array16_asm(b, 16, ntt_red16_psi_powers); 73 | ntt_red16_ct_std2rev_asm(b); 74 | reduce_array_asm(b, 16); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array_asm(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red16_gs_rev2std_asm(c); 84 | mul_reduce_array16_asm(c, 16, ntt_red16_scaled_inv_psi_powers); 85 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 86 | correct_asm(c, 16); 87 | } 88 | 89 | void ntt_red16_product4_asm(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array_asm(a, 16); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16_asm(a, 16, ntt_red16_psi_powers); 92 | ntt_red16_gs_std2rev_asm(a); 93 | reduce_array_asm(a, 16); 94 | 95 | shift_array_asm(b, 16); 96 | mul_reduce_array16_asm(b, 16, ntt_red16_psi_powers); 97 | ntt_red16_gs_std2rev_asm(b); 98 | reduce_array_asm(b, 16); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array_asm(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red16_gs_rev2std_asm(c); 108 | mul_reduce_array16_asm(c, 16, ntt_red16_scaled_inv_psi_powers); 109 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 110 | correct_asm(c, 16); 111 | } 112 | 113 | void ntt_red16_product5_asm(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array_asm(a, 16); 115 | mulntt_red16_ct_std2rev_asm(a); 116 | reduce_array_asm(a, 16); 117 | 118 | shift_array_asm(b, 16); 119 | mulntt_red16_ct_std2rev_asm(b); 120 | reduce_array_asm(b, 16); 121 | 122 | mul_reduce_array_asm(c, 16, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice_asm(c, 16); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red16_gs_rev2std_asm(c); 126 | scalar_mul_reduce_array_asm(c, 16, ntt_red16_rescale8); 127 | reduce_array_twice_asm(c, 16); 128 | correct_asm(c, 16); 129 | } 130 | -------------------------------------------------------------------------------- /src/ntt_red_asm1024.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=1024, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red_asm1024.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red1024_product1_asm(int32_t *c, int32_t *a, int32_t *b) { 18 | mul_reduce_array16_asm(a, 1024, ntt_red1024_psi_powers); 19 | ntt_red1024_ct_std2rev_asm(a); 20 | reduce_array_asm(a, 1024); 21 | 22 | mul_reduce_array16_asm(b, 1024, ntt_red1024_psi_powers); 23 | ntt_red1024_ct_std2rev_asm(b); 24 | reduce_array_asm(b, 1024); 25 | 26 | // at this point: 27 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 28 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 29 | mul_reduce_array_asm(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 30 | // reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 31 | 32 | // we have: -130 <= c[i] <= 12413 33 | intt_red1024_ct_rev2std_asm(c); 34 | mul_reduce_array16_asm(c, 1024, ntt_red1024_scaled_inv_psi_powers_var); 35 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 36 | correct_asm(c, 1024); 37 | } 38 | 39 | void ntt_red1024_product2_asm(int32_t *c, int32_t *a, int32_t *b) { 40 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 41 | mul_reduce_array16_asm(a, 1024, ntt_red1024_psi_powers); 42 | ntt_red1024_gs_std2rev_asm(a); 43 | reduce_array_asm(a, 1024); 44 | 45 | // shift_array(b, 1024); 46 | mul_reduce_array16_asm(b, 1024, ntt_red1024_psi_powers); 47 | ntt_red1024_gs_std2rev_asm(b); 48 | reduce_array_asm(b, 1024); 49 | 50 | // at this point: 51 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 52 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 53 | mul_reduce_array_asm(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 54 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 55 | 56 | // we have: -130 <= c[i] <= 12413 57 | intt_red1024_ct_rev2std_asm(c); 58 | mul_reduce_array16_asm(c, 1024, ntt_red1024_scaled_inv_psi_powers); 59 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 60 | correct_asm(c, 1024); 61 | } 62 | 63 | void ntt_red1024_product3_asm(int32_t *c, int32_t *a, int32_t *b) { 64 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 65 | mul_reduce_array16_asm(a, 1024, ntt_red1024_psi_powers); 66 | ntt_red1024_ct_std2rev_asm(a); 67 | reduce_array_asm(a, 1024); 68 | 69 | // shift_array(b, 1024); 70 | mul_reduce_array16_asm(b, 1024, ntt_red1024_psi_powers); 71 | ntt_red1024_ct_std2rev_asm(b); 72 | reduce_array_asm(b, 1024); 73 | 74 | // at this point: 75 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 76 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 77 | mul_reduce_array_asm(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 78 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 79 | 80 | // we have: -130 <= c[i] <= 12413 81 | intt_red1024_gs_rev2std_asm(c); 82 | mul_reduce_array16_asm(c, 1024, ntt_red1024_scaled_inv_psi_powers); 83 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 84 | correct_asm(c, 1024); 85 | } 86 | 87 | void ntt_red1024_product4_asm(int32_t *c, int32_t *a, int32_t *b) { 88 | // shift_array(a, 1024); // convert to [-(Q-1)/2, (Q-1)/2] 89 | mul_reduce_array16_asm(a, 1024, ntt_red1024_psi_powers); 90 | ntt_red1024_gs_std2rev_asm(a); 91 | reduce_array_asm(a, 1024); 92 | 93 | // shift_array(b, 1024); 94 | mul_reduce_array16_asm(b, 1024, ntt_red1024_psi_powers); 95 | ntt_red1024_gs_std2rev_asm(b); 96 | reduce_array_asm(b, 1024); 97 | 98 | // at this point: 99 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 100 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 101 | mul_reduce_array_asm(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 102 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 103 | 104 | // we have: -130 <= c[i] <= 12413 105 | intt_red1024_gs_rev2std_asm(c); 106 | mul_reduce_array16_asm(c, 1024, ntt_red1024_scaled_inv_psi_powers); 107 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 108 | correct_asm(c, 1024); 109 | } 110 | 111 | void ntt_red1024_product5_asm(int32_t *c, int32_t *a, int32_t *b) { 112 | // shift_array(a, 1024); 113 | mulntt_red1024_ct_std2rev_asm(a); 114 | reduce_array_asm(a, 1024); 115 | 116 | // shift_array(b, 1024); 117 | mulntt_red1024_ct_std2rev_asm(b); 118 | reduce_array_asm(b, 1024); 119 | 120 | mul_reduce_array_asm(c, 1024, a, b); // c[i] = 3 * a[i] * b[i] 121 | reduce_array_twice_asm(c, 1024); // c[i] = 9 * c[i] mod Q 122 | 123 | inttmul_red1024_gs_rev2std_asm(c); 124 | scalar_mul_reduce_array_asm(c, 1024, ntt_red1024_rescale8); 125 | reduce_array_twice_asm(c, 1024); 126 | correct_asm(c, 1024); 127 | } 128 | -------------------------------------------------------------------------------- /src/ntt_red_asm256.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=256, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red_asm256.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red256_product1_asm(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array_asm(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16_asm(a, 256, ntt_red256_psi_powers); 20 | ntt_red256_ct_std2rev_asm(a); 21 | reduce_array_asm(a, 256); 22 | 23 | shift_array_asm(b, 256); 24 | mul_reduce_array16_asm(b, 256, ntt_red256_psi_powers); 25 | ntt_red256_ct_std2rev_asm(b); 26 | reduce_array_asm(b, 256); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array_asm(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red256_ct_rev2std_asm(c); 36 | mul_reduce_array16_asm(c, 256, ntt_red256_scaled_inv_psi_powers); 37 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 38 | correct_asm(c, 256); 39 | } 40 | 41 | void ntt_red256_product2_asm(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array_asm(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16_asm(a, 256, ntt_red256_psi_powers); 44 | ntt_red256_gs_std2rev_asm(a); 45 | reduce_array_asm(a, 256); 46 | 47 | shift_array_asm(b, 256); 48 | mul_reduce_array16_asm(b, 256, ntt_red256_psi_powers); 49 | ntt_red256_gs_std2rev_asm(b); 50 | reduce_array_asm(b, 256); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array_asm(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red256_ct_rev2std_asm(c); 60 | mul_reduce_array16_asm(c, 256, ntt_red256_scaled_inv_psi_powers); 61 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 62 | correct_asm(c, 256); 63 | } 64 | 65 | void ntt_red256_product3_asm(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array_asm(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16_asm(a, 256, ntt_red256_psi_powers); 68 | ntt_red256_ct_std2rev_asm(a); 69 | reduce_array_asm(a, 256); 70 | 71 | shift_array_asm(b, 256); 72 | mul_reduce_array16_asm(b, 256, ntt_red256_psi_powers); 73 | ntt_red256_ct_std2rev_asm(b); 74 | reduce_array_asm(b, 256); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array_asm(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red256_gs_rev2std_asm(c); 84 | mul_reduce_array16_asm(c, 256, ntt_red256_scaled_inv_psi_powers); 85 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 86 | correct_asm(c, 256); 87 | } 88 | 89 | void ntt_red256_product4_asm(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array_asm(a, 256); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16_asm(a, 256, ntt_red256_psi_powers); 92 | ntt_red256_gs_std2rev_asm(a); 93 | reduce_array_asm(a, 256); 94 | 95 | shift_array_asm(b, 256); 96 | mul_reduce_array16_asm(b, 256, ntt_red256_psi_powers); 97 | ntt_red256_gs_std2rev_asm(b); 98 | reduce_array_asm(b, 256); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array_asm(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red256_gs_rev2std_asm(c); 108 | mul_reduce_array16_asm(c, 256, ntt_red256_scaled_inv_psi_powers); 109 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 110 | correct_asm(c, 256); 111 | } 112 | 113 | void ntt_red256_product5_asm(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array_asm(a, 256); 115 | mulntt_red256_ct_std2rev_asm(a); 116 | reduce_array_asm(a, 256); 117 | 118 | shift_array_asm(b, 256); 119 | mulntt_red256_ct_std2rev_asm(b); 120 | reduce_array_asm(b, 256); 121 | 122 | mul_reduce_array_asm(c, 256, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice_asm(c, 256); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red256_gs_rev2std_asm(c); 126 | scalar_mul_reduce_array_asm(c, 256, ntt_red256_rescale8); 127 | reduce_array_twice_asm(c, 256); 128 | correct_asm(c, 256); 129 | } 130 | -------------------------------------------------------------------------------- /src/ntt_red_asm512.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NTT for Q=12289, n=512, using the Longa/Naehrig reduction method. 3 | */ 4 | 5 | #include "ntt_red_asm512.h" 6 | 7 | /* 8 | * Input: two arrays a and b in standard order 9 | * 10 | * Result: 11 | * - the product is stored in array c, in standard order. 12 | * - arrays a and b are modified 13 | * 14 | * The input arrays must contain elements in the range [0, Q-1] 15 | * The result is also in that range. 16 | */ 17 | void ntt_red512_product1_asm(int32_t *c, int32_t *a, int32_t *b) { 18 | shift_array_asm(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 19 | mul_reduce_array16_asm(a, 512, ntt_red512_psi_powers); 20 | ntt_red512_ct_std2rev_asm(a); 21 | reduce_array_asm(a, 512); 22 | 23 | shift_array_asm(b, 512); 24 | mul_reduce_array16_asm(b, 512, ntt_red512_psi_powers); 25 | ntt_red512_ct_std2rev_asm(b); 26 | reduce_array_asm(b, 512); 27 | 28 | // at this point: 29 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 30 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 31 | mul_reduce_array_asm(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 32 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 33 | 34 | // we have: -130 <= c[i] <= 12413 35 | intt_red512_ct_rev2std_asm(c); 36 | mul_reduce_array16_asm(c, 512, ntt_red512_scaled_inv_psi_powers); 37 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 38 | correct_asm(c, 512); 39 | } 40 | 41 | void ntt_red512_product2_asm(int32_t *c, int32_t *a, int32_t *b) { 42 | shift_array_asm(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 43 | mul_reduce_array16_asm(a, 512, ntt_red512_psi_powers); 44 | ntt_red512_gs_std2rev_asm(a); 45 | reduce_array_asm(a, 512); 46 | 47 | shift_array_asm(b, 512); 48 | mul_reduce_array16_asm(b, 512, ntt_red512_psi_powers); 49 | ntt_red512_gs_std2rev_asm(b); 50 | reduce_array_asm(b, 512); 51 | 52 | // at this point: 53 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 54 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 55 | mul_reduce_array_asm(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 56 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 57 | 58 | // we have: -130 <= c[i] <= 12413 59 | intt_red512_ct_rev2std_asm(c); 60 | mul_reduce_array16_asm(c, 512, ntt_red512_scaled_inv_psi_powers); 61 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 62 | correct_asm(c, 512); 63 | } 64 | 65 | void ntt_red512_product3_asm(int32_t *c, int32_t *a, int32_t *b) { 66 | shift_array_asm(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 67 | mul_reduce_array16_asm(a, 512, ntt_red512_psi_powers); 68 | ntt_red512_ct_std2rev_asm(a); 69 | reduce_array_asm(a, 512); 70 | 71 | shift_array_asm(b, 512); 72 | mul_reduce_array16_asm(b, 512, ntt_red512_psi_powers); 73 | ntt_red512_ct_std2rev_asm(b); 74 | reduce_array_asm(b, 512); 75 | 76 | // at this point: 77 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 78 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 79 | mul_reduce_array_asm(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 80 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 81 | 82 | // we have: -130 <= c[i] <= 12413 83 | intt_red512_gs_rev2std_asm(c); 84 | mul_reduce_array16_asm(c, 512, ntt_red512_scaled_inv_psi_powers); 85 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 86 | correct_asm(c, 512); 87 | } 88 | 89 | void ntt_red512_product4_asm(int32_t *c, int32_t *a, int32_t *b) { 90 | shift_array_asm(a, 512); // convert to [-(Q-1)/2, (Q-1)/2] 91 | mul_reduce_array16_asm(a, 512, ntt_red512_psi_powers); 92 | ntt_red512_gs_std2rev_asm(a); 93 | reduce_array_asm(a, 512); 94 | 95 | shift_array_asm(b, 512); 96 | mul_reduce_array16_asm(b, 512, ntt_red512_psi_powers); 97 | ntt_red512_gs_std2rev_asm(b); 98 | reduce_array_asm(b, 512); 99 | 100 | // at this point: 101 | // a = NTT(a) * 3, -524287 <= a[i] <= 536573 102 | // b = NTT(b) * 3, -524287 <= b[i] <= 536573 103 | mul_reduce_array_asm(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 104 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 105 | 106 | // we have: -130 <= c[i] <= 12413 107 | intt_red512_gs_rev2std_asm(c); 108 | mul_reduce_array16_asm(c, 512, ntt_red512_scaled_inv_psi_powers); 109 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 110 | correct_asm(c, 512); 111 | } 112 | 113 | void ntt_red512_product5_asm(int32_t *c, int32_t *a, int32_t *b) { 114 | shift_array_asm(a, 512); 115 | mulntt_red512_ct_std2rev_asm(a); 116 | reduce_array_asm(a, 512); 117 | 118 | shift_array_asm(b, 512); 119 | mulntt_red512_ct_std2rev_asm(b); 120 | reduce_array_asm(b, 512); 121 | 122 | mul_reduce_array_asm(c, 512, a, b); // c[i] = 3 * a[i] * b[i] 123 | reduce_array_twice_asm(c, 512); // c[i] = 9 * c[i] mod Q 124 | 125 | inttmul_red512_gs_rev2std_asm(c); 126 | scalar_mul_reduce_array_asm(c, 512, ntt_red512_rescale8); 127 | reduce_array_twice_asm(c, 512); 128 | correct_asm(c, 512); 129 | } 130 | -------------------------------------------------------------------------------- /data/primitive-roots-1024.txt: -------------------------------------------------------------------------------- 1 | # 2 | # These are all the primitive n-th roots of unity in Z_q 3 | # when q=12289 and n=1024. 4 | # 5 | 49 6 | 52 7 | 56 8 | 58 9 | 64 10 | 142 11 | 147 12 | 151 13 | 156 14 | 168 15 | 174 16 | 192 17 | 218 18 | 241 19 | 295 20 | 316 21 | 325 22 | 347 23 | 350 24 | 382 25 | 400 26 | 418 27 | 421 28 | 426 29 | 431 30 | 441 31 | 453 32 | 468 33 | 504 34 | 522 35 | 576 36 | 605 37 | 652 38 | 654 39 | 677 40 | 683 41 | 709 42 | 723 43 | 787 44 | 835 45 | 885 46 | 922 47 | 948 48 | 973 49 | 975 50 | 1003 51 | 1010 52 | 1018 53 | 1041 54 | 1050 55 | 1058 56 | 1105 57 | 1112 58 | 1146 59 | 1159 60 | 1190 61 | 1200 62 | 1254 63 | 1263 64 | 1278 65 | 1293 66 | 1319 67 | 1321 68 | 1323 69 | 1359 70 | 1360 71 | 1404 72 | 1483 73 | 1489 74 | 1512 75 | 1566 76 | 1579 77 | 1583 78 | 1594 79 | 1693 80 | 1702 81 | 1728 82 | 1747 83 | 1805 84 | 1815 85 | 1843 86 | 1858 87 | 1922 88 | 1954 89 | 1956 90 | 1958 91 | 1962 92 | 1973 93 | 1975 94 | 1987 95 | 2031 96 | 2033 97 | 2049 98 | 2051 99 | 2057 100 | 2078 101 | 2127 102 | 2169 103 | 2281 104 | 2302 105 | 2344 106 | 2361 107 | 2447 108 | 2459 109 | 2500 110 | 2503 111 | 2505 112 | 2555 113 | 2655 114 | 2683 115 | 2692 116 | 2738 117 | 2766 118 | 2767 119 | 2839 120 | 2844 121 | 2882 122 | 2908 123 | 2919 124 | 2920 125 | 2925 126 | 2948 127 | 3009 128 | 3029 129 | 3030 130 | 3054 131 | 3123 132 | 3127 133 | 3150 134 | 3174 135 | 3199 136 | 3202 137 | 3262 138 | 3263 139 | 3315 140 | 3329 141 | 3336 142 | 3434 143 | 3438 144 | 3445 145 | 3477 146 | 3482 147 | 3514 148 | 3529 149 | 3532 150 | 3565 151 | 3570 152 | 3600 153 | 3602 154 | 3643 155 | 3656 156 | 3710 157 | 3728 158 | 3757 159 | 3762 160 | 3772 161 | 3789 162 | 3818 163 | 3834 164 | 3860 165 | 3879 166 | 3956 167 | 3957 168 | 3963 169 | 3969 170 | 3988 171 | 3991 172 | 3998 173 | 4016 174 | 4046 175 | 4049 176 | 4075 177 | 4077 178 | 4079 179 | 4080 180 | 4115 181 | 4169 182 | 4212 183 | 4213 184 | 4240 185 | 4298 186 | 4322 187 | 4324 188 | 4433 189 | 4449 190 | 4467 191 | 4493 192 | 4536 193 | 4624 194 | 4698 195 | 4737 196 | 4749 197 | 4754 198 | 4774 199 | 4780 200 | 4782 201 | 4789 202 | 4912 203 | 4916 204 | 4948 205 | 5009 206 | 5057 207 | 5079 208 | 5106 209 | 5184 210 | 5206 211 | 5241 212 | 5257 213 | 5297 214 | 5315 215 | 5333 216 | 5339 217 | 5369 218 | 5383 219 | 5415 220 | 5429 221 | 5435 222 | 5445 223 | 5446 224 | 5456 225 | 5468 226 | 5486 227 | 5529 228 | 5537 229 | 5574 230 | 5594 231 | 5681 232 | 5735 233 | 5766 234 | 5782 235 | 5862 236 | 5868 237 | 5874 238 | 5876 239 | 5886 240 | 5906 241 | 5908 242 | 5915 243 | 5919 244 | 5925 245 | 5942 246 | 5961 247 | 5990 248 | 6008 249 | 6055 250 | 6065 251 | 6068 252 | 6077 253 | 6093 254 | 6099 255 | 6118 256 | 6119 257 | 6122 258 | 6136 259 | 6137 260 | 6142 261 | 6147 262 | 6152 263 | 6153 264 | 6167 265 | 6170 266 | 6171 267 | 6190 268 | 6196 269 | 6212 270 | 6221 271 | 6224 272 | 6234 273 | 6281 274 | 6299 275 | 6328 276 | 6347 277 | 6364 278 | 6370 279 | 6374 280 | 6381 281 | 6383 282 | 6403 283 | 6413 284 | 6415 285 | 6421 286 | 6427 287 | 6507 288 | 6523 289 | 6554 290 | 6608 291 | 6695 292 | 6715 293 | 6752 294 | 6760 295 | 6803 296 | 6821 297 | 6833 298 | 6843 299 | 6844 300 | 6854 301 | 6860 302 | 6874 303 | 6906 304 | 6920 305 | 6950 306 | 6956 307 | 6974 308 | 6992 309 | 7032 310 | 7048 311 | 7083 312 | 7105 313 | 7183 314 | 7210 315 | 7232 316 | 7280 317 | 7341 318 | 7373 319 | 7377 320 | 7500 321 | 7507 322 | 7509 323 | 7515 324 | 7535 325 | 7540 326 | 7552 327 | 7591 328 | 7665 329 | 7753 330 | 7796 331 | 7822 332 | 7840 333 | 7856 334 | 7965 335 | 7967 336 | 7991 337 | 8049 338 | 8076 339 | 8077 340 | 8120 341 | 8174 342 | 8209 343 | 8210 344 | 8212 345 | 8214 346 | 8240 347 | 8243 348 | 8273 349 | 8291 350 | 8298 351 | 8301 352 | 8320 353 | 8326 354 | 8332 355 | 8333 356 | 8410 357 | 8429 358 | 8455 359 | 8471 360 | 8500 361 | 8517 362 | 8527 363 | 8532 364 | 8561 365 | 8579 366 | 8633 367 | 8646 368 | 8687 369 | 8689 370 | 8719 371 | 8724 372 | 8757 373 | 8760 374 | 8775 375 | 8807 376 | 8812 377 | 8844 378 | 8851 379 | 8855 380 | 8953 381 | 8960 382 | 8974 383 | 9026 384 | 9027 385 | 9087 386 | 9090 387 | 9115 388 | 9139 389 | 9162 390 | 9166 391 | 9235 392 | 9259 393 | 9260 394 | 9280 395 | 9341 396 | 9364 397 | 9369 398 | 9370 399 | 9381 400 | 9407 401 | 9445 402 | 9450 403 | 9522 404 | 9523 405 | 9551 406 | 9597 407 | 9606 408 | 9634 409 | 9734 410 | 9784 411 | 9786 412 | 9789 413 | 9830 414 | 9842 415 | 9928 416 | 9945 417 | 9987 418 | 10008 419 | 10120 420 | 10162 421 | 10211 422 | 10232 423 | 10238 424 | 10240 425 | 10256 426 | 10258 427 | 10302 428 | 10314 429 | 10316 430 | 10327 431 | 10331 432 | 10333 433 | 10335 434 | 10367 435 | 10431 436 | 10446 437 | 10474 438 | 10484 439 | 10542 440 | 10561 441 | 10587 442 | 10596 443 | 10695 444 | 10706 445 | 10710 446 | 10723 447 | 10777 448 | 10800 449 | 10806 450 | 10885 451 | 10929 452 | 10930 453 | 10966 454 | 10968 455 | 10970 456 | 10996 457 | 11011 458 | 11026 459 | 11035 460 | 11089 461 | 11099 462 | 11130 463 | 11143 464 | 11177 465 | 11184 466 | 11231 467 | 11239 468 | 11248 469 | 11271 470 | 11279 471 | 11286 472 | 11314 473 | 11316 474 | 11341 475 | 11367 476 | 11404 477 | 11454 478 | 11502 479 | 11566 480 | 11580 481 | 11606 482 | 11612 483 | 11635 484 | 11637 485 | 11684 486 | 11713 487 | 11767 488 | 11785 489 | 11821 490 | 11836 491 | 11848 492 | 11858 493 | 11863 494 | 11868 495 | 11871 496 | 11889 497 | 11907 498 | 11939 499 | 11942 500 | 11964 501 | 11973 502 | 11994 503 | 12048 504 | 12071 505 | 12097 506 | 12115 507 | 12121 508 | 12133 509 | 12138 510 | 12142 511 | 12147 512 | 12225 513 | 12231 514 | 12233 515 | 12237 516 | 12240 517 | 518 | -------------------------------------------------------------------------------- /src/test_red.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | /* 8 | * 12289 is 3 * 2^12 + 1 9 | * MASK = 2^12 - 1 10 | */ 11 | #define Q 12289 12 | #define K 3 13 | #define MASK 4095 14 | 15 | #define MAXABS 715827882 16 | #define MAXABS2 (((int64_t) 1) << 43) 17 | 18 | static int64_t red(int64_t x) { 19 | return (3 * (x & 4095)) - (x >> 12); 20 | } 21 | 22 | #if 0 23 | static void test(int64_t x, int64_t w) { 24 | int64_t r; 25 | 26 | r = red(x * w); 27 | if (r > INT32_MAX || r < INT32_MIN) { 28 | printf(" 32bit overflow detected for x = %"PRId64", w = %"PRId64"\n", x, w); 29 | printf(" red(x * w) = %"PRId64"\n", r); 30 | if (-MAXABS <= x && x <= MAXABS) { 31 | printf("---> x is within limits\n"); 32 | fflush(stdout); 33 | exit(1); 34 | } 35 | } 36 | } 37 | 38 | // return true on overflow 39 | static bool test2(int64_t x) { 40 | int64_t r; 41 | 42 | r = red(x); 43 | if (r > INT32_MAX || r < INT32_MIN) { 44 | printf(" 32bit overflow detected for x = %"PRId64"\n", x); 45 | printf(" red(x) = %"PRId64"\n", r); 46 | return true; 47 | } 48 | return false; 49 | } 50 | #endif 51 | 52 | static int64_t lower_bound, upper_bound; 53 | 54 | static bool test_overflow(int64_t x) { 55 | int64_t r; 56 | r = red(x); 57 | return r>INT32_MAX || r INT32_MAX) { 121 | b = b/4096 + Q - K; 122 | printf("Reduction to %"PRId64"\n", b); 123 | nb = (K+1) * b + Q - K; 124 | } 125 | b = nb; 126 | } 127 | printf("\n"); 128 | 129 | printf("GS updates\n"); 130 | b = Q-1; 131 | for (i=0; i<20; i++) { 132 | printf("B%"PRIu32" = %"PRIi64"\n", i, b); 133 | nb = 2 * K * b + Q - K; 134 | if (nb > INT32_MAX) { 135 | b = b/4096 + Q - K; 136 | printf("Reduction to %"PRId64"\n", b); 137 | nb = 2 * K * b + Q - K; 138 | } 139 | b = nb; 140 | } 141 | printf("\n"); 142 | } 143 | 144 | 145 | /* 146 | * CT: x' = x + red(w * y) 147 | * y' = x - red(w * y) 148 | * 149 | * Bound on |x'| and |y'| assuming |x| <= b and |y| <= b. 150 | */ 151 | static int64_t ct_bound(int64_t b) { 152 | int64_t delta; 153 | 154 | delta = (Q - K) - (b+1)/4096; 155 | return delta < 0 ? (K + 1) * b : (K + 1) * b + delta; 156 | } 157 | 158 | /* 159 | * GS: x' = x + y 160 | * y' = red(w * (x - y)) 161 | * 162 | * Bound on |x'| and |y'| assuming |x| <= b and |y| <= b. 163 | */ 164 | static int64_t gs_bound(int64_t b) { 165 | int64_t delta, bb; 166 | 167 | bb = 2 * b; // |x - y| <= 2b 168 | delta = (Q - K) - (bb + 1)/4096; 169 | return delta < 0 ? K * bb : K * bb + delta; 170 | } 171 | 172 | static void iter_bound_abs2(void) { 173 | uint32_t i; 174 | int64_t b, nb; 175 | 176 | printf("CT updates\n"); 177 | b = Q-1; 178 | for (i=0; i<20; i++) { 179 | printf("B%"PRIu32" = %"PRId64"\n", i, b); 180 | nb = ct_bound(b); 181 | if (nb > INT32_MAX) { 182 | b = b/4096 + Q - K; 183 | printf("Reduction to %"PRId64"\n", b); 184 | nb = ct_bound(b); 185 | } 186 | b = nb; 187 | } 188 | 189 | printf("\nGS updates\n"); 190 | b = Q-1; 191 | for (i=0; i<20; i++) { 192 | printf("B%"PRIu32" = %"PRId64"\n", i, b); 193 | nb = gs_bound(b); 194 | if (nb > INT32_MAX) { 195 | b = b/4096 + Q - K; 196 | printf("Reduction to %"PRId64"\n", b); 197 | nb = gs_bound(b); 198 | } 199 | b = nb; 200 | } 201 | } 202 | 203 | int main(void) { 204 | int64_t x; 205 | uint64_t u; 206 | 207 | find_lower_bound(); 208 | find_upper_bound(); 209 | 210 | for (x=-10000; x < 1000000000; x++) { 211 | if (test_overflow(lower_bound + x)) { 212 | printf("Overflow for lower_bound + %"PRId64"\n", x); 213 | } 214 | } 215 | printf("\n"); 216 | for (x= -1000000000; x < 10000; x++) { 217 | if (test_overflow(upper_bound + x)) { 218 | printf("Overflow for upper_bound + %"PRId64"\n", x); 219 | } 220 | } 221 | printf("\n"); 222 | u = lower_bound; 223 | printf("Hex: lower_bound = %0llx\n", u); 224 | u = upper_bound; 225 | printf("Hex: upper_bound = %0llx\n", u); 226 | printf("\n"); 227 | 228 | iter_bound_abs(); 229 | iter_bound_abs2(); 230 | 231 | return 0; 232 | } 233 | --------------------------------------------------------------------------------