├── README.md
├── baseline
    ├── generate_prime.py
    ├── helper.py
    ├── ntt.py
    ├── ntt_demo.py
    ├── poly.py
    └── poly_demo.py
├── hwmodel
    ├── FNTT_mem_N8_PE2.txt
    ├── FNTT_tw_N8_PE2.txt
    ├── INTT_mem_N8_PE2.txt
    ├── INTT_tw_N8_PE2.txt
    ├── NTRU3_mem_N24_PE2.txt
    ├── NTRU3_tw_N24_PE2.txt
    ├── NWC1_mem_N8_PE2.txt
    ├── NWC1_tw_N8_PE2.txt
    ├── NWC2_mem_N8_PE2.txt
    ├── NWC2_tw_N8_PE2.txt
    ├── generate_prime.py
    ├── helper.py
    └── poly_hw_demo.py
├── stats
    ├── generate_prime.py
    ├── helper.py
    ├── ntt.py
    ├── ntt_demo.py
    ├── poly.py
    └── poly_demo.py
└── testgenerator
    ├── generate_prime.py
    ├── generate_test.py
    └── helper.py


/README.md:
--------------------------------------------------------------------------------
1 | # ntt-based-polmul
2 | Python implementations of various NTT/INTT and NTT-based polynomial multiplication algorithms
3 | (I will include details later)
4 | 


--------------------------------------------------------------------------------
/baseline/generate_prime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Pedro Alves
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import random
16 | import math
17 | import sys
18 | 
19 | def miller_rabin(p,s=11):
20 |     #computes p-1 decomposition in 2**u*r
21 |     r = p-1
22 |     u = 0
23 |     while r&1 == 0:#true while the last bit of r is zero
24 |         u += 1
25 |         r = int(r/2)
26 | 
27 |     # apply miller_rabin primality test
28 |     for i in range(s):
29 |         a = random.randrange(2,p-1) # choose random a in {2,3,...,p-2}
30 |         z = pow(a,r,p)
31 | 
32 |         if z != 1 and z != p-1:
33 |             for j in range(u-1):
34 |                 if z != p-1:
35 |                     z = pow(z,2,p)
36 |                     if z == 1:
37 |                         return False
38 |                 else:
39 |                     break
40 |             if z != p-1:
41 |                 return False
42 |     return True
43 | 
44 | 
45 | def is_prime(n,s=11):
46 |      #lowPrimes is all primes (sans 2, which is covered by the bitwise and operator)
47 |      #under 1000. taking n modulo each lowPrime allows us to remove a huge chunk
48 |      #of composite numbers from our potential pool without resorting to Rabin-Miller
49 |      lowPrimes =   [3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97
50 |                    ,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179
51 |                    ,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269
52 |                    ,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367
53 |                    ,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461
54 |                    ,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571
55 |                    ,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661
56 |                    ,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773
57 |                    ,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883
58 |                    ,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997]
59 |      if (n >= 3):
60 |          if (n&1 != 0):
61 |              for p in lowPrimes:
62 |                  if (n == p):
63 |                     return True
64 |                  if (n % p == 0):
65 |                      return False
66 |              return miller_rabin(n,s)
67 |      return False
68 | 
69 | def generate_large_prime(k,s=11):
70 |     #print "Generating prime of %d bits" % k
71 |     #k is the desired bit length
72 | 
73 |     # using security parameter s=11, we have a error probability of less than
74 |     # 2**-80
75 | 
76 |     r=int(100*(math.log(k,2)+1)) #number of max attempts
77 |     while r>0:
78 |         #randrange is mersenne twister and is completely deterministic
79 |         #unusable for serious crypto purposes
80 |         n = random.randrange(2**(k-1),2**(k))
81 |         r-=1
82 |         if is_prime(n,s) == True:
83 |             return n
84 |     raise Exception("Failure after %d tries." % r)
85 | 


--------------------------------------------------------------------------------
/baseline/helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def egcd(a, b):
 3 |     if a == 0:
 4 |         return (b, 0, 1)
 5 |     else:
 6 |         g, y, x = egcd(b % a, a)
 7 |         return (g, x - (b // a) * y, y)
 8 | 
 9 | def modinv(a, m):
10 |     g, x, y = egcd(a, m)
11 |     if g != 1:
12 |         raise Exception('Modular inverse does not exist')
13 |     else:
14 |         return x % m
15 | 
16 | # Bit-Reverse integer
17 | def intReverse(a,n):
18 |     b = ('{:0'+str(n)+'b}').format(a)
19 |     return int(b[::-1],2)
20 | 
21 | # Bit-Reversed index
22 | def indexReverse(a,r):
23 |     n = len(a)
24 |     b = [0]*n
25 |     for i in range(n):
26 |         rev_idx = intReverse(i,r)
27 |         b[rev_idx] = a[i]
28 |     return b
29 | 
30 | # Check if input is m-th (could be n or 2n) primitive root of unity of q
31 | def isrootofunity(w,m,q):
32 |     if pow(w,m,q) != 1:
33 |         return False
34 |     elif pow(w,m//2,q) != (q-1):
35 |         return False
36 |     else:
37 |         v = w
38 |         for i in range(1,m):
39 |             if v == 1:
40 |                 return False
41 |             else:
42 |                 v = (v*w) % q
43 |         return True
44 | 


--------------------------------------------------------------------------------
/baseline/ntt.py:
--------------------------------------------------------------------------------
  1 | from math import log,floor,ceil
  2 | from helper import *
  3 | 
  4 | matrix = lambda polynomial, col_length: list(zip(*[polynomial[i:i + col_length] for i in range(0, len(polynomial), col_length)]))
  5 | 
  6 | # Cooley-Tukey Butterfly Structure
  7 | # A0,A1: input coefficients
  8 | # W: twiddle factor
  9 | # q: modulus
 10 | # B0,B1: output coefficients
 11 | def CT_Butterfly(A0,A1,W,q):
 12 |     """
 13 |     A0 -------\--|+|-- B0
 14 |                \/
 15 |                /\
 16 |     A1 --|x|--/--|-|-- B1
 17 |     """
 18 |     M = (A1 * W) % q
 19 | 
 20 |     B0 = (A0 + M) % q
 21 |     B1 = (A0 - M) % q
 22 | 
 23 |     return B0,B1
 24 | 
 25 | # Gentleman-Sandle Butterfly Structure
 26 | # A0,A1: input coefficients
 27 | # W: twiddle factor
 28 | # q: modulus
 29 | # B0,B1: output coefficients
 30 | def GS_Butterfly(A0,A1,W,q):
 31 |     """
 32 |     A0 --\--|+|------- B0
 33 |           \/
 34 |           /\
 35 |     A1 --/--|-|--|x|-- B1
 36 |     """
 37 |     M0 = (A0 + A1) % q
 38 |     M1 = (A0 - A1) % q
 39 | 
 40 |     B0 = M0
 41 |     B1 = (M1 * W) % q
 42 | 
 43 |     return B0,B1
 44 | 
 45 | class NTT:
 46 |     """
 47 |     - Definition of NTT:
 48 | 
 49 |     Existence condition: q = 1 (mod n) and w: n-th root of unity
 50 | 
 51 |     [a_0, a_1, ..., a_n-1] --> [A_0, A_1, ..., A_n-1]
 52 | 
 53 |     Forward NTT: A_i = sum{j from 0 to n-1}(a_j * w^ij mod q) for i from 0 to n-1
 54 |     Inverse NTT: a_i = sum{j from 0 to n-1}(A_j * w^-ij mod q) for i from 0 to n-1
 55 |     """
 56 | 
 57 |     """
 58 |     List of NTT Algorithms: (Inside the FFT Black Box, by Chu and George)
 59 |     -- Naive NTT (see Wikipedia definition of NTT operation)
 60 |     -- Radix-2 Decimation-in-Time (DIT) Recursive NTT (Cooley-Tukey)
 61 |     -- Radix-2 Decimation-in-Frequency (DIF) Recursive NTT (Gentleman-Sandle)
 62 |     -- Radix-2 Decimation-in-Time (DIT) Iterative NTT
 63 |     ---- NR (N: Natural order, R: Reversed Order)
 64 |     ---- RN
 65 |     ---- NN
 66 |     -- Radix-2 Decimation-in-Time (DIF) Iterative NTT
 67 |     ---- NR
 68 |     ---- RN
 69 |     ---- NN
 70 |     """
 71 | 
 72 |     """
 73 |     Note: Any forward NTT function can be used for inverse NTT if you give input
 74 |     in proper order and w^-1 instead of w. Besides, INTT requires output
 75 |     coefficients to be multiplied with n^-1 mod q.
 76 |     """
 77 | 
 78 |     """
 79 |     - What is standard order? : 0, 1, ..., n-1
 80 |     - What is reversed/bit-reversed (scrambled) order? : 0, br(1), ..., br(n-1)
 81 |     where br() function bit-revese the integer with log(n) bits
 82 |     """
 83 | 
 84 |     # Naive NTT
 85 |     # A: input polynomial (standard order)
 86 |     # W: twiddle factor
 87 |     # q: modulus
 88 |     # B: output polynomial (standard order)
 89 |     def NaiveNTT_NN(self,A,W,q):
 90 |         """
 91 |         Very slow baseline implementation. Do not use for large parameter set.
 92 |         """
 93 |         N = len(A)
 94 |         B = [0]*N
 95 | 
 96 |         for i in range(N):
 97 |             for j in range(N):
 98 |                 B[i] = (B[i] + A[j]*(W**(i*j))) % q
 99 | 
100 |         return B
101 | 
102 |     # Naive NTT
103 |     # A: input polynomial (standard order)
104 |     # W: twiddle factor
105 |     # q: modulus
106 |     # B: output polynomial (bit-reversed order)
107 |     def NaiveNTT_NR(self,A,W,q):
108 |         """
109 |         Very slow baseline implementation. Do not use for large parameter set.
110 |         """
111 |         N = len(A)
112 |         B = [0]*N
113 | 
114 |         v = int(log(N,2))
115 | 
116 |         for i in range(N):
117 |             for j in range(N):
118 |                 W_pow = intReverse(i,v)*j
119 |                 B[i] = (B[i] + A[j]*(W**W_pow)) % q
120 |         return B
121 | 
122 |     # Recursive Radix-2 Decimation-in-Time (DIT) (CT) NTT
123 |     # A: input polynomial (standard order --> it becomes reversed after recursions)
124 |     # W: twiddle factor
125 |     # q: modulus
126 |     # B: output polynomial (standard order)
127 |     def Radix2_DIT_Recursive_NTT(self,A,W,q):
128 |         N = len(A)
129 |         B = [0]*N
130 | 
131 |         if N == 2:
132 |             # When N is 2, perform butterfly operation with W = 1
133 |             B[0] = (A[0] + A[1]) % q
134 |             B[1] = (A[0] - A[1]) % q
135 | 
136 |             return B
137 |         else:
138 |             # Divide input into two (even indices, odd indices)
139 |             AE = [A[i] for i in range(0,N,2)]
140 |             AO = [A[i] for i in range(1,N,2)]
141 | 
142 |             # Reduce twiddle factor for the next recursion
143 |             W_new = pow(W,2,q)
144 | 
145 |             # Apply NTT operations to the even and odd indices of the input separately
146 |             BE = self.Radix2_DIT_Recursive_NTT(AE,W_new,q)
147 |             BO = self.Radix2_DIT_Recursive_NTT(AO,W_new,q)
148 | 
149 |             # Outputs of first and second NTT operations go to the first and second
150 |             # half of the array (output array)
151 |             B = BE+BO
152 | 
153 |             # Perform CT-Butterfly where first and second inputs of butterfly
154 |             # operation are from first and second half of the output respectively
155 |             # First and second outputs of the butterfly operation go to first and
156 |             # second half of the array (input array) respectively
157 |             for i in range(int(N/2)):
158 |                 B[i], B[i+int(N/2)] = CT_Butterfly(B[i],B[i+int(N/2)],pow(W,i,q),q)
159 | 
160 |             return B
161 | 
162 |     # Recursive Radix-2 Decimation-in-Frequency (DIF) (GS) NTT
163 |     # A: input polynomial (standard order)
164 |     # W: twiddle factor
165 |     # q: modulus
166 |     # B: output polynomial (bit-reversed order)
167 |     def Radix2_DIF_Recursive_NTT(self,A,W,q):
168 |         N = len(A)
169 |         B = [0]*N
170 | 
171 |         if N == 2:
172 |             # When N is 2, perform butterfly operation with W = 1
173 |             B[0] = (A[0] + A[1]) % q
174 |             B[1] = (A[0] - A[1]) % q
175 | 
176 |             return B
177 |         else:
178 |             # Divide input into two (first half, second half)
179 | 
180 |             # Perform GS-Butterfly where first and second inputs of butterfly
181 |             # operation are from first and second half of the input respectively
182 |             # First and second outputs of the butterfly operation go to first and
183 |             # second half of the array (input array) respectively
184 |             for i in range(int(N/2)):
185 |                 B[i], B[i+int(N/2)] = GS_Butterfly(A[i],A[i+int(N/2)],pow(W,i,q),q)
186 | 
187 |             # Reduce twiddle factor for the next recursion
188 |             W_new = pow(W,2,q)
189 | 
190 |             # Apply NTT operations to the first and second half of the input separately
191 |             BE = self.Radix2_DIF_Recursive_NTT(B[0:int(N/2)],W_new,q)
192 |             BO = self.Radix2_DIF_Recursive_NTT(B[int(N/2):N],W_new,q)
193 | 
194 |             # Outputs of first and second NTT operations go to the first and second
195 |             # half of the array (output array)
196 |             B = BE+BO
197 | 
198 |             return B
199 | 
200 |     # From paper: NTTU: An Area-Efficient Low-POwer NTT-Uncoupled Architecture for NTT-Based Multiplication
201 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - NR
202 |     # A: input polynomial (standard order)
203 |     # W: twiddle factor
204 |     # q: modulus
205 |     # B: output polynomial (bit-reversed order)
206 |     def Radix2_DIT_Iterative_NTT_NR(self,A,W,q):
207 |         N = len(A)
208 |         B = [_ for _ in A]
209 | 
210 |         for s in range(int(log(N,2)),0,-1):
211 |             m = 2**s
212 |             for k in range(int(N/m)):
213 |                 TW = pow(W,intReverse(k,int(log(N,2))-s)*int(m/2),q)
214 |                 for j in range(int(m/2)):
215 |                     u = B[k*m+j]
216 |                     t = (TW*B[k*m+j+int(m/2)]) % q
217 | 
218 |                     B[k*m+j]          = (u+t) % q
219 |                     B[k*m+j+int(m/2)] = (u-t) % q
220 | 
221 |         return B
222 | 
223 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - RN
224 |     # A: input polynomial (bit-reversed order)
225 |     # W: twiddle factor
226 |     # q: modulus
227 |     # B: output polynomial (standard order)
228 |     def Radix2_DIT_Iterative_NTT_RN(self,A,W,q):
229 |         N = len(A)
230 |         B = [_ for _ in A]
231 | 
232 |         v = int(N/2)
233 |         m = 1
234 |         d = 1
235 | 
236 |         while m<N:
237 |             np = 2*m
238 |             lp = np*(v-1)
239 |             for k in range(m):
240 |                 j = k
241 |                 jl = k + lp
242 |                 jt = k*v
243 |                 TW = pow(W,jt,q)
244 |                 while j < (jl+1):
245 |                     temp = (TW*B[j+d]) % q
246 |                     B[j+d] = (B[j] - temp) % q
247 |                     B[j]   = (B[j] + temp) % q
248 |                     j = j+np
249 |             v = int(v/2)
250 |             m = 2*m
251 |             d = 2*d
252 | 
253 |         return B
254 | 
255 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - NN
256 |     # A: input polynomial (standard order)
257 |     # W: twiddle factor
258 |     # q: modulus
259 |     # B: output polynomial (standard order)
260 |     def Radix2_DIT_Iterative_NTT_NN(self,A,W,q):
261 |         N = len(A)
262 |         B = [_ for _ in A]
263 |         C = [_ for _ in A]
264 |         # C = [0]*N
265 | 
266 |         v = int(N/2)
267 |         m = 1
268 |         d = int(N/2)
269 | 
270 |         if int(log(v))%2 == 0:
271 |             nsi = True
272 |         else:
273 |             nsi = False
274 | 
275 |         while m<N:
276 |             if nsi:
277 |                 l = 0
278 |                 for k in range(m):
279 |                     jf = 2*k*v
280 |                     jl = jf + v - 1
281 |                     jt = k*v
282 | 
283 |                     TW = pow(W,jt,q)
284 | 
285 |                     for j in range(jf,jl+1):
286 |                         temp = (TW*B[j+d]) % q
287 | 
288 |                         C[l]          = (B[j] + temp) % q
289 |                         C[l+int(N/2)] = (B[j] - temp) % q
290 | 
291 |                         l = l+1
292 |                 nsi = False
293 |             else:
294 |                 l = 0
295 |                 for k in range(m):
296 |                     jf = 2*k*v
297 |                     jl = jf + v - 1
298 |                     jt = k*v
299 | 
300 |                     TW = pow(W,jt,q)
301 | 
302 |                     for j in range(jf,jl+1):
303 |                         temp = (TW*C[j+d]) % q
304 | 
305 |                         B[l]          = (C[j] + temp) % q
306 |                         B[l+int(N/2)] = (C[j] - temp) % q
307 | 
308 |                         l = l+1
309 |                 nsi = True
310 |             v = int(v/2)
311 |             m = 2*m
312 |             d = int(d/2)
313 | 
314 |         return C
315 | 
316 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - NR
317 |     # A: input polynomial (standard order)
318 |     # W: twiddle factor
319 |     # q: modulus
320 |     # B: output polynomial (bit-reversed order)
321 |     def Radix2_DIF_Iterative_NTT_NR(self,A,W,q):
322 |         N = len(A)
323 |         B = [_ for _ in A]
324 | 
325 |         m = 1
326 |         v = N
327 | 
328 |         while v>1:
329 |             s = int(v/2)
330 |             for k in range(m):
331 |                 jf = k * v
332 |                 jl = jf + s - 1
333 |                 jt = 0
334 |                 for j in range(jf,jl+1):
335 |                     TW = pow(W,jt,q)
336 | 
337 |                     temp = B[j]
338 | 
339 |                     B[j  ] = (temp + B[j+s]) % q
340 |                     B[j+s] = (temp - B[j+s])*TW % q
341 | 
342 |                     jt = jt + m
343 |             m = 2*m
344 |             v = s
345 | 
346 |         return B
347 | 
348 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - RN
349 |     # A: input polynomial (reversed order)
350 |     # W: twiddle factor
351 |     # q: modulus
352 |     # B: output polynomial (bit-standard order)
353 |     def Radix2_DIF_Iterative_NTT_RN(self,A,W,q):
354 |         N = len(A)
355 |         B = [_ for _ in A]
356 | 
357 |         m = 1
358 |         v = N
359 |         d = 1
360 | 
361 |         while v>1:
362 |             for jf in range(m):
363 |                 j = jf
364 |                 jt = 0
365 |                 while j<(N-1):
366 |                     # bit-reversing jt
367 |                     TW = pow(W,intReverse(jt,int(log(N>>1,2))),q)
368 | 
369 |                     temp = B[j]
370 | 
371 |                     B[j]   = (temp + B[j+d]) % q
372 |                     B[j+d] = (temp - B[j+d])*TW % q
373 | 
374 |                     jt = jt+1
375 |                     j = j + 2*m
376 |             m = 2*m
377 |             v = int(v/2)
378 |             d = 2*d
379 | 
380 |         return B
381 | 
382 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - NN
383 |     # A: input polynomial (standard order)
384 |     # W: twiddle factor
385 |     # q: modulus
386 |     # B: output polynomial (standard order)
387 |     def Radix2_DIF_Iterative_NTT_NN(self,A,W,q):
388 |         N = len(A)
389 |         # requires two memory
390 |         B = [_ for _ in A]
391 |         C = [_ for _ in A]
392 |         # C = [0] * N
393 | 
394 |         m = 1
395 |         v = N
396 |         d = 1
397 | 
398 |         if int(log(v//2))%2 == 0:
399 |             nsi = True
400 |         else:
401 |             nsi = False
402 | 
403 |         while v>1:
404 |             if nsi:
405 |                 for jf in range(m):
406 |                     j = jf
407 |                     jt = 0
408 |                     k = jf
409 |                     while j<(N-1):
410 |                         TW = pow(W,jt,q)
411 | 
412 |                         C[j]   = (B[k] + B[k+int(N/2)]) % q
413 |                         C[j+d] = (B[k] - B[k+int(N/2)])*TW % q
414 | 
415 |                         jt = jt + m
416 |                         j = j + 2*m
417 |                         k = k + m
418 |                 nsi = False
419 |             else:
420 |                 for jf in range(m):
421 |                     j = jf
422 |                     jt = 0
423 |                     k = jf
424 |                     while j<(N-1):
425 |                         TW = pow(W,jt,q)
426 | 
427 |                         B[j]   = (C[k] + C[k+int(N/2)]) % q
428 |                         B[j+d] = (C[k] - C[k+int(N/2)])*TW % q
429 | 
430 |                         jt = jt + m
431 |                         j = j + 2*m
432 |                         k = k + m
433 |                 nsi = True
434 |             m = 2*m
435 |             v = int(v/2)
436 |             d = 2*d
437 | 
438 |         return C
439 | 
440 |     ######################################################################## (INTT)
441 |     """
442 |     List of INTT Algorithms: NTT algorithms with extra n^-1 mod q multiplication
443 |     -- Naive NTT (see Wikipedia definition of NTT operation)
444 |     -- Radix-2 Decimation-in-Time (DIT) Recursive NTT (Cooley-Tukey)
445 |     -- Radix-2 Decimation-in-Frequency (DIF) Recursive NTT (Gentleman-Sandle)
446 |     -- Radix-2 Decimation-in-Time (DIT) Iterative NTT
447 |     ---- NR (N: Natural order, R: Reversed Order)
448 |     ---- RN
449 |     ---- NN
450 |     -- Radix-2 Decimation-in-Time (DIF) Iterative NTT
451 |     ---- NR
452 |     ---- RN
453 |     ---- NN
454 |     """
455 | 
456 |     def NaiveINTT_NN(self,A,W_inv,q):
457 |         """
458 |         Very slow baseline implementation. Do not use for large parameter set.
459 |         """
460 |         N_inv = modinv(len(A),q)
461 |         B = [(x*N_inv) % q for x in self.NaiveNTT_NN(A,W_inv,q)]
462 |         return B
463 | 
464 |     def NaiveINTT_NR(self,A,W_inv,q):
465 |         """
466 |         Very slow baseline implementation. Do not use for large parameter set.
467 |         """
468 |         N_inv = modinv(len(A),q)
469 |         B = [(x*N_inv) % q for x in self.NaiveNTT_NR(A,W_inv,q)]
470 |         return B
471 | 
472 |     def Radix2_DIT_Recursive_INTT(self,A,W_inv,q):
473 |         N_inv = modinv(len(A),q)
474 |         B = [(x*N_inv) % q for x in self.Radix2_DIT_Recursive_NTT(A,W_inv,q)]
475 |         return B
476 | 
477 |     def Radix2_DIF_Recursive_INTT(self,A,W_inv,q):
478 |         N_inv = modinv(len(A),q)
479 |         B = [(x*N_inv) % q for x in self.Radix2_DIF_Recursive_NTT(A,W_inv,q)]
480 |         return B
481 | 
482 |     def Radix2_DIT_Iterative_INTT_NR(self,A,W_inv,q):
483 |         N_inv = modinv(len(A),q)
484 |         B = [(x*N_inv) % q for x in self.Radix2_DIT_Iterative_NTT_NR(A,W_inv,q)]
485 |         return B
486 | 
487 |     def Radix2_DIT_Iterative_INTT_RN(self,A,W_inv,q):
488 |         N_inv = modinv(len(A),q)
489 |         B = [(x*N_inv) % q for x in self.Radix2_DIT_Iterative_NTT_RN(A,W_inv,q)]
490 |         return B
491 | 
492 |     def Radix2_DIT_Iterative_INTT_NN(self,A,W_inv,q):
493 |         N_inv = modinv(len(A),q)
494 |         B = [(x*N_inv) % q for x in self.Radix2_DIT_Iterative_NTT_NN(A,W_inv,q)]
495 |         return B
496 | 
497 |     def Radix2_DIF_Iterative_INTT_NR(self,A,W_inv,q):
498 |         N_inv = modinv(len(A),q)
499 |         B = [(x*N_inv) % q for x in self.Radix2_DIF_Iterative_NTT_NR(A,W_inv,q)]
500 |         return B
501 | 
502 |     def Radix2_DIF_Iterative_INTT_RN(self,A,W_inv,q):
503 |         N_inv = modinv(len(A),q)
504 |         B = [(x*N_inv) % q for x in self.Radix2_DIF_Iterative_NTT_RN(A,W_inv,q)]
505 |         return B
506 | 
507 |     def Radix2_DIF_Iterative_INTT_NN(self,A,W_inv,q):
508 |         N_inv = modinv(len(A),q)
509 |         B = [(x*N_inv) % q for x in self.Radix2_DIF_Iterative_NTT_NN(A,W_inv,q)]
510 |         return B
511 | 
512 |     """
513 |     CRT-based NTT (it is also used for polynomial multiplication in x^n-1)
514 | 
515 |     Example for 8-pt NTT (w -> 8th root of unity)
516 |     Input  -> Standard Order
517 |     Output -> Bit-reversed Order (We can make it standard order)
518 | 
519 |                                       x^8 - 1 --------------------------------------------- Stage #0
520 |                                       /     \
521 |                                      /       \
522 |                                     /         \
523 |                                    /           \
524 |                                   /             \
525 |                                  /               \
526 |                                 /                 \
527 |                                /                   \
528 |                               /                     \
529 |                              /                       \
530 |                       x^4 - 1                         x^4 + 1 ----------------------------- Stage #1
531 |                          ||                              ||
532 |                       x^4 - 1                         x^4 - w^4
533 |                      /  \                                   /  \
534 |                     /    \                                 /    \
535 |                    /      \                               /      \
536 |             x^2 - 1        x^2 + 1               x^2 - w^2        x^2 + w^2 --------------- Stage #2
537 |                ||             ||                     ||               ||
538 |             x^2 - 1       x^2 - w^4              x^2 - w^2        x^2 - w^6
539 |            / \               / \                   / \               / \
540 |           /   \             /   \                 /   \             /   \
541 |          /     \           /     \               /     \           /     \
542 |     x - 1     x + 1   x - w^2   x + w^2       x - w   x + w   x - w^3   x + w^3 ----------- Stage #3
543 |       ||        ||       ||       ||            ||      ||       ||       ||
544 |     x - 1    x - w^4  x - w^2   x - w^6      x - w   x - w^5  x - w^3   x - w^7
545 | 
546 |     -- Recursive
547 |     -- Full
548 |     -- Iterative (converted to an optimized algorithm) --> Already presented above.
549 |     ---- CT
550 |     ---- GS
551 |     """
552 | 
553 |     # CRT-based NTT (recursive)
554 |     # A: input polynomial (standard order)
555 |     # W: twiddle factor
556 |     # q: modulus
557 |     # B: output polynomial (bit-reversed order)
558 |     def CRT_Recursive_NTT(self,A,W,q,pw=0,depth=1):
559 |         N = len(A)
560 | 
561 |         if N == 1:
562 |             return A
563 |         else:
564 |             B = [0]*N
565 | 
566 |             W_N = pow(W,pw,q)
567 | 
568 |             # reduce
569 |             B[0:int(N/2)] = [(A[i] + A[i+int(N/2)]*W_N) % q for i in range(int(N/2))]
570 |             B[int(N/2):N] = [(A[i] - A[i+int(N/2)]*W_N) % q for i in range(int(N/2))]
571 | 
572 |             # recall functions
573 |             B[0:int(N/2)] = self.CRT_Recursive_NTT(B[0:int(N/2)], W,q,int(pw/2)                 ,2*depth)
574 |             B[int(N/2):N] = self.CRT_Recursive_NTT(B[int(N/2):N], W,q,int(pw/2)+int((N/4)*depth),2*depth)
575 | 
576 |             return B
577 | 
578 |     # CRT-based NTT (full)
579 |     # A: input polynomial (standard order)
580 |     # W: twiddle factor
581 |     # q: modulus
582 |     # B: output polynomial (standard order)
583 |     def CRT_Full_NTT(self,A,W,q):
584 |         N = len(A)
585 |         B = [0]*N
586 | 
587 |         # If i or j is bit-reversed, output will be in bit-reversed order
588 |         for j in range(N):
589 |             C = [x*pow(W**j,i,q) % q for i,x in enumerate(A)]
590 |             B[j] = sum(C) % q
591 | 
592 |         return B
593 | 
594 |     ######################################################################## (INTT)
595 | 
596 |     """
597 |     CRT-based INTT (it is also used for polynomial multiplication in x^n-1)
598 |     It is NTT algorithms with extra n^-1 mod q multiplication
599 | 
600 |     -- Recursive
601 |     -- Full
602 |     -- Iterative (converted to an optimized algorithm) --> Already stated algorithms above!
603 |     ---- CT
604 |     ---- GS
605 |     """
606 | 
607 |     def CRT_Recursive_INTT(self,A,W_inv,q):
608 |         N_inv = modinv(len(A),q)
609 |         B = [(x*N_inv) % q for x in self.CRT_Recursive_NTT(A,W_inv,q)]
610 |         return B
611 | 
612 |     def CRT_Full_INTT(self,A,W_inv,q):
613 |         N_inv = modinv(len(A),q)
614 |         B = [(x*N_inv) % q for x in self.CRT_Full_NTT(A,W_inv,q)]
615 |         return B
616 | 
617 |     """
618 |     List of NTT Algorithms: (from literature)
619 |     -- Recursive Cooley-Tukey (CT) NTT (see http://people.scs.carleton.ca/~maheshwa/courses/5703COMP/16Fall/FFT_Report.pdf)
620 |     -- Iterative NTT (see https://eprint.iacr.org/2019/109.pdf)
621 |     -- Constant-Geometry NTT (see https://tches.iacr.org/index.php/TCHES/article/view/8344/7692 or https://eprint.iacr.org/2014/646.pdf)
622 |        (NOTE: There are typos in the Algorithm presented in the papers)
623 |     -- Stockham NTT (see https://ieeexplore.ieee.org/document/8717615)
624 |     -- Four-Step NTT (see https://eprint.iacr.org/2015/818.pdf)
625 |     """
626 | 
627 |     # Cooley-Tukey NTT
628 |     # A: input polynomial (standard order)
629 |     # W: twiddle factor
630 |     # q: modulus
631 |     # B: output polynomial (standard order)
632 |     def CooleyTukeyNTT(self,A,W,q):
633 |         N = len(A)
634 | 
635 |         if (N == 2):
636 |             B = [0] * N
637 | 
638 |             B[0] = (A[0] + A[1]) % q
639 |             B[1] = (A[0] - A[1]) % q
640 | 
641 |             return B
642 |         else:
643 |             B = [0] * N
644 |             w = 1
645 | 
646 |             A_even = [0] * (N >> 1)
647 |             A_odd  = [0] * (N >> 1)
648 | 
649 |             for i in range(N >> 1):
650 |                 A_even[i] = A[2 * i]
651 |                 A_odd[i]  = A[2 * i + 1]
652 | 
653 |             B_even = self.CooleyTukeyNTT(A_even,(W * W % q),q)
654 |             B_odd  = self.CooleyTukeyNTT(A_odd, (W * W % q),q)
655 | 
656 |             for i in range(N >> 1):
657 |                 B[i]            = (B_even[i] + w * B_odd[i]) % q
658 |                 B[i + (N >> 1)] = (B_even[i] - w * B_odd[i]) % q
659 | 
660 |                 w = w * W
661 | 
662 |         return B
663 | 
664 |     # Iterative NTT
665 |     # A: input polynomial (standard order)
666 |     # W: twiddle factor
667 |     # q: modulus
668 |     # B: output polynomial (bit-reversed order)
669 |     def IterativeNTT(self,A,W,q):
670 |         N = len(A)
671 |         B = [x for x in A]
672 | 
673 |         v = int(log(N, 2))
674 | 
675 |         for i in range(0, v):
676 |             for j in range(0, (2 ** i)):
677 |                 for k in range(0, (2 ** (v - i - 1))):
678 |                     s = j * (2 ** (v - i)) + k
679 |                     t = s + (2 ** (v - i - 1))
680 | 
681 |                     w = (W ** ((2 ** i) * k)) % q
682 | 
683 |                     as_temq = B[s]
684 |                     at_temq = B[t]
685 | 
686 |                     B[s] = (as_temq + at_temq) % q
687 |                     B[t] = ((as_temq - at_temq) * w) % q
688 | 
689 |         return B
690 | 
691 |     # Four-Step NTT
692 |     # A: input polynomial (standard order)
693 |     # W: twiddle factor
694 |     # q: modulus
695 |     # size: input polynomial partition
696 |     # B: output polynomial (standard order)
697 |     def FourStepNTT(self,A,W,q,size):
698 |         """
699 |         This is a unified four-step NTT algorithm for both forward and inverse
700 |         NTT operations. The coefficients of input polynomial should be given in
701 |         standard order. The output is generated in standard order. Forward NTT
702 |         uses twiddle factors and inverse NTT uses modular inverse of twiddle factors.
703 | 
704 |         This algorithm divides NTT operation into smaller parts. "size" input
705 |         determines the size of these small NTT operations. For details of the
706 |         algorithm, see the paper: https://eprint.iacr.org/2015/818.pdf
707 |         """
708 |         N = len(A)
709 | 
710 |         poly = [_ for _ in A]
711 | 
712 |         size0 = size[0]
713 |         size1 = size[1]
714 | 
715 |         temp0 = 1
716 |         # STEP.1
717 |         if isinstance(size0, list):
718 |             for i in size0:
719 |                 temp0 = temp0 * i
720 |             STEP_1 = matrix(poly, N/temp0)
721 |             W_0 = (W ** (N/temp0)) % q
722 |             for i in range(int(N/temp0)):
723 |                 STEP_1[i] = self.FourStepNTT(STEP_1[i],W_0,q,size0)
724 |         else:
725 |             temp0 = size0
726 |             STEP_1 = matrix(poly, int(N/temp0))
727 |             W_0 = (W ** int(N/temp0)) % q
728 |             for i in range(int(N/temp0)):
729 |                 STEP_1[i] =  self.CooleyTukeyNTT(STEP_1[i],W_0,q)
730 | 
731 |         # STEP.2 - Transpose
732 |         STEP_2 = [ [row[c] for row in STEP_1 if c < len(row)] for c in range(0, max([len(row) for row in STEP_1])) ]
733 |         # STEP_2 = list(zip(*STEP_1))
734 | 
735 |         # STEP.3 - Multiply with twiddle factor of N-pt NTT
736 |         STEP_3 = [[0]*int(N/temp0)]*size0
737 |         for i in range(temp0):
738 |             STEP_3[i] = [(STEP_2[i][k] * (W ** (i*k)) % q) for k in range(int(N/temp0))]
739 | 
740 |         temp1 = 1
741 |         #STEP.4
742 |         if isinstance(size1, list):
743 |             for i in size1:
744 |                 temp1 = temp0 * i
745 |             W_1 = (W ** int(N/temp1)) % q
746 |             for i in range(int(N/temp1)):
747 |                 STEP_3[i] = self.FourStepNTT(STEP_3[i],W_1,q,size1)
748 |         else:
749 |             temp1 = size1
750 |             W_1 = (W ** int(N/temp1)) % q
751 |             for i in range(int(N/temp1)):
752 |                 STEP_3[i] = self.CooleyTukeyNTT(STEP_3[i],W_1,q)
753 | 
754 |         # Final transpose
755 |         STEP_4 = [ [row[c] for row in STEP_3 if c < len(row)] for c in range(0, max([len(row) for row in STEP_3])) ]
756 |         # STEP_4 = list(zip(*STEP_3))
757 | 
758 |         # Convert matrix into array
759 |         STEP_4 = [item for sublist in STEP_4 for item in sublist]
760 | 
761 |         return STEP_4
762 | 
763 |     # Four-Step NTT v2
764 |     # A: input polynomial (standard order)
765 |     # W: twiddle factor
766 |     # q: modulus
767 |     # size: input polynomial partition
768 |     # B: output polynomial (standard order)
769 |     def FourStepNTTv2(self,A,W,q,size):
770 |         """
771 |         This is a four-step NTT algorithm for both forward and inverse NTT
772 |         operations. The coefficients of tnput polynomial should be given in
773 |         standard order. The output is generated in standard order. Forward NTT
774 |         uses modular inverse of twiddle factors and inverse NTT uses twiddle factors.
775 | 
776 |         This algorithm divides NTT operation into smaller parts. "size" input
777 |         determines the size of these small NTT operations. For details of the
778 |         algorithm, see the paper: https://eprint.iacr.org/2015/818.pdf
779 |         """
780 |         N = len(A)
781 | 
782 |         # If this is an inverse transform operation
783 |         N_inv = modinv(N, q)
784 |         # Re-order input
785 |         poly = [A[0]] + list(reversed(A[1:]))
786 | 
787 |         size0 = size[0]
788 |         size1 = size[1]
789 | 
790 |         temp0 = 1
791 |         # STEP.1
792 |         if isinstance(size0, list):
793 |             for i in size0:
794 |                 temp0 = temp0 * i
795 |             STEP_1 = matrix(poly, N/temp0)
796 |             W_0 = (W ** (N/temp0)) % q
797 |             for i in range(int(N/temp0)):
798 |                 STEP_1[i] = self.FourStepNTT(STEP_1[i],W_0,q,size0)
799 |         else:
800 |             temp0 = size0
801 |             STEP_1 = matrix(poly, int(N/temp0))
802 |             W_0 = (W ** int(N/temp0)) % q
803 |             for i in range(int(N/temp0)):
804 |                 STEP_1[i] =  self.CooleyTukeyNTT(STEP_1[i],W_0,q)
805 | 
806 |         # STEP.2 - Transpose
807 |         STEP_2 = [ [row[c] for row in STEP_1 if c < len(row)] for c in range(0, max([len(row) for row in STEP_1])) ]
808 |         # STEP_2 = list(zip(*STEP_1))
809 | 
810 |         # STEP.3 - Multiply with twiddle factor of N-pt NTT
811 |         STEP_3 = [[0]*int(N/temp0)]*size0
812 |         for i in range(temp0):
813 |             STEP_3[i] = [(STEP_2[i][k] * (W ** (i*k)) % q) for k in range(int(N/temp0))]
814 | 
815 |         temp1 = 1
816 |         #STEP.4
817 |         if isinstance(size1, list):
818 |             for i in size1:
819 |                 temp1 = temp0 * i
820 |             W_1 = (W ** int(N/temp1)) % q
821 |             for i in range(int(N/temp1)):
822 |                 STEP_3[i] = self.FourStepNTT(STEP_3[i],W_1,q,size1)
823 |         else:
824 |             temp1 = size1
825 |             W_1 = (W ** int(N/temp1)) % q
826 |             for i in range(int(N/temp1)):
827 |                 STEP_3[i] = self.CooleyTukeyNTT(STEP_3[i],W_1,q)
828 | 
829 |         # Final transpose
830 |         STEP_4 = [ [row[c] for row in STEP_3 if c < len(row)] for c in range(0, max([len(row) for row in STEP_3])) ]
831 |         # STEP_4 = list(zip(*STEP_3))
832 | 
833 |         # Convert matrix into array
834 |         STEP_4 = [item for sublist in STEP_4 for item in sublist]
835 | 
836 |         return STEP_4
837 | 
838 |     # CT-Based Constant-Geometry NTT
839 |     # A: input polynomial (Bit-reversed order)
840 |     # W: twiddle factor
841 |     # q: modulus
842 |     # B: output polynomial (standard order)
843 |     def CTBased_ConstantGeometryNTT(self,A,W,q):
844 |         N = len(A)
845 |         v = int(log(N,2))
846 | 
847 |         #B = indexReverse(A,v)
848 |         B = [_ for _ in A]
849 |         C = [0 for _ in range(N)]
850 | 
851 |         for s in range(1,v+1):
852 |             for j in range(int(N/2)):
853 |                 k = int(floor(j/(2**(v-s)))*(2**(v-s)))
854 | 
855 |                 TW = pow(W,k,q)
856 | 
857 |                 C[j           ] = (B[2*j] + B[2*j+1]*TW) % q
858 |                 C[j + int(N/2)] = (B[2*j] - B[2*j+1]*TW) % q
859 | 
860 |             if s != v:
861 |                 B = [_ for _ in C]
862 | 
863 |         return C
864 | 
865 |     ######################################################################## (INTT)
866 |     """
867 |     List of INTT Algorithms: (from literature): NTT algorithms with extra n^-1 mod q multiplication
868 |     -- Recursive Cooley-Tukey (CT) NTT (see http://people.scs.carleton.ca/~maheshwa/courses/5703COMP/16Fall/FFT_Report.pdf)
869 |     -- Iterative NTT (see https://eprint.iacr.org/2019/109.pdf)
870 |     -- Constant-Geometry NTT (see https://tches.iacr.org/index.php/TCHES/article/view/8344/7692 or https://eprint.iacr.org/2014/646.pdf)
871 |     -- Stockham NTT (see https://ieeexplore.ieee.org/document/8717615)
872 |     -- Four-Step NTT (see https://eprint.iacr.org/2015/818.pdf)
873 |     """
874 | 
875 |     def CooleyTukeyINTT(self,A,W_inv,q):
876 |         N_inv = modinv(len(A),q)
877 |         B = [(x*N_inv) % q for x in self.CooleyTukeyNTT(A,W_inv,q)]
878 |         return B
879 | 
880 |     def IterativeINTT(self,A,W_inv,q):
881 |         N_inv = modinv(len(A),q)
882 |         B = [(x*N_inv) % q for x in self.IterativeNTT(A,W_inv,q)]
883 |         return B
884 | 
885 |     def FourStepINTT(self,A,W_inv,q,size):
886 |         N_inv = modinv(len(A),q)
887 |         B = [(x*N_inv) % q for x in self.FourStepNTT(A,W_inv,q,size)]
888 |         return B
889 | 
890 |     def FourStepINTTv2(self,A,W_inv,q,size):
891 |         N_inv = modinv(len(A),q)
892 |         B = [(x*N_inv) % q for x in self.FourStepNTTv2(A,W_inv,q,size)]
893 |         return B
894 | 
895 |     def CTBased_ConstantGeometryINTT(self,A,W_inv,q):
896 |         N_inv = modinv(len(A),q)
897 |         B = [(x*N_inv) % q for x in self.CTBased_ConstantGeometryNTT(A,W_inv,q)]
898 |         return B
899 | #
900 | 


--------------------------------------------------------------------------------
/baseline/ntt_demo.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | from random import randint
  3 | 
  4 | from generate_prime import *
  5 | from helper import *
  6 | from ntt import *
  7 | 
  8 | # Parameter generation
  9 | 
 10 | # Determine n and bit-size of q, then find a q satisfying
 11 | # the condition: q = 1 (mod 2n) or q = 1 (mod n)
 12 | #
 13 | # Based on n and q, generate NTT parameters
 14 | 
 15 | mod     = 2 # if 1 --> q = 1 (mod n), if 2 --> q = 1 (mod 2n)
 16 | n       = 64
 17 | size    = [8,8]
 18 | q_bit   = 10
 19 | 
 20 | q       = 0
 21 | w       = 0
 22 | w_inv   = 0
 23 | psi     = 0
 24 | psi_inv = 0
 25 | 
 26 | # Generate parameters
 27 | wfound = False
 28 | while(not(wfound)):
 29 |     q = generate_large_prime(q_bit)
 30 | 
 31 |     # check q = 1 (mod n or 2n)
 32 |     while (not ((q % (mod*n)) == 1)):
 33 |         q = generate_large_prime(q_bit)
 34 | 
 35 |     # generate NTT parameters
 36 |     for i in range(2,q-1):
 37 |         wfound = isrootofunity(i,mod*n,q)
 38 |         if wfound:
 39 |             if mod == 1:
 40 |                 psi    = 0
 41 |                 psi_inv= 0
 42 |                 w      = i
 43 |                 w_inv  = modinv(w,q)
 44 |             else:
 45 |                 psi    = i
 46 |                 psi_inv= modinv(psi,q)
 47 |                 w      = pow(psi,2,q)
 48 |                 w_inv  = modinv(w,q)
 49 |             break
 50 | 
 51 | # Print parameters
 52 | print("Parameters")
 53 | print("n      : {}".format(n))
 54 | print("q      : {}".format(q))
 55 | print("w      : {}".format(w))
 56 | print("w_inv  : {}".format(w_inv))
 57 | print("psi    : {}".format(psi))
 58 | print("psi_inv: {}".format(psi_inv))
 59 | print("")
 60 | 
 61 | #NOTE: Comment Out Naive Method for Large Parameters
 62 | 
 63 | # Demo
 64 | # Random A
 65 | A = [randint(0,q-1) for x in range(n)]
 66 | 
 67 | # Reversed A
 68 | A_rev = indexReverse(A,int(log(n,2)))
 69 | 
 70 | # NTT operation
 71 | Evaluator = NTT()
 72 | 
 73 | # Reference NTT operation
 74 | REF = Evaluator.NaiveNTT_NN(A,w,q)
 75 | 
 76 | # Reversed N0
 77 | REF_rev = indexReverse(REF,int(log(n,2)))
 78 | 
 79 | # NTT operations
 80 | N0 = Evaluator.NaiveNTT_NR(A,w,q)
 81 | N1 = Evaluator.Radix2_DIT_Recursive_NTT(A,w,q)
 82 | N2 = Evaluator.Radix2_DIF_Recursive_NTT(A,w,q)
 83 | N3 = Evaluator.Radix2_DIF_Iterative_NTT_NR(A,w,q)
 84 | N4 = Evaluator.Radix2_DIF_Iterative_NTT_RN(A_rev,w,q)
 85 | N5 = Evaluator.Radix2_DIF_Iterative_NTT_NN(A,w,q)
 86 | N6 = Evaluator.Radix2_DIT_Iterative_NTT_NR(A,w,q)
 87 | N7 = Evaluator.Radix2_DIT_Iterative_NTT_RN(A_rev,w,q)
 88 | N8 = Evaluator.Radix2_DIT_Iterative_NTT_NN(A,w,q)
 89 | N9 = Evaluator.CRT_Recursive_NTT(A,w,q)
 90 | N10= Evaluator.CRT_Full_NTT(A,w,q)
 91 | N11= Evaluator.CooleyTukeyNTT(A,w,q)
 92 | N12= Evaluator.IterativeNTT(A,w,q)
 93 | N13= Evaluator.FourStepNTT(A,w,q,size)
 94 | N14= Evaluator.FourStepNTTv2(A,w_inv,q,size)
 95 | N15= Evaluator.CTBased_ConstantGeometryNTT(A_rev,w,q)
 96 | 
 97 | # Check NTT
 98 | print("-------- Sanity check for NTT operations --------")
 99 | # print("A         : {}".format(A))
100 | # print("br(A)     : {}".format(A_rev))
101 | # print("NTT(A)    : {}".format(REF))
102 | # print("br(NTT(A)): {}".format(REF_rev))
103 | print("")
104 | print("NaiveNTT_NR                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N0)]) == 0) else "Wrong"))
105 | print("Radix2_DIT_Recursive_NTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N1)]) == 0) else "Wrong"))
106 | print("Radix2_DIF_Recursive_NTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N2)]) == 0) else "Wrong"))
107 | print("Radix2_DIF_Iterative_NTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N3)]) == 0) else "Wrong"))
108 | print("Radix2_DIF_Iterative_NTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N4)]) == 0) else "Wrong"))
109 | print("Radix2_DIF_Iterative_NTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N5)]) == 0) else "Wrong"))
110 | print("Radix2_DIT_Iterative_NTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N6)]) == 0) else "Wrong"))
111 | print("Radix2_DIT_Iterative_NTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N7)]) == 0) else "Wrong"))
112 | print("Radix2_DIT_Iterative_NTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N8)]) == 0) else "Wrong"))
113 | print("CRT_Recursive_NTT              -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N9)]) == 0) else "Wrong"))
114 | print("CRT_Full_NTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N10)]) == 0) else "Wrong"))
115 | print("CooleyTukeyNTT                 -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N11)]) == 0) else "Wrong"))
116 | print("IterativeNTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N12)]) == 0) else "Wrong"))
117 | print("FourStepNTT                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N13)]) == 0) else "Wrong"))
118 | print("FourStepNTTv2                  -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N14)]) == 0) else "Wrong"))
119 | print("CTBased_ConstantGeometryNTT    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N15)]) == 0) else "Wrong"))
120 | print("")
121 | 
122 | # INTT operations
123 | R0 = Evaluator.NaiveINTT_NR(REF,w_inv,q)
124 | R1 = Evaluator.Radix2_DIT_Recursive_INTT(REF,w_inv,q)
125 | R2 = Evaluator.Radix2_DIF_Recursive_INTT(REF,w_inv,q)
126 | R3 = Evaluator.Radix2_DIF_Iterative_INTT_NR(REF,w_inv,q)
127 | R4 = Evaluator.Radix2_DIF_Iterative_INTT_RN(REF_rev,w_inv,q)
128 | R5 = Evaluator.Radix2_DIF_Iterative_INTT_NN(REF,w_inv,q)
129 | R6 = Evaluator.Radix2_DIT_Iterative_INTT_NR(REF,w_inv,q)
130 | R7 = Evaluator.Radix2_DIT_Iterative_INTT_RN(REF_rev,w_inv,q)
131 | R8 = Evaluator.Radix2_DIT_Iterative_INTT_NN(REF,w_inv,q)
132 | R9 = Evaluator.CRT_Recursive_INTT(REF,w_inv,q)
133 | R10= Evaluator.CRT_Full_INTT(REF,w_inv,q)
134 | R11= Evaluator.CooleyTukeyINTT(REF,w_inv,q)
135 | R12= Evaluator.IterativeINTT(REF,w_inv,q)
136 | R13= Evaluator.FourStepINTT(REF,w_inv,q,size)
137 | R14= Evaluator.FourStepINTTv2(REF,w,q,size)
138 | R15= Evaluator.CTBased_ConstantGeometryINTT(REF_rev,w_inv,q)
139 | 
140 | # Check INTT
141 | print("-------- Sanity check for INTT operations --------")
142 | # print("NTT(A)    : {}".format(REF))
143 | # print("br(NTT(A)): {}".format(REF_rev))
144 | # print("A         : {}".format(A))
145 | # print("br(A)     : {}".format(A_rev))
146 | print("")
147 | print("NaiveINTT_NR                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R0)]) == 0) else "Wrong"))
148 | print("Radix2_DIT_Recursive_INTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R1)]) == 0) else "Wrong"))
149 | print("Radix2_DIF_Recursive_INTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R2)]) == 0) else "Wrong"))
150 | print("Radix2_DIF_Iterative_INTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R3)]) == 0) else "Wrong"))
151 | print("Radix2_DIF_Iterative_INTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R4)]) == 0) else "Wrong"))
152 | print("Radix2_DIF_Iterative_INTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R5)]) == 0) else "Wrong"))
153 | print("Radix2_DIT_Iterative_INTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R6)]) == 0) else "Wrong"))
154 | print("Radix2_DIT_Iterative_INTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R7)]) == 0) else "Wrong"))
155 | print("Radix2_DIT_Iterative_INTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R8)]) == 0) else "Wrong"))
156 | print("CRT_Recursive_INTT              -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R9)]) == 0) else "Wrong"))
157 | print("CRT_Full_INTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R10)]) == 0) else "Wrong"))
158 | print("CooleyTukeyINTT                 -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R11)]) == 0) else "Wrong"))
159 | print("IterativeINTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R12)]) == 0) else "Wrong"))
160 | print("FourStepINTT                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R13)]) == 0) else "Wrong"))
161 | print("FourStepINTTv2                  -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R14)]) == 0) else "Wrong"))
162 | print("CTBased_ConstantGeometryINTT    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R15)]) == 0) else "Wrong"))
163 | print("")
164 | 
165 | #
166 | 


--------------------------------------------------------------------------------
/baseline/poly.py:
--------------------------------------------------------------------------------
   1 | from helper import *
   2 | from ntt import *
   3 | 
   4 | class Poly:
   5 |     """
   6 |     * These are baseline (not optimized) implementations *
   7 | 
   8 |     Reference Implementations
   9 |     --Reference Polynomial Multiplication (School-Book)
  10 |     --Reference Modular Polynomial Multiplication (School-Book)
  11 |     ---- Reduction polynomial: x^n - 1           (Positive wrapped convolution - PWC)
  12 |     ---- Reduction polynomial: x^n + 1           (Negative wrapped convolution - NWC)
  13 |     ---- Reduction polynomial: x^n - x^(n/2) + 1 (NTRU)
  14 |     --Reference Polynomial Reduction (with any reduction polynomial)
  15 | 
  16 |     NTT-based Polynomial Multiplication (no reduction)
  17 |     -- A(x),B(x): n-1 degree polynomials
  18 |     -- C(x)     : 2n-2 degree polynomial
  19 |     -- A(x),B(x) should be zero-padded to 2n before operation
  20 |     -- C(x)=A(x)*B(X) --> C=INTT_2n(NTT_2n(zero-padded A) . NTT_2n(zero-padded B))
  21 |     -- If there is a ring Z_q[x]/f(x), polynomial reduction should be applied separately
  22 | 
  23 |     NTT-based Modular Polynomial Multiplication with Carrier Modulus
  24 |     -- Polynomial reduction operation is performed separately
  25 |     -- If n is a power-of-two
  26 |     ---- Select a new ntt-friendly q with bit-size > log(n*q^2)
  27 |     ---- Perform "NTT-based Polynomial Multiplication" and then apply reduction
  28 |     -- If n is NOT a power-of-two
  29 |     ---- Zero-pad input polynomials to the closest power-of-two
  30 |     ---- Select a new ntt-friendly q with bit-size > log(n*q^2)
  31 |     ---- Perform "NTT-based Polynomial Multiplication" and then apply reduction
  32 | 
  33 |     NTT-based Modular Polynomial Multiplication with f(x)=x^n-1
  34 |     -- A(x),B(x): n-1 degree polynomials
  35 |     -- C(x)     : n-1 degree polynomial
  36 |     -- C(x)=A(x)*B(X) --> C=INTT_n(NTT_n(A) . NTT_n(B))
  37 | 
  38 |     NTT-based Modular Polynomial Multiplication with f(x)=x^n+1
  39 |     -- First implementation
  40 |     ---- A(x),B(x): n-1 degree polynomials
  41 |     ---- C(x)     : n-1 degree polynomial
  42 |     ---- C(x)=A(x)*B(X) --> C=PostProc(INTT_n(NTT_n(PreProc(A)) . NTT_n(PreProc(B))))
  43 |     -- Second implementation
  44 |     ---- A(x),B(x): n-1 degree polynomials
  45 |     ---- C(x)     : n-1 degree polynomial
  46 |     ---- C(x)=A(x)*B(X) --> C=INTT_n(MergedNTT_n(A) . MergedNTT_n(B))
  47 |     -- First method uses separate pre- and post-processing methods
  48 |     -- The second method merges pre- and post-processing with NTT and INTT, respectively
  49 | 
  50 |     CRT-based Modular Polynomial Multiplication
  51 |     -- f(x)=x^n-1
  52 |     -- f(x)=x^n+1 (with w)
  53 |     -- f(x)=x^n+1 (with psi)
  54 |     -- f(x)=x^n-x^(n/2)+1
  55 | 
  56 |     CRT-based Unified Modular Polynomial Multiplication
  57 |     -- with any final degree and any f(x)
  58 | 
  59 |     Other methods (not implemented in this class)
  60 |     -- Karatsuba
  61 |     -- Toom-Cook
  62 |     -- Schonhage-Strassen
  63 | 
  64 |     *******************************
  65 |     """
  66 | 
  67 |     # A,B: same degree polynomials
  68 |     # q: coefficient modulus
  69 |     # C: output polynomial
  70 |     def SchoolbookPolMul(self,A, B, q):
  71 |         C = [0] * (2 * len(A))
  72 |         for indexA, elemA in enumerate(A):
  73 |             for indexB, elemB in enumerate(B):
  74 |                 C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
  75 |         return C
  76 | 
  77 |     # A: input polynomial
  78 |     # F: reduction polynomial
  79 |     # q: coefficient modulus
  80 |     # D: output polynomial
  81 |     # Assuming coefficient of largest degree of F is 1
  82 |     def PolRed(self,A,F,q):
  83 |         if len(A) < len(F):
  84 |             return A
  85 |         else:
  86 |             D = [_ for _ in A]
  87 |             R = [(-x) % q for x in F[0:len(F)-1]]
  88 |             for i in range(len(D)-1,len(F)-2,-1):
  89 |                 for j in range(len(R)):
  90 |                     D[i-1-j] = (D[i-1-j] + D[i]*R[len(R)-1-j]) % q
  91 |                 D[i] = 0
  92 |             return D[0:len(F)-1]
  93 | 
  94 |     # A,B: input polynomials in x^n-1
  95 |     # q: coefficient modulus
  96 |     # D: output polynomial in x^n-1
  97 |     def SchoolbookModPolMul_PWC(self,A, B, q):
  98 |         C = [0] * (2 * len(A))
  99 |         D = [0] * (len(A))
 100 |         for indexA, elemA in enumerate(A):
 101 |             for indexB, elemB in enumerate(B):
 102 |                 C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
 103 | 
 104 |         for i in range(len(A)):
 105 |             D[i] = (C[i] + C[i + len(A)]) % q
 106 |         return D
 107 | 
 108 |     # A,B: input polynomials in x^n+1
 109 |     # q: coefficient modulus
 110 |     # D: output polynomial in x^n+1
 111 |     def SchoolbookModPolMul_NWC(self,A, B, q):
 112 |         C = [0] * (2 * len(A))
 113 |         D = [0] * (len(A))
 114 |         for indexA, elemA in enumerate(A):
 115 |             for indexB, elemB in enumerate(B):
 116 |                 C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
 117 | 
 118 |         for i in range(len(A)):
 119 |             D[i] = (C[i] - C[i + len(A)]) % q
 120 |         return D
 121 | 
 122 |     # A,B: input polynomials in x^n-x^(n/2)+1 where n = 3*2^t
 123 |     # q: coefficient modulus
 124 |     # D: output polynomial in x^n-x^(n/2)+1 where n = 3*2^t
 125 |     def SchoolbookModPolMul_NTRU(self,A, B, q):
 126 |         C = [0] * (2 * len(A))
 127 |         for indexA, elemA in enumerate(A):
 128 |             for indexB, elemB in enumerate(B):
 129 |                 C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
 130 |         D = [_ for _ in C]
 131 |         for i in range(2*len(A)-1,len(A)-1,-1):
 132 |             D[i-len(A)+int(len(A)/2)] = (D[i-len(A)+int(len(A)/2)] + D[i]) % q
 133 |             D[i-len(A)              ] = (D[i-len(A)              ] - D[i]) % q
 134 |             D[i] = 0
 135 |         return D[0:len(A)]
 136 | 
 137 |     # A,B: n-1 degree polynomials
 138 |     # w, w_inv: twiddle factors
 139 |     # q: coefficient modulus
 140 |     # C: 2n-2 degree polynomial
 141 |     # C(x)=A(x)*B(X) --> C=INTT_2n(NTT_2n(zero-padded A) . NTT_2n(zero-padded B))
 142 |     def NTTBasedPolMul(self,A,B,w,w_inv,q):
 143 |         N = len(A)
 144 |         A_padded = A + [0]*N
 145 |         B_padded = B + [0]*N
 146 | 
 147 |         Evaluator = NTT()
 148 | 
 149 |         A_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(A_padded,w,q)
 150 |         B_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(B_padded,w,q)
 151 | 
 152 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 153 | 
 154 |         C = Evaluator.Radix2_DIF_Iterative_INTT_RN(C_ntt,w_inv,q)
 155 |         return C
 156 | 
 157 |     # NTT-Based polynomial multiplication with carrier modulus
 158 |     # This can be implemented with other techniques we used so far
 159 |     # for k-bit modulus q, we need a log(n)+2*k-bit carrier modulus Q
 160 |     # After multiplication operation, separate polynomial reduction is required
 161 |     def NTTBasedPolMulwithCM(self,A,B,qw,qw_inv,q,Qw,Qw_inv,Q):
 162 |         """
 163 |         -- Polynomial reduction operation is performed separately
 164 |         -- If n is a power-of-two
 165 |         ---- Select a new ntt-friendly q with bit-size > log(n*q^2)
 166 |         ---- Perform "NTT-based Polynomial Multiplication" and then apply reduction
 167 |         -- If n is NOT a power-of-two
 168 |         ---- Zero-pad input polynomials to the closest power-of-two
 169 |         ---- Select a new ntt-friendly q with bit-size > log(n*q^2)
 170 |         ---- Perform "NTT-based Polynomial Multiplication" and then apply reduction
 171 |         """
 172 |         pass
 173 | 
 174 |     # NTT-Based Modular Polynomial Multiplication with f(x)=x^n-1 (Positive Wrapped Convolution)
 175 |     # A,B: n-1 degree polynomials
 176 |     # w, w_inv: twiddle factors
 177 |     # q: coefficient modulus
 178 |     # C: n-1 degree polynomial
 179 |     # C(x)=A(x)*B(X) --> C=INTT_n(NTT_n(A) . NTT_n(B))
 180 |     def NTTBasedModPolMul_PWC(self,A,B,w,w_inv,q):
 181 |         Evaluator = NTT()
 182 | 
 183 |         A_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(A,w,q)
 184 |         B_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(B,w,q)
 185 | 
 186 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 187 | 
 188 |         C = Evaluator.Radix2_DIF_Iterative_INTT_RN(C_ntt,w_inv,q)
 189 |         return C
 190 | 
 191 |     # NTT-Based Modular Polynomial Multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 192 |     # -- with separate pre-processing and post-processing
 193 |     # A,B: n-1 degree polynomials
 194 |     # w, w_inv: twiddle factors
 195 |     # q: coefficient modulus
 196 |     # C: n-1 degree polynomial
 197 |     # C(x)=A(x)*B(X) --> C=PostProc(INTT_n(NTT_n(PreProc(A)) . NTT_n(PreProc(B))))
 198 |     def NTTBasedModPolMul_NWC_v1(self,A,B,w,w_inv,psi,psi_inv,q):
 199 |         Evaluator = NTT()
 200 | 
 201 |         A_p = [(x*pow(psi,i,q)) % q for i,x in enumerate(A)]
 202 |         B_p = [(x*pow(psi,i,q)) % q for i,x in enumerate(B)]
 203 | 
 204 |         A_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(A_p,w,q)
 205 |         B_ntt = Evaluator.Radix2_DIT_Iterative_NTT_NR(B_p,w,q)
 206 | 
 207 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 208 | 
 209 |         C_p = Evaluator.Radix2_DIF_Iterative_INTT_RN(C_ntt,w_inv,q)
 210 | 
 211 |         C = [(x*pow(psi_inv,i,q)) % q for i,x in enumerate(C_p)]
 212 |         return C
 213 | 
 214 |     # NTT-Based Modular Polynomial Multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 215 |     # -- with merged pre-processing and post-processing
 216 |     # A,B: n-1 degree polynomials
 217 |     # w, w_inv: twiddle factors
 218 |     # q: coefficient modulus
 219 |     # C: n-1 degree polynomial
 220 |     # C(x)=A(x)*B(X) --> C=INTT_n(MergedNTT_n(A) . MergedNTT_n(B))
 221 |     def NTTBasedModPolMul_NWC_v2(self,A,B,psi,psi_inv,q):
 222 | 
 223 |         A_ntt = self.CTBasedMergedNTT_NR(A,psi,q)
 224 |         B_ntt = self.CTBasedMergedNTT_NR(B,psi,q)
 225 | 
 226 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 227 | 
 228 |         C = self.GSBasedMergedINTT_RN(C_ntt,psi_inv,q)
 229 |         return C
 230 | 
 231 |     # CRT-based modular polynomial multiplication with f(x)=x^n-1 (Positive Wrapped Convolution)
 232 |     # It is using CRT-based NTT instead of regular NTT
 233 |     # A,B: n-1 degree polynomials
 234 |     # w, w_inv: twiddle factors
 235 |     # q: coefficient modulus
 236 |     # C: n-1 degree polynomial
 237 |     # C(x)=A(x)*B(X) --> C=INTT_n(NTT_n(A) . NTT_n(B))
 238 |     def CRTBasedModPolMul_PWC(self,A,B,w,w_inv,q):
 239 |         # Note: if you use CRT_Recursive_NTT, output of NTT operation will be in bit-reversed order
 240 |         Evaluator = NTT()
 241 | 
 242 |         A_ntt = Evaluator.CRT_Full_NTT(A,w,q)
 243 |         B_ntt = Evaluator.CRT_Full_NTT(B,w,q)
 244 | 
 245 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 246 | 
 247 |         C = Evaluator.CRT_Full_INTT(C_ntt,w_inv,q)
 248 |         return C
 249 | 
 250 |     # CRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 251 |     # -- utilizing CRT instead of NTT
 252 |     # -- it is using psi (q = 1 mod 2n) and final degree of CRT reduction is 1
 253 |     # -- It is same as MergedNTT method (as shown in CRTBasedModPolMul_PWC)
 254 |     # A,B: n-1 degree polynomials
 255 |     # w, w_inv: twiddle factors
 256 |     # q: coefficient modulus
 257 |     # C: n-1 degree polynomial
 258 |     def CRTBasedModPolMul_NWC_FD1(self,A,B,psi,psi_inv,q):
 259 |         A_ntt = self.CRT_Iterative_NWC_FD1(A,psi,q)
 260 |         B_ntt = self.CRT_Iterative_NWC_FD1(B,psi,q)
 261 | 
 262 |         C_ntt = [(x*y) % q for x,y in zip(A_ntt,B_ntt)]
 263 | 
 264 |         C = self.ICRT_Iterative_NWC_FD1(C_ntt,psi_inv,q)
 265 |         return C
 266 | 
 267 |     # CRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 268 |     # -- utilizing CRT instead of NTT
 269 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 2
 270 |     # -- it is using Iterative version of reduction function
 271 |     # A,B: n-1 degree polynomials
 272 |     # w, w_inv: twiddle factors
 273 |     # q: coefficient modulus
 274 |     # C: n-1 degree polynomial
 275 |     def CRTBasedModPolMul_NWC_FD2(self,A,B,w,w_inv,q):
 276 |         A_ntt = self.CRT_Iterative_NWC_FD2_NR(A,w,q)
 277 |         B_ntt = self.CRT_Iterative_NWC_FD2_NR(B,w,q)
 278 | 
 279 |         C_ntt = [0 for _ in range(len(A))]
 280 | 
 281 |         # Degree-2 modular polynomial multiplications
 282 |         for i in range(len(A)//2):
 283 |             w_pow = 2*intReverse(i,int(log(len(A)//2,2)))+1
 284 |             wk    = pow(w,w_pow,q)
 285 |             C_ntt[2*i:2*i+2] = self.PolWiseMult(A_ntt[2*i:2*i+2],B_ntt[2*i:2*i+2],wk,2,q)
 286 | 
 287 |         # NOTE: it is using w. (We cen convert it into w_inv by modification)
 288 |         C = self.ICRT_Iterative_NWC_FD2_RN(C_ntt,w_inv,q)
 289 | 
 290 |         return C
 291 | 
 292 |     # CRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 293 |     # -- utilizing CRT instead of NTT
 294 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is a variable (power-of-two, 2, 4, ...)
 295 |     # -- it is using Iterative version of reduction function
 296 |     # A,B: n-1 degree polynomials
 297 |     # w, w_inv: twiddle factors
 298 |     # q: coefficient modulus
 299 |     # C: n-1 degree polynomial
 300 |     def CRTBasedModPolMul_NWC_FDV(self,A,B,w,w_inv,q,findeg):
 301 |         A_ntt = self.CRT_Iterative_NWC_FDV_NR(A,w,q,findeg)
 302 |         B_ntt = self.CRT_Iterative_NWC_FDV_NR(B,w,q,findeg)
 303 | 
 304 |         C_ntt = [0 for _ in range(len(A))]
 305 | 
 306 |         # Degree-findeg modular polynomial multiplications
 307 |         for i in range(len(A)//findeg):
 308 |             w_pow = 2*intReverse(i,int(log(len(A)//findeg,2)))+1
 309 |             wk    = pow(w,w_pow,q)
 310 |             C_ntt[findeg*i:findeg*i+findeg] = self.PolWiseMult(A_ntt[findeg*i:findeg*i+findeg],B_ntt[findeg*i:findeg*i+findeg],wk,findeg,q)
 311 | 
 312 |         C = self.ICRT_Iterative_NWC_FDV_RN(C_ntt,w_inv,q,findeg)
 313 | 
 314 |         return C
 315 | 
 316 |     # CRT-based modular polynomial multiplication with f(x)=x^n-x^(n/2)+1 (Negative Wrapped Convolution)
 317 |     # -- utilizing CRT instead of NTT
 318 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 3
 319 |     # -- it is using Iterative version of reduction function
 320 |     # A,B: n-1 degree polynomials
 321 |     # w, w_inv: twiddle factors
 322 |     # q: coefficient modulus
 323 |     # C: n-1 degree polynomial
 324 |     def CRTBasedModPolMul_NTRU_FD3(self,A,B,w,w_inv,ntrupowersf,ntrupowersb,ntrupowersi,q):
 325 |         # Initial reduction
 326 |         A_r = [_ for _ in A]
 327 |         B_r = [_ for _ in B]
 328 | 
 329 |         wk = pow(w,len(A)//6,q)
 330 |         for i in range(len(A)//2):
 331 |             t1 = (wk*A[i+len(A)//2]) % q
 332 |             t2 = (wk*B[i+len(B)//2]) % q
 333 |             A_r[i+len(A)//2] = (A[i]+A[i+len(A)//2]-t1)%q
 334 |             A_r[i]           = (A[i]               +t1)%q
 335 |             B_r[i+len(B)//2] = (B[i]+B[i+len(B)//2]-t2)%q
 336 |             B_r[i]           = (B[i]               +t2)%q
 337 | 
 338 |         # NTT
 339 |         A_ntt = self.CRT_Iterative_NTRU_FD3_NR(A_r,w,ntrupowersf,q)
 340 |         B_ntt = self.CRT_Iterative_NTRU_FD3_NR(B_r,w,ntrupowersf,q)
 341 | 
 342 |         C_ntt = [0 for _ in range(len(A))]
 343 | 
 344 |         # Degree-findeg modular polynomial multiplications
 345 |         for i in range(len(A)//3):
 346 |             w_pow = ntrupowersb[i]
 347 |             wk    = pow(w,w_pow,q)
 348 |             C_ntt[3*i:3*i+3] = self.PolWiseMult(A_ntt[3*i:3*i+3],B_ntt[3*i:3*i+3],wk,3,q)
 349 | 
 350 |         # INTT
 351 |         C = self.ICRT_Iterative_NTRU_FD3_RN(C_ntt,w_inv,ntrupowersi,q)
 352 | 
 353 |         # Final reconstruction
 354 |         C_r = [_ for _ in C]
 355 | 
 356 |         wk = modinv((2*pow(w,len(C)//6,q)-1)%q,q)
 357 | 
 358 |         for i in range(len(C)//2):
 359 |             t = ((C[i]-C[i+len(C)//2])*wk)%q   # t = f[i+N//2]
 360 |             C_r[i          ] = (C[i]+C[i+len(C)//2]-t)%q
 361 |             C_r[i+len(C)//2] = (2*t)%q
 362 | 
 363 |         return C_r
 364 | 
 365 | 
 366 |     def CRTBasedModPolMul_NTRU_FDV(self,A,B,w,w_inv,ntrupowersf,ntrupowersb,ntrupowersi,q,findeg):
 367 |         # Initial reduction
 368 |         A_r = [_ for _ in A]
 369 |         B_r = [_ for _ in B]
 370 | 
 371 |         wk = pow(w,(len(A)//6),q)
 372 |         # wk = pow(w,(len(A)//6)//(findeg//3),q)
 373 | 
 374 |         for i in range(len(A)//2):
 375 |             t1 = (wk*A[i+len(A)//2]) % q
 376 |             t2 = (wk*B[i+len(B)//2]) % q
 377 |             A_r[i+len(A)//2] = (A[i]+A[i+len(A)//2]-t1)%q
 378 |             A_r[i]           = (A[i]               +t1)%q
 379 |             B_r[i+len(B)//2] = (B[i]+B[i+len(B)//2]-t2)%q
 380 |             B_r[i]           = (B[i]               +t2)%q
 381 | 
 382 |         # NTT
 383 |         A_ntt = self.CRT_Iterative_NTRU_FDV_NR(A_r,w,ntrupowersf,q,findeg)
 384 |         B_ntt = self.CRT_Iterative_NTRU_FDV_NR(B_r,w,ntrupowersf,q,findeg)
 385 | 
 386 |         C_ntt = [0 for _ in range(len(A))]
 387 | 
 388 |         # Degree-findeg modular polynomial multiplications
 389 |         for i in range(len(A)//findeg):
 390 |             w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])
 391 |             # w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])//(findeg//3)
 392 | 
 393 |             wk    = pow(w,w_pow,q)
 394 |             C_ntt[findeg*i:findeg*i+findeg] = self.PolWiseMult(A_ntt[findeg*i:findeg*i+findeg],B_ntt[findeg*i:findeg*i+findeg],wk,findeg,q)
 395 | 
 396 |         # INTT
 397 |         C = self.ICRT_Iterative_NTRU_FDV_RN(C_ntt,w_inv,ntrupowersi,q,findeg)
 398 | 
 399 |         # Final reconstruction
 400 |         C_r = [_ for _ in C]
 401 | 
 402 |         wk = modinv((2*pow(w,(len(C)//6),q)-1)%q,q)
 403 |         # wk = modinv((2*pow(w,(len(C)//6)//(findeg//3),q)-1)%q,q)
 404 | 
 405 |         for i in range(len(C)//2):
 406 |             t = ((C[i]-C[i+len(C)//2])*wk)%q   # t = f[i+N//2]
 407 |             C_r[i          ] = (C[i]+C[i+len(C)//2]-t)%q
 408 |             C_r[i+len(C)//2] = (2*t)%q
 409 | 
 410 |         return C_r
 411 | 
 412 |     # A unified polynomial multiplication algorithm for all methods mentioned above
 413 |     # ring: 0 --> NWC  (x^n + 1)
 414 |     # -- findeg: 1
 415 |     # -- findeg: 2
 416 |     # -- findeg: 4
 417 |     # --   ...
 418 |     # ring: 1 --> NTRU (x^n - x^(n/2) + 1)
 419 |     # -- findeg: 3
 420 |     # -- findeg: 6
 421 |     # -- findeg: 12
 422 |     # --   ...
 423 |     # NOTE: Later I can add PWC (x^n - 1)
 424 |     # NOTE: Later I can add pure NTT/INTT operations
 425 |     def CRTBasedModPolMul_Unified(self,A,B,w,w_inv,q,ring,findeg,ntrupowersf=[],ntrupowersb=[],ntrupowersi=[]):
 426 |         # --------------------------------------------- Initial step
 427 |         if ring == 0:
 428 |             """
 429 |             NWC requires no initial reduction operation
 430 |             """
 431 |             A_r = [_ for _ in A]
 432 |             B_r = [_ for _ in B]
 433 |         else:
 434 |             """
 435 |             NTRU requires initial reduction
 436 |             """
 437 |             A_r = [_ for _ in A]
 438 |             B_r = [_ for _ in B]
 439 | 
 440 |             wk = pow(w,len(A)//6,q)
 441 |             for i in range(len(A)//2):
 442 |                 t1 = (wk*A[i+len(A)//2]) % q
 443 |                 t2 = (wk*B[i+len(B)//2]) % q
 444 |                 A_r[i+len(A)//2] = (A[i]+A[i+len(A)//2]-t1)%q
 445 |                 A_r[i]           = (A[i]               +t1)%q
 446 |                 B_r[i+len(B)//2] = (B[i]+B[i+len(B)//2]-t2)%q
 447 |                 B_r[i]           = (B[i]               +t2)%q
 448 | 
 449 |         # --------------------------------------------- NTT
 450 |         A_ntt = self.CRT_Iterative_Unified_NR(A_r,w,q,ring,findeg,ntrupowersf)
 451 |         B_ntt = self.CRT_Iterative_Unified_NR(B_r,w,q,ring,findeg,ntrupowersf)
 452 | 
 453 |         # --------------------------------------------- Degree-findeg modular polynomial multiplications
 454 |         C_ntt = [0 for _ in range(len(A))]
 455 |         for i in range(len(A)//findeg):
 456 |             if ring == 0:
 457 |                 # NWC
 458 |                 w_pow = 2*intReverse(i,int(log(len(A)//findeg,2)))+1
 459 |             else:
 460 |                 # NTRU
 461 |                 w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])
 462 |                 # w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])//(findeg//3)
 463 | 
 464 |             wk    = pow(w,w_pow,q)
 465 |             C_ntt[findeg*i:findeg*i+findeg] = self.PolWiseMult(A_ntt[findeg*i:findeg*i+findeg],B_ntt[findeg*i:findeg*i+findeg],wk,findeg,q)
 466 | 
 467 |         # --------------------------------------------- INTT
 468 |         C = self.ICRT_Iterative_Unified_RN(C_ntt,w_inv,q,ring,findeg,ntrupowersi)
 469 | 
 470 |         # --------------------------------------------- Final step
 471 |         if ring == 0:
 472 |             """
 473 |             NWC requires no final reconstruction step
 474 |             """
 475 |             return C
 476 |         else:
 477 |             """
 478 |             NTRU requires final reconstruction step
 479 |             """
 480 |             wk = modinv((2*pow(w,(len(C)//6),q)-1)%q,q)
 481 |             # wk = modinv((2*pow(w,(len(C)//6)//(findeg//3),q)-1)%q,q)
 482 | 
 483 |             for i in range(len(C)//2):
 484 |                 t = ((C[i]-C[i+len(C)//2])*wk)%q   # t = f[i+N//2]
 485 |                 C[i          ] = (C[i]+C[i+len(C)//2]-t)%q
 486 |                 C[i+len(C)//2] = (2*t)%q
 487 | 
 488 |             return C
 489 | 
 490 |     ############################################################################################ (Helper Function for Pol Mul Operations)
 491 | 
 492 |     # Multiplies two "deg" degree polynomial in x^"deg"-w^k where k is some power
 493 |     # A,B: input polynomials
 494 |     # wk: w^k
 495 |     # deg: degree
 496 |     # q: coefficient modulus
 497 |     # C: output polynomial
 498 |     def PolWiseMult(self,A,B,wk,deg,q):
 499 |         C = [0] * ((2 * deg)-1)
 500 |         # D = [0] * ((2 * deg)-1)
 501 | 
 502 |         if deg == 1:
 503 |             # if final degree is 1
 504 |             D = [(x*y)%q for x,y in zip(A,B)]
 505 |             return D[0:deg]
 506 |         else:
 507 |             # if final degree is larger than 1
 508 |             for indexA, elemA in enumerate(A):
 509 |                 for indexB, elemB in enumerate(B):
 510 |                     C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
 511 | 
 512 |             D = [_ for _ in C]
 513 |             for i in range(len(A)-1):
 514 |                 D[i] = (C[i] + C[i + len(A)]*wk) % q
 515 | 
 516 |         return D[0:deg]
 517 | 
 518 |     # -------------------------------------------------------------------------- Iterative
 519 | 
 520 |     # Merged NTT with pre-processing (optimized) (iterative)
 521 |     # This is not NTT, this is pre-processing + NTT
 522 |     # (see: https://eprint.iacr.org/2016/504.pdf)
 523 |     # A: input polynomial (standard order)
 524 |     # Psi: 2n-th root of unity
 525 |     # q: modulus
 526 |     # B: output polynomial (bit-reversed order)
 527 |     def CTBasedMergedNTT_NR(self,A,Psi,q):
 528 |         N = len(A)
 529 |         B = [_ for _ in A]
 530 | 
 531 |         l = int(log(N,2))
 532 | 
 533 |         t = N
 534 |         m = 1
 535 |         while(m<N):
 536 |             t = int(t/2)
 537 |             for i in range(m):
 538 |                 j1 = 2*i*t
 539 |                 j2 = j1 + t - 1
 540 |                 Psi_pow = intReverse(m+i,l)
 541 |                 S = pow(Psi,Psi_pow,q)
 542 |                 for j in range(j1,j2+1):
 543 |                     U = B[j]
 544 |                     V = (B[j+t]*S) % q
 545 | 
 546 |                     B[j]   = (U+V) % q
 547 |                     B[j+t] = (U-V) % q
 548 |             m = 2*m
 549 | 
 550 |         return B
 551 | 
 552 |     # Merged INTT with post-processing (optimized) (iterative)
 553 |     # This is not NTT, this is pre-processing + NTT
 554 |     # (see: https://eprint.iacr.org/2016/504.pdf)
 555 |     # A: input polynomial (Bit-reversed order)
 556 |     # Psi: 2n-th root of unity
 557 |     # q: modulus
 558 |     # B: output polynomial (standard order)
 559 |     def GSBasedMergedINTT_RN(self,A,Psi,q):
 560 |         N = len(A)
 561 |         B = [_ for _ in A]
 562 | 
 563 |         l = int(log(N,2))
 564 | 
 565 |         t = 1
 566 |         m = N
 567 |         while(m>1):
 568 |             j1 = 0
 569 |             h = int(m/2)
 570 |             for i in range(h):
 571 |                 j2 = j1 + t - 1
 572 |                 Psi_pow = intReverse(h+i,l)
 573 |                 S = pow(Psi,Psi_pow,q)
 574 |                 for j in range(j1,j2+1):
 575 |                     U = B[j]
 576 |                     V = B[j+t]
 577 | 
 578 |                     B[j]   = (U+V) % q
 579 |                     B[j+t] = (U-V)*S % q
 580 |                 j1 = j1 + 2*t
 581 |             t = 2*t
 582 |             m = int(m/2)
 583 | 
 584 |         N_inv = modinv(N, q)
 585 |         for i in range(N):
 586 |             B[i] = (B[i] * N_inv) % q
 587 | 
 588 |         return B
 589 | 
 590 |     # CRT-based Merged NTT for f(x)=x^n+1 (Negative Wrapped Convolution) (iterative)
 591 |     # Actually, same as CTBasedMergedNTT_NR() function
 592 |     # This is not NTT, this is pre-processing + NTT
 593 |     # A: input polynomial (standard order)
 594 |     # Psi: 2n-th root of unity
 595 |     # q: modulus
 596 |     # B: output polynomial (bit-reversed order)
 597 |     def CRT_Iterative_NWC_FD1(self,A,psi,q):
 598 |         return self.CTBasedMergedNTT_NR(A,psi,q)
 599 | 
 600 |     # ICRT-based Merged INTT for f(x)=x^n+1 (Negative Wrapped Convolution) (iterative)
 601 |     # Actually, same as GSBasedMergedINTT_RN() function
 602 |     # This is not NTT, this is pre-processing + NTT
 603 |     # A: input polynomial (bit-reversed order)
 604 |     # Psi: 2n-th root of unity
 605 |     # q: modulus
 606 |     # B: output polynomial (standard order)
 607 |     def ICRT_Iterative_NWC_FD1(self,A,psi,q):
 608 |         return self.GSBasedMergedINTT_RN(A,psi,q)
 609 | 
 610 |     # CRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 611 |     # -- utilizing CRT instead of NTT (Iterative Version)
 612 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 2
 613 |     # A: input polynomial (standard order)
 614 |     # W: twiddle factor
 615 |     # q: modulus
 616 |     # B: output polynomial (bit-reversed order)
 617 |     # NOTE: 1 iteration less executed version of "CTBasedMergedNTT_NR"
 618 |     def CRT_Iterative_NWC_FD2_NR(self,A,w,q):
 619 |         N = len(A)
 620 |         B = [_ for _ in A]
 621 | 
 622 |         k=1
 623 |         lena = (N//2)
 624 | 
 625 |         v = int(log(lena,2))
 626 | 
 627 |         while lena >= 2:
 628 |             start = 0
 629 |             while start < N:
 630 |                 W_pow = intReverse(k,v)
 631 |                 W = pow(w,W_pow,q)
 632 |                 k = k+1
 633 |                 j = start
 634 |                 while(j < (start + lena)):
 635 |                     t = (W * B[j+lena]) % q
 636 | 
 637 |                     B[j+lena] = (B[j] - t) % q
 638 |                     B[j     ] = (B[j] + t) % q
 639 | 
 640 |                     j = j+1
 641 |                 start = j + lena
 642 |             lena = (lena//2)
 643 | 
 644 |         return B
 645 | 
 646 |     # ICRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 647 |     # -- utilizing ICRT instead of INTT (Iterative Version)
 648 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 2
 649 |     # A: input polynomial (bit-reversed order)
 650 |     # W: twiddle factor
 651 |     # q: modulus
 652 |     # B: output polynomial (standard order)
 653 |     # NOTE: 1 iteration less executed version of "GSBasedMergedINTT_NR"
 654 |     def ICRT_Iterative_NWC_FD2_RN(self,A,w,q):
 655 |         N = len(A)
 656 |         B = [_ for _ in A]
 657 | 
 658 |         k = 0
 659 |         lena = 2
 660 | 
 661 |         v = int(log(N//2,2))
 662 | 
 663 |         while lena <= (N//2):
 664 |             start = 0
 665 |             while start < N:
 666 |                 W_pow = intReverse(k,v)+1
 667 |                 TW = pow(w,W_pow,q)
 668 |                 """
 669 |                 W_pow and TW below use "w" instead of "w_inv"
 670 | 
 671 |                 W_pow = (N//2) - 1 - intReverse(k,v)
 672 |                 TW = (-pow(w,W_pow,q)) % q # here, "-" means an extra w^(n/2)
 673 |                 """
 674 |                 k = k+1
 675 |                 j = start
 676 |                 while(j < (start + lena)):
 677 |                     t = B[j]
 678 | 
 679 |                     B[j       ] = (t + B[j + lena]) % q
 680 |                     B[j + lena] = (t - B[j + lena]) % q
 681 |                     B[j + lena] = B[j + lena]*TW % q
 682 | 
 683 |                     j = j+1
 684 |                 start = j + lena
 685 |             lena = 2*lena
 686 | 
 687 |         N_inv = modinv(N//2,q)
 688 |         for i in range(N):
 689 |             B[i] = (B[i] * N_inv) % q
 690 | 
 691 |         return B
 692 | 
 693 |     # CRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 694 |     # -- utilizing CRT instead of NTT
 695 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is a variable (power-of-two, 2, 4, ...)
 696 |     # -- it is using Iterative version of reduction function
 697 |     # A: input polynomial (standard order)
 698 |     # W: twiddle factor
 699 |     # q: modulus
 700 |     # B: output polynomial (bit-reversed order)
 701 |     def CRT_Iterative_NWC_FDV_NR(self,A,w,q,findeg):
 702 |         N = len(A)
 703 |         B = [_ for _ in A]
 704 | 
 705 |         k=1
 706 |         lena = (N//2)
 707 | 
 708 |         v = int(log(N//findeg,2))
 709 | 
 710 |         while lena >= findeg:
 711 |             start = 0
 712 |             while start < N:
 713 |                 W_pow = intReverse(k,v)
 714 |                 W = pow(w,W_pow,q)
 715 |                 k = k+1
 716 |                 j = start
 717 |                 while(j < (start + lena)):
 718 |                     t = (W * B[j+lena]) % q
 719 | 
 720 |                     B[j+lena] = (B[j] - t) % q
 721 |                     B[j     ] = (B[j] + t) % q
 722 | 
 723 |                     j = j+1
 724 |                 start = j + lena
 725 |             lena = (lena//2)
 726 | 
 727 |         return B
 728 | 
 729 |     # ICRT-based modular polynomial multiplication with f(x)=x^n+1 (Negative Wrapped Convolution)
 730 |     # -- utilizing ICRT instead of INTT
 731 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is a variable (power-of-two, 2, 4, ...)
 732 |     # -- it is using Iterative version of reduction function
 733 |     # A: input polynomial (bit-reversed order)
 734 |     # W: twiddle factor
 735 |     # q: modulus
 736 |     # B: output polynomial (standard order)
 737 |     def ICRT_Iterative_NWC_FDV_RN(self,A,w,q,findeg):
 738 |         N = len(A)
 739 |         B = [_ for _ in A]
 740 | 
 741 |         k = 0
 742 |         lena = findeg
 743 | 
 744 |         v = int(log(N//findeg,2))
 745 | 
 746 |         while lena <= (N//2):
 747 |             start = 0
 748 |             while start < N:
 749 |                 """
 750 |                 W_pow = intReverse(k,v)+1
 751 |                 TW = (-pow(w,W_pow,q)) % q # here, "-" means an extra w^(n/2)
 752 |                 """
 753 |                 W_pow = intReverse(k,v)+1
 754 |                 TW = pow(w,W_pow,q)
 755 |                 k = k+1
 756 |                 j = start
 757 |                 while(j < (start + lena)):
 758 |                     t = B[j]
 759 | 
 760 |                     B[j       ] = (t + B[j + lena]) % q
 761 |                     B[j + lena] = (t - B[j + lena]) % q
 762 |                     B[j + lena] = B[j + lena]*TW % q
 763 | 
 764 |                     j = j+1
 765 |                 start = j + lena
 766 |             lena = 2*lena
 767 | 
 768 |         N_inv = modinv(N//findeg,q)
 769 |         for i in range(N):
 770 |             B[i] = (B[i] * N_inv) % q
 771 | 
 772 |         return B
 773 | 
 774 |     # CRT-based modular polynomial multiplication with f(x)=x^n-x^n/2+1 (NTRU)
 775 |     # -- utilizing CRT instead of NTT
 776 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 3
 777 |     # -- it is using Iterative version of reduction function
 778 |     # A: input polynomial (standard order)
 779 |     # W: twiddle factor
 780 |     # q: modulus
 781 |     # B: output polynomial (bit-reversed order)
 782 |     def CRT_Iterative_NTRU_FD3_NR(self,A,w,powers,q):
 783 |         N = len(A)
 784 |         B = [_ for _ in A]
 785 | 
 786 |         k=0
 787 |         lena = (N//4)
 788 | 
 789 |         while lena >= 3:
 790 |             start = 0
 791 |             while start < N:
 792 |                 W_pow = powers[k]
 793 |                 W = pow(w,W_pow,q)
 794 |                 k = k+1
 795 |                 j = start
 796 |                 while(j < (start + lena)):
 797 |                     t = (W * B[j+lena]) % q
 798 | 
 799 |                     B[j+lena] = (B[j] - t) % q
 800 |                     B[j     ] = (B[j] + t) % q
 801 | 
 802 |                     j = j+1
 803 |                 start = j + lena
 804 |             lena = (lena//2)
 805 | 
 806 |         return B
 807 | 
 808 |     # ICRT-based modular polynomial multiplication with f(x)=x^n-x^n/2+1 (NTRU)
 809 |     # -- utilizing ICRT instead of INTT
 810 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is 3
 811 |     # -- it is using Iterative version of reduction function
 812 |     # A: input polynomial (bit-reversed order)
 813 |     # W: twiddle factor
 814 |     # q: modulus
 815 |     # B: output polynomial (standard order)
 816 |     def ICRT_Iterative_NTRU_FD3_RN(self,A,w,powers,q):
 817 |         N = len(A)
 818 |         B = [_ for _ in A]
 819 | 
 820 |         k = 0
 821 |         lena = 3
 822 | 
 823 |         v = int(log(N//3,2))
 824 | 
 825 |         while lena <= (N//4):
 826 |             start = 0
 827 |             while start < N:
 828 |                 W_pow = powers[k]
 829 |                 TW = pow(w,W_pow,q)
 830 |                 k = k+1
 831 |                 j = start
 832 |                 while(j < (start + lena)):
 833 |                     t = B[j]
 834 | 
 835 |                     B[j       ] = (t + B[j + lena]) % q
 836 |                     B[j + lena] = (t - B[j + lena]) % q
 837 |                     B[j + lena] = B[j + lena]*TW % q
 838 | 
 839 |                     j = j+1
 840 |                 start = j + lena
 841 |             lena = 2*lena
 842 | 
 843 |         N_inv = modinv(N//3,q)
 844 |         for i in range(N):
 845 |             B[i] = (B[i] * N_inv) % q
 846 | 
 847 |         return B
 848 | 
 849 |     # CRT-based modular polynomial multiplication with f(x)=x^n-x^n/2+1 (NTRU)
 850 |     # -- utilizing CRT instead of NTT
 851 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is a variable (3,6,12,...)
 852 |     # -- it is using Iterative version of reduction function
 853 |     # A: input polynomial (standard order)
 854 |     # W: twiddle factor
 855 |     # q: modulus
 856 |     # B: output polynomial (bit-reversed order)
 857 |     def CRT_Iterative_NTRU_FDV_NR(self,A,w,powers,q,findeg):
 858 |         N = len(A)
 859 |         B = [_ for _ in A]
 860 | 
 861 |         k=0
 862 |         lena = (N//4)
 863 | 
 864 |         while lena >= findeg:
 865 |             start = 0
 866 |             while start < N:
 867 |                 W_pow = powers[k]
 868 |                 # W_pow = (powers[k] // (findeg//3))
 869 | 
 870 |                 W = pow(w,W_pow,q)
 871 |                 k = k+1
 872 |                 j = start
 873 |                 while(j < (start + lena)):
 874 |                     t = (W * B[j+lena]) % q
 875 | 
 876 |                     B[j+lena] = (B[j] - t) % q
 877 |                     B[j     ] = (B[j] + t) % q
 878 | 
 879 |                     j = j+1
 880 |                 start = j + lena
 881 |             lena = (lena//2)
 882 | 
 883 |         return B
 884 | 
 885 |     # ICRT-based modular polynomial multiplication with f(x)=x^n-x^n/2+1 (NTRU)
 886 |     # -- utilizing ICRT instead of INTT
 887 |     # -- it is using w (q = 1 mod n) and final degree of CRT reduction is a variable (3,6,12,...)
 888 |     # -- it is using Iterative version of reduction function
 889 |     # A: input polynomial (bit-reversed order)
 890 |     # W: twiddle factor
 891 |     # q: modulus
 892 |     # B: output polynomial (standard order)
 893 |     def ICRT_Iterative_NTRU_FDV_RN(self,A,w,powers,q,findeg):
 894 |         N = len(A)
 895 |         B = [_ for _ in A]
 896 | 
 897 |         k = 0
 898 |         lena = findeg
 899 | 
 900 |         # Powers need to be adjusted accordingly
 901 |         powers_new = [_ for _ in powers]
 902 |         i = findeg
 903 |         r = 1
 904 |         while(i >= 6):
 905 |             powers_new = powers_new[N//(6*r):]
 906 |             i = (i//2)
 907 |             r = 2*r
 908 | 
 909 |         while lena <= (N//4):
 910 |             start = 0
 911 |             while start < N:
 912 |                 W_pow = powers_new[k]
 913 |                 # W_pow = (powers_new[k] // (findeg//3))
 914 | 
 915 |                 TW = pow(w,W_pow,q)
 916 |                 k = k+1
 917 |                 j = start
 918 |                 while(j < (start + lena)):
 919 |                     t = B[j]
 920 | 
 921 |                     B[j       ] = (t + B[j + lena]) % q
 922 |                     B[j + lena] = (t - B[j + lena]) % q
 923 |                     B[j + lena] = B[j + lena]*TW % q
 924 | 
 925 |                     j = j+1
 926 |                 start = j + lena
 927 |             lena = 2*lena
 928 | 
 929 |         N_inv = modinv(N//findeg,q)
 930 |         for i in range(N):
 931 |             B[i] = (B[i] * N_inv) % q
 932 | 
 933 |         return B
 934 | 
 935 |     # CRT-based unified structure for NWC and NTRU
 936 |     # -- ring  : 0->NWC, 1->NTRU
 937 |     # -- findeg: 1,2,4,... for NWC and 3,6,12,... for NTRU
 938 |     # A: input polynomial (standard order)
 939 |     # W: twiddle factor
 940 |     # q: modulus
 941 |     # B: output polynomial (bit-reversed order)
 942 |     def CRT_Iterative_Unified_NR(self,A,w,q,ring,findeg,powers):
 943 |         N = len(A)
 944 |         B = [_ for _ in A]
 945 | 
 946 |         if ring == 0:
 947 |             # NWC
 948 |             k = 1
 949 |             lena = (N//2)
 950 |             v = int(log(N//findeg,2))
 951 |         else:
 952 |             # NTRU
 953 |             k = 0
 954 |             lena = (N//4)
 955 | 
 956 |         while lena >= findeg:
 957 |             start = 0
 958 |             while start < N:
 959 |                 if ring == 0:
 960 |                     # NWC
 961 |                     W_pow = intReverse(k,v)
 962 |                 else:
 963 |                     # NTRU
 964 |                     W_pow = powers[k]
 965 |                     # W_pow = (powers[k] // (findeg//3))
 966 | 
 967 |                 W = pow(w,W_pow,q)
 968 |                 k = k+1
 969 |                 j = start
 970 |                 while(j < (start + lena)):
 971 |                     t = (W * B[j+lena]) % q
 972 | 
 973 |                     B[j+lena] = (B[j] - t) % q
 974 |                     B[j     ] = (B[j] + t) % q
 975 | 
 976 |                     j = j+1
 977 |                 start = j + lena
 978 |             lena = (lena//2)
 979 | 
 980 |         return B
 981 | 
 982 |     # ICRT-based unified structure for NWC and NTRU
 983 |     # -- ring  : 0->NWC, 1->NTRU
 984 |     # -- findeg: 1,2,4,... for NWC and 3,6,12,... for NTRU
 985 |     # A: input polynomial (bit-reversed order)
 986 |     # W: twiddle factor
 987 |     # q: modulus
 988 |     # B: output polynomial (standard order)
 989 |     def ICRT_Iterative_Unified_RN(self,A,w,q,ring,findeg,powers):
 990 |         N = len(A)
 991 |         B = [_ for _ in A]
 992 | 
 993 |         k = 0
 994 |         lena = findeg
 995 | 
 996 |         if ring == 0:
 997 |             # NWC
 998 |             v = int(log(N//findeg,2))
 999 |             lena_limit = (N//2)
1000 |         else:
1001 |             # NTRU
1002 |             powers_new = [_ for _ in powers]
1003 |             i = findeg
1004 |             r = 1
1005 |             while(i >= 6):
1006 |                 powers_new = powers_new[N//(6*r):]
1007 |                 i = (i//2)
1008 |                 r = 2*r
1009 |             lena_limit = (N//4)
1010 | 
1011 |         while lena <= lena_limit:
1012 |             start = 0
1013 |             while start < N:
1014 |                 if ring == 0:
1015 |                     # NWC
1016 |                     """
1017 |                     W_pow = intReverse(k,v)+1
1018 |                     TW = (-pow(w,W_pow,q)) % q # here, "-" means an extra w^(n/2)
1019 |                     """
1020 |                     W_pow = intReverse(k,v)+1
1021 |                 else:
1022 |                     # NTRU
1023 |                     W_pow = powers_new[k]
1024 | 
1025 |                 TW = pow(w,W_pow,q)
1026 |                 k = k+1
1027 |                 j = start
1028 |                 while(j < (start + lena)):
1029 |                     t = B[j]
1030 | 
1031 |                     B[j       ] = (t + B[j + lena]) % q
1032 |                     B[j + lena] = (t - B[j + lena]) % q
1033 |                     B[j + lena] = B[j + lena]*TW % q
1034 | 
1035 |                     j = j+1
1036 |                 start = j + lena
1037 |             lena = 2*lena
1038 | 
1039 |         N_inv = modinv(N//findeg,q)
1040 |         for i in range(N):
1041 |             B[i] = (B[i] * N_inv) % q
1042 | 
1043 |         return B
1044 | 
1045 | #
1046 | 


--------------------------------------------------------------------------------
/baseline/poly_demo.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | from random import randint
  3 | 
  4 | from generate_prime import *
  5 | from helper import *
  6 | from ntt import *
  7 | from poly import *
  8 | 
  9 | # Parameter generation
 10 | 
 11 | # Determine n and bit-size of q, then find a q satisfying
 12 | # the condition: q = 1 (mod 2n) or q = 1 (mod n)
 13 | #
 14 | # Based on n and q, polynomial multiplication parameters
 15 | 
 16 | # Parameters
 17 | mod     = 2 # if 1 --> q = 1 (mod n), if 2 --> q = 1 (mod 2n)
 18 | n       = 256
 19 | q_bit   = 13
 20 | 
 21 | q       = 0
 22 | w       = 0
 23 | w_inv   = 0
 24 | psi     = 0
 25 | psi_inv = 0
 26 | 
 27 | # Generate parameters
 28 | wfound = False
 29 | while(not(wfound)):
 30 |     q = generate_large_prime(q_bit)
 31 | 
 32 |     # check q = 1 (mod n or 2n)
 33 |     while (not ((q % (mod*n)) == 1)):
 34 |         q = generate_large_prime(q_bit)
 35 | 
 36 |     # generate NTT parameters
 37 |     for i in range(2,q-1):
 38 |         wfound = isrootofunity(i,mod*n,q)
 39 |         if wfound:
 40 |             if mod == 1:
 41 |                 psi    = 0
 42 |                 psi_inv= 0
 43 |                 w      = i
 44 |                 w_inv  = modinv(w,q)
 45 |             else:
 46 |                 psi    = i
 47 |                 psi_inv= modinv(psi,q)
 48 |                 w      = pow(psi,2,q)
 49 |                 w_inv  = modinv(w,q)
 50 |             break
 51 | 
 52 | # Print parameters
 53 | print("Parameters (NWC)")
 54 | print("n      : {}".format(n))
 55 | print("q      : {}".format(q))
 56 | print("w      : {}".format(w))
 57 | print("w_inv  : {}".format(w_inv))
 58 | print("psi    : {}".format(psi))
 59 | print("psi_inv: {}".format(psi_inv))
 60 | print("")
 61 | 
 62 | # Parameters (NTRU)
 63 | m       = 3*n
 64 | mq_bit  = 14
 65 | 
 66 | mq      = 0
 67 | mw      = 0
 68 | mw_inv  = 0
 69 | 
 70 | # Generate parameters
 71 | wfound = False
 72 | while(not(wfound)):
 73 |     mq = generate_large_prime(mq_bit)
 74 | 
 75 |     # check q = 1 (mod n or 2n)
 76 |     while (not ((mq % m) == 1)):
 77 |         mq = generate_large_prime(mq_bit)
 78 | 
 79 |     # generate NTT parameters
 80 |     for i in range(2,mq-1):
 81 |         wfound = isrootofunity(i,m,mq)
 82 |         if wfound:
 83 |             mw      = i
 84 |             mw_inv  = modinv(mw,mq)
 85 |             break
 86 | 
 87 | # m,mq,mw,mw_inv = 192,769,4,577
 88 | 
 89 | # Powers of twiddle factors for NTRU (forward and inverse transform)
 90 | # Generating necessary powers of twiddle factors for NTRU on-the-fly is really hard.
 91 | # Therefore, we create table for powers of twiddle factors prior any operation
 92 | nf = [0]*(m//3) # forward
 93 | 
 94 | nf[0] = 0
 95 | nf[1] = m//6
 96 | nf[2] = nf[1]//2
 97 | nf[3] = (5*nf[1])//2
 98 | 
 99 | i = 2
100 | while (2**i) < (m//3):
101 |     for j in range(2**i, 2**(i+1), 2):
102 |         nf[j]   =  nf[j//2]//2
103 |         nf[j+1] = (nf[j//2]+(m//2))//2
104 |     i = i + 1
105 | 
106 | ntrupowersf = nf[2:]
107 | 
108 | ntrupowersi = [] # inverse
109 | 
110 | idxs, idxe = len(ntrupowersf)-(m//6) ,len(ntrupowersf)
111 | for i in range(int(log(m//6,2))):
112 |     ntrupowersi = ntrupowersi + ntrupowersf[idxs:idxe]
113 |     idxe = idxs
114 |     idxs = idxs - ((m//12)>>i)
115 | 
116 | ntrupowersb = [0]*(m//3) # basemul
117 | 
118 | for i in range(m//6):
119 |     ntrupowersb[2*i+0] = ntrupowersi[i]
120 |     ntrupowersb[2*i+1] = ntrupowersi[i] + (m//2)
121 | 
122 | # print(ntrupowersf)
123 | # print(ntrupowersb)
124 | # print(ntrupowersi)
125 | 
126 | print("Parameters (NTRU)")
127 | print("m      : {}".format(m))
128 | print("mq     : {}".format(mq))
129 | print("mw     : {}".format(mw))
130 | print("mw_inv : {}".format(mw_inv))
131 | print("")
132 | 
133 | #NOTE: Comment Out Reference Method for Large Parameters
134 | 
135 | # Demo
136 | # Random A,B
137 | A = [randint(0,q-1) for _ in range(n)]
138 | B = [randint(0,q-1) for _ in range(n)]
139 | 
140 | # Random A,B (for ntru)
141 | A_ntru = [randint(0,mq-1) for _ in range(m)]
142 | B_ntru = [randint(0,mq-1) for _ in range(m)]
143 | 
144 | # Evaluator
145 | Evaluator = Poly()
146 | 
147 | # reduce functions
148 | pwc  = [-1]+[0]*(n-1)+[1]
149 | nwc  =  [1]+[0]*(n-1)+[1]
150 | ntru =  [1]+[0]*(int(m/2)-1)+[-1]+[0]*(int(m/2)-1)+[1]
151 | 
152 | print("-------- Sanity check for polynomial multiplication operations --------")
153 | print("")
154 | 
155 | # Check reference implementations
156 | D0 = Evaluator.SchoolbookPolMul(A,B,q)
157 | D1 = Evaluator.SchoolbookPolMul(A_ntru,B_ntru,mq)
158 | DR0= Evaluator.PolRed(D0,pwc,q) # reduce with x^n-1
159 | DR1= Evaluator.PolRed(D0,nwc,q) # reduce with x^n+1
160 | DR2= Evaluator.PolRed(D1,ntru,mq)# reduce with x^n-x^(n/2)+1
161 | C0 = Evaluator.SchoolbookModPolMul_PWC(A,B,q)
162 | C1 = Evaluator.SchoolbookModPolMul_NWC(A,B,q)
163 | C2 = Evaluator.SchoolbookModPolMul_NTRU(A_ntru,B_ntru,mq)
164 | 
165 | print("SchoolbookModPolMul_PWC  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR0,C0)]) == 0) else "Wrong"))
166 | print("SchoolbookModPolMul_NWC  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR1,C1)]) == 0) else "Wrong"))
167 | print("SchoolbookModPolMul_NTRU --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR2,C2)]) == 0) else "Wrong"))
168 | print("")
169 | 
170 | # Check NTT-based polynomial multiplication methods
171 | N0 = Evaluator.NTTBasedPolMul(A,B,psi,psi_inv,q)
172 | N1 = Evaluator.NTTBasedModPolMul_PWC(A,B,w,w_inv,q)
173 | N2 = Evaluator.NTTBasedModPolMul_NWC_v1(A,B,w,w_inv,psi,psi_inv,q)
174 | N3 = Evaluator.NTTBasedModPolMul_NWC_v2(A,B,psi,psi_inv,q)
175 | 
176 | print("NTTBasedPolMul           --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N0,D0)]) == 0) else "Wrong"))
177 | print("NTTBasedModPolMul_PWC    --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N1,C0)]) == 0) else "Wrong"))
178 | print("NTTBasedModPolMul_NWC_v1 --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N2,C1)]) == 0) else "Wrong"))
179 | print("NTTBasedModPolMul_NWC_v2 --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N3,C1)]) == 0) else "Wrong"))
180 | print("")
181 | 
182 | # Check CRT-based polynomial multiplication methods
183 | T0 = Evaluator.CRTBasedModPolMul_PWC(A,B,w,w_inv,q)
184 | T1 = Evaluator.CRTBasedModPolMul_NWC_FD1(A,B,psi,psi_inv,q)
185 | T2 = Evaluator.CRTBasedModPolMul_NWC_FD2(A,B,w,w_inv,q)
186 | T3 = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,psi,psi_inv,q,findeg=1)
187 | T4 = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w,w_inv,q,findeg=2)
188 | T5 = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w**2 % q,w_inv**2 % q,q,findeg=4)
189 | T6 = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w**4 % q,w_inv**4 % q,q,findeg=8)
190 | T7 = Evaluator.CRTBasedModPolMul_NTRU_FD3(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq)
191 | T8 = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=3)
192 | T9 = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=6)
193 | T10= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=12)
194 | T11= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=24)
195 | # T9 = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**2 % q,mw_inv**2 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=6)
196 | # T10= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**4 % q,mw_inv**4 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=12)
197 | # T11= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**8 % q,mw_inv**8 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=24)
198 | 
199 | print("CRTBasedModPolMul_PWC                  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T0,C0)]) == 0) else "Wrong"))
200 | print("CRTBasedModPolMul_NWC_FD1              --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T1,C1)]) == 0) else "Wrong"))
201 | print("CRTBasedModPolMul_NWC_FD2              --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T2,C1)]) == 0) else "Wrong"))
202 | print("CRTBasedModPolMul_NWC_FDV  (findeg=1)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T3,C1)]) == 0) else "Wrong"))
203 | print("CRTBasedModPolMul_NWC_FDV  (findeg=2)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T4,C1)]) == 0) else "Wrong"))
204 | print("CRTBasedModPolMul_NWC_FDV  (findeg=4)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T5,C1)]) == 0) else "Wrong"))
205 | print("CRTBasedModPolMul_NWC_FDV  (findeg=8)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T6,C1)]) == 0) else "Wrong"))
206 | print("CRTBasedModPolMul_NTRU_FD3             --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T7,C2)]) == 0) else "Wrong"))
207 | print("CRTBasedModPolMul_NTRU_FDV (findeg=3)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T8,C2)]) == 0) else "Wrong"))
208 | print("CRTBasedModPolMul_NTRU_FDV (findeg=6)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T9,C2)]) == 0) else "Wrong"))
209 | print("CRTBasedModPolMul_NTRU_FDV (findeg=12) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T10,C2)]) == 0) else "Wrong"))
210 | print("CRTBasedModPolMul_NTRU_FDV (findeg=24) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T11,C2)]) == 0) else "Wrong"))
211 | print("")
212 | 
213 | # Check CRT-based unified polynomial multiplication methods
214 | # ring: 0 --> NWC  (x^n + 1)
215 | # -- findeg: 1
216 | # -- findeg: 2
217 | # -- findeg: 4
218 | # --   ...
219 | # ring: 1 --> NTRU (x^n - x^(n/2) + 1)
220 | # -- findeg: 3
221 | # -- findeg: 6
222 | # -- findeg: 12
223 | # --
224 | ring,findeg = 0,1
225 | R0 = Evaluator.CRTBasedModPolMul_Unified(A,B,psi,psi_inv,q,ring,findeg) # NWC - findeg=2
226 | ring,findeg = 0,2
227 | R1 = Evaluator.CRTBasedModPolMul_Unified(A,B,w,w_inv,q,ring,findeg) # NWC - findeg=2
228 | ring,findeg = 0,4
229 | R2 = Evaluator.CRTBasedModPolMul_Unified(A,B,w**2 % q,w_inv**2 % q,q,ring,findeg) # NWC - findeg=4
230 | ring,findeg = 1,3
231 | R3 = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=3
232 | ring,findeg = 1,6
233 | R4 = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=6
234 | ring,findeg = 1,12
235 | R5 = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=12
236 | 
237 | print("CRTBasedModPolMul_Unified (NWC  - findeg=1)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R0,C1)]) == 0) else "Wrong"))
238 | print("CRTBasedModPolMul_Unified (NWC  - findeg=2)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R1,C1)]) == 0) else "Wrong"))
239 | print("CRTBasedModPolMul_Unified (NWC  - findeg=4)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R2,C1)]) == 0) else "Wrong"))
240 | print("CRTBasedModPolMul_Unified (NTRU - findeg=3)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R3,C2)]) == 0) else "Wrong"))
241 | print("CRTBasedModPolMul_Unified (NTRU - findeg=6)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R4,C2)]) == 0) else "Wrong"))
242 | print("CRTBasedModPolMul_Unified (NTRU - findeg=12) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R5,C2)]) == 0) else "Wrong"))
243 | 
244 | #
245 | # NOTE: We can have extra optimization by combining N_inv with last stage of INTT
246 | # NOTE: Later I can add PWC (x^n - 1)
247 | # NOTE: Later I can add pure NTT/INTT operations
248 | #
249 | 


--------------------------------------------------------------------------------
/hwmodel/FNTT_mem_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | *************************************************** stage=0
 2 | BRAM:	|0    1   |	|2    3   |
 3 |      	----------	----------
 4 | AD0:	|0    4   |	|1    5   |
 5 | AD1:	|2    6   |	|3    7   |
 6 | *************************************************** stage=1
 7 | BRAM:	|0    1   |	|2    3   |
 8 |      	----------	----------
 9 | AD0:	|0    2   |	|1    3   |
10 | AD1:	|4    6   |	|5    7   |
11 | *************************************************** stage=2
12 | BRAM:	|0    1   |	|2    3   |
13 |      	----------	----------
14 | AD0:	|0    1   |	|2    3   |
15 | AD1:	|4    5   |	|6    7   |
16 | 


--------------------------------------------------------------------------------
/hwmodel/FNTT_tw_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | *************************************************** stage=0
 2 | TWID:	|0    |	|1    |
 3 |      	------	------
 4 | AD0:	|0    |	|0    |
 5 | AD1:	|0    |	|0    |
 6 | *************************************************** stage=1
 7 | TWID:	|0    |	|1    |
 8 |      	------	------
 9 | AD0:	|0    |	|0    |
10 | AD1:	|2    |	|2    |
11 | *************************************************** stage=2
12 | TWID:	|0    |	|1    |
13 |      	------	------
14 | AD0:	|0    |	|2    |
15 | AD1:	|1    |	|3    |
16 | 


--------------------------------------------------------------------------------
/hwmodel/INTT_mem_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | *************************************************** stage=0
 2 | BRAM:	|0    1   |	|2    3   |
 3 |      	----------	----------
 4 | AD0:	|0    1   |	|2    3   |
 5 | AD1:	|4    5   |	|6    7   |
 6 | *************************************************** stage=1
 7 | BRAM:	|0    1   |	|2    3   |
 8 |      	----------	----------
 9 | AD0:	|0    2   |	|4    6   |
10 | AD1:	|1    3   |	|5    7   |
11 | *************************************************** stage=2
12 | BRAM:	|0    1   |	|2    3   |
13 |      	----------	----------
14 | AD0:	|0    4   |	|1    5   |
15 | AD1:	|2    6   |	|3    7   |
16 | 


--------------------------------------------------------------------------------
/hwmodel/INTT_tw_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | *************************************************** stage=0
 2 | TWID:	|0    |	|1    |
 3 |      	------	------
 4 | AD0:	|0    |	|2    |
 5 | AD1:	|1    |	|3    |
 6 | *************************************************** stage=1
 7 | TWID:	|0    |	|1    |
 8 |      	------	------
 9 | AD0:	|0    |	|2    |
10 | AD1:	|0    |	|2    |
11 | *************************************************** stage=2
12 | TWID:	|0    |	|1    |
13 |      	------	------
14 | AD0:	|0    |	|0    |
15 | AD1:	|0    |	|0    |
16 | 


--------------------------------------------------------------------------------
/hwmodel/NTRU3_mem_N24_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- First Reduction
 2 | ---------------------------------------------------------------------- Forward NTT (x2)
 3 | *************************************************** stage=0
 4 | BRAM:	|0    1   |	|2    3   |
 5 |      	----------	----------
 6 | AD0:	|0    6   |	|3    9   |
 7 | AD1:	|1    7   |	|4    10  |
 8 | AD2:	|2    8   |	|5    11  |
 9 | AD3:	|12   18  |	|15   21  |
10 | AD4:	|13   19  |	|16   22  |
11 | AD5:	|14   20  |	|17   23  |
12 | *************************************************** stage=1
13 | BRAM:	|0    1   |	|2    3   |
14 |      	----------	----------
15 | AD0:	|0    3   |	|6    9   |
16 | AD1:	|1    4   |	|7    10  |
17 | AD2:	|2    5   |	|8    11  |
18 | AD3:	|12   15  |	|18   21  |
19 | AD4:	|13   16  |	|19   22  |
20 | AD5:	|14   17  |	|20   23  |
21 | ---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication
22 | ---------------------------------------------------------------------- Inverse NTT
23 | *************************************************** stage=0
24 | BRAM:	|0    1   |	|2    3   |
25 |      	----------	----------
26 | AD0:	|0    3   |	|6    9   |
27 | AD1:	|1    4   |	|7    10  |
28 | AD2:	|2    5   |	|8    11  |
29 | AD3:	|12   15  |	|18   21  |
30 | AD4:	|13   16  |	|19   22  |
31 | AD5:	|14   17  |	|20   23  |
32 | *************************************************** stage=1
33 | BRAM:	|0    1   |	|2    3   |
34 |      	----------	----------
35 | AD0:	|0    6   |	|3    9   |
36 | AD1:	|1    7   |	|4    10  |
37 | AD2:	|2    8   |	|5    11  |
38 | AD3:	|12   18  |	|15   21  |
39 | AD4:	|13   19  |	|16   22  |
40 | AD5:	|14   20  |	|17   23  |
41 | ---------------------------------------------------------------------- Final Reconstruction
42 | 


--------------------------------------------------------------------------------
/hwmodel/NTRU3_tw_N24_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- First Reduction
 2 | ---------------------------------------------------------------------- Forward NTT (x2)
 3 | *************************************************** stage=0
 4 | TWID:	|0    |	|1    |
 5 |      	------	------
 6 | AD0:	|2    |	|2    |
 7 | AD1:	|2    |	|2    |
 8 | AD2:	|2    |	|2    |
 9 | AD3:	|10   |	|10   |
10 | AD4:	|10   |	|10   |
11 | AD5:	|10   |	|10   |
12 | *************************************************** stage=1
13 | TWID:	|0    |	|1    |
14 |      	------	------
15 | AD0:	|1    |	|7    |
16 | AD1:	|1    |	|7    |
17 | AD2:	|1    |	|7    |
18 | AD3:	|5    |	|11   |
19 | AD4:	|5    |	|11   |
20 | AD5:	|5    |	|11   |
21 | ---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication
22 | ---------------------------------------------------------------------- Inverse NTT
23 | *************************************************** stage=0
24 | TWID:	|0    |	|1    |
25 |      	------	------
26 | AD0:	|1    |	|7    |
27 | AD1:	|1    |	|7    |
28 | AD2:	|1    |	|7    |
29 | AD3:	|5    |	|11   |
30 | AD4:	|5    |	|11   |
31 | AD5:	|5    |	|11   |
32 | *************************************************** stage=1
33 | TWID:	|0    |	|1    |
34 |      	------	------
35 | AD0:	|2    |	|2    |
36 | AD1:	|2    |	|2    |
37 | AD2:	|2    |	|2    |
38 | AD3:	|10   |	|10   |
39 | AD4:	|10   |	|10   |
40 | AD5:	|10   |	|10   |
41 | ---------------------------------------------------------------------- Final Reconstruction
42 | 


--------------------------------------------------------------------------------
/hwmodel/NWC1_mem_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- Forward NTT (x2)
 2 | *************************************************** stage=0
 3 | BRAM:	|0    1   |	|2    3   |
 4 |      	----------	----------
 5 | AD0:	|0    4   |	|1    5   |
 6 | AD1:	|2    6   |	|3    7   |
 7 | *************************************************** stage=1
 8 | BRAM:	|0    1   |	|2    3   |
 9 |      	----------	----------
10 | AD0:	|0    2   |	|1    3   |
11 | AD1:	|4    6   |	|5    7   |
12 | *************************************************** stage=2
13 | BRAM:	|0    1   |	|2    3   |
14 |      	----------	----------
15 | AD0:	|0    1   |	|2    3   |
16 | AD1:	|4    5   |	|6    7   |
17 | ---------------------------------------------------------------------- Coefficient-wise multiplication
18 | ---------------------------------------------------------------------- Inverse NTT
19 | *************************************************** stage=0
20 | BRAM:	|0    1   |	|2    3   |
21 |      	----------	----------
22 | AD0:	|0    1   |	|2    3   |
23 | AD1:	|4    5   |	|6    7   |
24 | *************************************************** stage=1
25 | BRAM:	|0    1   |	|2    3   |
26 |      	----------	----------
27 | AD0:	|0    2   |	|1    3   |
28 | AD1:	|4    6   |	|5    7   |
29 | *************************************************** stage=2
30 | BRAM:	|0    1   |	|2    3   |
31 |      	----------	----------
32 | AD0:	|0    4   |	|1    5   |
33 | AD1:	|2    6   |	|3    7   |
34 | 


--------------------------------------------------------------------------------
/hwmodel/NWC1_tw_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- Forward NTT (x2)
 2 | *************************************************** stage=0
 3 | TWID:	|0    |	|1    |
 4 |      	------	------
 5 | AD0:	|4    |	|4    |
 6 | AD1:	|4    |	|4    |
 7 | *************************************************** stage=1
 8 | TWID:	|0    |	|1    |
 9 |      	------	------
10 | AD0:	|2    |	|2    |
11 | AD1:	|6    |	|6    |
12 | *************************************************** stage=2
13 | TWID:	|0    |	|1    |
14 |      	------	------
15 | AD0:	|1    |	|5    |
16 | AD1:	|3    |	|7    |
17 | ---------------------------------------------------------------------- Coefficient-wise multiplication
18 | ---------------------------------------------------------------------- Inverse NTT
19 | *************************************************** stage=0
20 | TWID:	|0    |	|1    |
21 |      	------	------
22 | AD0:	|1    |	|5    |
23 | AD1:	|3    |	|7    |
24 | *************************************************** stage=1
25 | TWID:	|0    |	|1    |
26 |      	------	------
27 | AD0:	|2    |	|2    |
28 | AD1:	|6    |	|6    |
29 | *************************************************** stage=2
30 | TWID:	|0    |	|1    |
31 |      	------	------
32 | AD0:	|4    |	|4    |
33 | AD1:	|4    |	|4    |
34 | 


--------------------------------------------------------------------------------
/hwmodel/NWC2_mem_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- Forward NTT (x2)
 2 | *************************************************** stage=0
 3 | BRAM:	|0    1   |	|2    3   |
 4 |      	----------	----------
 5 | AD0:	|0    4   |	|1    5   |
 6 | AD1:	|2    6   |	|3    7   |
 7 | *************************************************** stage=1
 8 | BRAM:	|0    1   |	|2    3   |
 9 |      	----------	----------
10 | AD0:	|0    2   |	|1    3   |
11 | AD1:	|4    6   |	|5    7   |
12 | ---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication
13 | ---------------------------------------------------------------------- Inverse NTT
14 | *************************************************** stage=0
15 | BRAM:	|0    1   |	|2    3   |
16 |      	----------	----------
17 | AD0:	|0    2   |	|1    3   |
18 | AD1:	|4    6   |	|5    7   |
19 | *************************************************** stage=1
20 | BRAM:	|0    1   |	|2    3   |
21 |      	----------	----------
22 | AD0:	|0    4   |	|1    5   |
23 | AD1:	|2    6   |	|3    7   |
24 | 


--------------------------------------------------------------------------------
/hwmodel/NWC2_tw_N8_PE2.txt:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------------------------------- Forward NTT (x2)
 2 | *************************************************** stage=0
 3 | TWID:	|0    |	|1    |
 4 |      	------	------
 5 | AD0:	|2    |	|2    |
 6 | AD1:	|2    |	|2    |
 7 | *************************************************** stage=1
 8 | TWID:	|0    |	|1    |
 9 |      	------	------
10 | AD0:	|1    |	|1    |
11 | AD1:	|3    |	|3    |
12 | ---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication
13 | ---------------------------------------------------------------------- Inverse NTT
14 | *************************************************** stage=0
15 | TWID:	|0    |	|1    |
16 |      	------	------
17 | AD0:	|1    |	|1    |
18 | AD1:	|3    |	|3    |
19 | *************************************************** stage=1
20 | TWID:	|0    |	|1    |
21 |      	------	------
22 | AD0:	|2    |	|2    |
23 | AD1:	|2    |	|2    |
24 | 


--------------------------------------------------------------------------------
/hwmodel/generate_prime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Pedro Alves
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import random
16 | import math
17 | import sys
18 | 
19 | def miller_rabin(p,s=11):
20 |     #computes p-1 decomposition in 2**u*r
21 |     r = p-1
22 |     u = 0
23 |     while r&1 == 0:#true while the last bit of r is zero
24 |         u += 1
25 |         r = int(r/2)
26 | 
27 |     # apply miller_rabin primality test
28 |     for i in range(s):
29 |         a = random.randrange(2,p-1) # choose random a in {2,3,...,p-2}
30 |         z = pow(a,r,p)
31 | 
32 |         if z != 1 and z != p-1:
33 |             for j in range(u-1):
34 |                 if z != p-1:
35 |                     z = pow(z,2,p)
36 |                     if z == 1:
37 |                         return False
38 |                 else:
39 |                     break
40 |             if z != p-1:
41 |                 return False
42 |     return True
43 | 
44 | 
45 | def is_prime(n,s=11):
46 |      #lowPrimes is all primes (sans 2, which is covered by the bitwise and operator)
47 |      #under 1000. taking n modulo each lowPrime allows us to remove a huge chunk
48 |      #of composite numbers from our potential pool without resorting to Rabin-Miller
49 |      lowPrimes =   [3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97
50 |                    ,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179
51 |                    ,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269
52 |                    ,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367
53 |                    ,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461
54 |                    ,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571
55 |                    ,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661
56 |                    ,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773
57 |                    ,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883
58 |                    ,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997]
59 |      if (n >= 3):
60 |          if (n&1 != 0):
61 |              for p in lowPrimes:
62 |                  if (n == p):
63 |                     return True
64 |                  if (n % p == 0):
65 |                      return False
66 |              return miller_rabin(n,s)
67 |      return False
68 | 
69 | def generate_large_prime(k,s=11):
70 |     #print "Generating prime of %d bits" % k
71 |     #k is the desired bit length
72 | 
73 |     # using security parameter s=11, we have a error probability of less than
74 |     # 2**-80
75 | 
76 |     r=int(100*(math.log(k,2)+1)) #number of max attempts
77 |     while r>0:
78 |         #randrange is mersenne twister and is completely deterministic
79 |         #unusable for serious crypto purposes
80 |         n = random.randrange(2**(k-1),2**(k))
81 |         r-=1
82 |         if is_prime(n,s) == True:
83 |             return n
84 |     raise Exception("Failure after %d tries." % r)
85 | 


--------------------------------------------------------------------------------
/hwmodel/helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def egcd(a, b):
 3 |     if a == 0:
 4 |         return (b, 0, 1)
 5 |     else:
 6 |         g, y, x = egcd(b % a, a)
 7 |         return (g, x - (b // a) * y, y)
 8 | 
 9 | def modinv(a, m):
10 |     g, x, y = egcd(a, m)
11 |     if g != 1:
12 |         raise Exception('Modular inverse does not exist')
13 |     else:
14 |         return x % m
15 | 
16 | # Bit-Reverse integer
17 | def intReverse(a,n):
18 |     b = ('{:0'+str(n)+'b}').format(a)
19 |     return int(b[::-1],2)
20 | 
21 | # Bit-Reversed index
22 | def indexReverse(a,r):
23 |     n = len(a)
24 |     b = [0]*n
25 |     for i in range(n):
26 |         rev_idx = intReverse(i,r)
27 |         b[rev_idx] = a[i]
28 |     return b
29 | 
30 | # Check if input is m-th (could be n or 2n) primitive root of unity of q
31 | def isrootofunity(w,m,q):
32 |     if pow(w,m,q) != 1:
33 |         return False
34 |     elif pow(w,m//2,q) != (q-1):
35 |         return False
36 |     else:
37 |         v = w
38 |         for i in range(1,m):
39 |             if v == 1:
40 |                 return False
41 |             else:
42 |                 v = (v*w) % q
43 |         return True
44 | 


--------------------------------------------------------------------------------
/hwmodel/poly_hw_demo.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | from random import randint
  3 | 
  4 | from generate_prime import *
  5 | from helper import *
  6 | 
  7 | # ----------------------------------------------------------
  8 | 
  9 | DEBUG_DIS = 1 # Memory Pattern
 10 | DEBUG_LAT = 1 # Expected Latency
 11 | DEBUG_TXT = 1 # Write Memory into TXT
 12 | DEBUG_TST = 0 # Generate Test Vector (I'll implement later)
 13 | 
 14 | # ----------------------------------------------------------
 15 | 
 16 | # Parallelism
 17 | PE_NUMBER = 2
 18 | PE = 2*PE_NUMBER
 19 | latency_pip = 6
 20 | latency_stg = 4
 21 | 
 22 | # ----------------------------------------------------------
 23 | 
 24 | # From paper: NTTU: An Area-Efficient Low-POwer NTT-Uncoupled Architecture for NTT-Based Multiplication
 25 | # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - NR
 26 | # A: input polynomial (standard order)
 27 | # W: twiddle factor
 28 | # q: modulus
 29 | # B: output polynomial (bit-reversed order)
 30 | def Radix2_DIT_Iterative_NTT_NR(A,W,q):
 31 |     N = len(A)
 32 |     B = [_ for _ in A]
 33 | 
 34 |     # ---------------------------------
 35 |     v = int(math.log(N, 2))
 36 |     m = N//PE
 37 | 
 38 |     BRAM = []
 39 |     BRTW = []
 40 | 
 41 |     for i in range(PE):
 42 |         BRAM.append([])
 43 |         for j in range(v):
 44 |             BRAM[i].append([])
 45 |             for k in range(m):
 46 |                 BRAM[i][j].append([])
 47 | 
 48 |     for i in range(PE//2):
 49 |         BRTW.append([])
 50 |         for j in range(v):
 51 |             BRTW[i].append([])
 52 |             for k in range(m):
 53 |                 BRTW[i][j].append([])
 54 | 
 55 |     bram_counter = 0
 56 |     # ---------------------------------
 57 | 
 58 |     for s in range(int(log(N,2)),0,-1):
 59 |         m = 2**s
 60 |         for k in range(int(N/m)):
 61 |             TW = pow(W,intReverse(k,int(log(N,2))-s)*int(m/2),q)
 62 |             for j in range(int(m/2)):
 63 |                 u = B[k*m+j]
 64 |                 t = (TW*B[k*m+j+int(m/2)]) % q
 65 | 
 66 |                 B[k*m+j]          = (u+t) % q
 67 |                 B[k*m+j+int(m/2)] = (u-t) % q
 68 | 
 69 |                 if DEBUG_DIS: print("W: "+str(intReverse(k,int(log(N,2))-s)*int(m/2)).ljust(5)+" A0: "+str(k*m+j).ljust(5)+" A1: "+str(k*m+j+int(m/2)).ljust(5))
 70 | 
 71 |                 # ---------------------------------
 72 |                 BRAM[(2*(bram_counter >> 0) & (PE-1))+0][int(log(N,2))-s][(bram_counter & ((N//2)-1)) // (PE//2)] = k*m+j
 73 |                 BRAM[(2*(bram_counter >> 0) & (PE-1))+1][int(log(N,2))-s][(bram_counter & ((N//2)-1)) // (PE//2)] = k*m+j+int(m/2)
 74 | 
 75 |                 BRTW[bram_counter & ((PE//2)-1)][int(log(N,2))-s][(bram_counter & ((N//2)-1)) // (PE//2)] = intReverse(k,int(log(N,2))-s)*int(m/2)
 76 | 
 77 |                 bram_counter = bram_counter + 1
 78 |                 # ---------------------------------
 79 | 
 80 |     return B,BRAM,BRTW
 81 | 
 82 | # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - RN
 83 | # A: input polynomial (reversed order)
 84 | # W: twiddle factor
 85 | # q: modulus
 86 | # B: output polynomial (bit-standard order)
 87 | def Radix2_DIF_Iterative_NTT_RN(A,W,q):
 88 |     N = len(A)
 89 |     B = [_ for _ in A]
 90 | 
 91 |     # ---------------------------------
 92 |     v = int(math.log(N, 2))
 93 |     m = N//PE
 94 | 
 95 |     BRAM = []
 96 |     BRTW = []
 97 | 
 98 |     for i in range(PE):
 99 |         BRAM.append([])
100 |         for j in range(v):
101 |             BRAM[i].append([])
102 |             for k in range(m):
103 |                 BRAM[i][j].append([])
104 | 
105 |     for i in range(PE//2):
106 |         BRTW.append([])
107 |         for j in range(v):
108 |             BRTW[i].append([])
109 |             for k in range(m):
110 |                 BRTW[i][j].append([])
111 | 
112 |     bram_counter = 0
113 |     # ---------------------------------
114 | 
115 |     m = 1
116 |     v = N
117 |     d = 1
118 | 
119 |     while v>1:
120 |         for jf in range(m):
121 |             j = jf
122 |             jt = 0
123 |             while j<(N-1):
124 |                 # bit-reversing jt
125 |                 TW = pow(W,intReverse(jt,int(log(N>>1,2))),q)
126 | 
127 |                 temp = B[j]
128 | 
129 |                 B[j]   = (temp + B[j+d]) % q
130 |                 B[j+d] = (temp - B[j+d])*TW % q
131 | 
132 |                 if DEBUG_DIS: print("W: "+str(intReverse(jt,int(log(N>>1,2)))).ljust(5)+" A0: "+str(j).ljust(5)+" A1: "+str(j+d).ljust(5))
133 | 
134 |                 # ---------------------------------
135 |                 BRAM[(2*(bram_counter >> 0) & (PE-1))+0][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j
136 |                 BRAM[(2*(bram_counter >> 0) & (PE-1))+1][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j+d
137 | 
138 |                 BRTW[bram_counter & ((PE//2)-1)][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = intReverse(jt,int(log(N>>1,2)))
139 | 
140 |                 bram_counter = bram_counter + 1
141 |                 # ---------------------------------
142 | 
143 |                 jt = jt+1
144 |                 j = j + 2*m
145 | 
146 |         m = 2*m
147 |         v = int(v/2)
148 |         d = 2*d
149 | 
150 |     return B,BRAM,BRTW
151 | 
152 | def Radix2_DIF_Iterative_INTT_RN(A,W_inv,q):
153 |     N_inv = modinv(len(A),q)
154 |     B,BRAM,BRTW = Radix2_DIF_Iterative_NTT_RN(A,W_inv,q)
155 |     B = [(x*N_inv) % q for x in B]
156 |     return B,BRAM,BRTW
157 | 
158 | # Multiplies two "deg" degree polynomial in x^"deg"-w^k where k is some power
159 | # A,B: input polynomials
160 | # wk: w^k
161 | # deg: degree
162 | # q: coefficient modulus
163 | # C: output polynomial
164 | def PolWiseMult(A,B,wk,deg,q):
165 |     C = [0] * (2 * deg)
166 |     D = [0] * deg
167 | 
168 |     for indexA, elemA in enumerate(A):
169 |         for indexB, elemB in enumerate(B):
170 |             C[indexA + indexB] = (C[indexA + indexB] + elemA * elemB) % q
171 | 
172 |     for i in range(len(A)):
173 |         D[i] = (C[i] + C[i + len(A)]*wk) % q
174 |     return D
175 | 
176 | # CRT-based unified structure for NWC and NTRU
177 | # -- ring  : 0->NWC, 1->NTRU
178 | # -- findeg: 1,2,4,... for NWC and 3,6,12,... for NTRU
179 | # A: input polynomial (standard order)
180 | # W: twiddle factor
181 | # q: modulus
182 | # B: output polynomial (bit-reversed order)
183 | def CRT_Iterative_Unified_NR(A,w,q,ring,findeg,powers):
184 |     N = len(A)
185 |     B = [_ for _ in A]
186 | 
187 |     # ---------------------------------
188 |     if ring == 0:
189 |         v = int(math.log(N//findeg, 2))
190 |         m = N//PE
191 | 
192 |         BRAM = []
193 |         BRTW = []
194 | 
195 |         for i in range(PE):
196 |             BRAM.append([])
197 |             for j in range(v):
198 |                 BRAM[i].append([])
199 |                 for k in range(m):
200 |                     BRAM[i][j].append([])
201 | 
202 |         for i in range(PE//2):
203 |             BRTW.append([])
204 |             for j in range(v):
205 |                 BRTW[i].append([])
206 |                 for k in range(m):
207 |                     BRTW[i][j].append([])
208 | 
209 |         bram_counter = 0
210 |     else:
211 |         v = int(math.log(N//findeg, 2))-1
212 |         m = N//PE
213 | 
214 |         BRAM = []
215 |         BRTW = []
216 | 
217 |         for i in range(PE):
218 |             BRAM.append([])
219 |             for j in range(v):
220 |                 BRAM[i].append([])
221 |                 for k in range(m):
222 |                     BRAM[i][j].append([])
223 | 
224 |         for i in range(PE//2):
225 |             BRTW.append([])
226 |             for j in range(v):
227 |                 BRTW[i].append([])
228 |                 for k in range(m):
229 |                     BRTW[i][j].append([])
230 | 
231 |         bram_counter = 0
232 |         ntru_counter = 0
233 |     # ---------------------------------
234 | 
235 |     if ring == 0:
236 |         # NWC
237 |         k = 1
238 |         lena = (N//2)
239 |         v = int(log(N//findeg,2))
240 |     else:
241 |         # NTRU
242 |         k = 0
243 |         lena = (N//4)
244 | 
245 |     while lena >= findeg:
246 |         start = 0
247 |         while start < N:
248 |             if ring == 0:
249 |                 # NWC
250 |                 W_pow = intReverse(k,v)
251 |             else:
252 |                 # NTRU
253 |                 W_pow = powers[k]
254 |                 # W_pow = (powers[k] // (findeg//3))
255 | 
256 |             W = pow(w,W_pow,q)
257 |             k = k+1
258 |             j = start
259 |             while(j < (start + lena)):
260 |                 t = (W * B[j+lena]) % q
261 | 
262 |                 B[j+lena] = (B[j] - t) % q
263 |                 B[j     ] = (B[j] + t) % q
264 | 
265 |                 if DEBUG_DIS: print("W: "+str(W_pow).ljust(5)+" A0: "+str(j).ljust(5)+" A1: "+str(j+lena).ljust(5))
266 | 
267 |                 # ---------------------------------
268 |                 if ring == 0:
269 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+0][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j
270 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+1][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j+lena
271 | 
272 |                     BRTW[bram_counter & ((PE//2)-1)][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = W_pow
273 | 
274 |                     bram_counter = bram_counter + 1
275 |                 else:
276 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+0][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = j
277 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+1][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = j+lena
278 | 
279 |                     BRTW[bram_counter & ((PE//2)-1)][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = W_pow
280 | 
281 |                     bram_counter = (bram_counter + 1) if (ntru_counter == 2) else bram_counter
282 |                     ntru_counter = 0 if (ntru_counter == 2) else (ntru_counter + 1)
283 |                 # ---------------------------------
284 | 
285 |                 j = j+1
286 |             start = j + lena
287 |         lena = (lena//2)
288 | 
289 |     return B,BRAM,BRTW
290 | 
291 | # ICRT-based unified structure for NWC and NTRU
292 | # -- ring  : 0->NWC, 1->NTRU
293 | # -- findeg: 1,2,4,... for NWC and 3,6,12,... for NTRU
294 | # A: input polynomial (bit-reversed order)
295 | # W: twiddle factor
296 | # q: modulus
297 | # B: output polynomial (standard order)
298 | def ICRT_Iterative_Unified_RN(A,w,q,ring,findeg,powers):
299 |     N = len(A)
300 |     B = [_ for _ in A]
301 | 
302 |     # ---------------------------------
303 |     if ring == 0:
304 |         v = int(math.log(N//findeg, 2))
305 |         m = N//PE
306 | 
307 |         BRAM = []
308 |         BRTW = []
309 | 
310 |         for i in range(PE):
311 |             BRAM.append([])
312 |             for j in range(v):
313 |                 BRAM[i].append([])
314 |                 for k in range(m):
315 |                     BRAM[i][j].append([])
316 | 
317 |         for i in range(PE//2):
318 |             BRTW.append([])
319 |             for j in range(v):
320 |                 BRTW[i].append([])
321 |                 for k in range(m):
322 |                     BRTW[i][j].append([])
323 | 
324 |         bram_counter = 0
325 |     else:
326 |         v = int(math.log(N//findeg, 2))-1
327 |         m = N//PE
328 | 
329 |         BRAM = []
330 |         BRTW = []
331 | 
332 |         for i in range(PE):
333 |             BRAM.append([])
334 |             for j in range(v):
335 |                 BRAM[i].append([])
336 |                 for k in range(m):
337 |                     BRAM[i][j].append([])
338 | 
339 |         for i in range(PE//2):
340 |             BRTW.append([])
341 |             for j in range(v):
342 |                 BRTW[i].append([])
343 |                 for k in range(m):
344 |                     BRTW[i][j].append([])
345 | 
346 |         bram_counter = 0
347 |         ntru_counter = 0
348 |     # ---------------------------------
349 | 
350 |     k = 0
351 |     lena = findeg
352 | 
353 |     if ring == 0:
354 |         # NWC
355 |         v = int(log(N//findeg,2))
356 |         lena_limit = (N//2)
357 |     else:
358 |         # NTRU
359 |         powers_new = [_ for _ in powers]
360 |         i = findeg
361 |         r = 1
362 |         while(i >= 6):
363 |             powers_new = powers_new[N//(6*r):]
364 |             i = (i//2)
365 |             r = 2*r
366 |         lena_limit = (N//4)
367 | 
368 |     while lena <= lena_limit:
369 |         start = 0
370 |         while start < N:
371 |             if ring == 0:
372 |                 # NWC
373 |                 """
374 |                 W_pow = intReverse(k,v)+1
375 |                 TW = (-pow(w,W_pow,q)) % q # here, "-" means an extra w^(n/2)
376 |                 """
377 |                 W_pow = intReverse(k,v)+1
378 |             else:
379 |                 # NTRU
380 |                 W_pow = powers_new[k]
381 | 
382 |             TW = pow(w,W_pow,q)
383 |             k = k+1
384 |             j = start
385 |             while(j < (start + lena)):
386 |                 t = B[j]
387 | 
388 |                 B[j       ] = (t + B[j + lena]) % q
389 |                 B[j + lena] = (t - B[j + lena]) % q
390 |                 B[j + lena] = B[j + lena]*TW % q
391 | 
392 |                 if DEBUG_DIS: print("W: "+str(W_pow).ljust(5)+" A0: "+str(j).ljust(5)+" A1: "+str(j+lena).ljust(5))
393 | 
394 |                 # ---------------------------------
395 |                 if ring == 0:
396 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+0][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j
397 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+1][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = j+lena
398 | 
399 |                     BRTW[bram_counter & ((PE//2)-1)][bram_counter // (N//2)][(bram_counter & ((N//2)-1)) // (PE//2)] = W_pow
400 | 
401 |                     bram_counter = bram_counter + 1
402 |                 else:
403 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+0][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = j
404 |                     BRAM[(2*(bram_counter >> 0) & (PE-1))+1][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = j+lena
405 | 
406 |                     BRTW[bram_counter & ((PE//2)-1)][bram_counter // (N//6)][3*((bram_counter & ((N//6)-1)) // (PE//2))+ntru_counter] = W_pow
407 | 
408 |                     bram_counter = (bram_counter + 1) if (ntru_counter == 2) else bram_counter
409 |                     ntru_counter = 0 if (ntru_counter == 2) else (ntru_counter + 1)
410 |                 # ---------------------------------
411 | 
412 |                 j = j+1
413 |             start = j + lena
414 |         lena = 2*lena
415 | 
416 |     N_inv = modinv(N//findeg,q)
417 |     for i in range(N):
418 |         B[i] = (B[i] * N_inv) % q
419 | 
420 |     return B,BRAM,BRTW
421 | 
422 | # A unified polynomial multiplication algorithm for all methods mentioned above
423 | # ring: 0 --> NWC  (x^n + 1)
424 | # -- findeg: 1
425 | # -- findeg: 2
426 | # -- findeg: 4
427 | # --   ...
428 | # ring: 1 --> NTRU (x^n - x^(n/2) + 1)
429 | # -- findeg: 3
430 | # -- findeg: 6
431 | # -- findeg: 12
432 | # --   ...
433 | # NOTE: Later I can add PWC (x^n - 1)
434 | # NOTE: Later I can add pure NTT/INTT operations
435 | def CRTBasedModPolMul_Unified(A,B,w,w_inv,q,ring,findeg,ntrupowersf=[],ntrupowersb=[],ntrupowersi=[]):
436 |     # --------------------------------------------- Initial step
437 |     if ring == 0:
438 |         """
439 |         NWC requires no initial reduction operation
440 |         """
441 |         A_r = [_ for _ in A]
442 |         B_r = [_ for _ in B]
443 |     else:
444 |         """
445 |         NTRU requires initial reduction
446 |         """
447 |         A_r = [_ for _ in A]
448 |         B_r = [_ for _ in B]
449 | 
450 |         if DEBUG_DIS: print("---- Pre-processing:")
451 |         wk = pow(w,len(A)//6,q)
452 |         for i in range(len(A)//2):
453 |             t1 = (wk*A[i+len(A)//2]) % q
454 |             t2 = (wk*B[i+len(B)//2]) % q
455 |             A_r[i+len(A)//2] = (A[i]+A[i+len(A)//2]-t1)%q
456 |             A_r[i]           = (A[i]               +t1)%q
457 |             B_r[i+len(B)//2] = (B[i]+B[i+len(B)//2]-t2)%q
458 |             B_r[i]           = (B[i]               +t2)%q
459 |             if DEBUG_DIS: print("W: "+str(len(A)//6).ljust(5)+" A0: "+str(i).ljust(5)+" A1: "+str(i+len(A)//2).ljust(5))
460 | 
461 |     # --------------------------------------------- NTT
462 |     if DEBUG_DIS: print("---- NTT(A)")
463 |     A_ntt,ABR,ATW = CRT_Iterative_Unified_NR(A_r,w,q,ring,findeg,ntrupowersf)
464 |     if DEBUG_DIS: print("---- NTT(B)")
465 |     B_ntt,BBR,BTW = CRT_Iterative_Unified_NR(B_r,w,q,ring,findeg,ntrupowersf)
466 | 
467 |     # --------------------------------------------- Degree-findeg modular polynomial multiplications
468 |     C_ntt = [0 for _ in range(len(A))]
469 |     if DEBUG_DIS: print("---- Coefficient-wise multiplication:")
470 |     for i in range(len(A)//findeg):
471 |         if ring == 0:
472 |             # NWC
473 |             w_pow = 2*intReverse(i,int(log(len(A)//findeg,2)))+1
474 |         else:
475 |             # NTRU
476 |             w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])
477 |             # w_pow = ((findeg//3)*ntrupowersb[i*(findeg//3)])//(findeg//3)
478 | 
479 |         if findeg == 1:
480 |             if DEBUG_DIS: print("A: {}".format(i))
481 |         else:
482 |             if DEBUG_DIS: print("W: "+str(w_pow).ljust(5)+" A: {}".format(range(findeg*i,findeg*i+findeg)))
483 | 
484 |         wk    = pow(w,w_pow,q)
485 |         C_ntt[findeg*i:findeg*i+findeg] = PolWiseMult(A_ntt[findeg*i:findeg*i+findeg],B_ntt[findeg*i:findeg*i+findeg],wk,findeg,q)
486 | 
487 |     # --------------------------------------------- INTT
488 |     if DEBUG_DIS: print("---- INTT(C)")
489 |     C,CBR,CTW = ICRT_Iterative_Unified_RN(C_ntt,w_inv,q,ring,findeg,ntrupowersi)
490 | 
491 |     # --------------------------------------------- Final step
492 |     if ring == 0:
493 |         """
494 |         NWC requires no final reconstruction step
495 |         """
496 |         return C,ABR,ATW,CBR,CTW
497 |     else:
498 |         if DEBUG_DIS: print("---- Post-processing:")
499 |         """
500 |         NTRU requires final reconstruction step
501 |         """
502 |         wk = modinv((2*pow(w,(len(C)//6),q)-1)%q,q)
503 |         # wk = modinv((2*pow(w,(len(C)//6)//(findeg//3),q)-1)%q,q)
504 | 
505 |         for i in range(len(C)//2):
506 |             t = ((C[i]-C[i+len(C)//2])*wk)%q   # t = f[i+N//2]
507 |             C[i          ] = (C[i]+C[i+len(C)//2]-t)%q
508 |             C[i+len(C)//2] = (2*t)%q
509 |             if DEBUG_DIS: print("W: "+str("P").ljust(5)+" A0: "+str(i).ljust(5)+" A1: "+str(i+len(C)//2).ljust(5))
510 | 
511 |         return C,ABR,ATW,CBR,CTW
512 | 
513 | # ----------------------------------------------------------
514 | 
515 | # Parameter generation
516 | 
517 | # Determine n and bit-size of q, then find a q satisfying
518 | # the condition: q = 1 (mod 2n) or q = 1 (mod n)
519 | #
520 | # Based on n and q, polynomial multiplication parameters
521 | 
522 | # Parameters
523 | mod     = 2 # if 1 --> q = 1 (mod n), if 2 --> q = 1 (mod 2n)
524 | n       = 8
525 | q_bit   = 10
526 | 
527 | q       = 0
528 | w       = 0
529 | w_inv   = 0
530 | psi     = 0
531 | psi_inv = 0
532 | 
533 | # Generate parameters
534 | wfound = False
535 | while(not(wfound)):
536 |     q = generate_large_prime(q_bit)
537 | 
538 |     # check q = 1 (mod n or 2n)
539 |     while (not ((q % (mod*n)) == 1)):
540 |         q = generate_large_prime(q_bit)
541 | 
542 |     # generate NTT parameters
543 |     for i in range(2,q-1):
544 |         wfound = isrootofunity(i,mod*n,q)
545 |         if wfound:
546 |             if mod == 1:
547 |                 psi    = 0
548 |                 psi_inv= 0
549 |                 w      = i
550 |                 w_inv  = modinv(w,q)
551 |             else:
552 |                 psi    = i
553 |                 psi_inv= modinv(psi,q)
554 |                 w      = pow(psi,2,q)
555 |                 w_inv  = modinv(w,q)
556 |             break
557 | 
558 | # Print parameters
559 | print("Parameters (NWC)")
560 | print("n      : {}".format(n))
561 | print("q      : {}".format(q))
562 | print("w      : {}".format(w))
563 | print("w_inv  : {}".format(w_inv))
564 | print("psi    : {}".format(psi))
565 | print("psi_inv: {}".format(psi_inv))
566 | print("")
567 | 
568 | # Parameters (NTRU)
569 | m       = 3*n
570 | mq_bit  = 10
571 | 
572 | mq      = 0
573 | mw      = 0
574 | mw_inv  = 0
575 | 
576 | # Generate parameters
577 | wfound = False
578 | while(not(wfound)):
579 |     mq = generate_large_prime(mq_bit)
580 | 
581 |     # check q = 1 (mod n or 2n)
582 |     while (not ((mq % m) == 1)):
583 |         mq = generate_large_prime(mq_bit)
584 | 
585 |     # generate NTT parameters
586 |     for i in range(2,mq-1):
587 |         wfound = isrootofunity(i,m,mq)
588 |         if wfound:
589 |             mw      = i
590 |             mw_inv  = modinv(mw,mq)
591 |             break
592 | 
593 | # m,mq,mw,mw_inv = 192,769,4,577
594 | 
595 | # Powers of twiddle factors for NTRU (forward and inverse transform)
596 | # Generating necessary powers of twiddle factors for NTRU on-the-fly is really hard.
597 | # Therefore, we create table for powers of twiddle factors prior any operation
598 | nf = [0]*(m//3) # forward
599 | 
600 | nf[0] = 0
601 | nf[1] = m//6
602 | nf[2] = nf[1]//2
603 | nf[3] = (5*nf[1])//2
604 | 
605 | i = 2
606 | while (2**i) < (m//3):
607 |     for j in range(2**i, 2**(i+1), 2):
608 |         nf[j]   =  nf[j//2]//2
609 |         nf[j+1] = (nf[j//2]+(m//2))//2
610 |     i = i + 1
611 | 
612 | ntrupowersf = nf[2:]
613 | 
614 | ntrupowersi = [] # inverse
615 | 
616 | idxs, idxe = len(ntrupowersf)-(m//6) ,len(ntrupowersf)
617 | for i in range(int(log(m//6,2))):
618 |     ntrupowersi = ntrupowersi + ntrupowersf[idxs:idxe]
619 |     idxe = idxs
620 |     idxs = idxs - ((m//12)>>i)
621 | 
622 | ntrupowersb = [0]*(m//3) # basemul
623 | 
624 | for i in range(m//6):
625 |     ntrupowersb[2*i+0] = ntrupowersi[i]
626 |     ntrupowersb[2*i+1] = ntrupowersi[i] + (m//2)
627 | 
628 | # print(ntrupowersf)
629 | # print(ntrupowersb)
630 | # print(ntrupowersi)
631 | 
632 | print("Parameters (NTRU)")
633 | print("m      : {}".format(m))
634 | print("mq     : {}".format(mq))
635 | print("mw     : {}".format(mw))
636 | print("mw_inv : {}".format(mw_inv))
637 | print("")
638 | 
639 | #NOTE: Comment Out Reference Method for Large Parameters
640 | 
641 | # Demo
642 | # Random A,B
643 | A = [randint(0,q-1) for _ in range(n)]
644 | B = [randint(0,q-1) for _ in range(n)]
645 | 
646 | # Random A,B (for ntru)
647 | A_ntru = [randint(0,mq-1) for _ in range(m)]
648 | B_ntru = [randint(0,mq-1) for _ in range(m)]
649 | 
650 | # reduce functions
651 | pwc  = [-1]+[0]*(n-1)+[1]
652 | nwc  =  [1]+[0]*(n-1)+[1]
653 | ntru =  [1]+[0]*(int(m/2)-1)+[-1]+[0]*(int(m/2)-1)+[1]
654 | 
655 | # NTT
656 | if DEBUG_DIS: print("\n-------- Addressing for NTT --------")
657 | N0,N0BR,N0TW = Radix2_DIT_Iterative_NTT_NR(A,w,q)
658 | if DEBUG_DIS: print("\n-------- Addressing for INTT --------")
659 | N1,N1BR,N1TW = Radix2_DIF_Iterative_INTT_RN(N0,w_inv,q)
660 | 
661 | # POLMUL
662 | if DEBUG_DIS: print("\n-------- Addressing for NWC - findeg=1 --------")
663 | R0,R0BRF,R0TWF,R0BRI,R0TWI = CRTBasedModPolMul_Unified(A,B,psi,psi_inv,q,ring=0,findeg=1) # NWC - findeg=1
664 | if DEBUG_DIS: print("\n-------- Addressing for NWC - findeg=2 --------")
665 | R1,R1BRF,R1TWF,R1BRI,R1TWI = CRTBasedModPolMul_Unified(A,B,w,w_inv,q,ring=0,findeg=2) # NWC - findeg=2
666 | if DEBUG_DIS: print("\n-------- Addressing for NTRU - findeg=3 --------")
667 | ring, findeg = 1,3
668 | R2,R2BRF,R2TWF,R2BRI,R2TWI = CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=3
669 | 
670 | # Print memory structure
671 | def PrintBRAM(BRAM,ring=0,findeg=1):
672 |     if ring == 0:
673 |         v = int(math.log(n//findeg, 2))
674 |         m = n//PE
675 |     else:
676 |         v = int(math.log((3*n)//findeg, 2))-1
677 |         m = (3*n)//PE
678 |     BS = ""
679 |     for j in range(v):
680 |         BS = BS+"*************************************************** stage="+str(j)+"\n"
681 |         BS = BS+"BRAM:"
682 | 
683 |         for i in range(PE//2):
684 |             BS = BS+"\t|"+str(2*i).ljust(5)+str(2*i+1).ljust(4)+"|"
685 |         BS = BS+"\n"
686 |         BS = BS+"     "
687 |         for i in range(PE//2):
688 |             BS = BS+"\t----------"
689 |         BS = BS+"\n"
690 | 
691 |         for k in range(m):
692 |             BS = BS + "AD"+str(k)+":"
693 |             for i in range(PE//2):
694 |                 BS = BS+"\t|"+str(BRAM[2*i][j][k]).ljust(5)+str(BRAM[2*i+1][j][k]).ljust(4)+"|"
695 |             BS = BS+"\n"
696 | 
697 |     return BS
698 | 
699 | def PrintBRTW(BRTW,ring=0,findeg=1):
700 |     if ring == 0:
701 |         v = int(math.log(n//findeg, 2))
702 |         m = n//PE
703 |     else:
704 |         v = int(math.log((3*n)//findeg, 2))-1
705 |         m = (3*n)//PE
706 |     TS = ""
707 |     for j in range(v):
708 |         TS = TS+"*************************************************** stage="+str(j)+"\n"
709 |         TS = TS+"TWID:"
710 | 
711 |         for i in range(PE//2):
712 |             TS = TS+"\t|"+str(i).ljust(5)+"|"
713 |         TS = TS+"\n"
714 |         TS = TS+"     "
715 |         for i in range(PE//2):
716 |             TS = TS+"\t------"
717 |         TS = TS+"\n"
718 | 
719 |         for k in range(m):
720 |             TS = TS + "AD"+str(k)+":"
721 |             for i in range(PE//2):
722 |                 TS = TS+"\t|"+str(BRTW[i][j][k]).ljust(5)+"|"
723 |             TS = TS+"\n"
724 | 
725 |     return TS
726 | 
727 | FNTT_BR   = PrintBRAM(N0BR)
728 | INTT_BR   = PrintBRAM(N1BR)
729 | NWC1F_BR  = PrintBRAM(R0BRF)
730 | NWC1I_BR  = PrintBRAM(R0BRI)
731 | NWC2F_BR  = PrintBRAM(R1BRF,0,2)
732 | NWC2I_BR  = PrintBRAM(R1BRI,0,2)
733 | NTRU3F_BR = PrintBRAM(R2BRF,1,3)
734 | NTRU3I_BR = PrintBRAM(R2BRI,1,3)
735 | 
736 | FNTT_TW   = PrintBRTW(N0TW)
737 | INTT_TW   = PrintBRTW(N1TW)
738 | NWC1F_TW  = PrintBRTW(R0TWF)
739 | NWC1I_TW  = PrintBRTW(R0TWI)
740 | NWC2F_TW  = PrintBRTW(R1TWF,0,2)
741 | NWC2I_TW  = PrintBRTW(R1TWI,0,2)
742 | NTRU3F_TW = PrintBRTW(R2TWF,1,3)
743 | NTRU3I_TW = PrintBRTW(R2TWI,1,3)
744 | 
745 | # Write to txt
746 | if DEBUG_TXT == 1:
747 |     # Data
748 |     FNTT_BR_TXT = open("FNTT_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
749 |     FNTT_BR_TXT.write(FNTT_BR)
750 |     FNTT_BR_TXT.close()
751 | 
752 |     INTT_BR_TXT = open("INTT_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
753 |     INTT_BR_TXT.write(INTT_BR)
754 |     INTT_BR_TXT.close()
755 | 
756 |     NWC1_BR_TXT = open("NWC1_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
757 |     NWC1_BR_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
758 |     NWC1_BR_TXT.write(NWC1F_BR)
759 |     NWC1_BR_TXT.write("---------------------------------------------------------------------- Coefficient-wise multiplication\n")
760 |     NWC1_BR_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
761 |     NWC1_BR_TXT.write(NWC1I_BR)
762 |     NWC1_BR_TXT.close()
763 | 
764 |     NWC2_BR_TXT = open("NWC2_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
765 |     NWC2_BR_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
766 |     NWC2_BR_TXT.write(NWC2F_BR)
767 |     NWC2_BR_TXT.write("---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication\n")
768 |     NWC2_BR_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
769 |     NWC2_BR_TXT.write(NWC2I_BR)
770 |     NWC2_BR_TXT.close()
771 | 
772 |     NTRU3_BR_TXT = open("NTRU3_mem_N"+str(m)+"_PE"+str(PE_NUMBER)+".txt","w")
773 |     NTRU3_BR_TXT.write("---------------------------------------------------------------------- First Reduction\n")
774 |     NTRU3_BR_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
775 |     NTRU3_BR_TXT.write(NTRU3F_BR)
776 |     NTRU3_BR_TXT.write("---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication\n")
777 |     NTRU3_BR_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
778 |     NTRU3_BR_TXT.write(NTRU3I_BR)
779 |     NTRU3_BR_TXT.write("---------------------------------------------------------------------- Final Reconstruction\n")
780 |     NTRU3_BR_TXT.close()
781 | 
782 |     # Twiddle
783 |     FNTT_TW_TXT = open("FNTT_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
784 |     FNTT_TW_TXT.write(FNTT_TW)
785 |     FNTT_TW_TXT.close()
786 | 
787 |     INTT_TW_TXT = open("INTT_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
788 |     INTT_TW_TXT.write(INTT_TW)
789 |     INTT_TW_TXT.close()
790 | 
791 |     NWC1_TW_TXT = open("NWC1_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
792 |     NWC1_TW_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
793 |     NWC1_TW_TXT.write(NWC1F_TW)
794 |     NWC1_TW_TXT.write("---------------------------------------------------------------------- Coefficient-wise multiplication\n")
795 |     NWC1_TW_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
796 |     NWC1_TW_TXT.write(NWC1I_TW)
797 |     NWC1_TW_TXT.close()
798 | 
799 |     NWC2_TW_TXT = open("NWC2_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt","w")
800 |     NWC2_TW_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
801 |     NWC2_TW_TXT.write(NWC2F_TW)
802 |     NWC2_TW_TXT.write("---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication\n")
803 |     NWC2_TW_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
804 |     NWC2_TW_TXT.write(NWC2I_TW)
805 |     NWC2_TW_TXT.close()
806 | 
807 |     NTRU3_TW_TXT = open("NTRU3_tw_N"+str(m)+"_PE"+str(PE_NUMBER)+".txt","w")
808 |     NTRU3_TW_TXT.write("---------------------------------------------------------------------- First Reduction\n")
809 |     NTRU3_TW_TXT.write("---------------------------------------------------------------------- Forward NTT (x2)\n")
810 |     NTRU3_TW_TXT.write(NTRU3F_TW)
811 |     NTRU3_TW_TXT.write("---------------------------------------------------------------------- Degree-2 polynomial-wise multiplication\n")
812 |     NTRU3_TW_TXT.write("---------------------------------------------------------------------- Inverse NTT\n")
813 |     NTRU3_TW_TXT.write(NTRU3I_TW)
814 |     NTRU3_TW_TXT.write("---------------------------------------------------------------------- Final Reconstruction\n")
815 |     NTRU3_TW_TXT.close()
816 | 
817 |     print("")
818 |     print("-------- Generated:")
819 |     print("* FNTT_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
820 |     print("* FNTT_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
821 |     print("* INTT_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
822 |     print("* INTT_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
823 |     print("* NWC1_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
824 |     print("* NWC1_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
825 |     print("* NWC2_mem_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
826 |     print("* NWC2_tw_N"+str(n)+"_PE"+str(PE_NUMBER)+".txt")
827 |     print("* NTRU3_mem_N"+str(m)+"_PE"+str(PE_NUMBER)+".txt")
828 |     print("* NTRU3_tw_N"+str(m)+"_PE"+str(PE_NUMBER)+".txt")
829 | 
830 | 
831 | # Latency
832 | if DEBUG_LAT:
833 |     print("")
834 |     print("-------- Latency (Cycle Counts) -------- N:{} M:{} PE:{}".format(n,m,PE_NUMBER))
835 |     fntt_best = int(log(n,2))*(n//PE)
836 |     fntt_expt = int(log(n,2))*((n//PE)+latency_pip+latency_stg)
837 |     intt_best = int(log(n,2))*(n//PE) + (n//PE_NUMBER)
838 |     intt_expt = int(log(n,2))*((n//PE)+latency_pip+latency_stg) + (n//PE_NUMBER)
839 |     nwc1_best = 2*int(log(n,2))*(n//PE) + 2*(n//PE_NUMBER)
840 |     nwc1_expt = 2*int(log(n,2))*((n//PE)+latency_pip+latency_stg) + 2*(n//PE_NUMBER)
841 |     nwc2_best = 2*int(log(n,2)-1)*(n//PE) + (n//PE_NUMBER) + 5*(n//PE_NUMBER)
842 |     nwc2_expt = 2*int(log(n,2)-1)*((n//PE)+latency_pip+latency_stg) + (n//PE_NUMBER) + 5*(n//PE_NUMBER) + (latency_pip+latency_stg)
843 |     ntru3_best = 2*int(log(n,2)-1)*(m//PE) + (m//PE_NUMBER) + 11*(m//PE_NUMBER)
844 |     ntru3_expt = 2*int(log(n,2)-1)*((m//PE)+latency_pip+latency_stg) + (m//PE_NUMBER) + 11*(m//PE_NUMBER) + (latency_pip+latency_stg)
845 |     print("NTT   -- Best: "+str(fntt_best).ljust(6)+" Expected: ~"+str(fntt_expt).ljust(6))
846 |     print("INTT  -- Best: "+str(intt_best).ljust(6)+" Expected: ~"+str(intt_expt).ljust(6))
847 |     print("NWC1  -- Best: "+str(nwc1_best).ljust(6)+" Expected: ~"+str(nwc1_expt).ljust(6))
848 |     print("NWC2  -- Best: "+str(nwc2_best).ljust(6)+" Expected: ~"+str(nwc2_expt).ljust(6))
849 |     print("NTRU3 -- Best: "+str(ntru3_best).ljust(6)+" Expected: ~"+str(ntru3_expt).ljust(6))
850 | 
851 | #
852 | 


--------------------------------------------------------------------------------
/stats/generate_prime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Pedro Alves
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import random
16 | import math
17 | import sys
18 | 
19 | def miller_rabin(p,s=11):
20 |     #computes p-1 decomposition in 2**u*r
21 |     r = p-1
22 |     u = 0
23 |     while r&1 == 0:#true while the last bit of r is zero
24 |         u += 1
25 |         r = int(r/2)
26 | 
27 |     # apply miller_rabin primality test
28 |     for i in range(s):
29 |         a = random.randrange(2,p-1) # choose random a in {2,3,...,p-2}
30 |         z = pow(a,r,p)
31 | 
32 |         if z != 1 and z != p-1:
33 |             for j in range(u-1):
34 |                 if z != p-1:
35 |                     z = pow(z,2,p)
36 |                     if z == 1:
37 |                         return False
38 |                 else:
39 |                     break
40 |             if z != p-1:
41 |                 return False
42 |     return True
43 | 
44 | 
45 | def is_prime(n,s=11):
46 |      #lowPrimes is all primes (sans 2, which is covered by the bitwise and operator)
47 |      #under 1000. taking n modulo each lowPrime allows us to remove a huge chunk
48 |      #of composite numbers from our potential pool without resorting to Rabin-Miller
49 |      lowPrimes =   [3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97
50 |                    ,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179
51 |                    ,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269
52 |                    ,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367
53 |                    ,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461
54 |                    ,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571
55 |                    ,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661
56 |                    ,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773
57 |                    ,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883
58 |                    ,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997]
59 |      if (n >= 3):
60 |          if (n&1 != 0):
61 |              for p in lowPrimes:
62 |                  if (n == p):
63 |                     return True
64 |                  if (n % p == 0):
65 |                      return False
66 |              return miller_rabin(n,s)
67 |      return False
68 | 
69 | def generate_large_prime(k,s=11):
70 |     #print "Generating prime of %d bits" % k
71 |     #k is the desired bit length
72 | 
73 |     # using security parameter s=11, we have a error probability of less than
74 |     # 2**-80
75 | 
76 |     r=int(100*(math.log(k,2)+1)) #number of max attempts
77 |     while r>0:
78 |         #randrange is mersenne twister and is completely deterministic
79 |         #unusable for serious crypto purposes
80 |         n = random.randrange(2**(k-1),2**(k))
81 |         r-=1
82 |         if is_prime(n,s) == True:
83 |             return n
84 |     raise Exception("Failure after %d tries." % r)
85 | 


--------------------------------------------------------------------------------
/stats/helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def egcd(a, b):
 3 |     if a == 0:
 4 |         return (b, 0, 1)
 5 |     else:
 6 |         g, y, x = egcd(b % a, a)
 7 |         return (g, x - (b // a) * y, y)
 8 | 
 9 | def modinv(a, m):
10 |     g, x, y = egcd(a, m)
11 |     if g != 1:
12 |         raise Exception('Modular inverse does not exist')
13 |     else:
14 |         return x % m
15 | 
16 | # Bit-Reverse integer
17 | def intReverse(a,n):
18 |     b = ('{:0'+str(n)+'b}').format(a)
19 |     return int(b[::-1],2)
20 | 
21 | # Bit-Reversed index
22 | def indexReverse(a,r):
23 |     n = len(a)
24 |     b = [0]*n
25 |     for i in range(n):
26 |         rev_idx = intReverse(i,r)
27 |         b[rev_idx] = a[i]
28 |     return b
29 | 
30 | # Check if input is m-th (could be n or 2n) primitive root of unity of q
31 | def isrootofunity(w,m,q):
32 |     if pow(w,m,q) != 1:
33 |         return False
34 |     elif pow(w,m//2,q) != (q-1):
35 |         return False
36 |     else:
37 |         v = w
38 |         for i in range(1,m):
39 |             if v == 1:
40 |                 return False
41 |             else:
42 |                 v = (v*w) % q
43 |         return True
44 | 


--------------------------------------------------------------------------------
/stats/ntt.py:
--------------------------------------------------------------------------------
   1 | from math import log,floor,ceil
   2 | from helper import *
   3 | 
   4 | matrix = lambda polynomial, col_length: list(zip(*[polynomial[i:i + col_length] for i in range(0, len(polynomial), col_length)]))
   5 | 
   6 | # Cooley-Tukey Butterfly Structure
   7 | # A0,A1: input coefficients
   8 | # W: twiddle factor
   9 | # q: modulus
  10 | # B0,B1: output coefficients
  11 | def CT_Butterfly(A0,A1,W,q):
  12 |     """
  13 |     A0 -------\--|+|-- B0
  14 |                \/
  15 |                /\
  16 |     A1 --|x|--/--|-|-- B1
  17 |     """
  18 |     M = (A1 * W) % q
  19 | 
  20 |     B0 = (A0 + M) % q
  21 |     B1 = (A0 - M) % q
  22 | 
  23 |     return B0,B1
  24 | 
  25 | # Gentleman-Sandle Butterfly Structure
  26 | # A0,A1: input coefficients
  27 | # W: twiddle factor
  28 | # q: modulus
  29 | # B0,B1: output coefficients
  30 | def GS_Butterfly(A0,A1,W,q):
  31 |     """
  32 |     A0 --\--|+|------- B0
  33 |           \/
  34 |           /\
  35 |     A1 --/--|-|--|x|-- B1
  36 |     """
  37 |     M0 = (A0 + A1) % q
  38 |     M1 = (A0 - A1) % q
  39 | 
  40 |     B0 = M0
  41 |     B1 = (M1 * W) % q
  42 | 
  43 |     return B0,B1
  44 | 
  45 | class NTT:
  46 |     """
  47 |     - Definition of NTT:
  48 | 
  49 |     Existence condition: q = 1 (mod n) and w: n-th root of unity
  50 | 
  51 |     [a_0, a_1, ..., a_n-1] --> [A_0, A_1, ..., A_n-1]
  52 | 
  53 |     Forward NTT: A_i = sum{j from 0 to n-1}(a_j * w^ij mod q) for i from 0 to n-1
  54 |     Inverse NTT: a_i = sum{j from 0 to n-1}(A_j * w^-ij mod q) for i from 0 to n-1
  55 |     """
  56 | 
  57 |     """
  58 |     List of NTT Algorithms: (Inside the FFT Black Box, by Chu and George)
  59 |     -- Naive NTT (see Wikipedia definition of NTT operation)
  60 |     -- Radix-2 Decimation-in-Time (DIT) Recursive NTT (Cooley-Tukey)
  61 |     -- Radix-2 Decimation-in-Frequency (DIF) Recursive NTT (Gentleman-Sandle)
  62 |     -- Radix-2 Decimation-in-Time (DIT) Iterative NTT
  63 |     ---- NR (N: Natural order, R: Reversed Order)
  64 |     ---- RN
  65 |     ---- NN
  66 |     -- Radix-2 Decimation-in-Time (DIF) Iterative NTT
  67 |     ---- NR
  68 |     ---- RN
  69 |     ---- NN
  70 |     """
  71 | 
  72 |     """
  73 |     Note: Any forward NTT function can be used for inverse NTT if you give input
  74 |     in proper order and w^-1 instead of w. Besides, INTT requires output
  75 |     coefficients to be multiplied with n^-1 mod q.
  76 |     """
  77 | 
  78 |     """
  79 |     - What is standard order? : 0, 1, ..., n-1
  80 |     - What is reversed/bit-reversed (scrambled) order? : 0, br(1), ..., br(n-1)
  81 |     where br() function bit-revese the integer with log(n) bits
  82 |     """
  83 | 
  84 |     # Naive NTT
  85 |     # A: input polynomial (standard order)
  86 |     # W: twiddle factor
  87 |     # q: modulus
  88 |     # B: output polynomial (standard order)
  89 |     def NaiveNTT_NN(self,A,W,q):
  90 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
  91 |         """
  92 |         Very slow baseline implementation. Do not use for large parameter set.
  93 |         """
  94 |         N = len(A)
  95 |         B = [0]*N
  96 | 
  97 |         for i in range(N):
  98 |             for j in range(N):
  99 |                 B[i] = (B[i] + A[j]*(W**(i*j))) % q
 100 |                 MulCnt = MulCnt+1
 101 |                 AddCnt = AddCnt+1
 102 | 
 103 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 104 | 
 105 |     # Naive NTT
 106 |     # A: input polynomial (standard order)
 107 |     # W: twiddle factor
 108 |     # q: modulus
 109 |     # B: output polynomial (bit-reversed order)
 110 |     def NaiveNTT_NR(self,A,W,q):
 111 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 112 |         """
 113 |         Very slow baseline implementation. Do not use for large parameter set.
 114 |         """
 115 |         N = len(A)
 116 |         B = [0]*N
 117 | 
 118 |         v = int(log(N,2))
 119 | 
 120 |         for i in range(N):
 121 |             for j in range(N):
 122 |                 W_pow = intReverse(i,v)*j
 123 |                 B[i] = (B[i] + A[j]*(W**W_pow)) % q
 124 |                 MulCnt = MulCnt+1
 125 |                 AddCnt = AddCnt+1
 126 | 
 127 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 128 | 
 129 |     # Recursive Radix-2 Decimation-in-Time (DIT) (CT) NTT
 130 |     # A: input polynomial (standard order --> it becomes reversed after recursions)
 131 |     # W: twiddle factor
 132 |     # q: modulus
 133 |     # B: output polynomial (standard order)
 134 |     def Radix2_DIT_Recursive_NTT(self,A,W,q):
 135 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 136 |         N = len(A)
 137 |         B = [0]*N
 138 | 
 139 |         if N == 2:
 140 |             # When N is 2, perform butterfly operation with W = 1
 141 |             B[0] = (A[0] + A[1]) % q
 142 |             B[1] = (A[0] - A[1]) % q
 143 | 
 144 |             MulCnt = MulCnt+1 # This can be avoided in this and Iterative cases
 145 |             AddCnt = AddCnt+1
 146 |             SubCnt = SubCnt+1
 147 |             BtfCnt = BtfCnt+1
 148 | 
 149 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 150 |         else:
 151 |             # Divide input into two (even indices, odd indices)
 152 |             AE = [A[i] for i in range(0,N,2)]
 153 |             AO = [A[i] for i in range(1,N,2)]
 154 | 
 155 |             # Reduce twiddle factor for the next recursion
 156 |             W_new = pow(W,2,q)
 157 | 
 158 |             # Apply NTT operations to the even and odd indices of the input separately
 159 |             BE,Mbe,Abe,Sbe,Bbe = self.Radix2_DIT_Recursive_NTT(AE,W_new,q)
 160 |             BO,Mbo,Abo,Sbo,Bbo = self.Radix2_DIT_Recursive_NTT(AO,W_new,q)
 161 |             MulCnt = MulCnt+Mbe+Mbo
 162 |             AddCnt = AddCnt+Abe+Abo
 163 |             SubCnt = SubCnt+Sbe+Sbo
 164 |             BtfCnt = BtfCnt+Bbe+Bbo
 165 | 
 166 |             # Outputs of first and second NTT operations go to the first and second
 167 |             # half of the array (output array)
 168 |             B = BE+BO
 169 | 
 170 |             # Perform CT-Butterfly where first and second inputs of butterfly
 171 |             # operation are from first and second half of the output respectively
 172 |             # First and second outputs of the butterfly operation go to first and
 173 |             # second half of the array (input array) respectively
 174 |             for i in range(int(N/2)):
 175 |                 B[i], B[i+int(N/2)] = CT_Butterfly(B[i],B[i+int(N/2)],pow(W,i,q),q)
 176 |                 MulCnt = MulCnt+1
 177 |                 AddCnt = AddCnt+1
 178 |                 SubCnt = SubCnt+1
 179 |                 BtfCnt = BtfCnt+1
 180 | 
 181 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 182 | 
 183 |     # Recursive Radix-2 Decimation-in-Frequency (DIF) (GS) NTT
 184 |     # A: input polynomial (standard order)
 185 |     # W: twiddle factor
 186 |     # q: modulus
 187 |     # B: output polynomial (bit-reversed order)
 188 |     def Radix2_DIF_Recursive_NTT(self,A,W,q):
 189 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 190 |         N = len(A)
 191 |         B = [0]*N
 192 | 
 193 |         if N == 2:
 194 |             # When N is 2, perform butterfly operation with W = 1
 195 |             B[0] = (A[0] + A[1]) % q
 196 |             B[1] = (A[0] - A[1]) % q
 197 | 
 198 |             MulCnt = MulCnt+1 # This can be avoided in this and Iterative cases
 199 |             AddCnt = AddCnt+1
 200 |             SubCnt = SubCnt+1
 201 |             BtfCnt = BtfCnt+1
 202 | 
 203 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 204 |         else:
 205 |             # Divide input into two (first half, second half)
 206 | 
 207 |             # Perform GS-Butterfly where first and second inputs of butterfly
 208 |             # operation are from first and second half of the input respectively
 209 |             # First and second outputs of the butterfly operation go to first and
 210 |             # second half of the array (input array) respectively
 211 |             for i in range(int(N/2)):
 212 |                 B[i], B[i+int(N/2)] = GS_Butterfly(A[i],A[i+int(N/2)],pow(W,i,q),q)
 213 |                 MulCnt = MulCnt+1
 214 |                 AddCnt = AddCnt+1
 215 |                 SubCnt = SubCnt+1
 216 |                 BtfCnt = BtfCnt+1
 217 | 
 218 |             # Reduce twiddle factor for the next recursion
 219 |             W_new = pow(W,2,q)
 220 | 
 221 |             # Apply NTT operations to the first and second half of the input separately
 222 |             BE,Mbe,Abe,Sbe,Bbe = self.Radix2_DIF_Recursive_NTT(B[0:int(N/2)],W_new,q)
 223 |             BO,Mbo,Abo,Sbo,Bbo = self.Radix2_DIF_Recursive_NTT(B[int(N/2):N],W_new,q)
 224 |             MulCnt = MulCnt+Mbe+Mbo
 225 |             AddCnt = AddCnt+Abe+Abo
 226 |             SubCnt = SubCnt+Sbe+Sbo
 227 |             BtfCnt = BtfCnt+Bbe+Bbo
 228 | 
 229 |             # Outputs of first and second NTT operations go to the first and second
 230 |             # half of the array (output array)
 231 |             B = BE+BO
 232 | 
 233 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 234 | 
 235 |     # From paper: NTTU: An Area-Efficient Low-POwer NTT-Uncoupled Architecture for NTT-Based Multiplication
 236 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - NR
 237 |     # A: input polynomial (standard order)
 238 |     # W: twiddle factor
 239 |     # q: modulus
 240 |     # B: output polynomial (bit-reversed order)
 241 |     def Radix2_DIT_Iterative_NTT_NR(self,A,W,q):
 242 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 243 | 
 244 |         N = len(A)
 245 |         B = [_ for _ in A]
 246 | 
 247 |         for s in range(int(log(N,2)),0,-1):
 248 |             m = 2**s
 249 |             for k in range(int(N/m)):
 250 |                 TW = pow(W,intReverse(k,int(log(N,2))-s)*int(m/2),q)
 251 |                 for j in range(int(m/2)):
 252 |                     u = B[k*m+j]
 253 |                     t = (TW*B[k*m+j+int(m/2)]) % q
 254 | 
 255 |                     B[k*m+j]          = (u+t) % q
 256 |                     B[k*m+j+int(m/2)] = (u-t) % q
 257 | 
 258 |                     MulCnt = MulCnt + 1
 259 |                     AddCnt = AddCnt + 1
 260 |                     SubCnt = SubCnt + 1
 261 |                     BtfCnt = BtfCnt + 1
 262 | 
 263 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 264 | 
 265 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - RN
 266 |     # A: input polynomial (bit-reversed order)
 267 |     # W: twiddle factor
 268 |     # q: modulus
 269 |     # B: output polynomial (standard order)
 270 |     def Radix2_DIT_Iterative_NTT_RN(self,A,W,q):
 271 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 272 | 
 273 |         N = len(A)
 274 |         B = [_ for _ in A]
 275 | 
 276 |         v = int(N/2)
 277 |         m = 1
 278 |         d = 1
 279 | 
 280 |         while m<N:
 281 |             np = 2*m
 282 |             lp = np*(v-1)
 283 |             for k in range(m):
 284 |                 j = k
 285 |                 jl = k + lp
 286 |                 jt = k*v
 287 |                 TW = pow(W,jt,q)
 288 |                 while j < (jl+1):
 289 |                     temp = (TW*B[j+d]) % q
 290 |                     B[j+d] = (B[j] - temp) % q
 291 |                     B[j]   = (B[j] + temp) % q
 292 |                     j = j+np
 293 | 
 294 |                     MulCnt = MulCnt + 1
 295 |                     AddCnt = AddCnt + 1
 296 |                     SubCnt = SubCnt + 1
 297 |                     BtfCnt = BtfCnt + 1
 298 |             v = int(v/2)
 299 |             m = 2*m
 300 |             d = 2*d
 301 | 
 302 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 303 | 
 304 |     # Iterative Radix-2 Decimation-in-Time (DIT) (CT) NTT - NN
 305 |     # A: input polynomial (standard order)
 306 |     # W: twiddle factor
 307 |     # q: modulus
 308 |     # B: output polynomial (standard order)
 309 |     def Radix2_DIT_Iterative_NTT_NN(self,A,W,q):
 310 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 311 | 
 312 |         N = len(A)
 313 |         B = [_ for _ in A]
 314 |         C = [_ for _ in A]
 315 |         # C = [0]*N
 316 | 
 317 |         v = int(N/2)
 318 |         m = 1
 319 |         d = int(N/2)
 320 | 
 321 |         if int(log(v))%2 == 0:
 322 |             nsi = True
 323 |         else:
 324 |             nsi = False
 325 | 
 326 |         while m<N:
 327 |             if nsi:
 328 |                 l = 0
 329 |                 for k in range(m):
 330 |                     jf = 2*k*v
 331 |                     jl = jf + v - 1
 332 |                     jt = k*v
 333 | 
 334 |                     TW = pow(W,jt,q)
 335 | 
 336 |                     for j in range(jf,jl+1):
 337 |                         temp = (TW*B[j+d]) % q
 338 | 
 339 |                         C[l]          = (B[j] + temp) % q
 340 |                         C[l+int(N/2)] = (B[j] - temp) % q
 341 | 
 342 |                         l = l+1
 343 | 
 344 |                         MulCnt = MulCnt + 1
 345 |                         AddCnt = AddCnt + 1
 346 |                         SubCnt = SubCnt + 1
 347 |                         BtfCnt = BtfCnt + 1
 348 |                 nsi = False
 349 |             else:
 350 |                 l = 0
 351 |                 for k in range(m):
 352 |                     jf = 2*k*v
 353 |                     jl = jf + v - 1
 354 |                     jt = k*v
 355 | 
 356 |                     TW = pow(W,jt,q)
 357 | 
 358 |                     for j in range(jf,jl+1):
 359 |                         temp = (TW*C[j+d]) % q
 360 | 
 361 |                         B[l]          = (C[j] + temp) % q
 362 |                         B[l+int(N/2)] = (C[j] - temp) % q
 363 | 
 364 |                         l = l+1
 365 | 
 366 |                         MulCnt = MulCnt + 1
 367 |                         AddCnt = AddCnt + 1
 368 |                         SubCnt = SubCnt + 1
 369 |                         BtfCnt = BtfCnt + 1
 370 |                 nsi = True
 371 |             v = int(v/2)
 372 |             m = 2*m
 373 |             d = int(d/2)
 374 | 
 375 |         return C,MulCnt,AddCnt,SubCnt,BtfCnt
 376 | 
 377 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - NR
 378 |     # A: input polynomial (standard order)
 379 |     # W: twiddle factor
 380 |     # q: modulus
 381 |     # B: output polynomial (bit-reversed order)
 382 |     def Radix2_DIF_Iterative_NTT_NR(self,A,W,q):
 383 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 384 | 
 385 |         N = len(A)
 386 |         B = [_ for _ in A]
 387 | 
 388 |         m = 1
 389 |         v = N
 390 | 
 391 |         while v>1:
 392 |             s = int(v/2)
 393 |             for k in range(m):
 394 |                 jf = k * v
 395 |                 jl = jf + s - 1
 396 |                 jt = 0
 397 |                 for j in range(jf,jl+1):
 398 |                     TW = pow(W,jt,q)
 399 | 
 400 |                     temp = B[j]
 401 | 
 402 |                     B[j  ] = (temp + B[j+s]) % q
 403 |                     B[j+s] = (temp - B[j+s])*TW % q
 404 | 
 405 |                     jt = jt + m
 406 | 
 407 |                     MulCnt = MulCnt + 1
 408 |                     AddCnt = AddCnt + 1
 409 |                     SubCnt = SubCnt + 1
 410 |                     BtfCnt = BtfCnt + 1
 411 |             m = 2*m
 412 |             v = s
 413 | 
 414 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 415 | 
 416 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - RN
 417 |     # A: input polynomial (reversed order)
 418 |     # W: twiddle factor
 419 |     # q: modulus
 420 |     # B: output polynomial (bit-standard order)
 421 |     def Radix2_DIF_Iterative_NTT_RN(self,A,W,q):
 422 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 423 | 
 424 |         N = len(A)
 425 |         B = [_ for _ in A]
 426 | 
 427 |         m = 1
 428 |         v = N
 429 |         d = 1
 430 | 
 431 |         while v>1:
 432 |             for jf in range(m):
 433 |                 j = jf
 434 |                 jt = 0
 435 |                 while j<(N-1):
 436 |                     # bit-reversing jt
 437 |                     TW = pow(W,intReverse(jt,int(log(N>>1,2))),q)
 438 | 
 439 |                     temp = B[j]
 440 | 
 441 |                     B[j]   = (temp + B[j+d]) % q
 442 |                     B[j+d] = (temp - B[j+d])*TW % q
 443 | 
 444 |                     jt = jt+1
 445 |                     j = j + 2*m
 446 | 
 447 |                     MulCnt = MulCnt + 1
 448 |                     AddCnt = AddCnt + 1
 449 |                     SubCnt = SubCnt + 1
 450 |                     BtfCnt = BtfCnt + 1
 451 |             m = 2*m
 452 |             v = int(v/2)
 453 |             d = 2*d
 454 | 
 455 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 456 | 
 457 |     # Iterative Radix-2 Decimation-in-Frequency (DIF) (GS) NTT - NN
 458 |     # A: input polynomial (standard order)
 459 |     # W: twiddle factor
 460 |     # q: modulus
 461 |     # B: output polynomial (standard order)
 462 |     def Radix2_DIF_Iterative_NTT_NN(self,A,W,q):
 463 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 464 | 
 465 |         N = len(A)
 466 |         # requires two memory
 467 |         B = [_ for _ in A]
 468 |         C = [_ for _ in A]
 469 |         # C = [0] * N
 470 | 
 471 |         m = 1
 472 |         v = N
 473 |         d = 1
 474 | 
 475 |         if int(log(v//2))%2 == 0:
 476 |             nsi = True
 477 |         else:
 478 |             nsi = False
 479 | 
 480 |         while v>1:
 481 |             if nsi:
 482 |                 for jf in range(m):
 483 |                     j = jf
 484 |                     jt = 0
 485 |                     k = jf
 486 |                     while j<(N-1):
 487 |                         TW = pow(W,jt,q)
 488 | 
 489 |                         C[j]   = (B[k] + B[k+int(N/2)]) % q
 490 |                         C[j+d] = (B[k] - B[k+int(N/2)])*TW % q
 491 | 
 492 |                         jt = jt + m
 493 |                         j = j + 2*m
 494 |                         k = k + m
 495 | 
 496 |                         MulCnt = MulCnt + 1
 497 |                         AddCnt = AddCnt + 1
 498 |                         SubCnt = SubCnt + 1
 499 |                         BtfCnt = BtfCnt + 1
 500 |                 nsi = False
 501 |             else:
 502 |                 for jf in range(m):
 503 |                     j = jf
 504 |                     jt = 0
 505 |                     k = jf
 506 |                     while j<(N-1):
 507 |                         TW = pow(W,jt,q)
 508 | 
 509 |                         B[j]   = (C[k] + C[k+int(N/2)]) % q
 510 |                         B[j+d] = (C[k] - C[k+int(N/2)])*TW % q
 511 | 
 512 |                         jt = jt + m
 513 |                         j = j + 2*m
 514 |                         k = k + m
 515 | 
 516 |                         MulCnt = MulCnt + 1
 517 |                         AddCnt = AddCnt + 1
 518 |                         SubCnt = SubCnt + 1
 519 |                         BtfCnt = BtfCnt + 1
 520 |                 nsi = True
 521 |             m = 2*m
 522 |             v = int(v/2)
 523 |             d = 2*d
 524 | 
 525 |         return C,MulCnt,AddCnt,SubCnt,BtfCnt
 526 | 
 527 |     ######################################################################## (INTT)
 528 |     """
 529 |     List of INTT Algorithms: NTT algorithms with extra n^-1 mod q multiplication
 530 |     -- Naive NTT (see Wikipedia definition of NTT operation)
 531 |     -- Radix-2 Decimation-in-Time (DIT) Recursive NTT (Cooley-Tukey)
 532 |     -- Radix-2 Decimation-in-Frequency (DIF) Recursive NTT (Gentleman-Sandle)
 533 |     -- Radix-2 Decimation-in-Time (DIT) Iterative NTT
 534 |     ---- NR (N: Natural order, R: Reversed Order)
 535 |     ---- RN
 536 |     ---- NN
 537 |     -- Radix-2 Decimation-in-Time (DIF) Iterative NTT
 538 |     ---- NR
 539 |     ---- RN
 540 |     ---- NN
 541 |     """
 542 | 
 543 |     def NaiveINTT_NN(self,A,W_inv,q):
 544 |         """
 545 |         Very slow baseline implementation. Do not use for large parameter set.
 546 |         """
 547 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.NaiveNTT_NN(A,W_inv,q)
 548 |         N_inv = modinv(len(A),q)
 549 |         B = [(x*N_inv) % q for x in P]
 550 |         MulCnt = MulCnt + len(P)
 551 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 552 | 
 553 |     def NaiveINTT_NR(self,A,W_inv,q):
 554 |         """
 555 |         Very slow baseline implementation. Do not use for large parameter set.
 556 |         """
 557 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.NaiveNTT_NR(A,W_inv,q)
 558 |         N_inv = modinv(len(A),q)
 559 |         B = [(x*N_inv) % q for x in P]
 560 |         MulCnt = MulCnt + len(P)
 561 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 562 | 
 563 |     def Radix2_DIT_Recursive_INTT(self,A,W_inv,q):
 564 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIT_Recursive_NTT(A,W_inv,q)
 565 |         N_inv = modinv(len(A),q)
 566 |         B = [(x*N_inv) % q for x in P]
 567 |         MulCnt = MulCnt + len(P)
 568 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 569 | 
 570 |     def Radix2_DIF_Recursive_INTT(self,A,W_inv,q):
 571 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIF_Recursive_NTT(A,W_inv,q)
 572 |         N_inv = modinv(len(A),q)
 573 |         B = [(x*N_inv) % q for x in P]
 574 |         MulCnt = MulCnt + len(P)
 575 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 576 | 
 577 |     def Radix2_DIT_Iterative_INTT_NR(self,A,W_inv,q):
 578 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIT_Iterative_NTT_NR(A,W_inv,q)
 579 |         N_inv = modinv(len(A),q)
 580 |         B = [(x*N_inv) % q for x in P]
 581 |         MulCnt = MulCnt + len(P)
 582 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 583 | 
 584 |     def Radix2_DIT_Iterative_INTT_RN(self,A,W_inv,q):
 585 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIT_Iterative_NTT_RN(A,W_inv,q)
 586 |         N_inv = modinv(len(A),q)
 587 |         B = [(x*N_inv) % q for x in P]
 588 |         MulCnt = MulCnt + len(P)
 589 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 590 | 
 591 |     def Radix2_DIT_Iterative_INTT_NN(self,A,W_inv,q):
 592 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIT_Iterative_NTT_NN(A,W_inv,q)
 593 |         N_inv = modinv(len(A),q)
 594 |         B = [(x*N_inv) % q for x in P]
 595 |         MulCnt = MulCnt + len(P)
 596 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 597 | 
 598 |     def Radix2_DIF_Iterative_INTT_NR(self,A,W_inv,q):
 599 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIT_Iterative_NTT_NR(A,W_inv,q)
 600 |         N_inv = modinv(len(A),q)
 601 |         B = [(x*N_inv) % q for x in P]
 602 |         MulCnt = MulCnt + len(P)
 603 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 604 | 
 605 |     def Radix2_DIF_Iterative_INTT_RN(self,A,W_inv,q):
 606 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIF_Iterative_NTT_RN(A,W_inv,q)
 607 |         N_inv = modinv(len(A),q)
 608 |         B = [(x*N_inv) % q for x in P]
 609 |         MulCnt = MulCnt + len(P)
 610 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 611 | 
 612 |     def Radix2_DIF_Iterative_INTT_NN(self,A,W_inv,q):
 613 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.Radix2_DIF_Iterative_NTT_NN(A,W_inv,q)
 614 |         N_inv = modinv(len(A),q)
 615 |         B = [(x*N_inv) % q for x in P]
 616 |         MulCnt = MulCnt + len(P)
 617 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 618 | 
 619 |     """
 620 |     CRT-based NTT (it is also used for polynomial multiplication in x^n-1)
 621 | 
 622 |     Example for 8-pt NTT (w -> 8th root of unity)
 623 |     Input  -> Standard Order
 624 |     Output -> Bit-reversed Order (We can make it standard order)
 625 | 
 626 |                                       x^8 - 1 --------------------------------------------- Stage #0
 627 |                                       /     \
 628 |                                      /       \
 629 |                                     /         \
 630 |                                    /           \
 631 |                                   /             \
 632 |                                  /               \
 633 |                                 /                 \
 634 |                                /                   \
 635 |                               /                     \
 636 |                              /                       \
 637 |                       x^4 - 1                         x^4 + 1 ----------------------------- Stage #1
 638 |                          ||                              ||
 639 |                       x^4 - 1                         x^4 - w^4
 640 |                      /  \                                   /  \
 641 |                     /    \                                 /    \
 642 |                    /      \                               /      \
 643 |             x^2 - 1        x^2 + 1               x^2 - w^2        x^2 + w^2 --------------- Stage #2
 644 |                ||             ||                     ||               ||
 645 |             x^2 - 1       x^2 - w^4              x^2 - w^2        x^2 - w^6
 646 |            / \               / \                   / \               / \
 647 |           /   \             /   \                 /   \             /   \
 648 |          /     \           /     \               /     \           /     \
 649 |     x - 1     x + 1   x - w^2   x + w^2       x - w   x + w   x - w^3   x + w^3 ----------- Stage #3
 650 |       ||        ||       ||       ||            ||      ||       ||       ||
 651 |     x - 1    x - w^4  x - w^2   x - w^6      x - w   x - w^5  x - w^3   x - w^7
 652 | 
 653 |     -- Recursive
 654 |     -- Full
 655 |     -- Iterative (converted to an optimized algorithm) --> Already presented above.
 656 |     ---- CT
 657 |     ---- GS
 658 |     """
 659 | 
 660 |     # CRT-based NTT (recursive)
 661 |     # A: input polynomial (standard order)
 662 |     # W: twiddle factor
 663 |     # q: modulus
 664 |     # B: output polynomial (bit-reversed order)
 665 |     def CRT_Recursive_NTT(self,A,W,q,pw=0,depth=1):
 666 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 667 |         N = len(A)
 668 | 
 669 |         if N == 1:
 670 |             B = [_ for _ in A]
 671 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 672 |         else:
 673 |             B = [0]*N
 674 | 
 675 |             W_N = pow(W,pw,q)
 676 | 
 677 |             # reduce
 678 |             B[0:int(N/2)] = [(A[i] + A[i+int(N/2)]*W_N) % q for i in range(int(N/2))]
 679 |             B[int(N/2):N] = [(A[i] - A[i+int(N/2)]*W_N) % q for i in range(int(N/2))]
 680 | 
 681 |             MulCnt = MulCnt + 1
 682 |             AddCnt = AddCnt + 1
 683 |             SubCnt = SubCnt + 1
 684 |             BtfCnt = BtfCnt + 1
 685 | 
 686 |             # recall functions
 687 |             B[0:int(N/2)],Mbe,Abe,Sbe,Bbe = self.CRT_Recursive_NTT(B[0:int(N/2)], W,q,int(pw/2)                 ,2*depth)
 688 |             B[int(N/2):N],Mbo,Abo,Sbo,Bbo = self.CRT_Recursive_NTT(B[int(N/2):N], W,q,int(pw/2)+int((N/4)*depth),2*depth)
 689 | 
 690 |             MulCnt = MulCnt + Mbe + Mbo
 691 |             AddCnt = AddCnt + Abe + Abo
 692 |             SubCnt = SubCnt + Sbe + Sbo
 693 |             BtfCnt = BtfCnt + Bbe + Bbo
 694 | 
 695 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 696 | 
 697 |     # CRT-based NTT (full)
 698 |     # A: input polynomial (standard order)
 699 |     # W: twiddle factor
 700 |     # q: modulus
 701 |     # B: output polynomial (standard order)
 702 |     def CRT_Full_NTT(self,A,W,q):
 703 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 704 | 
 705 |         N = len(A)
 706 |         B = [0]*N
 707 | 
 708 |         # If i or j is bit-reversed, output will be in bit-reversed order
 709 |         for j in range(N):
 710 |             C = [x*pow(W**j,i,q) % q for i,x in enumerate(A)]
 711 |             B[j] = sum(C) % q
 712 |             MulCnt = MulCnt + len(A)
 713 |             AddCnt = AddCnt + len(C)
 714 | 
 715 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 716 | 
 717 |     ######################################################################## (INTT)
 718 | 
 719 |     """
 720 |     CRT-based INTT (it is also used for polynomial multiplication in x^n-1)
 721 |     It is NTT algorithms with extra n^-1 mod q multiplication
 722 | 
 723 |     -- Recursive
 724 |     -- Full
 725 |     -- Iterative (converted to an optimized algorithm) --> Already stated algorithms above!
 726 |     ---- CT
 727 |     ---- GS
 728 |     """
 729 | 
 730 |     def CRT_Recursive_INTT(self,A,W_inv,q):
 731 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.CRT_Recursive_NTT(A,W_inv,q)
 732 |         N_inv = modinv(len(A),q)
 733 |         B = [(x*N_inv) % q for x in P]
 734 |         MulCnt = MulCnt + len(P)
 735 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 736 | 
 737 |     def CRT_Full_INTT(self,A,W_inv,q):
 738 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.CRT_Full_NTT(A,W_inv,q)
 739 |         N_inv = modinv(len(A),q)
 740 |         B = [(x*N_inv) % q for x in P]
 741 |         MulCnt = MulCnt + len(P)
 742 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 743 | 
 744 |     """
 745 |     List of NTT Algorithms: (from literature)
 746 |     -- Recursive Cooley-Tukey (CT) NTT (see http://people.scs.carleton.ca/~maheshwa/courses/5703COMP/16Fall/FFT_Report.pdf)
 747 |     -- Iterative NTT (see https://eprint.iacr.org/2019/109.pdf)
 748 |     -- Constant-Geometry NTT (see https://tches.iacr.org/index.php/TCHES/article/view/8344/7692 or https://eprint.iacr.org/2014/646.pdf)
 749 |        (NOTE: There are typos in the Algorithm presented in the papers)
 750 |     -- Stockham NTT (see https://ieeexplore.ieee.org/document/8717615)
 751 |     -- Four-Step NTT (see https://eprint.iacr.org/2015/818.pdf)
 752 |     """
 753 | 
 754 |     # Cooley-Tukey NTT
 755 |     # A: input polynomial (standard order)
 756 |     # W: twiddle factor
 757 |     # q: modulus
 758 |     # B: output polynomial (standard order)
 759 |     def CooleyTukeyNTT(self,A,W,q):
 760 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 761 | 
 762 |         N = len(A)
 763 | 
 764 |         if (N == 2):
 765 |             B = [0] * N
 766 | 
 767 |             B[0] = (A[0] + A[1]) % q
 768 |             B[1] = (A[0] - A[1]) % q
 769 | 
 770 |             MulCnt = MulCnt+1 # This can be avoided in this and Iterative cases
 771 |             AddCnt = AddCnt+1
 772 |             SubCnt = SubCnt+1
 773 |             BtfCnt = BtfCnt+1
 774 | 
 775 |             return B,MulCnt,AddCnt,SubCnt,BtfCnt
 776 |         else:
 777 |             B = [0] * N
 778 |             w = 1
 779 | 
 780 |             A_even = [0] * (N >> 1)
 781 |             A_odd  = [0] * (N >> 1)
 782 | 
 783 |             for i in range(N >> 1):
 784 |                 A_even[i] = A[2 * i]
 785 |                 A_odd[i]  = A[2 * i + 1]
 786 | 
 787 |             B_even,Mbe,Abe,Sbe,Bbe = self.CooleyTukeyNTT(A_even,(W * W % q),q)
 788 |             B_odd ,Mbo,Abo,Sbo,Bbo = self.CooleyTukeyNTT(A_odd, (W * W % q),q)
 789 | 
 790 |             MulCnt = MulCnt + Mbe + Mbo
 791 |             AddCnt = AddCnt + Abe + Abo
 792 |             SubCnt = SubCnt + Sbe + Sbo
 793 |             BtfCnt = BtfCnt + Bbe + Bbo
 794 | 
 795 |             for i in range(N >> 1):
 796 |                 B[i]            = (B_even[i] + w * B_odd[i]) % q
 797 |                 B[i + (N >> 1)] = (B_even[i] - w * B_odd[i]) % q
 798 | 
 799 |                 w = w * W
 800 | 
 801 |                 MulCnt = MulCnt + 1
 802 |                 AddCnt = AddCnt + 1
 803 |                 SubCnt = SubCnt + 1
 804 |                 BtfCnt = BtfCnt + 1
 805 | 
 806 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 807 | 
 808 |     # Iterative NTT
 809 |     # A: input polynomial (standard order)
 810 |     # W: twiddle factor
 811 |     # q: modulus
 812 |     # B: output polynomial (bit-reversed order)
 813 |     def IterativeNTT(self,A,W,q):
 814 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 815 | 
 816 |         N = len(A)
 817 |         B = [x for x in A]
 818 | 
 819 |         v = int(log(N, 2))
 820 | 
 821 |         for i in range(0, v):
 822 |             for j in range(0, (2 ** i)):
 823 |                 for k in range(0, (2 ** (v - i - 1))):
 824 |                     s = j * (2 ** (v - i)) + k
 825 |                     t = s + (2 ** (v - i - 1))
 826 | 
 827 |                     w = (W ** ((2 ** i) * k)) % q
 828 | 
 829 |                     as_temq = B[s]
 830 |                     at_temq = B[t]
 831 | 
 832 |                     B[s] = (as_temq + at_temq) % q
 833 |                     B[t] = ((as_temq - at_temq) * w) % q
 834 | 
 835 |                     MulCnt = MulCnt + 1
 836 |                     AddCnt = AddCnt + 1
 837 |                     SubCnt = SubCnt + 1
 838 |                     BtfCnt = BtfCnt + 1
 839 | 
 840 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
 841 | 
 842 |     # Four-Step NTT
 843 |     # A: input polynomial (standard order)
 844 |     # W: twiddle factor
 845 |     # q: modulus
 846 |     # size: input polynomial partition
 847 |     # B: output polynomial (standard order)
 848 |     def FourStepNTT(self,A,W,q,size):
 849 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 850 |         """
 851 |         This is a unified four-step NTT algorithm for both forward and inverse
 852 |         NTT operations. The coefficients of input polynomial should be given in
 853 |         standard order. The output is generated in standard order. Forward NTT
 854 |         uses twiddle factors and inverse NTT uses modular inverse of twiddle factors.
 855 | 
 856 |         This algorithm divides NTT operation into smaller parts. "size" input
 857 |         determines the size of these small NTT operations. For details of the
 858 |         algorithm, see the paper: https://eprint.iacr.org/2015/818.pdf
 859 |         """
 860 |         N = len(A)
 861 | 
 862 |         poly = [_ for _ in A]
 863 | 
 864 |         size0 = size[0]
 865 |         size1 = size[1]
 866 | 
 867 |         temp0 = 1
 868 |         # STEP.1
 869 |         if isinstance(size0, list):
 870 |             for i in size0:
 871 |                 temp0 = temp0 * i
 872 |             STEP_1 = matrix(poly, N/temp0)
 873 |             W_0 = (W ** (N/temp0)) % q
 874 |             for i in range(int(N/temp0)):
 875 |                 STEP_1[i],SM1,SA1,SS1,SB1 = self.FourStepNTT(STEP_1[i],W_0,q,size0)
 876 | 
 877 |                 MulCnt = MulCnt + SM1
 878 |                 AddCnt = AddCnt + SA1
 879 |                 SubCnt = SubCnt + SS1
 880 |                 BtfCnt = BtfCnt + SB1
 881 |         else:
 882 |             temp0 = size0
 883 |             STEP_1 = matrix(poly, int(N/temp0))
 884 |             W_0 = (W ** int(N/temp0)) % q
 885 |             for i in range(int(N/temp0)):
 886 |                 STEP_1[i],SM1,SA1,SS1,SB1 =  self.CooleyTukeyNTT(STEP_1[i],W_0,q)
 887 | 
 888 |                 MulCnt = MulCnt + SM1
 889 |                 AddCnt = AddCnt + SA1
 890 |                 SubCnt = SubCnt + SS1
 891 |                 BtfCnt = BtfCnt + SB1
 892 | 
 893 |         # STEP.2 - Transpose
 894 |         STEP_2 = [ [row[c] for row in STEP_1 if c < len(row)] for c in range(0, max([len(row) for row in STEP_1])) ]
 895 |         # STEP_2 = list(zip(*STEP_1))
 896 | 
 897 |         # STEP.3 - Multiply with twiddle factor of N-pt NTT
 898 |         STEP_3 = [[0]*int(N/temp0)]*size0
 899 |         for i in range(temp0):
 900 |             STEP_3[i] = [(STEP_2[i][k] * (W ** (i*k)) % q) for k in range(int(N/temp0))]
 901 |             MulCnt = MulCnt + int(N/temp0)
 902 | 
 903 |         temp1 = 1
 904 |         #STEP.4
 905 |         if isinstance(size1, list):
 906 |             for i in size1:
 907 |                 temp1 = temp0 * i
 908 |             W_1 = (W ** int(N/temp1)) % q
 909 |             for i in range(int(N/temp1)):
 910 |                 STEP_3[i],SM3,SA3,SS3,SB3 = self.FourStepNTT(STEP_3[i],W_1,q,size1)
 911 | 
 912 |                 MulCnt = MulCnt + SM3
 913 |                 AddCnt = AddCnt + SA3
 914 |                 SubCnt = SubCnt + SS3
 915 |                 BtfCnt = BtfCnt + SB3
 916 |         else:
 917 |             temp1 = size1
 918 |             W_1 = (W ** int(N/temp1)) % q
 919 |             for i in range(int(N/temp1)):
 920 |                 STEP_3[i],SM3,SA3,SS3,SB3 = self.CooleyTukeyNTT(STEP_3[i],W_1,q)
 921 | 
 922 |                 MulCnt = MulCnt + SM3
 923 |                 AddCnt = AddCnt + SA3
 924 |                 SubCnt = SubCnt + SS3
 925 |                 BtfCnt = BtfCnt + SB3
 926 | 
 927 |         # Final transpose
 928 |         STEP_4 = [ [row[c] for row in STEP_3 if c < len(row)] for c in range(0, max([len(row) for row in STEP_3])) ]
 929 |         # STEP_4 = list(zip(*STEP_3))
 930 | 
 931 |         # Convert matrix into array
 932 |         STEP_4 = [item for sublist in STEP_4 for item in sublist]
 933 | 
 934 |         return STEP_4,MulCnt,AddCnt,SubCnt,BtfCnt
 935 | 
 936 |     # Four-Step NTT v2
 937 |     # A: input polynomial (standard order)
 938 |     # W: twiddle factor
 939 |     # q: modulus
 940 |     # size: input polynomial partition
 941 |     # B: output polynomial (standard order)
 942 |     def FourStepNTTv2(self,A,W,q,size):
 943 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
 944 |         """
 945 |         This is a four-step NTT algorithm for both forward and inverse NTT
 946 |         operations. The coefficients of tnput polynomial should be given in
 947 |         standard order. The output is generated in standard order. Forward NTT
 948 |         uses modular inverse of twiddle factors and inverse NTT uses twiddle factors.
 949 | 
 950 |         This algorithm divides NTT operation into smaller parts. "size" input
 951 |         determines the size of these small NTT operations. For details of the
 952 |         algorithm, see the paper: https://eprint.iacr.org/2015/818.pdf
 953 |         """
 954 |         N = len(A)
 955 | 
 956 |         # If this is an inverse transform operation
 957 |         N_inv = modinv(N, q)
 958 |         # Re-order input
 959 |         poly = [A[0]] + list(reversed(A[1:]))
 960 | 
 961 |         size0 = size[0]
 962 |         size1 = size[1]
 963 | 
 964 |         temp0 = 1
 965 |         # STEP.1
 966 |         if isinstance(size0, list):
 967 |             for i in size0:
 968 |                 temp0 = temp0 * i
 969 |             STEP_1 = matrix(poly, N/temp0)
 970 |             W_0 = (W ** (N/temp0)) % q
 971 |             for i in range(int(N/temp0)):
 972 |                 STEP_1[i],SM1,SA1,SS1,SB1 = self.FourStepNTT(STEP_1[i],W_0,q,size0)
 973 | 
 974 |                 MulCnt = MulCnt + SM1
 975 |                 AddCnt = AddCnt + SA1
 976 |                 SubCnt = SubCnt + SS1
 977 |                 BtfCnt = BtfCnt + SB1
 978 |         else:
 979 |             temp0 = size0
 980 |             STEP_1 = matrix(poly, int(N/temp0))
 981 |             W_0 = (W ** int(N/temp0)) % q
 982 |             for i in range(int(N/temp0)):
 983 |                 STEP_1[i],SM1,SA1,SS1,SB1 =  self.CooleyTukeyNTT(STEP_1[i],W_0,q)
 984 | 
 985 |                 MulCnt = MulCnt + SM1
 986 |                 AddCnt = AddCnt + SA1
 987 |                 SubCnt = SubCnt + SS1
 988 |                 BtfCnt = BtfCnt + SB1
 989 | 
 990 |         # STEP.2 - Transpose
 991 |         STEP_2 = [ [row[c] for row in STEP_1 if c < len(row)] for c in range(0, max([len(row) for row in STEP_1])) ]
 992 |         # STEP_2 = list(zip(*STEP_1))
 993 | 
 994 |         # STEP.3 - Multiply with twiddle factor of N-pt NTT
 995 |         STEP_3 = [[0]*int(N/temp0)]*size0
 996 |         for i in range(temp0):
 997 |             STEP_3[i] = [(STEP_2[i][k] * (W ** (i*k)) % q) for k in range(int(N/temp0))]
 998 |             MulCnt = MulCnt + int(N/temp0)
 999 | 
1000 |         temp1 = 1
1001 |         #STEP.4
1002 |         if isinstance(size1, list):
1003 |             for i in size1:
1004 |                 temp1 = temp0 * i
1005 |             W_1 = (W ** int(N/temp1)) % q
1006 |             for i in range(int(N/temp1)):
1007 |                 STEP_3[i],SM3,SA3,SS3,SB3 = self.FourStepNTT(STEP_3[i],W_1,q,size1)
1008 | 
1009 |                 MulCnt = MulCnt + SM3
1010 |                 AddCnt = AddCnt + SA3
1011 |                 SubCnt = SubCnt + SS3
1012 |                 BtfCnt = BtfCnt + SB3
1013 |         else:
1014 |             temp1 = size1
1015 |             W_1 = (W ** int(N/temp1)) % q
1016 |             for i in range(int(N/temp1)):
1017 |                 STEP_3[i],SM3,SA3,SS3,SB3 = self.CooleyTukeyNTT(STEP_3[i],W_1,q)
1018 | 
1019 |                 MulCnt = MulCnt + SM3
1020 |                 AddCnt = AddCnt + SA3
1021 |                 SubCnt = SubCnt + SS3
1022 |                 BtfCnt = BtfCnt + SB3
1023 | 
1024 |         # Final transpose
1025 |         STEP_4 = [ [row[c] for row in STEP_3 if c < len(row)] for c in range(0, max([len(row) for row in STEP_3])) ]
1026 |         # STEP_4 = list(zip(*STEP_3))
1027 | 
1028 |         # Convert matrix into array
1029 |         STEP_4 = [item for sublist in STEP_4 for item in sublist]
1030 | 
1031 |         return STEP_4,MulCnt,AddCnt,SubCnt,BtfCnt
1032 | 
1033 |     # CT-Based Constant-Geometry NTT
1034 |     # A: input polynomial (Bit-reversed order)
1035 |     # W: twiddle factor
1036 |     # q: modulus
1037 |     # B: output polynomial (standard order)
1038 |     def CTBased_ConstantGeometryNTT(self,A,W,q):
1039 |         MulCnt,AddCnt,SubCnt,BtfCnt = 0,0,0,0
1040 | 
1041 |         N = len(A)
1042 |         v = int(log(N,2))
1043 | 
1044 |         #B = indexReverse(A,v)
1045 |         B = [_ for _ in A]
1046 |         C = [0 for _ in range(N)]
1047 | 
1048 |         for s in range(1,v+1):
1049 |             for j in range(int(N/2)):
1050 |                 k = int(floor(j/(2**(v-s)))*(2**(v-s)))
1051 | 
1052 |                 TW = pow(W,k,q)
1053 | 
1054 |                 C[j           ] = (B[2*j] + B[2*j+1]*TW) % q
1055 |                 C[j + int(N/2)] = (B[2*j] - B[2*j+1]*TW) % q
1056 | 
1057 |                 MulCnt = MulCnt + 1
1058 |                 AddCnt = AddCnt + 1
1059 |                 SubCnt = SubCnt + 1
1060 |                 BtfCnt = BtfCnt + 1
1061 | 
1062 |             if s != v:
1063 |                 B = [_ for _ in C]
1064 | 
1065 |         return C,MulCnt,AddCnt,SubCnt,BtfCnt
1066 | 
1067 |     ######################################################################## (INTT)
1068 |     """
1069 |     List of INTT Algorithms: (from literature): NTT algorithms with extra n^-1 mod q multiplication
1070 |     -- Recursive Cooley-Tukey (CT) NTT (see http://people.scs.carleton.ca/~maheshwa/courses/5703COMP/16Fall/FFT_Report.pdf)
1071 |     -- Iterative NTT (see https://eprint.iacr.org/2019/109.pdf)
1072 |     -- Constant-Geometry NTT (see https://tches.iacr.org/index.php/TCHES/article/view/8344/7692 or https://eprint.iacr.org/2014/646.pdf)
1073 |     -- Stockham NTT (see https://ieeexplore.ieee.org/document/8717615)
1074 |     -- Four-Step NTT (see https://eprint.iacr.org/2015/818.pdf)
1075 |     """
1076 | 
1077 |     def CooleyTukeyINTT(self,A,W_inv,q):
1078 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.CooleyTukeyNTT(A,W_inv,q)
1079 |         N_inv = modinv(len(A),q)
1080 |         B = [(x*N_inv) % q for x in P]
1081 |         MulCnt = MulCnt + len(P)
1082 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
1083 | 
1084 |     def IterativeINTT(self,A,W_inv,q):
1085 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.IterativeNTT(A,W_inv,q)
1086 |         N_inv = modinv(len(A),q)
1087 |         B = [(x*N_inv) % q for x in P]
1088 |         MulCnt = MulCnt + len(P)
1089 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
1090 | 
1091 |     def FourStepINTT(self,A,W_inv,q,size):
1092 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.FourStepNTT(A,W_inv,q,size)
1093 |         N_inv = modinv(len(A),q)
1094 |         B = [(x*N_inv) % q for x in P]
1095 |         MulCnt = MulCnt + len(P)
1096 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
1097 | 
1098 |     def FourStepINTTv2(self,A,W_inv,q,size):
1099 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.FourStepNTTv2(A,W_inv,q,size)
1100 |         N_inv = modinv(len(A),q)
1101 |         B = [(x*N_inv) % q for x in P]
1102 |         MulCnt = MulCnt + len(P)
1103 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
1104 | 
1105 |     def CTBased_ConstantGeometryINTT(self,A,W_inv,q):
1106 |         P,MulCnt,AddCnt,SubCnt,BtfCnt = self.CTBased_ConstantGeometryNTT(A,W_inv,q)
1107 |         N_inv = modinv(len(A),q)
1108 |         B = [(x*N_inv) % q for x in P]
1109 |         MulCnt = MulCnt + len(P)
1110 |         return B,MulCnt,AddCnt,SubCnt,BtfCnt
1111 | #
1112 | 


--------------------------------------------------------------------------------
/stats/ntt_demo.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | from random import randint
  3 | 
  4 | from generate_prime import *
  5 | from helper import *
  6 | from ntt import *
  7 | 
  8 | # Parameter generation
  9 | 
 10 | # Determine n and bit-size of q, then find a q satisfying
 11 | # the condition: q = 1 (mod 2n) or q = 1 (mod n)
 12 | #
 13 | # Based on n and q, generate NTT parameters
 14 | 
 15 | mod     = 2 # if 1 --> q = 1 (mod n), if 2 --> q = 1 (mod 2n)
 16 | n       = 64
 17 | size    = [8,8]
 18 | q_bit   = 10
 19 | 
 20 | q       = 0
 21 | w       = 0
 22 | w_inv   = 0
 23 | psi     = 0
 24 | psi_inv = 0
 25 | 
 26 | # Generate parameters
 27 | wfound = False
 28 | while(not(wfound)):
 29 |     q = generate_large_prime(q_bit)
 30 | 
 31 |     # check q = 1 (mod n or 2n)
 32 |     while (not ((q % (mod*n)) == 1)):
 33 |         q = generate_large_prime(q_bit)
 34 | 
 35 |     # generate NTT parameters
 36 |     for i in range(2,q-1):
 37 |         wfound = isrootofunity(i,mod*n,q)
 38 |         if wfound:
 39 |             if mod == 1:
 40 |                 psi    = 0
 41 |                 psi_inv= 0
 42 |                 w      = i
 43 |                 w_inv  = modinv(w,q)
 44 |             else:
 45 |                 psi    = i
 46 |                 psi_inv= modinv(psi,q)
 47 |                 w      = pow(psi,2,q)
 48 |                 w_inv  = modinv(w,q)
 49 |             break
 50 | 
 51 | # Print parameters
 52 | print("Parameters")
 53 | print("n      : {}".format(n))
 54 | print("q      : {}".format(q))
 55 | print("w      : {}".format(w))
 56 | print("w_inv  : {}".format(w_inv))
 57 | print("psi    : {}".format(psi))
 58 | print("psi_inv: {}".format(psi_inv))
 59 | print("")
 60 | 
 61 | #NOTE: Comment Out Naive Method for Large Parameters
 62 | 
 63 | # Demo
 64 | # Random A
 65 | A = [randint(0,q-1) for x in range(n)]
 66 | 
 67 | # Reversed A
 68 | A_rev = indexReverse(A,int(log(n,2)))
 69 | 
 70 | # NTT operation
 71 | Evaluator = NTT()
 72 | 
 73 | # Reference NTT operation
 74 | REF,REFMul,REFAdd,REFSub,REFBtf = Evaluator.NaiveNTT_NN(A,w,q)
 75 | 
 76 | # Reversed N0
 77 | REF_rev = indexReverse(REF,int(log(n,2)))
 78 | 
 79 | # NTT operations
 80 | N0 ,N0Mul ,N0Add ,N0Sub ,N0Btf  = Evaluator.NaiveNTT_NR(A,w,q)
 81 | N1 ,N1Mul ,N1Add ,N1Sub ,N1Btf  = Evaluator.Radix2_DIT_Recursive_NTT(A,w,q)
 82 | N2 ,N2Mul ,N2Add ,N2Sub ,N2Btf  = Evaluator.Radix2_DIF_Recursive_NTT(A,w,q)
 83 | N3 ,N3Mul ,N3Add ,N3Sub ,N3Btf  = Evaluator.Radix2_DIF_Iterative_NTT_NR(A,w,q)
 84 | N4 ,N4Mul ,N4Add ,N4Sub ,N4Btf  = Evaluator.Radix2_DIF_Iterative_NTT_RN(A_rev,w,q)
 85 | N5 ,N5Mul ,N5Add ,N5Sub ,N5Btf  = Evaluator.Radix2_DIF_Iterative_NTT_NN(A,w,q)
 86 | N6 ,N6Mul ,N6Add ,N6Sub ,N6Btf  = Evaluator.Radix2_DIT_Iterative_NTT_NR(A,w,q)
 87 | N7 ,N7Mul ,N7Add ,N7Sub ,N7Btf  = Evaluator.Radix2_DIT_Iterative_NTT_RN(A_rev,w,q)
 88 | N8 ,N8Mul ,N8Add ,N8Sub ,N8Btf  = Evaluator.Radix2_DIT_Iterative_NTT_NN(A,w,q)
 89 | N9 ,N9Mul ,N9Add ,N9Sub ,N9Btf  = Evaluator.CRT_Recursive_NTT(A,w,q)
 90 | N10,N10Mul,N10Add,N10Sub,N10Btf = Evaluator.CRT_Full_NTT(A,w,q)
 91 | N11,N11Mul,N11Add,N11Sub,N11Btf = Evaluator.CooleyTukeyNTT(A,w,q)
 92 | N12,N12Mul,N12Add,N12Sub,N12Btf = Evaluator.IterativeNTT(A,w,q)
 93 | N13,N13Mul,N13Add,N13Sub,N13Btf = Evaluator.FourStepNTT(A,w,q,size)
 94 | N14,N14Mul,N14Add,N14Sub,N14Btf = Evaluator.FourStepNTTv2(A,w_inv,q,size)
 95 | N15,N15Mul,N15Add,N15Sub,N15Btf = Evaluator.CTBased_ConstantGeometryNTT(A_rev,w,q)
 96 | 
 97 | # Check NTT
 98 | print("-------- Sanity check for NTT operations --------")
 99 | # print("A         : {}".format(A))
100 | # print("br(A)     : {}".format(A_rev))
101 | # print("NTT(A)    : {}".format(REF))
102 | # print("br(NTT(A)): {}".format(REF_rev))
103 | print("")
104 | print("NaiveNTT_NR                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N0)]) == 0) else "Wrong"))
105 | print("-- Mul:{}".format(N0Mul))
106 | # print("-- Add:{}".format(N0Add))
107 | # print("-- Sub:{}".format(N0Sub))
108 | # print("-- Btf:{}".format(N0Btf))
109 | print("Radix2_DIT_Recursive_NTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N1)]) == 0) else "Wrong"))
110 | print("-- Mul:{}".format(N1Mul))
111 | # print("-- Add:{}".format(N1Add))
112 | # print("-- Sub:{}".format(N1Sub))
113 | # print("-- Btf:{}".format(N1Btf))
114 | print("Radix2_DIF_Recursive_NTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N2)]) == 0) else "Wrong"))
115 | print("-- Mul:{}".format(N2Mul))
116 | # print("-- Add:{}".format(N2Add))
117 | # print("-- Sub:{}".format(N2Sub))
118 | # print("-- Btf:{}".format(N2Btf))
119 | print("Radix2_DIF_Iterative_NTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N3)]) == 0) else "Wrong"))
120 | print("-- Mul:{}".format(N3Mul))
121 | # print("-- Add:{}".format(N3Add))
122 | # print("-- Sub:{}".format(N3Sub))
123 | # print("-- Btf:{}".format(N3Btf))
124 | print("Radix2_DIF_Iterative_NTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N4)]) == 0) else "Wrong"))
125 | print("-- Mul:{}".format(N4Mul))
126 | # print("-- Add:{}".format(N4Add))
127 | # print("-- Sub:{}".format(N4Sub))
128 | # print("-- Btf:{}".format(N4Btf))
129 | print("Radix2_DIF_Iterative_NTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N5)]) == 0) else "Wrong"))
130 | print("-- Mul:{}".format(N5Mul))
131 | # print("-- Add:{}".format(N5Add))
132 | # print("-- Sub:{}".format(N5Sub))
133 | # print("-- Btf:{}".format(N5Btf))
134 | print("Radix2_DIT_Iterative_NTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N6)]) == 0) else "Wrong"))
135 | print("-- Mul:{}".format(N6Mul))
136 | # print("-- Add:{}".format(N6Add))
137 | # print("-- Sub:{}".format(N6Sub))
138 | # print("-- Btf:{}".format(N6Btf))
139 | print("Radix2_DIT_Iterative_NTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N7)]) == 0) else "Wrong"))
140 | print("-- Mul:{}".format(N7Mul))
141 | # print("-- Add:{}".format(N7Add))
142 | # print("-- Sub:{}".format(N7Sub))
143 | # print("-- Btf:{}".format(N7Btf))
144 | print("Radix2_DIT_Iterative_NTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N8)]) == 0) else "Wrong"))
145 | print("-- Mul:{}".format(N8Mul))
146 | # print("-- Add:{}".format(N8Add))
147 | # print("-- Sub:{}".format(N8Sub))
148 | # print("-- Btf:{}".format(N8Btf))
149 | print("CRT_Recursive_NTT              -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N9)]) == 0) else "Wrong"))
150 | print("-- Mul:{}".format(N9Mul))
151 | # print("-- Add:{}".format(N9Add))
152 | # print("-- Sub:{}".format(N9Sub))
153 | # print("-- Btf:{}".format(N9Btf))
154 | print("CRT_Full_NTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N10)]) == 0) else "Wrong"))
155 | print("-- Mul:{}".format(N10Mul))
156 | # print("-- Add:{}".format(N10Add))
157 | # print("-- Sub:{}".format(N10Sub))
158 | # print("-- Btf:{}".format(N10Btf))
159 | print("CooleyTukeyNTT                 -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N11)]) == 0) else "Wrong"))
160 | print("-- Mul:{}".format(N11Mul))
161 | # print("-- Add:{}".format(N11Add))
162 | # print("-- Sub:{}".format(N11Sub))
163 | # print("-- Btf:{}".format(N11Btf))
164 | print("IterativeNTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF_rev,N12)]) == 0) else "Wrong"))
165 | print("-- Mul:{}".format(N12Mul))
166 | # print("-- Add:{}".format(N12Add))
167 | # print("-- Sub:{}".format(N12Sub))
168 | # print("-- Btf:{}".format(N12Btf))
169 | print("FourStepNTT                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N13)]) == 0) else "Wrong"))
170 | print("-- Mul:{}".format(N13Mul))
171 | # print("-- Add:{}".format(N13Add))
172 | # print("-- Sub:{}".format(N13Sub))
173 | # print("-- Btf:{}".format(N13Btf))
174 | print("FourStepNTTv2                  -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N14)]) == 0) else "Wrong"))
175 | print("-- Mul:{}".format(N14Mul))
176 | # print("-- Add:{}".format(N14Add))
177 | # print("-- Sub:{}".format(N14Sub))
178 | # print("-- Btf:{}".format(N14Btf))
179 | print("CTBased_ConstantGeometryNTT    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(REF,N15)]) == 0) else "Wrong"))
180 | print("-- Mul:{}".format(N15Mul))
181 | # print("-- Add:{}".format(N15Add))
182 | # print("-- Sub:{}".format(N15Sub))
183 | # print("-- Btf:{}".format(N15Btf))
184 | print("")
185 | 
186 | # INTT operations
187 | R0 ,R0Mul ,R0Add ,R0Sub ,R0Btf  = Evaluator.NaiveINTT_NR(REF,w_inv,q)
188 | R1 ,R1Mul ,R1Add ,R1Sub ,R1Btf  = Evaluator.Radix2_DIT_Recursive_INTT(REF,w_inv,q)
189 | R2 ,R2Mul ,R2Add ,R2Sub ,R2Btf  = Evaluator.Radix2_DIF_Recursive_INTT(REF,w_inv,q)
190 | R3 ,R3Mul ,R3Add ,R3Sub ,R3Btf  = Evaluator.Radix2_DIF_Iterative_INTT_NR(REF,w_inv,q)
191 | R4 ,R4Mul ,R4Add ,R4Sub ,R4Btf  = Evaluator.Radix2_DIF_Iterative_INTT_RN(REF_rev,w_inv,q)
192 | R5 ,R5Mul ,R5Add ,R5Sub ,R5Btf  = Evaluator.Radix2_DIF_Iterative_INTT_NN(REF,w_inv,q)
193 | R6 ,R6Mul ,R6Add ,R6Sub ,R6Btf  = Evaluator.Radix2_DIT_Iterative_INTT_NR(REF,w_inv,q)
194 | R7 ,R7Mul ,R7Add ,R7Sub ,R7Btf  = Evaluator.Radix2_DIT_Iterative_INTT_RN(REF_rev,w_inv,q)
195 | R8 ,R8Mul ,R8Add ,R8Sub ,R8Btf  = Evaluator.Radix2_DIT_Iterative_INTT_NN(REF,w_inv,q)
196 | R9 ,R9Mul ,R9Add ,R9Sub ,R9Btf  = Evaluator.CRT_Recursive_INTT(REF,w_inv,q)
197 | R10,R10Mul,R10Add,R10Sub,R10Btf = Evaluator.CRT_Full_INTT(REF,w_inv,q)
198 | R11,R11Mul,R11Add,R11Sub,R11Btf = Evaluator.CooleyTukeyINTT(REF,w_inv,q)
199 | R12,R12Mul,R12Add,R12Sub,R12Btf = Evaluator.IterativeINTT(REF,w_inv,q)
200 | R13,R13Mul,R13Add,R13Sub,R13Btf = Evaluator.FourStepINTT(REF,w_inv,q,size)
201 | R14,R14Mul,R14Add,R14Sub,R14Btf = Evaluator.FourStepINTTv2(REF,w,q,size)
202 | R15,R15Mul,R15Add,R15Sub,R15Btf = Evaluator.CTBased_ConstantGeometryINTT(REF_rev,w_inv,q)
203 | 
204 | # Check INTT
205 | print("-------- Sanity check for INTT operations --------")
206 | # print("NTT(A)    : {}".format(REF))
207 | # print("br(NTT(A)): {}".format(REF_rev))
208 | # print("A         : {}".format(A))
209 | # print("br(A)     : {}".format(A_rev))
210 | print("")
211 | print("NaiveINTT_NR                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R0)]) == 0) else "Wrong"))
212 | print("-- Mul:{}".format(R0Mul))
213 | # print("-- Add:{}".format(R0Add))
214 | # print("-- Sub:{}".format(R0Sub))
215 | # print("-- Btf:{}".format(R0Btf))
216 | print("Radix2_DIT_Recursive_INTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R1)]) == 0) else "Wrong"))
217 | print("-- Mul:{}".format(R1Mul))
218 | # print("-- Add:{}".format(R1Add))
219 | # print("-- Sub:{}".format(R1Sub))
220 | # print("-- Btf:{}".format(R1Btf))
221 | print("Radix2_DIF_Recursive_INTT       -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R2)]) == 0) else "Wrong"))
222 | print("-- Mul:{}".format(R2Mul))
223 | # print("-- Add:{}".format(R2Add))
224 | # print("-- Sub:{}".format(R2Sub))
225 | # print("-- Btf:{}".format(R2Btf))
226 | print("Radix2_DIF_Iterative_INTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R3)]) == 0) else "Wrong"))
227 | print("-- Mul:{}".format(R3Mul))
228 | # print("-- Add:{}".format(R3Add))
229 | # print("-- Sub:{}".format(R3Sub))
230 | # print("-- Btf:{}".format(R3Btf))
231 | print("Radix2_DIF_Iterative_INTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R4)]) == 0) else "Wrong"))
232 | print("-- Mul:{}".format(R4Mul))
233 | # print("-- Add:{}".format(R4Add))
234 | # print("-- Sub:{}".format(R4Sub))
235 | # print("-- Btf:{}".format(R4Btf))
236 | print("Radix2_DIF_Iterative_INTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R5)]) == 0) else "Wrong"))
237 | print("-- Mul:{}".format(R5Mul))
238 | # print("-- Add:{}".format(R5Add))
239 | # print("-- Sub:{}".format(R5Sub))
240 | # print("-- Btf:{}".format(R5Btf))
241 | print("Radix2_DIT_Iterative_INTT_NR    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R6)]) == 0) else "Wrong"))
242 | print("-- Mul:{}".format(R6Mul))
243 | # print("-- Add:{}".format(R6Add))
244 | # print("-- Sub:{}".format(R6Sub))
245 | # print("-- Btf:{}".format(R6Btf))
246 | print("Radix2_DIT_Iterative_INTT_RN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R7)]) == 0) else "Wrong"))
247 | print("-- Mul:{}".format(R7Mul))
248 | # print("-- Add:{}".format(R7Add))
249 | # print("-- Sub:{}".format(R7Sub))
250 | # print("-- Btf:{}".format(R7Btf))
251 | print("Radix2_DIT_Iterative_INTT_NN    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R8)]) == 0) else "Wrong"))
252 | print("-- Mul:{}".format(R8Mul))
253 | # print("-- Add:{}".format(R8Add))
254 | # print("-- Sub:{}".format(R8Sub))
255 | # print("-- Btf:{}".format(R8Btf))
256 | print("CRT_Recursive_INTT              -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R9)]) == 0) else "Wrong"))
257 | print("-- Mul:{}".format(R9Mul))
258 | # print("-- Add:{}".format(R9Add))
259 | # print("-- Sub:{}".format(R9Sub))
260 | # print("-- Btf:{}".format(R9Btf))
261 | print("CRT_Full_INTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R10)]) == 0) else "Wrong"))
262 | print("-- Mul:{}".format(R10Mul))
263 | # print("-- Add:{}".format(R10Add))
264 | # print("-- Sub:{}".format(R10Sub))
265 | # print("-- Btf:{}".format(R10Btf))
266 | print("CooleyTukeyINTT                 -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R11)]) == 0) else "Wrong"))
267 | print("-- Mul:{}".format(R11Mul))
268 | # print("-- Add:{}".format(R11Add))
269 | # print("-- Sub:{}".format(R11Sub))
270 | # print("-- Btf:{}".format(R11Btf))
271 | print("IterativeINTT                   -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A_rev,R12)]) == 0) else "Wrong"))
272 | print("-- Mul:{}".format(R12Mul))
273 | # print("-- Add:{}".format(R12Add))
274 | # print("-- Sub:{}".format(R12Sub))
275 | # print("-- Btf:{}".format(R12Btf))
276 | print("FourStepINTT                    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R13)]) == 0) else "Wrong"))
277 | print("-- Mul:{}".format(R13Mul))
278 | # print("-- Add:{}".format(R13Add))
279 | # print("-- Sub:{}".format(R13Sub))
280 | # print("-- Btf:{}".format(R13Btf))
281 | print("FourStepINTTv2                  -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R14)]) == 0) else "Wrong"))
282 | print("-- Mul:{}".format(R14Mul))
283 | # print("-- Add:{}".format(R14Add))
284 | # print("-- Sub:{}".format(R14Sub))
285 | # print("-- Btf:{}".format(R14Btf))
286 | print("CTBased_ConstantGeometryINTT    -->" + ("Correct" if(sum([abs(x-y) for x,y in zip(A,R15)]) == 0) else "Wrong"))
287 | print("-- Mul:{}".format(R15Mul))
288 | # print("-- Add:{}".format(R15Add))
289 | # print("-- Sub:{}".format(R15Sub))
290 | # print("-- Btf:{}".format(R15Btf))
291 | print("")
292 | 
293 | #
294 | 


--------------------------------------------------------------------------------
/stats/poly_demo.py:
--------------------------------------------------------------------------------
  1 | from math import log
  2 | from random import randint
  3 | 
  4 | from generate_prime import *
  5 | from helper import *
  6 | from ntt import *
  7 | from poly import *
  8 | 
  9 | # ------------------------------------------------------------------------------
 10 | 
 11 | # Parameter generation
 12 | 
 13 | # Determine n and bit-size of q, then find a q satisfying
 14 | # the condition: q = 1 (mod 2n) or q = 1 (mod n)
 15 | #
 16 | # Based on n and q, polynomial multiplication parameters
 17 | 
 18 | # Parameters
 19 | mod     = 2 # if 1 --> q = 1 (mod n), if 2 --> q = 1 (mod 2n)
 20 | n       = 256
 21 | q_bit   = 13
 22 | 
 23 | q       = 0
 24 | w       = 0
 25 | w_inv   = 0
 26 | psi     = 0
 27 | psi_inv = 0
 28 | 
 29 | # Generate parameters
 30 | wfound = False
 31 | while(not(wfound)):
 32 |     q = generate_large_prime(q_bit)
 33 | 
 34 |     # check q = 1 (mod n or 2n)
 35 |     while (not ((q % (mod*n)) == 1)):
 36 |         q = generate_large_prime(q_bit)
 37 | 
 38 |     # generate NTT parameters
 39 |     for i in range(2,q-1):
 40 |         wfound = isrootofunity(i,mod*n,q)
 41 |         if wfound:
 42 |             if mod == 1:
 43 |                 psi    = 0
 44 |                 psi_inv= 0
 45 |                 w      = i
 46 |                 w_inv  = modinv(w,q)
 47 |             else:
 48 |                 psi    = i
 49 |                 psi_inv= modinv(psi,q)
 50 |                 w      = pow(psi,2,q)
 51 |                 w_inv  = modinv(w,q)
 52 |             break
 53 | 
 54 | # Print parameters
 55 | print("Parameters (NWC)")
 56 | print("n      : {}".format(n))
 57 | print("q      : {}".format(q))
 58 | print("w      : {}".format(w))
 59 | print("w_inv  : {}".format(w_inv))
 60 | print("psi    : {}".format(psi))
 61 | print("psi_inv: {}".format(psi_inv))
 62 | print("")
 63 | 
 64 | # ------------------------------------------------------------------------------
 65 | 
 66 | # Parameters (NTRU)
 67 | m       = 3*n
 68 | mq_bit  = 14
 69 | 
 70 | mq      = 0
 71 | mw      = 0
 72 | mw_inv  = 0
 73 | 
 74 | # Generate parameters
 75 | wfound = False
 76 | while(not(wfound)):
 77 |     mq = generate_large_prime(mq_bit)
 78 | 
 79 |     # check q = 1 (mod n or 2n)
 80 |     while (not ((mq % m) == 1)):
 81 |         mq = generate_large_prime(mq_bit)
 82 | 
 83 |     # generate NTT parameters
 84 |     for i in range(2,mq-1):
 85 |         wfound = isrootofunity(i,m,mq)
 86 |         if wfound:
 87 |             mw      = i
 88 |             mw_inv  = modinv(mw,mq)
 89 |             break
 90 | 
 91 | # m,mq,mw,mw_inv = 192,769,4,577
 92 | 
 93 | # Powers of twiddle factors for NTRU (forward and inverse transform)
 94 | # Generating necessary powers of twiddle factors for NTRU on-the-fly is really hard.
 95 | # Therefore, we create table for powers of twiddle factors prior any operation
 96 | nf = [0]*(m//3) # forward
 97 | 
 98 | nf[0] = 0
 99 | nf[1] = m//6
100 | nf[2] = nf[1]//2
101 | nf[3] = (5*nf[1])//2
102 | 
103 | i = 2
104 | while (2**i) < (m//3):
105 |     for j in range(2**i, 2**(i+1), 2):
106 |         nf[j]   =  nf[j//2]//2
107 |         nf[j+1] = (nf[j//2]+(m//2))//2
108 |     i = i + 1
109 | 
110 | ntrupowersf = nf[2:]
111 | 
112 | ntrupowersi = [] # inverse
113 | 
114 | idxs, idxe = len(ntrupowersf)-(m//6) ,len(ntrupowersf)
115 | for i in range(int(log(m//6,2))):
116 |     ntrupowersi = ntrupowersi + ntrupowersf[idxs:idxe]
117 |     idxe = idxs
118 |     idxs = idxs - ((m//12)>>i)
119 | 
120 | ntrupowersb = [0]*(m//3) # basemul
121 | 
122 | for i in range(m//6):
123 |     ntrupowersb[2*i+0] = ntrupowersi[i]
124 |     ntrupowersb[2*i+1] = ntrupowersi[i] + (m//2)
125 | 
126 | # print(ntrupowersf)
127 | # print(ntrupowersb)
128 | # print(ntrupowersi)
129 | 
130 | print("Parameters (NTRU)")
131 | print("m      : {}".format(m))
132 | print("mq     : {}".format(mq))
133 | print("mw     : {}".format(mw))
134 | print("mw_inv : {}".format(mw_inv))
135 | print("")
136 | 
137 | # ------------------------------------------------------------------------------
138 | 
139 | #NOTE: Comment Out Reference Method for Large Parameters
140 | 
141 | # Demo
142 | # Random A,B
143 | A = [randint(0,q-1) for _ in range(n)]
144 | B = [randint(0,q-1) for _ in range(n)]
145 | 
146 | # Random A,B (for ntru)
147 | A_ntru = [randint(0,mq-1) for _ in range(m)]
148 | B_ntru = [randint(0,mq-1) for _ in range(m)]
149 | 
150 | # Evaluator
151 | Evaluator = Poly()
152 | 
153 | # reduce functions
154 | pwc  = [-1]+[0]*(n-1)+[1]
155 | nwc  =  [1]+[0]*(n-1)+[1]
156 | ntru =  [1]+[0]*(int(m/2)-1)+[-1]+[0]*(int(m/2)-1)+[1]
157 | 
158 | # ------------------------------------------------------------------------------
159 | 
160 | print("-------- Sanity check for polynomial multiplication operations --------")
161 | print("")
162 | 
163 | # Check reference implementations
164 | D0 ,D0Mul ,D0Add ,D0Sub ,D0Btf  = Evaluator.SchoolbookPolMul(A,B,q)
165 | D1 ,D1Mul ,D1Add ,D1Sub ,D1Btf  = Evaluator.SchoolbookPolMul(A_ntru,B_ntru,mq)
166 | DR0,DR0Mul,DR0Add,DR0Sub,DR0Btf = Evaluator.PolRed(D0,pwc,q) # reduce with x^n-1
167 | DR1,DR1Mul,DR1Add,DR1Sub,DR1Btf = Evaluator.PolRed(D0,nwc,q) # reduce with x^n+1
168 | DR2,DR2Mul,DR2Add,DR2Sub,DR2Btf = Evaluator.PolRed(D1,ntru,mq)# reduce with x^n-x^(n/2)+1
169 | C0 ,C0Mul ,C0Add ,C0Sub ,C0Btf  = Evaluator.SchoolbookModPolMul_PWC(A,B,q)
170 | C1 ,C1Mul ,C1Add ,C1Sub ,C1Btf  = Evaluator.SchoolbookModPolMul_NWC(A,B,q)
171 | C2 ,C2Mul ,C2Add ,C2Sub ,C2Btf  = Evaluator.SchoolbookModPolMul_NTRU(A_ntru,B_ntru,mq)
172 | 
173 | print("SchoolbookModPolMul_PWC  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR0,C0)]) == 0) else "Wrong"))
174 | print("-- Mul:{}".format(C0Mul))
175 | # print("-- Add:{}".format(C0Add))
176 | # print("-- Sub:{}".format(C0Sub))
177 | # print("-- Btf:{}".format(C0Btf))
178 | print("SchoolbookModPolMul_NWC  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR1,C1)]) == 0) else "Wrong"))
179 | print("-- Mul:{}".format(C1Mul))
180 | # print("-- Add:{}".format(C1Add))
181 | # print("-- Sub:{}".format(C1Sub))
182 | # print("-- Btf:{}".format(C1Btf))
183 | print("SchoolbookModPolMul_NTRU --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(DR2,C2)]) == 0) else "Wrong"))
184 | print("-- Mul:{}".format(C2Mul))
185 | # print("-- Add:{}".format(C2Add))
186 | # print("-- Sub:{}".format(C2Sub))
187 | # print("-- Btf:{}".format(C2Btf))
188 | print("")
189 | 
190 | # ------------------------------------------------------------------------------
191 | 
192 | # Check NTT-based polynomial multiplication methods
193 | N0,N0Mul,N0Add,N0Sub,N0Btf = Evaluator.NTTBasedPolMul(A,B,psi,psi_inv,q)
194 | N1,N1Mul,N1Add,N1Sub,N1Btf = Evaluator.NTTBasedModPolMul_PWC(A,B,w,w_inv,q)
195 | N2,N2Mul,N2Add,N2Sub,N2Btf = Evaluator.NTTBasedModPolMul_NWC_v1(A,B,w,w_inv,psi,psi_inv,q)
196 | N3,N3Mul,N3Add,N3Sub,N3Btf = Evaluator.NTTBasedModPolMul_NWC_v2(A,B,psi,psi_inv,q)
197 | 
198 | print("NTTBasedPolMul           --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N0,D0)]) == 0) else "Wrong"))
199 | print("-- Mul:{}".format(N0Mul))
200 | # print("-- Add:{}".format(N0Add))
201 | # print("-- Sub:{}".format(N0Sub))
202 | # print("-- Btf:{}".format(N0Btf))
203 | print("NTTBasedModPolMul_PWC    --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N1,C0)]) == 0) else "Wrong"))
204 | print("-- Mul:{}".format(N1Mul))
205 | # print("-- Add:{}".format(N1Add))
206 | # print("-- Sub:{}".format(N1Sub))
207 | # print("-- Btf:{}".format(N1Btf))
208 | print("NTTBasedModPolMul_NWC_v1 --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N2,C1)]) == 0) else "Wrong"))
209 | print("-- Mul:{}".format(N2Mul))
210 | # print("-- Add:{}".format(N2Add))
211 | # print("-- Sub:{}".format(N2Sub))
212 | # print("-- Btf:{}".format(N2Btf))
213 | print("NTTBasedModPolMul_NWC_v2 --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(N3,C1)]) == 0) else "Wrong"))
214 | print("-- Mul:{}".format(N3Mul))
215 | # print("-- Add:{}".format(N3Add))
216 | # print("-- Sub:{}".format(N3Sub))
217 | # print("-- Btf:{}".format(N3Btf))
218 | print("")
219 | 
220 | # ------------------------------------------------------------------------------
221 | 
222 | # Check CRT-based polynomial multiplication methods
223 | T0 ,T0Mul ,T0Add ,T0Sub ,T0Btf  = Evaluator.CRTBasedModPolMul_PWC(A,B,w,w_inv,q)
224 | T1 ,T1Mul ,T1Add ,T1Sub ,T1Btf  = Evaluator.CRTBasedModPolMul_NWC_FD1(A,B,psi,psi_inv,q)
225 | T2 ,T2Mul ,T2Add ,T2Sub ,T2Btf  = Evaluator.CRTBasedModPolMul_NWC_FD2(A,B,w,w_inv,q)
226 | T3 ,T3Mul ,T3Add ,T3Sub ,T3Btf  = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,psi,psi_inv,q,findeg=1)
227 | T4 ,T4Mul ,T4Add ,T4Sub ,T4Btf  = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w,w_inv,q,findeg=2)
228 | T5 ,T5Mul ,T5Add ,T5Sub ,T5Btf  = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w**2 % q,w_inv**2 % q,q,findeg=4)
229 | T6 ,T6Mul ,T6Add ,T6Sub ,T6Btf  = Evaluator.CRTBasedModPolMul_NWC_FDV(A,B,w**4 % q,w_inv**4 % q,q,findeg=8)
230 | T7 ,T7Mul ,T7Add ,T7Sub ,T7Btf  = Evaluator.CRTBasedModPolMul_NTRU_FD3(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq)
231 | T8 ,T8Mul ,T8Add ,T8Sub ,T8Btf  = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=3)
232 | T9 ,T9Mul ,T9Add ,T9Sub ,T9Btf  = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=6)
233 | T10,T10Mul,T10Add,T10Sub,T10Btf = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=12)
234 | T11,T11Mul,T11Add,T11Sub,T11Btf = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw,mw_inv,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=24)
235 | # T9 = Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**2 % q,mw_inv**2 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=6)
236 | # T10= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**4 % q,mw_inv**4 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=12)
237 | # T11= Evaluator.CRTBasedModPolMul_NTRU_FDV(A_ntru,B_ntru,mw**8 % q,mw_inv**8 % q,ntrupowersf,ntrupowersb,ntrupowersi,mq,findeg=24)
238 | 
239 | print("CRTBasedModPolMul_PWC                  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T0,C0)]) == 0) else "Wrong"))
240 | print("-- Mul:{}".format(T0Mul))
241 | # print("-- Add:{}".format(T0Add))
242 | # print("-- Sub:{}".format(T0Sub))
243 | # print("-- Btf:{}".format(T0Btf))
244 | print("CRTBasedModPolMul_NWC_FD1              --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T1,C1)]) == 0) else "Wrong"))
245 | print("-- Mul:{}".format(T1Mul))
246 | # print("-- Add:{}".format(T1Add))
247 | # print("-- Sub:{}".format(T1Sub))
248 | # print("-- Btf:{}".format(T1Btf))
249 | print("CRTBasedModPolMul_NWC_FD2              --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T2,C1)]) == 0) else "Wrong"))
250 | print("-- Mul:{}".format(T2Mul))
251 | # print("-- Add:{}".format(T2Add))
252 | # print("-- Sub:{}".format(T2Sub))
253 | # print("-- Btf:{}".format(T2Btf))
254 | print("CRTBasedModPolMul_NWC_FDV  (findeg=1)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T3,C1)]) == 0) else "Wrong"))
255 | print("-- Mul:{}".format(T3Mul))
256 | # print("-- Add:{}".format(T3Add))
257 | # print("-- Sub:{}".format(T3Sub))
258 | # print("-- Btf:{}".format(T3Btf))
259 | print("CRTBasedModPolMul_NWC_FDV  (findeg=2)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T4,C1)]) == 0) else "Wrong"))
260 | print("-- Mul:{}".format(T4Mul))
261 | # print("-- Add:{}".format(T4Add))
262 | # print("-- Sub:{}".format(T4Sub))
263 | # print("-- Btf:{}".format(T4Btf))
264 | print("CRTBasedModPolMul_NWC_FDV  (findeg=4)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T5,C1)]) == 0) else "Wrong"))
265 | print("-- Mul:{}".format(T5Mul))
266 | # print("-- Add:{}".format(T5Add))
267 | # print("-- Sub:{}".format(T5Sub))
268 | # print("-- Btf:{}".format(T5Btf))
269 | print("CRTBasedModPolMul_NWC_FDV  (findeg=8)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T6,C1)]) == 0) else "Wrong"))
270 | print("-- Mul:{}".format(T6Mul))
271 | # print("-- Add:{}".format(T6Add))
272 | # print("-- Sub:{}".format(T6Sub))
273 | # print("-- Btf:{}".format(T6Btf))
274 | print("CRTBasedModPolMul_NTRU_FD3             --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T7,C2)]) == 0) else "Wrong"))
275 | print("-- Mul:{}".format(T7Mul))
276 | # print("-- Add:{}".format(T7Add))
277 | # print("-- Sub:{}".format(T7Sub))
278 | # print("-- Btf:{}".format(T7Btf))
279 | print("CRTBasedModPolMul_NTRU_FDV (findeg=3)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T8,C2)]) == 0) else "Wrong"))
280 | print("-- Mul:{}".format(T8Mul))
281 | # print("-- Add:{}".format(T8Add))
282 | # print("-- Sub:{}".format(T8Sub))
283 | # print("-- Btf:{}".format(T8Btf))
284 | print("CRTBasedModPolMul_NTRU_FDV (findeg=6)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T9,C2)]) == 0) else "Wrong"))
285 | print("-- Mul:{}".format(T9Mul))
286 | # print("-- Add:{}".format(T9Add))
287 | # print("-- Sub:{}".format(T9Sub))
288 | # print("-- Btf:{}".format(T9Btf))
289 | print("CRTBasedModPolMul_NTRU_FDV (findeg=12) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T10,C2)]) == 0) else "Wrong"))
290 | print("-- Mul:{}".format(T10Mul))
291 | # print("-- Add:{}".format(T10Add))
292 | # print("-- Sub:{}".format(T10Sub))
293 | # print("-- Btf:{}".format(T10Btf))
294 | print("CRTBasedModPolMul_NTRU_FDV (findeg=24) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(T11,C2)]) == 0) else "Wrong"))
295 | print("-- Mul:{}".format(T11Mul))
296 | # print("-- Add:{}".format(T11Add))
297 | # print("-- Sub:{}".format(T11Sub))
298 | # print("-- Btf:{}".format(T11Btf))
299 | print("")
300 | 
301 | # ------------------------------------------------------------------------------
302 | # Parallelism?
303 | # NTRU is best utilized with 3*power-of-2 PEs (can be parallelised up to m//2)
304 | # NWC is best utilized with power-of-2 PEs (can be parallelised up to n//2)
305 | # -- An unified architecture is best utilized with power-of-2 PEs
306 | # All systems needs 2*PE BRAMs for in-place computation
307 | 
308 | # Check CRT-based unified polynomial multiplication methods
309 | # ring: 0 --> NWC  (x^n + 1)
310 | # -- findeg: 1
311 | # -- findeg: 2
312 | # -- findeg: 4
313 | # --   ...
314 | # ring: 1 --> NTRU (x^n - x^(n/2) + 1)
315 | # -- findeg: 3
316 | # -- findeg: 6
317 | # -- findeg: 12
318 | # --
319 | ring,findeg = 0,1
320 | R0 ,R0Mul ,R0Add ,R0Sub ,R0Btf = Evaluator.CRTBasedModPolMul_Unified(A,B,psi,psi_inv,q,ring,findeg) # NWC - findeg=2
321 | ring,findeg = 0,2
322 | R1 ,R1Mul ,R1Add ,R1Sub ,R1Btf = Evaluator.CRTBasedModPolMul_Unified(A,B,w,w_inv,q,ring,findeg) # NWC - findeg=2
323 | ring,findeg = 0,4
324 | R2 ,R2Mul ,R2Add ,R2Sub ,R2Btf = Evaluator.CRTBasedModPolMul_Unified(A,B,w**2 % q,w_inv**2 % q,q,ring,findeg) # NWC - findeg=4
325 | ring,findeg = 1,3
326 | R3 ,R3Mul ,R3Add ,R3Sub ,R3Btf = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=3
327 | ring,findeg = 1,6
328 | R4 ,R4Mul ,R4Add ,R4Sub ,R4Btf = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=6
329 | ring,findeg = 1,12
330 | R5 ,R5Mul ,R5Add ,R5Sub ,R5Btf = Evaluator.CRTBasedModPolMul_Unified(A_ntru,B_ntru,mw,mw_inv,mq,ring,findeg,ntrupowersf,ntrupowersb,ntrupowersi) # NTRU - findeg=12
331 | 
332 | print("CRTBasedModPolMul_Unified (NWC  - findeg=1)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R0,C1)]) == 0) else "Wrong"))
333 | print("-- Mul:{}".format(R0Mul))
334 | # print("-- Add:{}".format(R0Add))
335 | # print("-- Sub:{}".format(R0Sub))
336 | # print("-- Btf:{}".format(R0Btf))
337 | print("CRTBasedModPolMul_Unified (NWC  - findeg=2)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R1,C1)]) == 0) else "Wrong"))
338 | print("-- Mul:{}".format(R1Mul))
339 | # print("-- Add:{}".format(R1Add))
340 | # print("-- Sub:{}".format(R1Sub))
341 | # print("-- Btf:{}".format(R1Btf))
342 | print("CRTBasedModPolMul_Unified (NWC  - findeg=4)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R2,C1)]) == 0) else "Wrong"))
343 | print("-- Mul:{}".format(R2Mul))
344 | # print("-- Add:{}".format(R2Add))
345 | # print("-- Sub:{}".format(R2Sub))
346 | # print("-- Btf:{}".format(R2Btf))
347 | print("CRTBasedModPolMul_Unified (NTRU - findeg=3)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R3,C2)]) == 0) else "Wrong"))
348 | print("-- Mul:{}".format(R3Mul))
349 | # print("-- Add:{}".format(R3Add))
350 | # print("-- Sub:{}".format(R3Sub))
351 | # print("-- Btf:{}".format(R3Btf))
352 | print("CRTBasedModPolMul_Unified (NTRU - findeg=6)  --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R4,C2)]) == 0) else "Wrong"))
353 | print("-- Mul:{}".format(R4Mul))
354 | # print("-- Add:{}".format(R4Add))
355 | # print("-- Sub:{}".format(R4Sub))
356 | # print("-- Btf:{}".format(R4Btf))
357 | print("CRTBasedModPolMul_Unified (NTRU - findeg=12) --> " + ("Correct" if(sum([abs(x-y) for x,y in zip(R5,C2)]) == 0) else "Wrong"))
358 | print("-- Mul:{}".format(R5Mul))
359 | # print("-- Add:{}".format(R5Add))
360 | # print("-- Sub:{}".format(R5Sub))
361 | # print("-- Btf:{}".format(R5Btf))
362 | print("")
363 | 
364 | # ------------------------------------------------------------------------------
365 | 
366 | #
367 | # NOTE: We can have extra optimization by combining N_inv with last stage of INTT
368 | # NOTE: Later I can add PWC (x^n - 1)
369 | # NOTE: Later I can add pure NTT/INTT operations
370 | #
371 | 


--------------------------------------------------------------------------------
/testgenerator/generate_prime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Pedro Alves
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import random
16 | import math
17 | import sys
18 | 
19 | def miller_rabin(p,s=11):
20 |     #computes p-1 decomposition in 2**u*r
21 |     r = p-1
22 |     u = 0
23 |     while r&1 == 0:#true while the last bit of r is zero
24 |         u += 1
25 |         r = int(r/2)
26 | 
27 |     # apply miller_rabin primality test
28 |     for i in range(s):
29 |         a = random.randrange(2,p-1) # choose random a in {2,3,...,p-2}
30 |         z = pow(a,r,p)
31 | 
32 |         if z != 1 and z != p-1:
33 |             for j in range(u-1):
34 |                 if z != p-1:
35 |                     z = pow(z,2,p)
36 |                     if z == 1:
37 |                         return False
38 |                 else:
39 |                     break
40 |             if z != p-1:
41 |                 return False
42 |     return True
43 | 
44 | 
45 | def is_prime(n,s=11):
46 |      #lowPrimes is all primes (sans 2, which is covered by the bitwise and operator)
47 |      #under 1000. taking n modulo each lowPrime allows us to remove a huge chunk
48 |      #of composite numbers from our potential pool without resorting to Rabin-Miller
49 |      lowPrimes =   [3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97
50 |                    ,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179
51 |                    ,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269
52 |                    ,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367
53 |                    ,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461
54 |                    ,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571
55 |                    ,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661
56 |                    ,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773
57 |                    ,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883
58 |                    ,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997]
59 |      if (n >= 3):
60 |          if (n&1 != 0):
61 |              for p in lowPrimes:
62 |                  if (n == p):
63 |                     return True
64 |                  if (n % p == 0):
65 |                      return False
66 |              return miller_rabin(n,s)
67 |      return False
68 | 
69 | def generate_large_prime(k,s=11):
70 |     #print "Generating prime of %d bits" % k
71 |     #k is the desired bit length
72 | 
73 |     # using security parameter s=11, we have a error probability of less than
74 |     # 2**-80
75 | 
76 |     r=int(100*(math.log(k,2)+1)) #number of max attempts
77 |     while r>0:
78 |         #randrange is mersenne twister and is completely deterministic
79 |         #unusable for serious crypto purposes
80 |         n = random.randrange(2**(k-1),2**(k))
81 |         r-=1
82 |         if is_prime(n,s) == True:
83 |             return n
84 |     raise Exception("Failure after %d tries." % r)
85 | 


--------------------------------------------------------------------------------
/testgenerator/helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def egcd(a, b):
 3 |     if a == 0:
 4 |         return (b, 0, 1)
 5 |     else:
 6 |         g, y, x = egcd(b % a, a)
 7 |         return (g, x - (b // a) * y, y)
 8 | 
 9 | def modinv(a, m):
10 |     g, x, y = egcd(a, m)
11 |     if g != 1:
12 |         raise Exception('Modular inverse does not exist')
13 |     else:
14 |         return x % m
15 | 
16 | # Bit-Reverse integer
17 | def intReverse(a,n):
18 |     b = ('{:0'+str(n)+'b}').format(a)
19 |     return int(b[::-1],2)
20 | 
21 | # Bit-Reversed index
22 | def indexReverse(a,r):
23 |     n = len(a)
24 |     b = [0]*n
25 |     for i in range(n):
26 |         rev_idx = intReverse(i,r)
27 |         b[rev_idx] = a[i]
28 |     return b
29 | 
30 | # Check if input is m-th (could be n or 2n) primitive root of unity of q
31 | def isrootofunity(w,m,q):
32 |     if pow(w,m,q) != 1:
33 |         return False
34 |     elif pow(w,m//2,q) != (q-1):
35 |         return False
36 |     else:
37 |         v = w
38 |         for i in range(1,m):
39 |             if v == 1:
40 |                 return False
41 |             else:
42 |                 v = (v*w) % q
43 |         return True
44 | 


--------------------------------------------------------------------------------