├── FA.v ├── ModMult.v ├── README.md ├── BRAM.v ├── CSA.v ├── ShiftReg.v ├── ModRed_sub.v ├── defines.v ├── NTT2.v ├── ModRed.v ├── test_generator ├── generate_prime.py ├── test_generator.py └── helper.py ├── intMult.v ├── NTTN_test.v ├── AddressGenerator.v └── NTTN.v /FA.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module FA(input x,y,z, 20 | output c,s); 21 | 22 | assign {c,s} = x+y+z; 23 | 24 | endmodule 25 | -------------------------------------------------------------------------------- /ModMult.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module ModMult(input clk,reset, 20 | input [`DATA_SIZE_ARB-1:0] A,B, 21 | input [`DATA_SIZE_ARB-1:0] q, 22 | output[`DATA_SIZE_ARB-1:0] C); 23 | 24 | // --------------------------------------------------------------- connections 25 | wire [(2*`DATA_SIZE_ARB)-1:0] P; 26 | 27 | // --------------------------------------------------------------- modules 28 | intMult im(clk,reset,A,B,P); 29 | ModRed mr(clk,reset,q,P,C); 30 | 31 | endmodule 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parametric NTT/INTT Hardware 2 | 3 | This repository provides the baseline version of Verilog code for parametric NTT/INTT hardware published in "An Extensive Study of Flexible Design Methods for the Number Theoretic Transform". 4 | 5 | You have to set three parameters defined in `defines.v`: 6 | * `DATA_SIZE_ARB`: bit-size of coefficient modulus *q* (constrained to the values between 8-64 for practical implementations) 7 | * `RING_SIZE`: degree of ring polynomial, namely *n* in *x^n+1* (needs to be a power of 2) 8 | * `PE_NUMBER`: number of processing elements (*butterfly units*) (needs to be a power of 2 and `PE_NUMBER` <= `RING_SIZE`/2) 9 | 10 | Other versions of the hardware generator and documentation will be available soon. 11 | 12 | If you use this work in your research/study, please cite our work: 13 | 14 | ``` 15 | @ARTICLE{9171507, 16 | author={A. C. {Mert} and E. {Karabulut} and E. {Ozturk} and E. {Savas} and A. {Aysu}}, 17 | journal={IEEE Transactions on Computers}, 18 | title={An Extensive Study of Flexible Design Methods for the Number Theoretic Transform}, 19 | year={2020}, 20 | volume={}, 21 | number={}, 22 | pages={1-1}, 23 | doi={10.1109/TC.2020.3017930}} 24 | ``` 25 | -------------------------------------------------------------------------------- /BRAM.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `timescale 1ns / 1ps 18 | 19 | module BRAM #(parameter DLEN = 32, HLEN=9) 20 | (input clk, 21 | input wen, 22 | input [HLEN-1:0] waddr, 23 | input [DLEN-1:0] din, 24 | input [HLEN-1:0] raddr, 25 | output reg [DLEN-1:0] dout); 26 | // bram 27 | (* ram_style="block" *) reg [DLEN-1:0] blockram [(1< 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module CSA(input [(2*`DATA_SIZE)-1:0] x,y,z, 20 | output reg [(2*`DATA_SIZE)-1:0] c,s); 21 | 22 | wire [(2*`DATA_SIZE)-1:0] c_t,s_t; 23 | 24 | generate 25 | genvar csa_idx; 26 | 27 | for(csa_idx=0; csa_idx<(2*`DATA_SIZE); csa_idx=csa_idx+1) begin: FA_LOOP 28 | FA fau(x[csa_idx],y[csa_idx],z[csa_idx],c_t[csa_idx],s_t[csa_idx]); 29 | end 30 | endgenerate 31 | 32 | always @(*) begin: SHIFT_LOOP 33 | integer i; 34 | 35 | for(i=0; i<((2*`DATA_SIZE)-1); i=i+1) begin 36 | c[i+1] = c_t[i]; 37 | s[i] = s_t[i]; 38 | end 39 | 40 | c[0] = 1'b0; 41 | s[(2*`DATA_SIZE)-1] = s_t[(2*`DATA_SIZE)-1]; 42 | end 43 | 44 | endmodule 45 | -------------------------------------------------------------------------------- /ShiftReg.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `timescale 1 ns / 1 ps 18 | 19 | module ShiftReg #(parameter SHIFT = 0, DATA=32) 20 | (input clk,reset, 21 | input [DATA-1:0] data_in, 22 | output [DATA-1:0] data_out); 23 | 24 | reg [DATA-1:0] shift_array [SHIFT-1:0]; 25 | 26 | always @(posedge clk or posedge reset) begin 27 | if(reset) 28 | shift_array[0] <= 0; 29 | else 30 | shift_array[0] <= data_in; 31 | end 32 | 33 | genvar shft; 34 | 35 | generate 36 | for(shft=0; shft < SHIFT-1; shft=shft+1) begin: DELAY_BLOCK 37 | always @(posedge clk or posedge reset) begin 38 | if(reset) 39 | shift_array[shft+1] <= 0; 40 | else 41 | shift_array[shft+1] <= shift_array[shft]; 42 | end 43 | end 44 | endgenerate 45 | 46 | assign data_out = shift_array[SHIFT-1]; 47 | 48 | endmodule 49 | -------------------------------------------------------------------------------- /ModRed_sub.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module ModRed_sub #(parameter CURR_DATA = 0, NEXT_DATA = 0) 20 | (input clk,reset, 21 | input [(`DATA_SIZE_ARB-`W_SIZE)-1:0] qH, 22 | input [CURR_DATA-1:0] T1, 23 | output reg[NEXT_DATA-1:0] C); 24 | 25 | // connections 26 | reg [(`W_SIZE)-1:0] T2L; 27 | reg [(`W_SIZE)-1:0] T2; 28 | 29 | reg [(CURR_DATA - `W_SIZE)-1:0] T2H; 30 | reg CARRY; 31 | 32 | (* use_dsp = "yes" *) reg [`DATA_SIZE_ARB - 1:0] MULT; 33 | 34 | // --------------------------------------------------------------- multiplication of qH and T2 (and registers) 35 | always @(*) begin 36 | T2L = T1[(`W_SIZE)-1:0]; 37 | T2 = (-T2L); 38 | end 39 | 40 | always @(posedge clk or posedge reset) begin 41 | if(reset) begin 42 | T2H <= 0; 43 | CARRY <= 0; 44 | MULT <= 0; 45 | end 46 | else begin 47 | T2H <= (T1 >> (`W_SIZE)); 48 | CARRY <= (T2L[`W_SIZE-1] | T2[`W_SIZE-1]); 49 | MULT <= qH * T2; 50 | end 51 | end 52 | 53 | // --------------------------------------------------------------- final addition operation 54 | always @(posedge clk or posedge reset) begin 55 | if(reset) begin 56 | C <= 0; 57 | end 58 | else begin 59 | C <= (MULT+T2H)+CARRY; 60 | end 61 | end 62 | 63 | endmodule 64 | -------------------------------------------------------------------------------- /defines.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `timescale 1 ns / 1 ps 18 | 19 | // ------------------------------------------------ 20 | // User parameters 21 | // -- K: DATA_SIZE_ARB 22 | // -- n: RING_SIZE 23 | // -- B: PE_NUMBER 24 | 25 | `define DATA_SIZE_ARB 14 26 | `define RING_SIZE 512 27 | `define PE_NUMBER 1 28 | 29 | // ------------------------------------------------ 30 | // Parameters for integer multiplication 31 | 32 | `define DATA_SIZE (1 << ($clog2(`DATA_SIZE_ARB))) 33 | `define DATA_SIZE_DEPTH ($clog2(`DATA_SIZE)) 34 | 35 | `define GENERIC (1 << (`DATA_SIZE_DEPTH - 4)) 36 | `define CSA_LEVEL ((`DATA_SIZE > 16) ? (`GENERIC*`GENERIC-2) : 0) 37 | 38 | `define INTMUL_DELAY 3 39 | 40 | // ------------------------------------------------ 41 | // Works for K between 9-bit to 64-bit 42 | // Parameters for modular reduction 43 | 44 | `define RING_DEPTH ($clog2(`RING_SIZE)) 45 | `define W_SIZE ((`RING_DEPTH)+1) 46 | `define L_SIZE ((`DATA_SIZE_ARB > `W_SIZE) ? ((`DATA_SIZE_ARB > (`W_SIZE * 2)) ? ((`DATA_SIZE_ARB > (`W_SIZE * 3)) ? ((`DATA_SIZE_ARB > (`W_SIZE * 4)) ? ((`DATA_SIZE_ARB > (`W_SIZE * 5)) ? ((`DATA_SIZE_ARB > (`W_SIZE * 6)) ? ((`DATA_SIZE_ARB > (`W_SIZE * 7)) ? 8 : 7) : 6) : 5) : 4) : 3) : 2) : 1) 47 | 48 | // `define W_SIZE ($rtoi((`RING_DEPTH)+1)) 49 | // `define L_SIZE ($rtoi($ceil((`DATA_SIZE_ARB*1.0)/(`W_SIZE*1.0)))) 50 | 51 | `define MODRED_DELAY ((`L_SIZE)*2 + 1) 52 | 53 | // ------------------------------------------------ 54 | // System parameters 55 | 56 | `define PE_DEPTH ($clog2(`PE_NUMBER)) 57 | `define STAGE_DELAY 5 58 | 59 | `define R ($rtoi(`W_SIZE * `L_SIZE)) 60 | 61 | // ------------------------------------------------ 62 | -------------------------------------------------------------------------------- /NTT2.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module NTT2(input clk,reset, 20 | input [`DATA_SIZE_ARB-1:0] q, 21 | input [`DATA_SIZE_ARB-1:0] NTTin0,NTTin1, 22 | input [`DATA_SIZE_ARB-1:0] MULin, 23 | output reg [`DATA_SIZE_ARB-1:0] ADDout,SUBout, 24 | output reg [`DATA_SIZE_ARB-1:0] NTToutEVEN,NTToutODD); 25 | 26 | // modular add 27 | wire [`DATA_SIZE_ARB :0] madd; 28 | wire signed [`DATA_SIZE_ARB+1:0] madd_q; 29 | wire [`DATA_SIZE_ARB-1:0] madd_res; 30 | 31 | assign madd = NTTin0 + NTTin1; 32 | assign madd_q = madd - q; 33 | assign madd_res = (madd_q[`DATA_SIZE_ARB+1] == 1'b0) ? madd_q[`DATA_SIZE_ARB-1:0] : madd[`DATA_SIZE_ARB-1:0]; 34 | 35 | // modular sub 36 | wire [`DATA_SIZE_ARB :0] msub; 37 | wire signed [`DATA_SIZE_ARB+1:0] msub_q; 38 | wire [`DATA_SIZE_ARB-1:0] msub_res; 39 | 40 | assign msub = NTTin0 - NTTin1; 41 | assign msub_q = msub + q; 42 | assign msub_res = (msub[`DATA_SIZE_ARB] == 1'b0) ? msub[`DATA_SIZE_ARB-1:0] : msub_q[`DATA_SIZE_ARB-1:0]; 43 | 44 | // first level registers 45 | reg [`DATA_SIZE_ARB-1:0] MULin0,MULin1; 46 | reg [`DATA_SIZE_ARB-1:0] ADDreg; 47 | 48 | always @(posedge clk) begin 49 | if(reset) begin 50 | MULin0 <= 0; 51 | MULin1 <= 0; 52 | ADDreg <= 0; 53 | end 54 | else begin 55 | MULin0 <= MULin; 56 | MULin1 <= msub_res; 57 | ADDreg <= madd_res; 58 | end 59 | end 60 | 61 | // modular mul 62 | wire [`DATA_SIZE_ARB-1:0] MODout; 63 | ModMult mm(clk,reset,MULin0,MULin1,q,MODout); 64 | 65 | wire [`DATA_SIZE_ARB-1:0] ADDreg_next; 66 | ShiftReg #(.SHIFT(`INTMUL_DELAY + `MODRED_DELAY),.DATA(`DATA_SIZE_ARB)) unit00(clk,reset,ADDreg,ADDreg_next); 67 | 68 | always @(*) begin 69 | ADDout = ADDreg_next; 70 | SUBout = MODout; 71 | 72 | NTToutEVEN = ADDreg_next; 73 | end 74 | 75 | // second level registers (output) 76 | always @(posedge clk) begin 77 | if(reset) begin 78 | NTToutODD <= 0; 79 | end 80 | else begin 81 | NTToutODD <= SUBout; 82 | end 83 | end 84 | 85 | endmodule 86 | -------------------------------------------------------------------------------- /ModRed.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module ModRed (input clk,reset, 20 | input [`DATA_SIZE_ARB-1:0] q, 21 | input [(2*`DATA_SIZE_ARB)-1:0] P, 22 | output reg [`DATA_SIZE_ARB-1:0] C); 23 | 24 | // connections 25 | wire [(2*`DATA_SIZE_ARB)-1:0] C_reg [`L_SIZE:0]; 26 | 27 | assign C_reg[0][(2*`DATA_SIZE_ARB)-1:0] = P[(2*`DATA_SIZE_ARB)-1:0]; 28 | 29 | // ------------------------------------------------------------- XY+Z+Cin operations (except for the last one) 30 | genvar i_gen_loop; 31 | generate 32 | for(i_gen_loop=0; i_gen_loop < (`L_SIZE-1); i_gen_loop=i_gen_loop+1) 33 | begin 34 | ModRed_sub #(.CURR_DATA((2*`DATA_SIZE_ARB)-(i_gen_loop )*(`W_SIZE-1)), 35 | .NEXT_DATA((2*`DATA_SIZE_ARB)-(i_gen_loop+1)*(`W_SIZE-1))) 36 | mrs (.clk(clk), 37 | .reset(reset), 38 | .qH(q[`DATA_SIZE_ARB-1:`W_SIZE]), 39 | .T1(C_reg[i_gen_loop] [((2*`DATA_SIZE_ARB)-(i_gen_loop) *(`W_SIZE-1))-1:0]), 40 | .C (C_reg[i_gen_loop+1][((2*`DATA_SIZE_ARB)-(i_gen_loop+1)*(`W_SIZE-1))-1:0])); 41 | 42 | end 43 | endgenerate 44 | 45 | // ------------------------------------------------------------- XY+Z+Cin operations (the last one) 46 | ModRed_sub #(.CURR_DATA((2*`DATA_SIZE_ARB)-(`L_SIZE-1)*(`W_SIZE-1)), 47 | .NEXT_DATA(`DATA_SIZE_ARB+2)) 48 | mrsl (.clk(clk), 49 | .reset(reset), 50 | .qH(q[`DATA_SIZE_ARB-1:`W_SIZE]), 51 | .T1(C_reg[`L_SIZE-1][((2*`DATA_SIZE_ARB)-(`L_SIZE-1)*(`W_SIZE-1))-1:0]), 52 | .C (C_reg[`L_SIZE ][(`DATA_SIZE_ARB+2)-1:0])); 53 | 54 | // ------------------------------------------------------------- final subtraction 55 | wire [`DATA_SIZE_ARB+1:0] C_ext; 56 | wire [`DATA_SIZE_ARB+1:0] C_temp; 57 | 58 | assign C_ext = C_reg[`L_SIZE][(`DATA_SIZE_ARB+2)-1:0]; 59 | assign C_temp = C_ext - q; 60 | 61 | // ------------------------------------------------------------- final comparison 62 | always @(posedge clk or posedge reset) 63 | begin 64 | if(reset) begin 65 | C <= 0; 66 | end 67 | else begin 68 | if (C_temp[`DATA_SIZE_ARB+1]) 69 | C <= C_ext; 70 | else 71 | C <= C_temp[`DATA_SIZE_ARB-1:0]; 72 | end 73 | end 74 | 75 | endmodule 76 | -------------------------------------------------------------------------------- /test_generator/generate_prime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Pedro Alves 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import random 16 | import math 17 | import sys 18 | 19 | def miller_rabin(p,s=11): 20 | #computes p-1 decomposition in 2**u*r 21 | r = p-1 22 | u = 0 23 | while r&1 == 0:#true while the last bit of r is zero 24 | u += 1 25 | r = int(r/2) 26 | 27 | # apply miller_rabin primality test 28 | for i in range(s): 29 | a = random.randrange(2,p-1) # choose random a in {2,3,...,p-2} 30 | z = pow(a,r,p) 31 | 32 | if z != 1 and z != p-1: 33 | for j in range(u-1): 34 | if z != p-1: 35 | z = pow(z,2,p) 36 | if z == 1: 37 | return False 38 | else: 39 | break 40 | if z != p-1: 41 | return False 42 | return True 43 | 44 | 45 | def is_prime(n,s=11): 46 | #lowPrimes is all primes (sans 2, which is covered by the bitwise and operator) 47 | #under 1000. taking n modulo each lowPrime allows us to remove a huge chunk 48 | #of composite numbers from our potential pool without resorting to Rabin-Miller 49 | lowPrimes = [3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97 50 | ,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179 51 | ,181,191,193,197,199,211,223,227,229,233,239,241,251,257,263,269 52 | ,271,277,281,283,293,307,311,313,317,331,337,347,349,353,359,367 53 | ,373,379,383,389,397,401,409,419,421,431,433,439,443,449,457,461 54 | ,463,467,479,487,491,499,503,509,521,523,541,547,557,563,569,571 55 | ,577,587,593,599,601,607,613,617,619,631,641,643,647,653,659,661 56 | ,673,677,683,691,701,709,719,727,733,739,743,751,757,761,769,773 57 | ,787,797,809,811,821,823,827,829,839,853,857,859,863,877,881,883 58 | ,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997] 59 | if (n >= 3): 60 | if (n&1 != 0): 61 | for p in lowPrimes: 62 | if (n == p): 63 | return True 64 | if (n % p == 0): 65 | return False 66 | return miller_rabin(n,s) 67 | return False 68 | 69 | def generate_large_prime(k,s=11): 70 | #print "Generating prime of %d bits" % k 71 | #k is the desired bit length 72 | 73 | # using security parameter s=11, we have a error probability of less than 74 | # 2**-80 75 | 76 | r=int(100*(math.log(k,2)+1)) #number of max attempts 77 | while r>0: 78 | #randrange is mersenne twister and is completely deterministic 79 | #unusable for serious crypto purposes 80 | n = random.randrange(2**(k-1),2**(k)) 81 | r-=1 82 | if is_prime(n,s) == True: 83 | return n 84 | raise Exception("Failure after %d tries." % r) 85 | -------------------------------------------------------------------------------- /intMult.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | // inputs are divided into 16-bit chunks 20 | // multiplications are performed using DSP on 1 cc 21 | // partial products are added using CSA in 1 cc 22 | // final output (c, s) will be added in 1 cc 23 | 24 | module intMult(input clk,reset, 25 | // input reset, 26 | input [`DATA_SIZE_ARB-1:0] A,B, 27 | output reg[(2*`DATA_SIZE_ARB)-1:0] C); 28 | 29 | // connections 30 | (* use_dsp = "yes" *) reg [(2*`DATA_SIZE)-1:0] output_dsp [`GENERIC*`GENERIC-1:0]; 31 | wire[(2*`DATA_SIZE)-1:0] op_reg [(`CSA_LEVEL*3+2)-1:0]; 32 | 33 | reg [15:0] first_index_dsp [`GENERIC-1:0]; 34 | reg [15:0] second_index_dsp [`GENERIC-1:0]; 35 | 36 | wire[(2*`DATA_SIZE)-1:0] csa_out_c; 37 | wire[(2*`DATA_SIZE)-1:0] csa_out_s; 38 | 39 | reg [(2*`DATA_SIZE)-1:0] C_out; 40 | reg [(2*`DATA_SIZE)-1:0] S_out; 41 | 42 | // --------------------------------------------------------------- divide inputs into 16-bit chunks 43 | genvar i_gen_loop,m_gen_loop; 44 | 45 | generate 46 | for(i_gen_loop=0; i_gen_loop < `GENERIC; i_gen_loop=i_gen_loop+1) 47 | begin 48 | always @(*) begin 49 | first_index_dsp [i_gen_loop] = (A >> (i_gen_loop*16)); 50 | second_index_dsp[i_gen_loop] = (B >> (i_gen_loop*16)); 51 | end 52 | end 53 | endgenerate 54 | 55 | // --------------------------------------------------------------- multiply 16-bit chunks 56 | integer i_loop=0; 57 | integer m_loop=0; 58 | 59 | always @(posedge clk or posedge reset) 60 | begin 61 | for(i_loop=0; i_loop < `GENERIC; i_loop=i_loop+1) 62 | begin 63 | for(m_loop=0; m_loop < `GENERIC; m_loop=m_loop+1) 64 | begin 65 | if(reset) 66 | output_dsp[(i_loop*`GENERIC)+m_loop][(2*`DATA_SIZE)-1:0] <= 0; 67 | else 68 | output_dsp[(i_loop*`GENERIC)+m_loop][(2*`DATA_SIZE)-1:0] <= (first_index_dsp[i_loop][15:0] * second_index_dsp[m_loop][15:0])<<((i_loop+m_loop)*16); 69 | end 70 | end 71 | end 72 | 73 | // --------------------------------------------------------------- Carry-Save Adder for adder tree 74 | // data initialization 75 | generate 76 | genvar m; 77 | 78 | for(m=0; m<(`GENERIC*`GENERIC); m=m+1) begin: DUMMY 79 | assign op_reg[m] = output_dsp[m]; 80 | end 81 | endgenerate 82 | 83 | // operation 84 | generate 85 | genvar k; 86 | 87 | for(k=0; k<(`CSA_LEVEL); k=k+1) begin: CSA_LOOP 88 | CSA csau(op_reg[3*k+0],op_reg[3*k+1],op_reg[3*k+2],op_reg[(`GENERIC*`GENERIC)+2*k+0],op_reg[(`GENERIC*`GENERIC)+2*k+1]); 89 | end 90 | endgenerate 91 | 92 | // DFF value 93 | always @(posedge clk or posedge reset) begin 94 | if(reset) begin 95 | C_out <= 0; 96 | S_out <= 0; 97 | end 98 | else begin 99 | C_out <= (`DATA_SIZE > 16) ? op_reg[(`CSA_LEVEL*3+2)-1] : op_reg[0]; 100 | S_out <= (`DATA_SIZE > 16) ? op_reg[(`CSA_LEVEL*3+2)-2] : op_reg[0]; 101 | end 102 | end 103 | 104 | // --------------------------------------------------------------- c + s operation 105 | always @(posedge clk or posedge reset) begin 106 | if(reset) 107 | C <= 0; 108 | else 109 | C <= (`DATA_SIZE > 16) ? (C_out + S_out) : S_out; 110 | end 111 | 112 | endmodule 113 | -------------------------------------------------------------------------------- /NTTN_test.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module NTTN_test(); 20 | 21 | parameter HP = 5; 22 | parameter FP = (2*HP); 23 | 24 | reg clk,reset; 25 | reg load_w; 26 | reg load_data; 27 | reg start; 28 | reg start_intt; 29 | reg [`DATA_SIZE_ARB-1:0] din; 30 | wire done; 31 | wire [`DATA_SIZE_ARB-1:0] dout; 32 | 33 | // ---------------------------------------------------------------- CLK 34 | 35 | always #HP clk = ~clk; 36 | 37 | // ---------------------------------------------------------------- TXT data 38 | 39 | reg [`DATA_SIZE_ARB-1:0] params [0:7]; 40 | reg [`DATA_SIZE_ARB-1:0] w [0:((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)-1]; 41 | reg [`DATA_SIZE_ARB-1:0] winv [0:((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)-1]; 42 | reg [`DATA_SIZE_ARB-1:0] ntt_pin [0:`RING_SIZE-1]; 43 | reg [`DATA_SIZE_ARB-1:0] ntt_pout [0:`RING_SIZE-1]; 44 | reg [`DATA_SIZE_ARB-1:0] intt_pin [0:`RING_SIZE-1]; 45 | reg [`DATA_SIZE_ARB-1:0] intt_pout [0:`RING_SIZE-1]; 46 | 47 | initial begin 48 | // ntt 49 | $readmemh("test/PARAM.txt" , params); 50 | $readmemh("test/W.txt" , w); 51 | $readmemh("test/WINV.txt" , winv); 52 | $readmemh("test/NTT_DIN.txt" , ntt_pin); 53 | $readmemh("test/NTT_DOUT.txt" , ntt_pout); 54 | $readmemh("test/INTT_DIN.txt" , intt_pin); 55 | $readmemh("test/INTT_DOUT.txt", intt_pout); 56 | end 57 | 58 | // ---------------------------------------------------------------- TEST case 59 | 60 | integer k; 61 | 62 | initial begin: CLK_RESET_INIT 63 | // clk & reset (150 cc) 64 | clk = 0; 65 | reset = 0; 66 | 67 | #200; 68 | reset = 1; 69 | #200; 70 | reset = 0; 71 | #100; 72 | 73 | #1000; 74 | end 75 | 76 | initial begin: LOAD_DATA 77 | load_w = 0; 78 | load_data = 0; 79 | start = 0; 80 | start_intt= 0; 81 | din = 0; 82 | 83 | #1500; 84 | 85 | // load w 86 | load_w = 1; 87 | #FP; 88 | load_w = 0; 89 | 90 | for(k=0; k<((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH); k=k+1) begin 91 | din = w[k]; 92 | #FP; 93 | end 94 | for(k=0; k<((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH); k=k+1) begin 95 | din = winv[k]; 96 | #FP; 97 | end 98 | din = params[1]; 99 | #FP; 100 | din = params[6]; 101 | #FP; 102 | 103 | #(5*FP); 104 | 105 | // ---------- load data (ntt) 106 | load_data = 1; 107 | #FP; 108 | load_data = 0; 109 | 110 | for(k=0; k<(`RING_SIZE); k=k+1) begin 111 | din = ntt_pin[k]; 112 | #FP; 113 | end 114 | 115 | #(5*FP); 116 | 117 | // start (ntt) 118 | start = 1; 119 | #FP; 120 | start = 0; 121 | #FP; 122 | 123 | while(done == 0) 124 | #FP; 125 | #FP; 126 | 127 | #(FP*(`RING_SIZE+10)) 128 | 129 | // ---------- load data (intt) 130 | load_data = 1; 131 | #FP; 132 | load_data = 0; 133 | 134 | for(k=0; k<(`RING_SIZE); k=k+1) begin 135 | din = intt_pin[k]; 136 | #FP; 137 | end 138 | 139 | #(5*FP); 140 | 141 | // start (ntt) 142 | start_intt = 1; 143 | #FP; 144 | start_intt = 0; 145 | #FP; 146 | 147 | while(done == 0) 148 | #FP; 149 | #FP; 150 | 151 | #(FP*(`RING_SIZE+10)); 152 | 153 | end 154 | 155 | // ---------------------------------------------------------------- TEST control 156 | 157 | reg [`DATA_SIZE_ARB-1:0] ntt_nout [0:`RING_SIZE-1]; 158 | reg [`DATA_SIZE_ARB-1:0] intt_nout [0:`RING_SIZE-1]; 159 | 160 | integer m; 161 | integer en,ei; 162 | 163 | initial begin: CHECK_RESULT 164 | en = 0; 165 | ei = 0; 166 | #1500; 167 | 168 | // wait result (ntt) 169 | while(done == 0) 170 | #FP; 171 | #FP; 172 | 173 | // Store output (ntt) 174 | for(m=0; m<(`RING_SIZE); m=m+1) begin 175 | ntt_nout[m] = dout; 176 | #FP; 177 | end 178 | 179 | #FP; 180 | 181 | // wait result (intt) 182 | while(done == 0) 183 | #FP; 184 | #FP; 185 | 186 | // Store output (intt) 187 | for(m=0; m<(`RING_SIZE); m=m+1) begin 188 | intt_nout[m] = dout; 189 | #FP; 190 | end 191 | 192 | // Compare output with expected result (ntt) 193 | for(m=0; m<(`RING_SIZE); m=m+1) begin 194 | if(ntt_nout[m] == ntt_pout[m]) begin 195 | en = en+1; 196 | end 197 | else begin 198 | $display("NTT: Index-%d -- Calculated:%d, Expected:%d",m,ntt_nout[m],ntt_pout[m]); 199 | end 200 | end 201 | 202 | // Compare output with expected result (intt) 203 | for(m=0; m<(`RING_SIZE); m=m+1) begin 204 | if(intt_nout[m] == intt_pout[m]) begin 205 | ei = ei+1; 206 | end 207 | else begin 208 | $display("INTT: Index-%d -- Calculated:%d, Expected:%d",m,intt_nout[m],intt_pout[m]); 209 | end 210 | end 211 | 212 | #FP; 213 | 214 | if(en == (`RING_SIZE)) 215 | $display("NTT: Correct"); 216 | else 217 | $display("NTT: Incorrect"); 218 | 219 | if(ei == (`RING_SIZE)) 220 | $display("INTT: Correct"); 221 | else 222 | $display("INTT: Incorrect"); 223 | 224 | $stop(); 225 | 226 | end 227 | 228 | // ---------------------------------------------------------------- UUT 229 | 230 | NTTN uut (clk,reset, 231 | load_w, 232 | load_data, 233 | start, 234 | start_intt, 235 | din, 236 | done, 237 | dout); 238 | 239 | endmodule 240 | -------------------------------------------------------------------------------- /test_generator/test_generator.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright 2020 3 | # Ahmet Can Mert 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from math import log,ceil 18 | from random import randint 19 | 20 | from generate_prime import * 21 | from helper import * 22 | 23 | # Test Generator for N-pt NTT/INTT with P Processing Element 24 | 25 | # -------------------------------------------------------------------------- TXT 26 | PRM_TXT = open("PARAM.txt","w") 27 | NTT_DIN_TXT = open("NTT_DIN.txt","w") 28 | NTT_DOUT_TXT = open("NTT_DOUT.txt","w") 29 | INTT_DIN_TXT = open("INTT_DIN.txt","w") 30 | INTT_DOUT_TXT = open("INTT_DOUT.txt","w") 31 | W_TXT = open("W.txt","w") 32 | WINV_TXT = open("WINV.txt","w") 33 | # -------------------------------------------------------------------------- TXT 34 | 35 | # Pre-defined parameter set 36 | PC = 0 # 0: generate parameters / 1: use pre-defined parameter set 37 | 38 | # Number of Processing Elements 39 | P = 8 40 | 41 | # Generate parameters 42 | q = 0 43 | psi = 0 44 | psi_inv = 0 45 | w = 0 46 | w_inv = 0 47 | n_inv = 0 48 | 49 | if PC: 50 | N, K, q, psi = 1024, 19, 520193, 98 51 | #N, K, q, psi = 1024, 27, 132120577, 73993 52 | #N, K, q, psi = 1024, 29, 463128577, 61961 53 | #N, K, q, psi = 2048, 30, 618835969, 327404 54 | #N, K, q, psi = 2048, 37, 137438691329, 22157790 55 | #N, K, q, psi = 4096, 25, 33349633, 8131 56 | #N, K, q, psi = 4096, 36, 68719230977, 29008497 57 | #N, K, q, psi = 4096, 55, 36028797009985537, 5947090524825 58 | #N, K, q, psi = 8192, 43, 8796092858369, 1734247217 59 | #N, K, q, psi = 16384, 49, 562949951881217, 45092463253 60 | #N, K, q, psi = 16384, 50, 1125899903500289, 68423600398 61 | #N, K, q, psi = 32768, 55, 36028797009985537, 5947090524825 62 | 63 | psi_inv = modinv(psi,q) 64 | w = pow(psi,2,q) 65 | w_inv = modinv(w,q) 66 | 67 | R = 2**((int(log(N,2))+1) * int(ceil((1.0*K)/(1.0*((int(log(N,2))+1)))))) 68 | n_inv = modinv(N,q) 69 | PE = P*2 70 | else: 71 | # Input parameters 72 | N, K = 256, 13 73 | #N, K = 256, 23 74 | #N, K = 512, 14 75 | #N, K = 1024, 14 76 | #N, K = 1024, 29 77 | #N, K = 2048, 30 78 | #N, K = 4096, 60 79 | 80 | while(1): 81 | q = generate_large_prime(K) 82 | # check q = 1 (mod 2n or n) 83 | while (not ((q % (2*N)) == 1)): 84 | q = generate_large_prime(K) 85 | 86 | # generate NTT parameters 87 | for i in range(2,q-1): 88 | if pow(i,2*N,q) == 1: 89 | if pow(i,N,q) == (q-1): 90 | pru = [i**x % q for x in range(1,2*N)] 91 | if not(1 in pru): 92 | psi = i 93 | psi_inv = modinv(i,q) 94 | w = pow(psi,2,q) 95 | w_inv = modinv(w,q) 96 | break 97 | else: 98 | continue 99 | break 100 | else: 101 | continue 102 | break 103 | else: 104 | continue 105 | break 106 | 107 | R = 2**((int(log(N,2))+1) * int(ceil((1.0*K)/(1.0*((int(log(N,2))+1)))))) 108 | n_inv = modinv(N,q) 109 | PE = P*2 110 | 111 | # Print parameters 112 | print("-----------------------") 113 | print("N : {}".format(N)) 114 | print("K : {}".format(K)) 115 | print("PE : {}".format(P)) 116 | print("q : {}".format(q)) 117 | print("psi : {}".format(psi)) 118 | print("psi_inv: {}".format(psi_inv)) 119 | print("w : {}".format(w)) 120 | print("w_inv : {}".format(w_inv)) 121 | print("n_inv : {}".format(n_inv)) 122 | print("log(R) : {}".format(int(log(R,2)))) 123 | print("-----------------------") 124 | 125 | # -------------------------------------------------------------------------- 126 | 127 | PRM_TXT.write(hex(N ).replace("L","")[2:].ljust(20)+"\n") 128 | PRM_TXT.write(hex(q ).replace("L","")[2:].ljust(20)+"\n") 129 | PRM_TXT.write(hex(w ).replace("L","")[2:].ljust(20)+"\n") 130 | PRM_TXT.write(hex(w_inv ).replace("L","")[2:].ljust(20)+"\n") 131 | PRM_TXT.write(hex(psi ).replace("L","")[2:].ljust(20)+"\n") 132 | PRM_TXT.write(hex(psi_inv ).replace("L","")[2:].ljust(20)+"\n") 133 | PRM_TXT.write(hex((n_inv*R)%q).replace("L","")[2:].ljust(20)+"\n") 134 | PRM_TXT.write(hex(R ).replace("L","")[2:].ljust(20)+"\n") 135 | 136 | PRM_TXT.write("// Input order:\n") 137 | 138 | PRM_TXT.write("// N\n") 139 | PRM_TXT.write("// q\n") 140 | PRM_TXT.write("// w\n") 141 | PRM_TXT.write("// w_inv\n") 142 | PRM_TXT.write("// psi\n") 143 | PRM_TXT.write("// psi_inv\n") 144 | PRM_TXT.write("// n_inv\n") 145 | PRM_TXT.write("// R\n") 146 | PRM_TXT.write("// \n") 147 | PRM_TXT.write("// K :"+str(K)+"\n") 148 | PRM_TXT.write("// PE:"+str(P)+"\n") 149 | 150 | # -------------------------------------------------------------------------- 151 | 152 | # NTT/INTT operation 153 | A = [randint(0,q-1) for _ in range(N)] 154 | 155 | A_ntt = IterativeForwardNTT(A,q,w,R) 156 | A_rev = indexReverse(A_ntt,int(log(N,2))) 157 | A_rec = IterativeInverseNTT(A_rev,q,w_inv,R) 158 | A_res = indexReverse(A_rec,int(log(N,2))) 159 | 160 | # Sanity Check 161 | if sum([abs(x-y) for x,y in zip(A,A_res)]) == 0: 162 | print("Sanity Check: NTT operation is correct.") 163 | else: 164 | print("Sanity Check: Check your math with NTT/INTT operation.") 165 | 166 | # Print input/output to txt (normal input - bit-reversed output) 167 | for i in range(N): 168 | NTT_DIN_TXT.write(hex(A[i]).replace("L","")[2:]+"\n") 169 | NTT_DOUT_TXT.write(hex(A_ntt[i]).replace("L","")[2:]+"\n") 170 | 171 | for i in range(N): 172 | INTT_DIN_TXT.write(hex(A_rev[i]).replace("L","")[2:]+"\n") 173 | INTT_DOUT_TXT.write(hex(A_rec[i]).replace("L","")[2:]+"\n") 174 | 175 | # Print TWs to txt 176 | for j in range(int(log(N, 2))): 177 | for k in range(1 if (((N//PE)>>j) < 1) else ((N//PE)>>j)): 178 | for i in range(P): 179 | w_pow = (((P< 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import math 18 | 19 | DEBUG_MODE_NTT = 0 20 | DEBUG_MODE_INTT = 0 21 | 22 | # Modular inverse (https://stackoverflow.com/questions/4798654/modular-multiplicative-inverse-function-in-python) 23 | def egcd(a, b): 24 | if a == 0: 25 | return (b, 0, 1) 26 | else: 27 | g, y, x = egcd(b % a, a) 28 | return (g, x - (b // a) * y, y) 29 | 30 | def modinv(a, m): 31 | g, x, y = egcd(a, m) 32 | if g != 1: 33 | raise Exception('Modular inverse does not exist') 34 | else: 35 | return x % m 36 | 37 | # Bit-Reverse integer 38 | def intReverse(a,n): 39 | b = ('{:0'+str(n)+'b}').format(a) 40 | return int(b[::-1],2) 41 | 42 | # Bit-Reversed index 43 | def indexReverse(a,r): 44 | n = len(a) 45 | b = [0]*n 46 | for i in range(n): 47 | rev_idx = intReverse(i,r) 48 | b[rev_idx] = a[i] 49 | return b 50 | 51 | # forward ntt (takes input in normal order, produces output in bit-reversed order) 52 | def IterativeForwardNTT(arrayIn, P, W, R): 53 | ######################################################### 54 | if DEBUG_MODE_NTT: 55 | A_ntt_interm_1 = open("NTT_DIN_DEBUG_1.txt","w") # Just result 56 | A_ntt_interm_2 = open("NTT_DIN_DEBUG_2.txt","w") # BTF inputs 57 | ######################################################### 58 | 59 | arrayOut = [0] * len(arrayIn) 60 | N = len(arrayIn) 61 | 62 | for idx in range(N): 63 | arrayOut[idx] = arrayIn[idx] 64 | 65 | ######################################################### 66 | if DEBUG_MODE_NTT: 67 | A_ntt_interm_1.write("------------------------------ input: \n") 68 | A_ntt_interm_2.write("------------------------------ input: \n") 69 | for idx in range(N): 70 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 71 | A_ntt_interm_2.write(str(arrayOut[idx])+"\n") 72 | ######################################################### 73 | 74 | v = int(math.log(N, 2)) 75 | 76 | for i in range(0, v): 77 | ######################################################### 78 | if DEBUG_MODE_NTT: 79 | A_ntt_interm_1.write("------------------------------ stage: "+str(i)+"\n") 80 | A_ntt_interm_2.write("------------------------------ stage: "+str(i)+"\n") 81 | ######################################################### 82 | for j in range(0, (2 ** i)): 83 | for k in range(0, (2 ** (v - i - 1))): 84 | s = j * (2 ** (v - i)) + k 85 | t = s + (2 ** (v - i - 1)) 86 | 87 | w = (W ** ((2 ** i) * k)) % P 88 | 89 | as_temp = arrayOut[s] 90 | at_temp = arrayOut[t] 91 | 92 | arrayOut[s] = (as_temp + at_temp) % P 93 | arrayOut[t] = ((as_temp - at_temp) * w) % P 94 | 95 | ######################################################### 96 | if DEBUG_MODE_NTT: 97 | A_ntt_interm_2.write((str(s)+" "+str(t)+" "+str(((2 ** i) * k))).ljust(16)+"("+str(as_temp).ljust(12)+" "+str(at_temp).ljust(12)+" "+str((w*R) % P).ljust(12)+") -> ("+str(arrayOut[s]).ljust(12)+" "+str(arrayOut[t]).ljust(12)+")"+"\n") 98 | ######################################################### 99 | 100 | ######################################################### 101 | if DEBUG_MODE_NTT: 102 | for idx in range(N): 103 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 104 | ######################################################### 105 | 106 | ######################################################### 107 | if DEBUG_MODE_NTT: 108 | A_ntt_interm_1.write("------------------------------ result: \n") 109 | A_ntt_interm_2.write("------------------------------ result: \n") 110 | for idx in range(N): 111 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 112 | A_ntt_interm_2.write(str(arrayOut[idx])+"\n") 113 | ######################################################### 114 | 115 | ######################################################### 116 | if DEBUG_MODE_NTT: 117 | A_ntt_interm_1.close() 118 | A_ntt_interm_2.close() 119 | ######################################################### 120 | 121 | return arrayOut 122 | 123 | # inverse ntt (takes input in normal order, produces output in bit-reversed order) 124 | def IterativeInverseNTT(arrayIn, P, W, R): 125 | ######################################################### 126 | if DEBUG_MODE_INTT: 127 | A_ntt_interm_1 = open("test/INTT_DIN_DEBUG_1.txt","w") # Just result 128 | A_ntt_interm_2 = open("test/INTT_DIN_DEBUG_2.txt","w") # BTF inputs 129 | ######################################################### 130 | 131 | arrayOut = [0] * len(arrayIn) 132 | N = len(arrayIn) 133 | 134 | for idx in range(N): 135 | arrayOut[idx] = arrayIn[idx] 136 | 137 | ######################################################### 138 | if DEBUG_MODE_INTT: 139 | A_ntt_interm_1.write("------------------------------ input: \n") 140 | A_ntt_interm_2.write("------------------------------ input: \n") 141 | for idx in range(N): 142 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 143 | A_ntt_interm_2.write(str(arrayOut[idx])+"\n") 144 | ######################################################### 145 | 146 | v = int(math.log(N, 2)) 147 | 148 | for i in range(0, v): 149 | ######################################################### 150 | if DEBUG_MODE_INTT: 151 | A_ntt_interm_1.write("------------------------------ stage: "+str(i)+"\n") 152 | A_ntt_interm_2.write("------------------------------ stage: "+str(i)+"\n") 153 | ######################################################### 154 | for j in range(0, (2 ** i)): 155 | for k in range(0, (2 ** (v - i - 1))): 156 | s = j * (2 ** (v - i)) + k 157 | t = s + (2 ** (v - i - 1)) 158 | 159 | w = (W ** ((2 ** i) * k)) % P 160 | 161 | as_temp = arrayOut[s] 162 | at_temp = arrayOut[t] 163 | 164 | arrayOut[s] = (as_temp + at_temp) % P 165 | arrayOut[t] = ((as_temp - at_temp) * w) % P 166 | 167 | ######################################################### 168 | if DEBUG_MODE_INTT: 169 | A_ntt_interm_2.write((str(s)+" "+str(t)+" "+str(((2 ** i) * k))).ljust(16)+"("+str(as_temp).ljust(12)+" "+str(at_temp).ljust(12)+" "+str((w*R) % P).ljust(12)+") -> ("+str(arrayOut[s]).ljust(12)+" "+str(arrayOut[t]).ljust(12)+")"+"\n") 170 | ######################################################### 171 | 172 | ######################################################### 173 | if DEBUG_MODE_INTT: 174 | for idx in range(N): 175 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 176 | ######################################################### 177 | 178 | ######################################################### 179 | if DEBUG_MODE_INTT: 180 | A_ntt_interm_1.write("------------------------------ result: \n") 181 | A_ntt_interm_2.write("------------------------------ result: \n") 182 | for idx in range(N): 183 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 184 | A_ntt_interm_2.write(str(arrayOut[idx])+"\n") 185 | ######################################################### 186 | 187 | N_inv = modinv(N, P) 188 | for i in range(N): 189 | arrayOut[i] = (arrayOut[i] * N_inv) % P 190 | 191 | ######################################################### 192 | if DEBUG_MODE_INTT: 193 | A_ntt_interm_1.write("------------------------------ result (with N_inv): \n") 194 | A_ntt_interm_2.write("------------------------------ result (with N_inv): \n") 195 | for idx in range(N): 196 | A_ntt_interm_1.write(str(arrayOut[idx])+"\n") 197 | A_ntt_interm_2.write(str(arrayOut[idx])+"\n") 198 | ######################################################### 199 | 200 | ######################################################### 201 | if DEBUG_MODE_INTT: 202 | A_ntt_interm_1.close() 203 | A_ntt_interm_2.close() 204 | ######################################################### 205 | 206 | return arrayOut 207 | -------------------------------------------------------------------------------- /AddressGenerator.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | module AddressGenerator (input clk,reset, 20 | input start, 21 | output reg [`RING_DEPTH-`PE_DEPTH+1:0] raddr0, 22 | output reg [`RING_DEPTH-`PE_DEPTH+1:0] waddr0,waddr1, 23 | output reg wen0 ,wen1 , 24 | output reg brsel0,brsel1, 25 | output reg brselen0,brselen1, 26 | output reg [2*`PE_NUMBER*(`PE_DEPTH+1)-1:0] brscramble0, 27 | output reg [`RING_DEPTH-`PE_DEPTH+2:0] raddr_tw, 28 | output reg [4:0] stage_count, 29 | output reg ntt_finished); 30 | // --------------------------------------------------------------------------- 31 | // Control signals 32 | reg [4:0] c_stage_limit; 33 | reg [`RING_DEPTH-`PE_DEPTH:0] c_loop_limit; 34 | reg [`RING_DEPTH-`PE_DEPTH+2:0] c_tw_limit; 35 | 36 | reg [4:0] c_stage; 37 | reg [`RING_DEPTH-`PE_DEPTH:0] c_loop; 38 | reg [`RING_DEPTH-`PE_DEPTH+2:0] c_tw; 39 | 40 | reg [8:0] c_wait_limit; 41 | reg [8:0] c_wait; 42 | 43 | reg [`RING_DEPTH-`PE_DEPTH-1:0] raddr; 44 | reg [1:0] raddr_m; 45 | 46 | reg [`RING_DEPTH-`PE_DEPTH-1:0] waddre,waddro; 47 | reg [1:0] waddr_m; 48 | 49 | reg wen; 50 | reg brsel; 51 | reg brselen; 52 | reg finished; 53 | reg [2*`PE_NUMBER*(`PE_DEPTH+1)-1:0] brscramble; 54 | 55 | // --------------------------------------------------------------------------- 56 | // FSM 57 | reg [1:0] state; 58 | // 0 --> IDLE 59 | // 1 --> NTT 60 | // 2 --> NTT (WAIT between stages) 61 | 62 | always @(posedge clk or posedge reset) begin 63 | if(reset) 64 | state <= 0; 65 | else begin 66 | case(state) 67 | 2'd0: begin 68 | state <= (start) ? 1 : 0; 69 | end 70 | 2'd1: begin 71 | state <= (c_loop == c_loop_limit) ? 2 : 1; 72 | end 73 | 2'd2: begin 74 | if((c_stage == c_stage_limit) && (c_wait == c_wait_limit)) // operation is finished 75 | state <= 0; 76 | else if(c_wait == c_wait_limit) // to next NTT stage 77 | state <= 1; 78 | else // wait 79 | state <= 2; 80 | end 81 | default: state <= 0; 82 | endcase 83 | end 84 | end 85 | 86 | // --------------------------------------------------------------------------- WAIT OPERATION 87 | 88 | always @(posedge clk or posedge reset) begin 89 | if(reset) begin 90 | c_wait_limit <= 0; 91 | c_wait <= 0; 92 | end 93 | else begin 94 | c_wait_limit <= (start) ? 8'd15 : c_wait_limit; 95 | 96 | if(state == 2'd2) 97 | c_wait <= (c_wait < c_wait_limit) ? (c_wait + 1) : 0; 98 | else 99 | c_wait <= 0; 100 | end 101 | end 102 | 103 | // --------------------------------------------------------------------------- c_stage & c_loop 104 | 105 | always @(posedge clk or posedge reset) begin 106 | if(reset) begin 107 | c_stage_limit <= 0; 108 | c_loop_limit <= 0; 109 | end 110 | else begin 111 | if(start) begin 112 | c_stage_limit <= (`RING_DEPTH-1); 113 | c_loop_limit <= ((`RING_SIZE >> (`PE_DEPTH+1))-1); 114 | end 115 | else begin 116 | c_stage_limit <= c_stage_limit; 117 | c_loop_limit <= c_loop_limit; 118 | end 119 | end 120 | end 121 | 122 | always @(posedge clk or posedge reset) begin 123 | if(reset) begin 124 | c_stage <= 0; 125 | c_loop <= 0; 126 | end 127 | else begin 128 | if(start) begin 129 | c_stage <= 0; 130 | c_loop <= 0; 131 | end 132 | else begin 133 | // ---------------------------- c_stage 134 | if((state == 2'd2) && (c_wait == c_wait_limit) && (c_stage == c_stage_limit)) 135 | c_stage <= 0; 136 | else if((state == 2'd2) && (c_wait == c_wait_limit)) 137 | c_stage <= c_stage + 1; 138 | else 139 | c_stage <= c_stage; 140 | 141 | // ---------------------------- c_loop 142 | if((state == 2'd2) && (c_wait == c_wait_limit)) 143 | c_loop <= 0; 144 | else if((state == 2'd1) && (c_loop < c_loop_limit)) 145 | c_loop <= c_loop + 1; 146 | else 147 | c_loop <= c_loop; 148 | end 149 | end 150 | end 151 | 152 | // --------------------------------------------------------------------------- twiddle factors 153 | wire [`RING_DEPTH-`PE_DEPTH+2:0] c_tw_temp; 154 | assign c_tw_temp = (c_loop_limit>>c_stage); 155 | 156 | always @(posedge clk or posedge reset) begin 157 | if(reset) begin 158 | c_tw <= 0; 159 | end 160 | else begin 161 | if(start) begin 162 | c_tw <= 0; 163 | end 164 | else begin 165 | if((state == 2'd1) && (c_loop != c_loop_limit)) begin 166 | if(c_stage == 0) begin 167 | if(c_loop[0] == 0) 168 | c_tw <= (((c_tw + ((1 << (`RING_DEPTH-`PE_DEPTH-2))>>c_stage))) & c_loop_limit); 169 | else 170 | c_tw <= (((c_tw + 1 - ((1 << (`RING_DEPTH-`PE_DEPTH-2))>>c_stage))) & c_loop_limit); 171 | end 172 | else if(c_stage >= (`RING_DEPTH-`PE_DEPTH-1)) begin 173 | c_tw <= c_tw; 174 | end 175 | else begin 176 | if(c_loop[0] == 0) begin 177 | c_tw <= c_tw + ((1 << (`RING_DEPTH-`PE_DEPTH-2))>>c_stage) 178 | - (((c_loop & c_tw_temp) == c_tw_temp) ? (((c_loop & c_tw_temp)>>1)+1) : 0); 179 | end 180 | else begin 181 | c_tw <= (c_tw + 1) - ((1 << (`RING_DEPTH-`PE_DEPTH-2))>>c_stage) 182 | - (((c_loop & c_tw_temp) == c_tw_temp) ? (((c_loop & c_tw_temp)>>1)+1) : 0); 183 | end 184 | end 185 | end 186 | else if((state == 2'd2) && (c_wait == c_wait_limit) && (c_stage == c_stage_limit)) 187 | c_tw <= 0; 188 | else if((state == 2'd2) && (c_wait == c_wait_limit)) begin 189 | c_tw <= c_tw+1; 190 | end 191 | else begin 192 | c_tw <= c_tw; 193 | end 194 | end 195 | end 196 | end 197 | 198 | // --------------------------------------------------------------------------- raddr (1 cc delayed) 199 | 200 | wire [`RING_DEPTH-`PE_DEPTH-1:0] raddr_temp; 201 | assign raddr_temp = ((`RING_DEPTH-`PE_DEPTH-1) - (c_stage+1)); 202 | 203 | always @ (posedge clk or posedge reset) begin 204 | if(reset) begin 205 | raddr <= 0; 206 | raddr_m <= 0; 207 | end 208 | else begin 209 | if(start) begin 210 | raddr <= 0; 211 | raddr_m <= 0; 212 | end 213 | else begin 214 | // ---------------------------- raddr 215 | if((state == 2'd2) && (c_wait == c_wait_limit)) 216 | raddr <= 0; 217 | else if((state == 2'd1) && (c_loop <= c_loop_limit)) begin 218 | if(c_stage < (`RING_DEPTH-`PE_DEPTH-1)) begin 219 | if(~c_loop[0]) 220 | raddr <= (c_loop >> 1) + ((c_loop >> (raddr_temp+1)) << raddr_temp); 221 | else 222 | raddr <= (1 << raddr_temp) + (c_loop >> 1) + ((c_loop >> (raddr_temp+1)) << raddr_temp); 223 | end 224 | else 225 | raddr <= c_loop; 226 | end 227 | else 228 | raddr <= raddr; 229 | 230 | // ---------------------------- raddr_m 231 | if((state == 2'd2) && (c_wait == c_wait_limit)) 232 | raddr_m <= {raddr_m[1],~raddr_m[0]}; 233 | else 234 | raddr_m <= raddr_m; 235 | end 236 | end 237 | end 238 | 239 | // --------------------------------------------------------------------------- waddr (1 cc delayed) 240 | 241 | wire [`RING_DEPTH-`PE_DEPTH-1:0] waddr_temp; 242 | assign waddr_temp = ((`RING_DEPTH-`PE_DEPTH-1) - (c_stage+1)); 243 | 244 | always @ (posedge clk or posedge reset) begin 245 | if(reset) begin 246 | waddre <= 0; 247 | waddro <= 0; 248 | waddr_m <= 0; 249 | end 250 | else begin 251 | if(start) begin 252 | waddre <= 0; 253 | waddro <= (1 << (`RING_DEPTH-`PE_DEPTH-1)); 254 | waddr_m <= 1; 255 | end 256 | else begin 257 | // ---------------------------- raddr 258 | if((state == 2'd2) && (c_wait == c_wait_limit)) begin 259 | waddre <= 0; 260 | waddro <= 0; 261 | end 262 | else if((state == 2'd1) && (c_loop <= c_loop_limit)) begin 263 | if(c_stage < (`RING_DEPTH-`PE_DEPTH-1)) begin 264 | waddre <= (c_loop >> 1) + ((c_loop >> (waddr_temp+1)) << waddr_temp); 265 | waddro <= (c_loop >> 1) + ((c_loop >> (waddr_temp+1)) << waddr_temp) + (1 << waddr_temp); 266 | end 267 | else begin 268 | waddre <= c_loop; 269 | waddro <= c_loop; 270 | end 271 | end 272 | else begin 273 | waddre <= waddre; 274 | waddro <= waddro; 275 | end 276 | 277 | // ---------------------------- raddr_m 278 | if((state == 2'd2) && (c_wait == c_wait_limit) && (c_stage == (c_stage_limit-1))) 279 | waddr_m <= 2'b10; 280 | else if((state == 2'd2) && (c_wait == c_wait_limit)) 281 | waddr_m <= {waddr_m[1],~waddr_m[0]}; 282 | else 283 | waddr_m <= waddr_m; 284 | end 285 | end 286 | end 287 | 288 | // --------------------------------------------------------------------------- wen,brsel,brselen (1 cc delayed) 289 | 290 | always @(posedge clk or posedge reset) begin 291 | if(reset) begin 292 | wen <= 0; 293 | brsel <= 0; 294 | brselen <= 0; 295 | end 296 | else begin 297 | if(state == 2'd1) begin 298 | wen <= 1; 299 | brsel <= c_loop[0]; 300 | brselen <= 1; 301 | end 302 | else begin 303 | wen <= 0; 304 | brsel <= 0; 305 | brselen <= 0; 306 | end 307 | end 308 | end 309 | 310 | // --------------------------------------------------------------------------- brscrambled 311 | 312 | wire [`PE_DEPTH:0] brscrambled_temp; 313 | wire [`PE_DEPTH:0] brscrambled_temp2; 314 | wire [`PE_DEPTH:0] brscrambled_temp3; 315 | assign brscrambled_temp = (`PE_NUMBER >> (c_stage-(`RING_DEPTH-`PE_DEPTH-1))); 316 | assign brscrambled_temp2 = (`PE_DEPTH - (c_stage-(`RING_DEPTH-`PE_DEPTH-1))); 317 | assign brscrambled_temp3 = ((`PE_DEPTH+1) - (c_stage-(`RING_DEPTH-`PE_DEPTH-1))); 318 | 319 | always @(posedge clk or posedge reset) begin: B_BLOCK 320 | integer n; 321 | for(n=0; n < (2*`PE_NUMBER); n=n+1) begin: LOOP_1 322 | if(reset) begin 323 | brscramble[(`PE_DEPTH+1)*n+:(`PE_DEPTH+1)] <= 0; 324 | end 325 | else begin 326 | if(c_stage >= (`RING_DEPTH-`PE_DEPTH-1)) begin 327 | brscramble[(`PE_DEPTH+1)*n+:(`PE_DEPTH+1)] <= (brscrambled_temp*n[0]) + 328 | (((n>>1)<<1) & (brscrambled_temp-1)) + 329 | ((n>>(brscrambled_temp2+1))<<(brscrambled_temp3)) + 330 | ((n>>brscrambled_temp2) & 1); 331 | end 332 | else begin 333 | brscramble[(`PE_DEPTH+1)*n+:(`PE_DEPTH+1)] <= 0; 334 | end 335 | end 336 | end 337 | end 338 | 339 | // --------------------------------------------------------------------------- ntt_finished 340 | 341 | always @(posedge clk or posedge reset) begin 342 | if(reset) begin 343 | finished <= 0; 344 | end 345 | else begin 346 | if((state == 2'd2) && (c_wait == c_wait_limit) && (c_stage == c_stage_limit)) 347 | finished <= 1; 348 | else 349 | finished <= 0; 350 | end 351 | end 352 | 353 | // --------------------------------------------------------------------------- delays 354 | 355 | // -------------------- read signals 356 | wire [`RING_DEPTH-`PE_DEPTH+2:0] c_tw_w; 357 | 358 | ShiftReg #(.SHIFT(1),.DATA(`RING_DEPTH-`PE_DEPTH+3)) sr00(clk,reset,c_tw,c_tw_w); 359 | 360 | always @(posedge clk or posedge reset) begin 361 | if(reset) begin 362 | raddr0 <= 0; 363 | raddr_tw <= 0; 364 | end 365 | else begin 366 | raddr0 <= {raddr_m,raddr}; 367 | raddr_tw <= c_tw_w; 368 | end 369 | end 370 | 371 | // -------------------- write signals (waddr0/1, wen0/1, brsel0/1, brselen0/1) 372 | // waddr0/1 373 | wire [`RING_DEPTH-`PE_DEPTH+1:0] waddre_w,waddro_w; 374 | 375 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY ),.DATA(`RING_DEPTH-`PE_DEPTH+2)) sr01(clk,reset,{waddr_m,waddre},waddre_w); 376 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY+1),.DATA(`RING_DEPTH-`PE_DEPTH+2)) sr02(clk,reset,{waddr_m,waddro},waddro_w); 377 | 378 | always @(*) begin 379 | waddr0 = waddre_w; 380 | waddr1 = waddro_w; 381 | end 382 | 383 | // wen0/1 384 | wire [0:0] wen0_w,wen1_w; 385 | 386 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY ),.DATA(1)) sr03(clk,reset,wen,wen0_w); 387 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY+1),.DATA(1)) sr04(clk,reset,wen,wen1_w); 388 | 389 | always @(*) begin 390 | wen0 = wen0_w; 391 | wen1 = wen1_w; 392 | end 393 | 394 | // brsel 395 | wire [0:0] brsel0_w,brsel1_w; 396 | 397 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY ),.DATA(1)) sr05(clk,reset,brsel,brsel0_w); 398 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY+1),.DATA(1)) sr06(clk,reset,brsel,brsel1_w); 399 | 400 | always @(*) begin 401 | brsel0 = brsel0_w; 402 | brsel1 = brsel1_w; 403 | end 404 | 405 | // brselen 406 | wire [0:0] brselen0_w,brselen1_w; 407 | 408 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY ),.DATA(1)) sr07(clk,reset,brselen,brselen0_w); 409 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY+1),.DATA(1)) sr08(clk,reset,brselen,brselen1_w); 410 | 411 | always @(*) begin 412 | brselen0 = brselen0_w; 413 | brselen1 = brselen1_w; 414 | end 415 | 416 | // stage count 417 | wire [4:0] c_stage_w; 418 | 419 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY+1),.DATA(5)) sr09(clk,reset,c_stage,c_stage_w); 420 | 421 | always @(*) begin 422 | stage_count = c_stage_w; 423 | end 424 | 425 | // brascambled 426 | wire [2*`PE_NUMBER*(`PE_DEPTH+1)-1:0] brscramble_w; 427 | 428 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY),.DATA(2*`PE_NUMBER*(`PE_DEPTH+1))) sr10(clk,reset,brscramble,brscramble_w); 429 | 430 | always @(*) begin 431 | brscramble0 = brscramble_w; 432 | end 433 | 434 | // ntt finished 435 | wire finished_w; 436 | 437 | ShiftReg #(.SHIFT(4),.DATA(1)) sr11(clk,reset,finished,finished_w); 438 | 439 | always @(*) begin 440 | ntt_finished = finished_w; 441 | end 442 | 443 | endmodule 444 | -------------------------------------------------------------------------------- /NTTN.v: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020, Ahmet Can Mert 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | `include "defines.v" 18 | 19 | // start: HIGH for 1 cc (after 1 cc, data starts going in) 20 | // done : HIGH for 1 cc (after 1 cc, data starts going out) 21 | 22 | // Input: standard order 23 | // output: scrambled order 24 | 25 | // --- Baseline Version 26 | // * address bit-lengts are set according to worst-case 27 | // * supports up-to 2^15-pt NTT/INTT 28 | // * integer multiplier is not optimized 29 | // * modular reduction is not optimized 30 | // * wait state is not optimized 31 | 32 | module NTTN (input clk,reset, 33 | input load_w, 34 | input load_data, 35 | input start, 36 | input start_intt, 37 | input [`DATA_SIZE_ARB-1:0] din, 38 | output reg done, 39 | output reg [`DATA_SIZE_ARB-1:0] dout 40 | ); 41 | // ---------------------------------------------------------------- connections 42 | 43 | // parameters & control 44 | reg [2:0] state; 45 | // 0: IDLE 46 | // 1: load twiddle factors + q + n_inv 47 | // 2: load data 48 | // 3: performs ntt 49 | // 4: output data 50 | // 5: last stage of intt 51 | 52 | reg [`RING_DEPTH+3:0] sys_cntr; 53 | 54 | reg [`DATA_SIZE_ARB-1:0]q; 55 | reg [`DATA_SIZE_ARB-1:0]n_inv; 56 | 57 | // data tw brams (datain,dataout,waddr,raddr,wen) 58 | reg [`DATA_SIZE_ARB-1:0] pi [(2*`PE_NUMBER)-1:0]; 59 | wire[`DATA_SIZE_ARB-1:0] po [(2*`PE_NUMBER)-1:0]; 60 | reg [`RING_DEPTH-`PE_DEPTH+1:0] pw [(2*`PE_NUMBER)-1:0]; 61 | reg [`RING_DEPTH-`PE_DEPTH+1:0] pr [(2*`PE_NUMBER)-1:0]; 62 | reg [0:0] pe [(2*`PE_NUMBER)-1:0]; 63 | 64 | reg [`DATA_SIZE_ARB-1:0] ti [`PE_NUMBER-1:0]; 65 | wire[`DATA_SIZE_ARB-1:0] to [`PE_NUMBER-1:0]; 66 | reg [`RING_DEPTH-`PE_DEPTH+3:0] tw [`PE_NUMBER-1:0]; 67 | reg [`RING_DEPTH-`PE_DEPTH+3:0] tr [`PE_NUMBER-1:0]; 68 | reg [0:0] te [`PE_NUMBER-1:0]; 69 | 70 | // control signals 71 | wire [`RING_DEPTH-`PE_DEPTH+1:0] raddr; 72 | wire [`RING_DEPTH-`PE_DEPTH+1:0] waddr0,waddr1; 73 | wire wen0 ,wen1 ; 74 | wire brsel0,brsel1; 75 | wire brselen0,brselen1; 76 | wire [2*`PE_NUMBER*(`PE_DEPTH+1)-1:0] brscramble; 77 | wire [`RING_DEPTH-`PE_DEPTH+2:0] raddr_tw; 78 | 79 | wire [4:0] stage_count; 80 | wire ntt_finished; 81 | 82 | reg ntt_intt; // ntt:0 -- intt:1 83 | 84 | // pu 85 | reg [`DATA_SIZE_ARB-1:0] NTTin [(2*`PE_NUMBER)-1:0]; 86 | reg [`DATA_SIZE_ARB-1:0] MULin [`PE_NUMBER-1:0]; 87 | wire[`DATA_SIZE_ARB-1:0] ASout [(2*`PE_NUMBER)-1:0]; // ADD-SUB out (no extra delay after odd) 88 | wire[`DATA_SIZE_ARB-1:0] EOout [(2*`PE_NUMBER)-1:0]; // EVEN-ODD out 89 | 90 | // ---------------------------------------------------------------- BRAMs 91 | // 2*PE BRAMs for input-output polynomial 92 | // PE BRAMs for storing twiddle factors 93 | 94 | generate 95 | genvar k; 96 | 97 | for(k=0; k<`PE_NUMBER ;k=k+1) begin: BRAM_GEN_BLOCK 98 | BRAM #(.DLEN(`DATA_SIZE_ARB),.HLEN(`RING_DEPTH-`PE_DEPTH+2)) bd00(clk,pe[2*k+0],pw[2*k+0],pi[2*k+0],pr[2*k+0],po[2*k+0]); 99 | BRAM #(.DLEN(`DATA_SIZE_ARB),.HLEN(`RING_DEPTH-`PE_DEPTH+2)) bd01(clk,pe[2*k+1],pw[2*k+1],pi[2*k+1],pr[2*k+1],po[2*k+1]); 100 | BRAM #(.DLEN(`DATA_SIZE_ARB),.HLEN(`RING_DEPTH-`PE_DEPTH+4)) bt00(clk,te[k],tw[k],ti[k],tr[k],to[k]); 101 | end 102 | endgenerate 103 | 104 | // ---------------------------------------------------------------- NTT2 units 105 | 106 | generate 107 | genvar m; 108 | 109 | for(m=0; m<`PE_NUMBER ;m=m+1) begin: NTT2_GEN_BLOCK 110 | NTT2 nttu(clk,reset, 111 | q, 112 | NTTin[2*m+0],NTTin[2*m+1], 113 | MULin[m], 114 | ASout[2*m+0],ASout[2*m+1], 115 | EOout[2*m+0],EOout[2*m+1]); 116 | end 117 | endgenerate 118 | 119 | // ---------------------------------------------------------------- control unit 120 | 121 | AddressGenerator ag(clk,reset, 122 | (start | start_intt), 123 | raddr, 124 | waddr0,waddr1, 125 | wen0 ,wen1 , 126 | brsel0,brsel1, 127 | brselen0,brselen1, 128 | brscramble, 129 | raddr_tw, 130 | stage_count, 131 | ntt_finished 132 | ); 133 | 134 | // ---------------------------------------------------------------- ntt/intt 135 | 136 | always @(posedge clk or posedge reset) begin 137 | if(reset) begin 138 | ntt_intt <= 0; 139 | end 140 | else begin 141 | if(start) 142 | ntt_intt <= 0; 143 | else if(start_intt) 144 | ntt_intt <= 1; 145 | else 146 | ntt_intt <= ntt_intt; 147 | end 148 | end 149 | 150 | // ---------------------------------------------------------------- state machine & sys_cntr 151 | 152 | always @(posedge clk or posedge reset) begin 153 | if(reset) begin 154 | state <= 3'd0; 155 | sys_cntr <= 0; 156 | end 157 | else begin 158 | case(state) 159 | 3'd0: begin 160 | if(load_w) 161 | state <= 3'd1; 162 | else if(load_data) 163 | state <= 3'd2; 164 | else if(start | start_intt) 165 | state <= 3'd3; 166 | else 167 | state <= 3'd0; 168 | sys_cntr <= 0; 169 | end 170 | 3'd1: begin 171 | if(sys_cntr == ((((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)<<1)+2-1)) begin 172 | state <= 3'd0; 173 | sys_cntr <= 0; 174 | end 175 | else begin 176 | state <= 3'd1; 177 | sys_cntr <= sys_cntr + 1; 178 | end 179 | end 180 | 3'd2: begin 181 | if(sys_cntr == (`RING_SIZE-1)) begin 182 | state <= 3'd0; 183 | sys_cntr <= 0; 184 | end 185 | else begin 186 | state <= 3'd2; 187 | sys_cntr <= sys_cntr + 1; 188 | end 189 | end 190 | 3'd3: begin 191 | if(ntt_finished && (ntt_intt == 0)) 192 | state <= 3'd4; 193 | else if(ntt_finished && (ntt_intt == 1)) 194 | state <= 3'd5; 195 | else 196 | state <= 3'd3; 197 | sys_cntr <= 0; 198 | end 199 | 3'd4: begin 200 | if(sys_cntr == (`RING_SIZE+1)) begin 201 | state <= 3'd0; 202 | sys_cntr <= 0; 203 | end 204 | else begin 205 | state <= 3'd4; 206 | sys_cntr <= sys_cntr + 1; 207 | end 208 | end 209 | 3'd5: begin 210 | if(sys_cntr == (((`RING_SIZE >> (`PE_DEPTH+1))<<1) + `INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY)) begin 211 | state <= 3'd4; 212 | sys_cntr <= 0; 213 | end 214 | else begin 215 | state <= 3'd5; 216 | sys_cntr <= sys_cntr + 1; 217 | end 218 | end 219 | default: begin 220 | state <= 3'd0; 221 | sys_cntr <= 0; 222 | end 223 | endcase 224 | end 225 | end 226 | 227 | // ---------------------------------------------------------------- load twiddle factor + q + n_inv & other operations 228 | 229 | always @(posedge clk or posedge reset) begin: TW_BLOCK 230 | integer n; 231 | for(n=0; n < (`PE_NUMBER); n=n+1) begin: LOOP_1 232 | if(reset) begin 233 | te[n] <= 0; 234 | tw[n] <= 0; 235 | ti[n] <= 0; 236 | tr[n] <= 0; 237 | end 238 | else begin 239 | if((state == 3'd1) && (sys_cntr < ((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH))) begin 240 | te[n] <= (n == (sys_cntr & ((1 << `PE_DEPTH)-1))); 241 | tw[n][`RING_DEPTH-`PE_DEPTH+3] <= 0; 242 | tw[n][`RING_DEPTH-`PE_DEPTH+2:0] <= (sys_cntr >> `PE_DEPTH); 243 | ti[n] <= din; 244 | tr[n] <= 0; 245 | end 246 | else if((state == 3'd1) && (sys_cntr < (((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)<<1))) begin 247 | te[n] <= (n == ((sys_cntr-((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)) & ((1 << `PE_DEPTH)-1))); 248 | tw[n][`RING_DEPTH-`PE_DEPTH+3] <= 1; 249 | tw[n][`RING_DEPTH-`PE_DEPTH+2:0] <= ((sys_cntr-((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)) >> `PE_DEPTH); 250 | ti[n] <= din; 251 | tr[n] <= 0; 252 | end 253 | else if(state == 3'd3) begin // NTT operations 254 | te[n] <= 0; 255 | tw[n] <= 0; 256 | ti[n] <= 0; 257 | tr[n] <= {ntt_intt,raddr_tw}; 258 | end 259 | else begin 260 | te[n] <= 0; 261 | tw[n] <= 0; 262 | ti[n] <= 0; 263 | tr[n] <= 0; 264 | end 265 | end 266 | end 267 | end 268 | 269 | always @(posedge clk or posedge reset) begin 270 | if(reset) begin 271 | q <= 0; 272 | n_inv <= 0; 273 | end 274 | else begin 275 | q <= ((state == 3'd1) && (sys_cntr == ((((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)<<1)+2-2))) ? din : q; 276 | n_inv <= ((state == 3'd1) && (sys_cntr == ((((((1<<(`RING_DEPTH-`PE_DEPTH))-1)+`PE_DEPTH)<<`PE_DEPTH)<<1)+2-1))) ? din : n_inv; 277 | end 278 | end 279 | 280 | // ---------------------------------------------------------------- load data & other data operations 281 | 282 | wire [`RING_DEPTH-`PE_DEPTH-1:0] addrout; 283 | assign addrout = (sys_cntr >> (`PE_DEPTH+1)); 284 | 285 | wire [`RING_DEPTH-`PE_DEPTH-1:0] inttlast; 286 | assign inttlast = (sys_cntr & ((`RING_SIZE >> (`PE_DEPTH+1))-1)); 287 | 288 | wire [`RING_DEPTH+3:0] sys_cntr_d; 289 | wire [`RING_DEPTH-`PE_DEPTH-1:0] inttlast_d; 290 | 291 | always @(posedge clk or posedge reset) begin: DT_BLOCK 292 | integer n; 293 | for(n=0; n < (2*`PE_NUMBER); n=n+1) begin: LOOP_1 294 | if(reset) begin 295 | pe[n] <= 0; 296 | pw[n] <= 0; 297 | pi[n] <= 0; 298 | pr[n] <= 0; 299 | end 300 | else begin 301 | if((state == 3'd2)) begin // input data 302 | if(sys_cntr < (`RING_SIZE >> 1)) begin 303 | pe[n] <= (n == ((sys_cntr & ((1 << `PE_DEPTH)-1)) << 1)); 304 | pw[n] <= (sys_cntr >> `PE_DEPTH); 305 | pi[n] <= din; 306 | pr[n] <= 0; 307 | end 308 | else begin 309 | pe[n] <= (n == (((sys_cntr & ((1 << `PE_DEPTH)-1)) << 1)+1)); 310 | pw[n] <= ((sys_cntr-(`RING_SIZE >> 1)) >> `PE_DEPTH); 311 | pi[n] <= din; 312 | pr[n] <= 0; 313 | end 314 | end 315 | else if(state == 3'd3) begin // NTT operations 316 | if(stage_count < (`RING_DEPTH - `PE_DEPTH - 1)) begin 317 | if(brselen0) begin 318 | if(brsel0 == 0) begin 319 | if(n[0] == 0) begin 320 | pe[n] <= wen0; 321 | pw[n] <= waddr0; 322 | pi[n] <= EOout[n]; 323 | end 324 | end 325 | else begin // brsel0 == 1 326 | if(n[0] == 0) begin 327 | pe[n] <= wen1; 328 | pw[n] <= waddr1; 329 | pi[n] <= EOout[n+1]; 330 | end 331 | end 332 | end 333 | else begin 334 | if(n[0] == 0) begin 335 | pe[n] <= 0; 336 | pw[n] <= pw[n]; 337 | pi[n] <= pi[n]; 338 | end 339 | end 340 | 341 | if(brselen1) begin 342 | if(brsel1 == 0) begin 343 | if(n[0] == 1) begin 344 | pe[n] <= wen0; 345 | pw[n] <= waddr0; 346 | pi[n] <= EOout[n-1]; 347 | end 348 | end 349 | else begin // brsel1 == 1 350 | if(n[0] == 1) begin 351 | pe[n] <= wen1; 352 | pw[n] <= waddr1; 353 | pi[n] <= EOout[n]; 354 | end 355 | end 356 | end 357 | else begin 358 | if(n[0] == 1) begin 359 | pe[n] <= 0; 360 | pw[n] <= pw[n]; 361 | pi[n] <= pi[n]; 362 | end 363 | end 364 | end 365 | else if(stage_count < (`RING_DEPTH - 1)) begin 366 | pe[n] <= wen0; 367 | pw[n] <= waddr0; 368 | pi[n] <= ASout[brscramble[(`PE_DEPTH+1)*n+:(`PE_DEPTH+1)]]; 369 | end 370 | else begin 371 | pe[n] <= wen0; 372 | pw[n] <= waddr0; 373 | pi[n] <= ASout[n]; 374 | end 375 | pr[n] <= raddr; 376 | end 377 | else if(state == 3'd4) begin // output data 378 | pe[n] <= 0; 379 | pw[n] <= 0; 380 | pi[n] <= 0; 381 | pr[n] <= {2'b10,addrout}; 382 | end 383 | else if(state == 3'd5) begin // last stage of intt 384 | if(sys_cntr_d < (`RING_SIZE >> (`PE_DEPTH+1))) begin 385 | if(n[0] == 0) begin 386 | pe[n] <= 1; 387 | pw[n] <= {2'b10,inttlast_d}; 388 | pi[n] <= ASout[n+1]; 389 | end 390 | else begin 391 | pe[n] <= 0; 392 | pw[n] <= 0; 393 | pi[n] <= 0; 394 | end 395 | end 396 | else if(sys_cntr_d < (`RING_SIZE >> (`PE_DEPTH))) begin 397 | if(n[0] == 1) begin 398 | pe[n] <= 1; 399 | pw[n] <= {2'b10,inttlast_d}; 400 | pi[n] <= ASout[n]; 401 | end 402 | else begin 403 | pe[n] <= 0; 404 | pw[n] <= 0; 405 | pi[n] <= 0; 406 | end 407 | end 408 | else begin 409 | pe[n] <= 0; 410 | pw[n] <= 0; 411 | pi[n] <= 0; 412 | end 413 | pr[n] <= {2'b10,inttlast}; 414 | end 415 | else begin 416 | pe[n] <= 0; 417 | pw[n] <= 0; 418 | pi[n] <= 0; 419 | pr[n] <= 0; 420 | end 421 | end 422 | end 423 | end 424 | 425 | // done signal & output data 426 | wire [`PE_DEPTH:0] coefout; 427 | assign coefout = (sys_cntr-2); 428 | 429 | always @(posedge clk or posedge reset) begin 430 | if(reset) begin 431 | done <= 0; 432 | dout <= 0; 433 | end 434 | else begin 435 | if(state == 3'd4) begin 436 | done <= (sys_cntr == 1) ? 1 : 0; 437 | dout <= po[coefout]; 438 | end 439 | else begin 440 | done <= 0; 441 | dout <= 0; 442 | end 443 | end 444 | end 445 | 446 | // ---------------------------------------------------------------- PU control 447 | 448 | always @(posedge clk or posedge reset) begin: NT_BLOCK 449 | integer n; 450 | for(n=0; n < (`PE_NUMBER); n=n+1) begin: LOOP_1 451 | if(reset) begin 452 | NTTin[2*n+0] <= 0; 453 | NTTin[2*n+1] <= 0; 454 | MULin[n] <= 0; 455 | end 456 | else begin 457 | if(state == 3'd5) begin 458 | if(sys_cntr < (2+(`RING_SIZE >> (`PE_DEPTH+1)))) begin 459 | NTTin[2*n+0] <= po[2*n+0]; 460 | NTTin[2*n+1] <= 0; 461 | end 462 | else if(sys_cntr < (2+(`RING_SIZE >> (`PE_DEPTH)))) begin 463 | NTTin[2*n+0] <= po[2*n+1]; 464 | NTTin[2*n+1] <= 0; 465 | end 466 | else begin 467 | NTTin[2*n+0] <= po[2*n+0]; 468 | NTTin[2*n+1] <= po[2*n+1]; 469 | end 470 | MULin[n] <= n_inv; 471 | end 472 | else begin 473 | NTTin[2*n+0] <= po[2*n+0]; 474 | NTTin[2*n+1] <= po[2*n+1]; 475 | MULin[n] <= to[n]; 476 | end 477 | end 478 | end 479 | end 480 | 481 | // --------------------------------------------------------------------------- delays 482 | 483 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY-1),.DATA(`RING_DEPTH+4 )) sr00(clk,reset,sys_cntr,sys_cntr_d); 484 | ShiftReg #(.SHIFT(`INTMUL_DELAY+`MODRED_DELAY+`STAGE_DELAY-1),.DATA(`RING_DEPTH-`PE_DEPTH)) sr01(clk,reset,inttlast,inttlast_d); 485 | 486 | // --------------------------------------------------------------------------- 487 | 488 | endmodule 489 | --------------------------------------------------------------------------------