├── 2D_filter
    └── conv.sv
├── CNN_TB.sv
├── Fully connected layer
    ├── flat.sv
    └── fully_connected_layer.sv
├── README.md
├── activation_func
    └── relu.sv
├── blocks
    ├── conv_block.sv
    └── max_pooling_block.sv
├── cnn_behind.py
├── pooling
    └── maxpooling.sv
└── top
    ├── CNN.sv
    └── CNN.svh


/2D_filter/conv.sv:
--------------------------------------------------------------------------------
  1 | // 2D Convolution module. For sim purposes: Image width and height are not changeable dynamically
  2 | //
  3 | // -----------------------------------------------------------------------------
  4 | // Copyright (c) 2014-2024 All rights reserved
  5 | // -----------------------------------------------------------------------------
  6 | // Author : Maksim Ananev mananev086@gmail.com
  7 | // 
  8 | // Create : 2024-05-13 11:30:23
  9 | // Revise : 2024-10-22 12:20:46
 10 | // Editor : sublime text4, tab size (4)
 11 | // -----------------------------------------------------------------------------
 12 | 
 13 | 
 14 | 
 15 | // `define RELU
 16 | 
 17 | module conv #(
 18 |     parameter PIX_WIDTH          = 8  ,
 19 |     parameter WEIGHT_WIDTH       = 10 ,
 20 |     parameter WEIGHT_FRACT_WIDTH = 5  ,
 21 |     parameter KERNEL_DIMENSION   = 3  ,
 22 |     parameter TRUNK = "TRUE",
 23 |     parameter logic [         11:0] img_width  = 28,
 24 |     parameter logic [         11:0] img_height = 28
 25 |     
 26 | ) (
 27 |     input                        clk       , // Clock
 28 |     input                        clk_en    , // Clock Enable
 29 |     input                        rst_n     , // Asynchronous reset active low
 30 |     //input pixels
 31 |     input        [PIX_WIDTH-1:0] i_data    ,
 32 |     input                        i_valid   ,
 33 |     input                        i_sop     ,
 34 |     input                        i_eop     ,
 35 |     // output pixels
 36 |     output       [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] o_data    ,
 37 |     output                       o_valid   ,
 38 |     output                       o_sop     ,
 39 |     output                       o_eop     ,
 40 |     ///
 41 |     input [KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH-1:0] kernel    ,
 42 |     // input        [         11:0] img_width ,
 43 |     // input        [         11:0] img_height,
 44 |     output logic                 ready     ,
 45 |     output logic [         11:0] cols_cntr ,
 46 |     output logic [         11:0] rows_cntr
 47 | );
 48 | 
 49 | 
 50 | 
 51 | /*
 52 |     Pixels Delay scheme. if Kernel 3*3
 53 | 
 54 |     pixel_input----------->--------------------                                   --pix[2][2]-->     --pix[2][1]-->
 55 |                                 /-------\     |                                  |                  |
 56 |                          ---<--| FIFO_0 |--<------>----delayed_line[0]------->-----|FF|---------->------|FF|--->-----------pix[2][0]-->--
 57 |                          |     \-------/
 58 |                          |                                                        --pix[1][2]-->     --pix[1][1]-->
 59 |                          |                                                       |                  |
 60 |                          ------------------------->---delayed_line[1]------->------|FF|---------->------|FF|--->-----------pix[1][0]-->--
 61 |                          |
 62 |                          |                                                        --pix[0][2]-->     --pix[0][2]-->
 63 |                          |      /-------\                                        |                  |
 64 |                          --->--| FIFO_1 |--------->---delayed_line[2]------->------|FF|---------->------|FF|--->-----------pix[0][0]-->--
 65 |                                \-------/
 66 | */
 67 |  localparam MAX_DEPTH = 1920;
 68 | 
 69 |     logic [img_width-1:0][PIX_WIDTH-1:0] fifo[KERNEL_DIMENSION-1]; /// !!!!!!!!!!!!!!!!!!!!!!! only for simulation, MUST BE REPLACED by a regular FIFO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 70 | 
 71 |     logic [PIX_WIDTH-1:0] delayed_line[KERNEL_DIMENSION];
 72 | 
 73 |     bit [KERNEL_DIMENSION-2:0][PIX_WIDTH-1:0] after_fifos_ffs[KERNEL_DIMENSION];
 74 | 
 75 |     logic [PIX_WIDTH-1:0] delayed_pix[KERNEL_DIMENSION][KERNEL_DIMENSION];
 76 | 
 77 |     always_comb begin
 78 |         foreach (delayed_line[i]) begin
 79 |             delayed_line[i] = (i == 0) ? i_data : fifo[i-1][img_width-1];
 80 |         end
 81 | 
 82 |         foreach (delayed_pix[i,y]) begin
 83 |             delayed_pix[i][y] = (y==0)?delayed_line[i] : after_fifos_ffs[i][y-1];
 84 |         end
 85 |     end
 86 | 
 87 |     always_ff @(posedge clk) begin
 88 |         if(clk_en && (i_valid || !ready))begin
 89 |             foreach (fifo[i]) begin
 90 |                 fifo[i] <= {fifo[i][img_width-2:0],( (i == 0) ? i_data : fifo[i-1][img_width-1] )};
 91 |             end
 92 | 
 93 |             foreach (after_fifos_ffs[i]) begin
 94 |                 after_fifos_ffs[i] <= {after_fifos_ffs[i],delayed_line[i]};
 95 |             end
 96 |         end
 97 |     end
 98 | 
 99 | 
100 | 
101 | /*
102 |     Convolution's maths. if Kernel 3*3
103 | 
104 | 
105 |     kernel table                     image lines                                          multiplication table
106 |     -------------------       -----------------------------------------      -----------------------------------------------------
107 |     | a11 | a12 | a13 |       | pix[i][y] | pix[i][y+1] | pix[i][y+2] |      | a11*pix[i][y+2] | a12*pix[i][y+1] | a13*pix[i][y] |
108 |     -------------------       -----------------------------------------      -----------------------------------------------------
109 |     | a21 | a22 | a23 |   X   | pix[i][y] | pix[i][y+1] | pix[i][y+2] |   =  | a21*pix[i][y+2] | a22*pix[i][y+1] | a23*pix[i][y] |   --------> Sum(multiplication table)
110 |     -------------------       -----------------------------------------      -----------------------------------------------------
111 |     | a31 | a32 | a33 |       | pix[i][y] | pix[i][y+1] | pix[i][y+2] |      | a31*pix[i][y+2] | a32*pix[i][y+1] | a33*pix[i][y] |
112 |     -------------------       -----------------------------------------      -----------------------------------------------------
113 | */
114 |     logic signed [KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH+PIX_WIDTH-1:0] mult_result;
115 | 
116 |     always_ff @(posedge clk) begin : proc_multiplying
117 |         if(clk_en)begin
118 |             foreach (mult_result[i,y]) begin
119 |                 mult_result[i][y] <= $signed({1'b0, delayed_pix[(KERNEL_DIMENSION-1)-i][(KERNEL_DIMENSION-1)-y]}) * $signed(kernel[i][y]);
120 |             end
121 |         end
122 |     end
123 | 
124 |     logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_result,3)-1:0]mult_intermed_sum_1dim[KERNEL_DIMENSION];
125 |     logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_result,3)-1:0]mult_sum_1dim[KERNEL_DIMENSION];
126 |     logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_sum_1dim,2)-1:0]mult_intermed_sum_2dim;
127 | 
128 |     always_comb begin
129 |         mult_intermed_sum_2dim = '0;
130 |         foreach (mult_intermed_sum_1dim[i]) begin
131 |             mult_intermed_sum_1dim[i] = '0;
132 |             foreach (mult_intermed_sum_1dim[y]) begin
133 |                 mult_intermed_sum_1dim[i] += $signed(mult_result[i][y]);
134 |             end
135 | 
136 |             mult_intermed_sum_2dim += mult_sum_1dim[i];
137 | 
138 |         end
139 |     end
140 | 
141 | 
142 |     logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_sum_1dim,2)-1:0]mult_sum_out;
143 |     always_ff @(posedge clk) begin : proc_mult_sum
144 |         if(clk_en)begin
145 |             foreach (mult_sum_1dim[i]) begin
146 |                 mult_sum_1dim[i] <= mult_intermed_sum_1dim[i];
147 |             end            
148 |             mult_sum_out <= (TRUNK == "TRUE") ? (mult_intermed_sum_2dim>>>WEIGHT_FRACT_WIDTH) : mult_intermed_sum_2dim;
149 |         end
150 | 
151 |     end
152 | 
153 | 
154 | /*
155 |     normalize( Sum(multiplication table) ) -----> pixel output
156 | 
157 |     normalize(a, min = 0, max = 255){
158 |         if(a < min)
159 |             a = 0;
160 |         if(a > max)
161 |             a = max;
162 | 
163 |     }
164 | */
165 | `ifdef RELU
166 |     assign o_data = ( mult_sum_out < 0 ) ? '0 : ( ( |mult_sum_out[$size( mult_sum_out )-1:PIX_WIDTH] ) ? ( 2**PIX_WIDTH - 1 ) : mult_sum_out);
167 | `else 
168 |     assign o_data = mult_sum_out;
169 | `endif
170 | /*
171 |     Latency
172 | */
173 |     logic [2:0] valid_delay   = '0            ;
174 |     wire        valid_delayed = valid_delay[2];
175 |     always_ff @(posedge clk or negedge rst_n) begin
176 |         if(~rst_n) begin
177 |             valid_delay <= 0;
178 |         end else begin
179 |             if(clk_en)
180 |                 valid_delay <= {valid_delay, i_valid && ready};
181 |         end
182 |     end
183 | 
184 | 
185 | /*
186 |     counters
187 | */
188 |     always_ff @(posedge clk or negedge rst_n) begin
189 |         if(~rst_n) begin
190 |             cols_cntr <= 0;
191 |             rows_cntr <= 0;
192 |         end else begin
193 |             if(clk_en)begin
194 |                 if(valid_delayed || (!ready && (rows_cntr == img_height)))begin
195 |                     cols_cntr <= (cols_cntr == img_width-1) ? '0 : (cols_cntr + 'd1);
196 |                     if(cols_cntr == img_width-1)
197 |                         rows_cntr <= (rows_cntr == img_height) ? '0 : (rows_cntr + 'd1);
198 |                 end
199 |                 else if(i_sop)begin
200 |                     cols_cntr <= '0;
201 |                     rows_cntr <= '0;
202 |                 end
203 |             end
204 |         end
205 |     end
206 | 
207 | /*
208 |     video control signals
209 | */
210 |     always_ff @(posedge clk or negedge rst_n) begin
211 |         if(~rst_n) begin
212 |             ready <= 1;
213 |         end else if(clk_en) begin
214 |             if (i_eop) begin
215 |                 ready <= 1'b0;
216 |             end
217 |             else if(rows_cntr == 0)
218 |                 ready <= 1'b1;
219 |         end
220 |     end
221 | 
222 | 
223 |     assign o_valid = valid_delayed && (rows_cntr > 1) && (rows_cntr < img_height) && (cols_cntr > 1) && (cols_cntr < img_width);
224 | 
225 |     assign o_eop = valid_delayed && (cols_cntr == img_width-1) && (rows_cntr == img_height-1);
226 | 
227 |     assign o_sop = valid_delayed && (rows_cntr == 2) && (cols_cntr == 2);
228 | 
229 | 
230 | 
231 | 
232 | 
233 | endmodule : conv
234 | 


--------------------------------------------------------------------------------
/CNN_TB.sv:
--------------------------------------------------------------------------------
  1 | // Convolutional Neural Network TB. 
  2 | //
  3 | // The images are taken from MNIST digits dataset. 
  4 | // 
  5 | // All weights and reference calculated by "cnn_behind.py"   
  6 | //
  7 | // -----------------------------------------------------------------------------
  8 | // Copyright (c) 2014-2024 All rights reserved
  9 | // -----------------------------------------------------------------------------
 10 | // Author : Maksim Ananev mananev086@gmail.com
 11 | // 
 12 | // Create : 2024-05-13 11:30:23
 13 | // Revise : 2024-10-22 12:20:46
 14 | // Editor : sublime text4, tab size (4)
 15 | // -----------------------------------------------------------------------------
 16 | 
 17 | `include "./top/CNN.svh"
 18 | 
 19 | `timescale 1ns/1ns
 20 | 
 21 | 
 22 | `define DATAFLOW_CHECK
 23 | 
 24 | module CNN_TB ();
 25 | 
 26 |     parameter CLASSES_QNT = 10;
 27 |     parameter IMG_WIDTH   = 28;
 28 |     parameter IMG_HEIGHT  = 28;
 29 | 
 30 |     real image_7[IMG_HEIGHT][IMG_WIDTH] =
 31 |         '{
 32 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 33 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 34 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 35 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 36 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 37 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 38 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0},
 39 |             '{0,   0,   0,   0,   0,   0,  84, 185, 159, 151,  60,  36,   0,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 40 |             '{0,   0,   0,   0,   0,   0, 222, 254, 254, 254, 254, 241, 198, 198, 198, 198, 198, 198, 198, 198, 170,  52,   0,   0,   0,   0,   0,   0},
 41 |             '{0,   0,   0,   0,   0,   0,  67, 114,  72, 114, 163, 227, 254, 225, 254, 254, 254, 250, 229, 254, 254, 140,   0,   0,   0,   0,   0,   0},
 42 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  17,  66,  14, 67,  67,  67,  59,  21, 236, 254, 106,   0,   0,   0,   0,   0,   0},
 43 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0,  83, 253, 209,  18,   0,   0,   0,   0,   0,   0},
 44 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,  22, 233, 255,  83,   0,   0,   0,   0,   0,   0,   0},
 45 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0, 129, 254, 238,  44,   0,   0,   0,   0,   0,   0,   0},
 46 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,  59, 249, 254,  62,   0,   0,   0,   0,   0,   0,   0,   0},
 47 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0, 133, 254, 187,   5,   0,   0,   0,   0,   0,   0,   0,   0},
 48 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   9, 205, 248,  58,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 49 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0, 126, 254, 182,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 50 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 75, 251, 240,  57,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 51 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  19, 221, 254, 166,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 52 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3, 203, 254, 219,  35,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 53 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  38, 254, 254,  77,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 54 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  31, 224, 254, 115,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 55 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 133, 254, 254,  52,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 56 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  61, 242, 254, 254,  52,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 57 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 121, 254, 254, 219,  40,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 58 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 121, 254, 207,  18,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 59 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0}
 60 |         };
 61 | 
 62 |     real image_2[IMG_HEIGHT][IMG_WIDTH] =
 63 |         '{
 64 | 
 65 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
 66 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
 67 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
 68 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   116, 125, 171, 255,   255, 150,  93,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 69 | 
 70 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   169, 253, 253, 253, 253,   253, 253, 218,  30,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 71 | 
 72 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   169, 253, 253, 253, 213, 142,   176, 253, 253, 122,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 73 | 
 74 |             '{  0,   0,   0,   0,   0,   0,   0,  52, 250, 253, 210,  32,  12,   0, 6, 206, 253, 140,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 75 | 
 76 |             '{  0,   0,   0,   0,   0,   0,   0,  77, 251, 210,  25,   0,   0,   0, 122, 248, 253,  65,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 77 | 
 78 |             '{  0,   0,   0,   0,   0,   0,   0,   0,  31,  18,   0,   0,   0,   0, 209, 253, 253,  65,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 79 | 
 80 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   117,   247, 253, 198,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 81 | 
 82 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  76, 247, 253, 231,  63,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 83 | 
 84 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   128, 253,   253, 144,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 85 | 
 86 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   176, 246, 253,   159,  12,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 87 | 
 88 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  25, 234, 253, 233, 35,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 89 | 
 90 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   198, 253, 253, 141,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 91 | 
 92 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,  78, 248, 253, 189,  12, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 93 | 
 94 |             '{  0,   0,   0,   0,   0,   0,   0,   0,  19, 200, 253, 253, 141,   0, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 95 | 
 96 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   134, 253, 253, 173,  12,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 97 | 
 98 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   248, 253, 253,  25,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
 99 | 
100 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   248, 253, 253,  43,  20,  20,   20,  20,   5,   0,   5,  20,  20,  37, 150, 150, 150, 147,  10,   0},
101 | 
102 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   248, 253, 253, 253, 253, 253,   253, 253, 168, 143, 166, 253, 253, 253, 253, 253, 253, 253, 123,   0},
103 | 
104 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   174, 253, 253, 253, 253, 253,   253, 253, 253, 253, 253, 253, 249, 247, 247, 169, 117, 117,  57,   0},
105 | 
106 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   118, 123, 123, 123, 166,   253, 253, 253, 155, 123, 123,  41,   0,   0,   0,   0,   0,   0,   0},
107 | 
108 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
109 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
110 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
111 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
112 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0}
113 | 
114 |         };
115 |     real image_1[IMG_HEIGHT][IMG_WIDTH] =
116 |         '{  '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
117 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
118 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
119 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
120 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,  38, 254, 109,   0,   0,   0,   0,   0,   0,   0,   0,   0},
121 | 
122 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,  87, 252,  82,   0,   0,   0,   0,   0,   0,   0,   0,   0},
123 | 
124 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   135, 241,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
125 | 
126 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,  45, 244, 150,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
127 | 
128 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,  84, 254,  63,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
129 | 
130 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   202, 223,  11,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
131 | 
132 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 32, 254, 216,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
133 | 
134 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 95, 254, 195,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
135 | 
136 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 140, 254,  77,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
137 | 
138 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  57, 237, 205,   8,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
139 | 
140 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   124, 255, 165,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
141 | 
142 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   171,   254,  81,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
143 | 
144 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  24, 232, 215,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
145 | 
146 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   120, 254,   159,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
147 | 
148 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   151, 254,   142,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
149 | 
150 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   228, 254,   66,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
151 | 
152 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  61, 251, 254, 66,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
153 | 
154 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   141, 254, 205,   3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
155 | 
156 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  10, 215, 254, 121, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
157 | 
158 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   5, 198, 176,  10, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
159 | 
160 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
161 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
162 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0},
163 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0,   0,   0,   0}
164 |         };
165 | 
166 | 
167 |     real image_0[IMG_HEIGHT][IMG_WIDTH] =
168 |         '{
169 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
170 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
171 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
172 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
173 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  11, 150, 253, 202,  31,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
174 | 
175 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  37, 251, 251, 253, 107,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
176 | 
177 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  21, 197, 251, 251, 253, 107,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
178 | 
179 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   110, 190, 251, 251, 251, 253, 169, 109,  62,   0,   0,   0,   0,   0,   0,   0,   0,   0},
180 | 
181 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   253, 251, 251, 251, 251, 253, 251, 251, 220,  51,   0,   0,   0,   0,   0,   0,   0,   0},
182 | 
183 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   182, 255, 253, 253, 253, 253, 234, 222, 253, 253, 253,   0,   0,   0,   0,   0,   0,   0,   0},
184 | 
185 |             '{  0,   0,   0,   0,   0,   0,   0,   0,  63, 221, 253, 251, 251, 251, 147, 77,  62, 128, 251, 251, 105,   0,   0,   0,   0,   0,   0,   0},
186 | 
187 |             '{  0,   0,   0,   0,   0,   0,   0,  32, 231, 251, 253, 251, 220, 137, 10,  0,   0,  31, 230, 251, 243, 113,   5,   0,   0,   0,   0,   0},
188 | 
189 |             '{  0,   0,   0,   0,   0,   0,   0,  37, 251, 251, 253, 188,  20,   0, 0,  0,   0,   0,   109, 251, 253, 251,  35,   0,   0,   0,   0,   0},
190 | 
191 |             '{  0,   0,   0,   0,   0,   0,   0,  37, 251, 251, 201,  30,   0,   0, 0,  0,   0,   0,  31, 200, 253, 251,  35,   0,   0,   0,   0,   0},
192 | 
193 |             '{  0,   0,   0,   0,   0,   0,   0,  37, 253, 253,   0,   0,   0,   0, 0,  0,   0,   0,  32, 202, 255, 253, 164,   0,   0,   0,   0,   0},
194 | 
195 |             '{  0,   0,   0,   0,   0,   0,   0,   140, 251, 251,   0,   0,   0,   0,  0,   0,   0,   0,   109, 251, 253, 251,  35,   0,   0,   0,   0,   0},
196 | 
197 |             '{  0,   0,   0,   0,   0,   0,   0,   217, 251, 251,   0,   0,   0,   0,  0,   0,  21,  63, 231, 251, 253, 230,  30,   0,   0,   0,   0,   0},
198 | 
199 |             '{  0,   0,   0,   0,   0,   0,   0,   217, 251, 251,   0,   0,   0,   0,  0,   0,   144, 251, 251, 251, 221,  61,   0,   0,   0,   0,   0,   0},
200 | 
201 |             '{  0,   0,   0,   0,   0,   0,   0,   217, 251, 251,   0,   0,   0,   0,  0,   182, 221, 251, 251, 251, 180,   0,   0,   0,   0,   0,   0,   0},
202 | 
203 |             '{  0,   0,   0,   0,   0,   0,   0,   218, 253, 253,  73,  73, 228, 253,  253, 255, 253, 253, 253, 253,   0,   0,   0,   0,   0,   0,   0,   0},
204 | 
205 |             '{  0,   0,   0,   0,   0,   0,   0,   113, 251, 251, 253, 251, 251, 251,  251, 253, 251, 251, 251, 147,   0,   0,   0,   0,   0,   0,   0,   0},
206 | 
207 |             '{  0,   0,   0,   0,   0,   0,   0,  31, 230, 251, 253, 251, 251, 251,  251, 253, 230, 189,  35,  10,   0,   0,   0,   0,   0,   0,   0,   0},
208 | 
209 |             '{  0,   0,   0,   0,   0,   0,   0,   0,  62, 142, 253, 251, 251, 251,  251, 253, 107,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
210 | 
211 |             '{  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  72, 174, 251, 173,  71,  72,  30,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
212 | 
213 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
214 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
215 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0},
216 |             '{0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0}
217 |         };
218 | 
219 | 
220 |     parameter PIX_WIDTH    = 16;
221 |     parameter WEIGHT_WIDTH = 16;
222 |     parameter FRACT_WIDTH  = 12;
223 |     parameter CONV_NUMB    = 2 ;
224 |     parameter FLAT_NUMB    = 2 ;
225 | 
226 |     parameter logic [CONV_NUMB-1:0][ 1:0][7:0] CONV_DIMENSION   = {{8'd8, 8'd4}, {8'd4, 8'd1}};
227 |     parameter logic [CONV_NUMB-1:0][ 3:0]      KERNEL_DIMENSION = {4'd3  , 4'd3}              ;
228 |     parameter logic [FLAT_NUMB :0][15:0]      FLAT_DIMENSION   = {CLASSES_QNT, 16'd64, 16'd200}           ;
229 | 
230 | 
231 | 
232 |     logic clk    = 0;
233 |     logic clk_en = 1;
234 |     logic rst_n  = 0;
235 | 
236 |     logic [PIX_WIDTH-1:0] i_data;
237 | 
238 |     logic i_valid = 0;
239 |     logic i_sop   = 0;
240 |     logic i_eop   = 0;
241 | 
242 |     logic o_valid;
243 | 
244 |     logic [CLASSES_QNT-1:0][31:0] classes;
245 | 
246 |     int weights_mem_in_data;
247 |     int weights_mem_in_addr;
248 |     int weights_mem_sel_addr;
249 | 
250 |     logic [CONV_NUMB-1:0]weights_mem_in_kernel_wr=0;
251 |     logic [FLAT_NUMB-1:0]weights_mem_in_fc_wr = 0;
252 | 
253 |     localparam R2I_COEF = 2**FRACT_WIDTH;
254 | 
255 |     CNN #(
256 |         .PIX_WIDTH       (PIX_WIDTH       ),
257 |         .WEIGHT_WIDTH    (WEIGHT_WIDTH    ),
258 |         .FRACT_WIDTH     (FRACT_WIDTH     ),
259 |         .CONV_NUMB       (CONV_NUMB       ),
260 |         .CONV_DIMENSION  (CONV_DIMENSION  ),
261 |         .KERNEL_DIMENSION(KERNEL_DIMENSION),
262 |         .FLAT_NUMB       (FLAT_NUMB       ),
263 |         .FLAT_DIMENSION  (FLAT_DIMENSION  ),
264 |         .CLASSES_QNT     (CLASSES_QNT     ),
265 |         .IMG_WIDTH       (IMG_WIDTH       ),
266 |         .IMG_HEIGHT      (IMG_HEIGHT      )
267 |     ) inst_CNN (
268 |         .clk                     (clk                     ),
269 |         .clk_en                  (clk_en                  ),
270 |         .rst_n                   (rst_n                   ),
271 |         .i_data                  (i_data                  ),
272 |         .i_valid                 (i_valid                 ),
273 |         .i_sop                   (i_sop                   ),
274 |         .i_eop                   (i_eop                   ),
275 |         .o_valid                 (o_valid                 ),
276 |         .classes                 (classes                 ),
277 |         .weights_mem_in_data     (weights_mem_in_data     ),
278 |         .weights_mem_in_addr     (weights_mem_in_addr     ),
279 |         .weights_mem_sel_addr    (weights_mem_sel_addr    ),
280 |         .weights_mem_in_kernel_wr(weights_mem_in_kernel_wr),
281 |         .weights_mem_in_fc_wr    (weights_mem_in_fc_wr    )
282 |     );
283 | 
284 | 
285 |     initial begin
286 |         forever begin
287 |             #10 clk = !clk;
288 |         end
289 |     end
290 | 
291 | 
292 | 
293 | 
294 | 
295 |     initial begin
296 |         #100;
297 | 
298 |         rst_n = 1;
299 | 
300 |         #100;
301 |         /////////////
302 |         ///WEIGHTS INITIALIZATION 
303 |         //////////////
304 |         @(posedge clk);
305 |         foreach (kernel_1_re[dim2, dim1, row, col]) begin
306 |             weights_mem_in_data = R2I_COEF*kernel_1_re[dim2][dim1][row][col];
307 |             weights_mem_in_kernel_wr[0] = 1'b1;
308 |             @(posedge clk);
309 |             weights_mem_in_addr++; 
310 |             weights_mem_in_kernel_wr[0] = 1'b0;
311 |         end
312 |         foreach (conv_1_bias_re[x]) begin
313 |             weights_mem_in_data = R2I_COEF*conv_1_bias_re[x];
314 |             weights_mem_in_kernel_wr[0] = 1'b1;
315 |             @(posedge clk);
316 |             weights_mem_in_addr++; 
317 |             weights_mem_in_kernel_wr[0] = 1'b0;
318 |         end
319 |         weights_mem_in_kernel_wr[0] = 1'b0;
320 |         weights_mem_in_addr = 0; 
321 | 
322 |         @(posedge clk);
323 | 
324 |         foreach (kernel_2_re[dim2, dim1, row, col]) begin
325 |             weights_mem_in_data = R2I_COEF*kernel_2_re[dim2][dim1][row][col];
326 |             weights_mem_in_kernel_wr[1] = 1'b1;
327 |             @(posedge clk);
328 |             weights_mem_in_addr++; 
329 |         end
330 |         foreach (conv_2_bias_re[x]) begin
331 |             weights_mem_in_data = R2I_COEF*conv_2_bias_re[x];
332 |             weights_mem_in_kernel_wr[1] = 1'b1;
333 |             @(posedge clk);
334 |             weights_mem_in_addr++; 
335 |             weights_mem_in_kernel_wr[1] = 1'b0;
336 |         end
337 |         weights_mem_in_kernel_wr[1] = 1'b0;
338 |         weights_mem_in_addr = 0; 
339 | 
340 | 
341 |         foreach (fc1_weights_re[x,y]) begin
342 |             weights_mem_in_data = R2I_COEF*fc1_weights_re[x][y];
343 |             weights_mem_sel_addr = x;
344 | 
345 |             weights_mem_in_addr = y;
346 |             weights_mem_in_fc_wr[0] = 1'b1;
347 |             @(posedge clk);
348 |             weights_mem_in_fc_wr[0] = 1'b0;
349 |         end
350 |         foreach (fc1_bias_re[x]) begin
351 |             weights_mem_in_data = R2I_COEF*fc1_bias_re[x];
352 |             weights_mem_sel_addr = FLAT_DIMENSION[1];
353 | 
354 |             weights_mem_in_addr = x;
355 |             weights_mem_in_fc_wr[0] = 1'b1;
356 |             @(posedge clk);
357 |             weights_mem_in_fc_wr[0] = 1'b0;
358 |         end
359 | 
360 |         foreach (fc2_weights_re[x,y]) begin
361 |             weights_mem_in_data = R2I_COEF*fc2_weights_re[x][y];
362 |             weights_mem_sel_addr = x;
363 | 
364 |         weights_mem_in_addr = y; 
365 |         weights_mem_in_fc_wr[1] = 1'b1;
366 |         @(posedge clk);
367 |         weights_mem_in_fc_wr[1] = 1'b0;
368 |         end
369 |         foreach (fc2_bias_re[x]) begin
370 |             weights_mem_in_data = R2I_COEF*fc2_bias_re[x];
371 |             weights_mem_sel_addr = CLASSES_QNT;
372 | 
373 |             weights_mem_in_addr = x;
374 |             weights_mem_in_fc_wr[1] = 1'b1;
375 |             @(posedge clk);
376 |             weights_mem_in_fc_wr[1] = 1'b0;
377 |         end
378 | /////////////////////////////////////////////////////////
379 | 
380 | 
381 | 
382 | 
383 | ///////////////////////////////////////////////
384 | /////////////////////
385 |         foreach (image_0[row,col]) begin
386 |             @(posedge clk);
387 |             i_data = (image_7[row][col]/255) * R2I_COEF;
388 |             i_valid = 1;
389 |             i_sop = (row == 0) && (col == 0);
390 |             i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1);
391 |         end
392 | 
393 |         @(posedge clk);
394 |         i_valid = 0;
395 |         i_sop = 0;
396 |         i_eop = 0;
397 | 
398 | 
399 |         wait(inst_CNN.o_valid);
400 |         @(posedge clk);
401 |         foreach (image_0[row,col]) begin
402 |             @(posedge clk);
403 |             i_data = (image_2[row][col]/255) * R2I_COEF;
404 |             i_valid = 1;
405 |             i_sop = (row == 0) && (col == 0);
406 |             i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1);
407 |         end
408 | 
409 |         @(posedge clk);
410 |         i_valid = 0;
411 |         i_sop = 0;
412 |         i_eop = 0;
413 | 
414 | 
415 |         wait(inst_CNN.o_valid);
416 |         @(posedge clk);
417 |         foreach (image_0[row,col]) begin
418 |             @(posedge clk);
419 |             i_data = (image_1[row][col]/255) * R2I_COEF;
420 |             i_valid = 1;
421 |             i_sop = (row == 0) && (col == 0);
422 |             i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1);
423 |         end
424 | 
425 |         @(posedge clk);
426 |         i_valid = 0;
427 |         i_sop = 0;
428 |         i_eop = 0;
429 | 
430 | 
431 |         wait(inst_CNN.o_valid);
432 |         @(posedge clk);
433 |         foreach (image_0[row,col]) begin
434 |             @(posedge clk);
435 |             i_data = (image_0[row][col]/255) * R2I_COEF;
436 |             i_valid = 1;
437 |             i_sop = (row == 0) && (col == 0);
438 |             i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1);
439 |         end
440 | 
441 |         @(posedge clk);
442 |         i_valid = 0;
443 |         i_sop = 0;
444 |         i_eop = 0;
445 |     end
446 | 
447 | 
448 | /////////////////////////////////////////////
449 | //////DATAFLOW CHECK
450 | /////////////////////////////////////////////
451 | `ifdef DATAFLOW_CHECK
452 |     int first_conv_rows_cntr = 0;
453 |     int first_conv_cols_cntr = 0;
454 |     int first_pool_rows_cntr = 0;
455 |     int first_pool_cols_cntr = 0;
456 | 
457 |     int second_conv_rows_cntr = 0;
458 |     int second_conv_cols_cntr = 0;
459 |     int second_pool_rows_cntr = 0;
460 |     int second_pool_cols_cntr = 0;
461 | 
462 |     int flat_cntr = 0;
463 | 
464 |     real first_conv_data[CONV_DIMENSION[0][1]][IMG_HEIGHT-2][IMG_WIDTH-2];
465 |     real first_relu_data[CONV_DIMENSION[0][1]][IMG_HEIGHT-2][IMG_WIDTH-2];
466 |     real first_pool_data[CONV_DIMENSION[0][1]][(IMG_HEIGHT-2)/2][(IMG_WIDTH-2)/2];
467 | 
468 |     real second_conv_data[CONV_DIMENSION[1][1]][(IMG_HEIGHT-2)/2-2][(IMG_WIDTH-2)/2-2];
469 |     real second_relu_data[CONV_DIMENSION[1][1]][(IMG_HEIGHT-2)/2-2][(IMG_WIDTH-2)/2-2];
470 |     real second_pool_data[CONV_DIMENSION[1][1]][((IMG_HEIGHT-2)/2-2)/2][((IMG_WIDTH-2)/2-2)/2];
471 | 
472 |     real flat_data [CONV_DIMENSION[1][1]*(((IMG_HEIGHT-2)/2-2)/2)*(((IMG_HEIGHT-2)/2-2)/2)];
473 | `endif
474 | 
475 |     int first_fc_cntr  = 0;
476 |     int second_fc_cntr = 0;
477 | 
478 | 
479 |     real first_fc_data[FLAT_DIMENSION[1]];
480 |     real second_fc_data[CLASSES_QNT];
481 | 
482 | `ifdef DATAFLOW_CHECK
483 |     initial begin
484 |         forever
485 |             @(posedge clk)
486 |                 if(inst_CNN.conv_valid[0])begin
487 |                     foreach (first_conv_data[i]) begin
488 |                         first_conv_data[i][first_conv_rows_cntr][first_conv_cols_cntr] <= $itor($signed(inst_CNN.conv_data[0][i]))/R2I_COEF;
489 |                         first_relu_data[i][first_conv_rows_cntr][first_conv_cols_cntr] <= $itor(inst_CNN.relu_data[0][i])/R2I_COEF;
490 |                     end
491 |                     first_conv_cols_cntr++;
492 |                     if(first_conv_cols_cntr == IMG_WIDTH-2)begin
493 |                         first_conv_cols_cntr = 0;
494 |                         first_conv_rows_cntr++;
495 |                     end
496 |                     if(inst_CNN.conv_eop[0])begin
497 |                         first_conv_cols_cntr = 0;
498 |                         first_conv_rows_cntr = 0;
499 |                     end
500 |                 end
501 |     end
502 | 
503 |     initial begin
504 |         forever
505 |             @(posedge clk)
506 |                 if(inst_CNN.pool_valid[0])begin
507 |                     foreach (first_pool_data[i]) begin
508 |                         first_pool_data[i][first_pool_rows_cntr][first_pool_cols_cntr] <= $itor(inst_CNN.pool_data[0][i])/R2I_COEF;
509 |                     end
510 |                     first_pool_cols_cntr++;
511 |                     if(first_pool_cols_cntr == (IMG_HEIGHT-2)/2)begin
512 |                         first_pool_cols_cntr = 0;
513 |                         first_pool_rows_cntr++;
514 |                     end
515 |                     if(inst_CNN.pool_eop[0])begin
516 |                         first_pool_cols_cntr = 0;
517 |                         first_pool_rows_cntr = 0;
518 |                     end
519 |                 end
520 |     end
521 | 
522 |     initial begin
523 |         forever
524 |             @(posedge clk)
525 |                 if(inst_CNN.conv_valid[1])begin
526 |                     foreach (second_conv_data[i]) begin
527 |                         second_conv_data[i][second_conv_rows_cntr][second_conv_cols_cntr] <= $itor($signed(inst_CNN.conv_data[0][i]))/R2I_COEF;
528 |                         second_relu_data[i][second_conv_rows_cntr][second_conv_cols_cntr] <= $itor(inst_CNN.relu_data[0][i])/R2I_COEF;
529 |                     end
530 |                     second_conv_cols_cntr++;
531 |                     if(second_conv_cols_cntr == (IMG_HEIGHT-2)/2-2)begin
532 |                         second_conv_cols_cntr = 0;
533 |                         second_conv_rows_cntr++;
534 |                     end
535 |                     if(inst_CNN.conv_eop[1])begin
536 |                         second_conv_cols_cntr = 0;
537 |                         second_conv_rows_cntr = 0;
538 |                     end
539 |                 end
540 |     end
541 | 
542 |     initial begin
543 |         forever
544 |             @(posedge clk)
545 |                 if(inst_CNN.pool_valid[1])begin
546 |                     foreach (second_conv_data[i]) begin
547 |                         second_pool_data[i][second_pool_rows_cntr][second_pool_cols_cntr] <= $itor(inst_CNN.pool_data[0][i])/R2I_COEF;
548 |                     end
549 |                     second_pool_cols_cntr++;
550 |                     if(second_pool_cols_cntr == ((IMG_HEIGHT-2)/2-2)/2)begin
551 |                         second_pool_cols_cntr = 0;
552 |                         second_pool_rows_cntr++;
553 |                     end
554 |                     if(inst_CNN.pool_eop[1])begin
555 |                         second_pool_cols_cntr = 0;
556 |                         second_pool_rows_cntr = 0;
557 |                     end
558 |                 end
559 |     end
560 | 
561 |     initial begin
562 |         forever
563 |             @(posedge clk)
564 |                 if(inst_CNN.flat_valid)begin
565 |                     flat_data[flat_cntr] <= $itor(inst_CNN.flat_data)/R2I_COEF;
566 |                     flat_cntr++;
567 | 
568 |                     if(inst_CNN.flat_eop)begin
569 |                         flat_cntr = 0;
570 |                     end
571 |                 end
572 |     end
573 | 
574 | 
575 |     initial begin
576 |         forever
577 |             @(posedge clk)
578 |                 if(inst_CNN.fc_valid[0])begin
579 |                     first_fc_data[first_fc_cntr] <= $itor($signed(inst_CNN.fc_relu_data[0]))/R2I_COEF;
580 |                     first_fc_cntr++;
581 | 
582 |                     if(inst_CNN.fc_eop[0])begin
583 |                         first_fc_cntr = 0;
584 |                     end
585 |                 end
586 |     end
587 | 
588 | `endif
589 |     initial begin
590 |         forever
591 |             @(posedge clk)
592 |                 if(inst_CNN.fc_valid[1])begin
593 |                     second_fc_data[second_fc_cntr] <= $itor($signed(inst_CNN.fc_data[1]))/R2I_COEF;
594 |                     second_fc_cntr++;
595 | 
596 |                     if(inst_CNN.fc_eop[1])begin
597 |                         second_fc_cntr = 0;
598 |                     end
599 |                 end
600 |     end
601 | 
602 | 
603 | 
604 | 
605 | 
606 | 
607 | ///////////////////////////////////////////
608 | /////////// CNN RESULTS CHECK
609 | ///////////////////////////////////////////
610 |  
611 |     typedef enum {
612 |         ZERO  = 0,
613 |         ONE   = 1,
614 |         TWO   = 2,
615 |         THREE = 3,
616 |         FOUR  = 4,
617 |         FIVE  = 5,
618 |         SIX   = 6,
619 |         SEVEN = 7,
620 |         EIGHT = 8,
621 |         NINE  = 9,
622 | 
623 |         NONE  = 999
624 |     } e_number;
625 | 
626 |     e_number detected_class;
627 | 
628 | 
629 |     int  detected = 0  ;
630 |     real det_max  = 0.0;
631 |     initial begin
632 |         detected_class = NONE;
633 |             forever wait (
634 |                 inst_CNN.fc_eop[1]) begin
635 |                 @(posedge clk
636 |             );
637 |                 detected_class = ZERO;
638 |                 detected       = 0;
639 |                 det_max = 0;
640 |                 for (int i = 0; i < CLASSES_QNT; i++) begin
641 |                     if(second_fc_data[i] >= det_max)begin
642 |                         detected = i;
643 |                         det_max = second_fc_data[i];
644 |                     end
645 |                 end
646 | 
647 |                 for (int i = 0; i < CLASSES_QNT; i++) begin
648 |                     if(detected == i)begin
649 |                         break;
650 |                     end
651 | 
652 |                     detected_class = detected_class.next();
653 | 
654 |                 end
655 | 
656 |                 @(posedge clk);
657 |                 @(posedge clk);
658 | 
659 |             end
660 |     end
661 | 
662 | 
663 | 
664 | 
665 | endmodule : CNN_TB
666 | 
667 | 
668 | 
669 | 


--------------------------------------------------------------------------------
/Fully connected layer/flat.sv:
--------------------------------------------------------------------------------
  1 | // For sim purposes: Image width and height are not changeable dynamically
  2 | //
  3 | // -----------------------------------------------------------------------------
  4 | // Copyright (c) 2014-2024 All rights reserved
  5 | // -----------------------------------------------------------------------------
  6 | // Author : Maksim Ananev mananev086@gmail.com
  7 | // 
  8 | // Create : 2024-05-13 11:30:23
  9 | // Revise : 2024-10-22 12:20:46
 10 | // Editor : sublime text4, tab size (4)
 11 | // -----------------------------------------------------------------------------
 12 | 
 13 | module flat #(
 14 |     parameter              PIX_WIDTH  = 8 ,
 15 |     parameter              DIMENSION  = 8 ,
 16 |     parameter logic [11:0] img_width  = 7,
 17 |     parameter logic [11:0] img_height = 7
 18 | ) (
 19 |     input                                       clk    , // Clock
 20 |     input                                       clk_en , // Clock Enable
 21 |     input                                       rst_n  , // Asynchronous reset active low
 22 |     //input pixels
 23 |     input        [DIMENSION-1:0][PIX_WIDTH-1:0] i_data ,
 24 |     input                                       i_valid,
 25 |     input                                       i_sop  ,
 26 |     input                                       i_eop  ,
 27 |     // output pixels
 28 |     output logic [PIX_WIDTH-1:0]                o_data ,
 29 |     output logic                                o_valid,
 30 |     output logic                                o_sop  ,
 31 |     output logic                                o_eop  ,
 32 |     ///
 33 |     output logic                                o_ready
 34 | );
 35 | 
 36 | 
 37 | logic [DIMENSION-1:0][img_height-1:0][img_width-1:0][PIX_WIDTH-1:0] img_buf;
 38 | wire [DIMENSION*img_height*img_width-1:0][PIX_WIDTH-1:0] img_buf_plain = img_buf;
 39 | logic [$clog2(DIMENSION*img_width*img_height)-1:0]o_cntr;
 40 | 
 41 | typedef enum logic [2:0] {
 42 |     IDLE    = 'd1,
 43 |     FILL    = 'd2,
 44 |     RELEASE = 'd4
 45 | } e_state;
 46 | 
 47 | e_state state;
 48 | 
 49 | 
 50 | always_ff @(posedge clk) begin
 51 |     if(clk_en) begin
 52 | 
 53 |         o_valid <= 1'd0;
 54 | 
 55 |         o_ready <= 1'b1;
 56 |         case (state)
 57 |             IDLE: begin
 58 |                 if (i_valid && i_sop && o_ready) begin
 59 |                     state <= FILL;
 60 |                 end
 61 |             end
 62 |             FILL: begin
 63 |                 if(i_valid && i_eop)begin
 64 |                     state <= RELEASE;
 65 |                     o_cntr <= '0;
 66 |                     o_ready <= 1'b0;
 67 |                 end
 68 |             end
 69 |             RELEASE: begin
 70 | 
 71 |                 if(o_cntr == DIMENSION * img_width * img_height)begin
 72 |                     state <= IDLE;
 73 |                 end
 74 |                 else begin
 75 |                     o_valid <= 1'b1;
 76 |                     o_data <= img_buf_plain[o_cntr];
 77 |                     o_cntr <= o_cntr + 'd1;
 78 |                 end
 79 | 
 80 |                 o_sop <= o_cntr == 'd0;
 81 |                 o_eop <= o_cntr == (DIMENSION * img_width * img_height - 1);
 82 |                 o_ready <= 1'b0;
 83 |             end
 84 |             default : state <= IDLE;
 85 |         endcase
 86 |     end
 87 | 
 88 |     if(~rst_n) begin
 89 |         o_cntr <= 0;
 90 |         state <= IDLE;
 91 |     end  
 92 | end
 93 | 
 94 | always_ff @(posedge clk) begin
 95 |     if(clk_en) begin
 96 |         if (i_valid && o_ready) begin
 97 |             foreach (img_buf[i]) begin
 98 |                 img_buf[i] <= {i_data[i], img_buf[i][img_height-1:1], img_buf[i][0][img_width-1:1]};
 99 |             end
100 |         end
101 |     end
102 | end
103 | 
104 | endmodule : flat


--------------------------------------------------------------------------------
/Fully connected layer/fully_connected_layer.sv:
--------------------------------------------------------------------------------
  1 | // Fully connected layer of CNN. For sim purposes: 
  2 | // Image width and height are not changeable dynamically
  3 | // To simplify simulation all weights initialize from "CNN.svh"  -- Temporal
  4 | // -----------------------------------------------------------------------------
  5 | // Copyright (c) 2014-2024 All rights reserved
  6 | // -----------------------------------------------------------------------------
  7 | // Author : Maksim Ananev mananev086@gmail.com
  8 | // 
  9 | // Create : 2024-05-13 11:30:23
 10 | // Revise : 2024-10-22 12:20:46
 11 | // Editor : sublime text4, tab size (4)
 12 | // -----------------------------------------------------------------------------
 13 | 
 14 | module fully_connected_layer #(
 15 |     //data width parameters
 16 |     parameter PIX_WIDTH          = 16  ,
 17 |     parameter WEIGHT_WIDTH       = 16 ,
 18 |     parameter WEIGHT_FRACT_WIDTH = 10  ,
 19 |     //array_parameter
 20 |     parameter IN_DIMENSION       = 200,
 21 |     parameter OUT_DIMENSION      = 64
 22 | ) (
 23 |     input                                             clk                 , // Clock
 24 |     input                                             clk_en              , // Clock Enable
 25 |     input                                             rst_n               , // Asynchronous reset active low
 26 |     //input pixels
 27 |     input        [                     PIX_WIDTH-1:0] i_data              ,
 28 |     input                                             i_valid             ,
 29 |     input                                             i_sop               ,
 30 |     input                                             i_eop               ,
 31 |     // output pixels
 32 |     output logic [PIX_WIDTH+$clog2(IN_DIMENSION)-1:0] o_data              ,
 33 |     output logic                                      o_valid             ,
 34 |     output logic                                      o_sop               ,
 35 |     output logic                                      o_eop               ,
 36 |     ///
 37 |     input  int                                        weights_mem_in_data ,
 38 |     input  [$clog2(IN_DIMENSION)-1:0]                 weights_mem_in_addr ,
 39 |     input  [$clog2(OUT_DIMENSION):0]                  weights_mem_sel_addr,
 40 |     input                                             weights_mem_in_fc_wr,
 41 |     ///
 42 |     output logic                                      o_ready
 43 | );
 44 | 
 45 | 
 46 | 
 47 | 
 48 | int col_cntr;
 49 | logic [OUT_DIMENSION :0] weight_wr;
 50 | 
 51 | always_comb begin 
 52 |    weight_wr = '0;
 53 |    weight_wr[weights_mem_sel_addr] = weights_mem_in_fc_wr;
 54 | end
 55 | 
 56 | genvar y;
 57 | 
 58 | logic [WEIGHT_WIDTH-1:0]weights[OUT_DIMENSION];
 59 | 
 60 | generate 
 61 | for(y = 0; y < OUT_DIMENSION; y++)begin
 62 |     single_port_rom #(
 63 |     .ADDR_WIDTH($clog2(IN_DIMENSION)),
 64 |     .DATA_WIDTH(WEIGHT_WIDTH)
 65 |     )
 66 |     weight_rom(
 67 |      .clk(clk),
 68 |      .w_addr(weights_mem_in_addr),
 69 |      .r_addr(col_cntr),
 70 |      .data(weights_mem_in_data),
 71 |      .o(weights[y]),
 72 |      .we(weight_wr[y])
 73 |     
 74 |     );
 75 | end
 76 | 
 77 | endgenerate
 78 | 
 79 | 
 80 |        
 81 | 
 82 | logic [WEIGHT_WIDTH-1:0]bias[OUT_DIMENSION];
 83 | 
 84 | logic [PIX_WIDTH-1:0] i_data_ff;
 85 | logic i_sop_ff;
 86 | logic o_ready_ff;
 87 | logic i_valid_ff;
 88 | always_ff @(posedge clk) begin
 89 | 
 90 |     if(weight_wr[OUT_DIMENSION])
 91 |         bias[weights_mem_in_addr] <= weights_mem_in_data;
 92 | 
 93 |     i_data_ff <= i_data;
 94 |     i_valid_ff <= i_valid;
 95 |     i_sop_ff <= i_sop;
 96 |     o_ready_ff <= o_ready;
 97 | 
 98 |     if(~rst_n) begin
 99 |         i_data_ff <= '0;
100 |         i_sop_ff <= '0;
101 |         o_ready_ff <= '0;
102 |         i_valid_ff <= '0;
103 |     end
104 | end
105 | 
106 | 
107 | 
108 | logic [PIX_WIDTH*WEIGHT_FRACT_WIDTH+$clog2(OUT_DIMENSION)-1:0] integrators[OUT_DIMENSION];
109 | 
110 | 
111 | typedef enum logic [2:0] {
112 |     IDLE    = 'd1,
113 |     FILL    = 'd2,
114 |     RELEASE = 'd4
115 | } e_state;
116 | 
117 | e_state state;
118 | 
119 | logic fill_delay;
120 | 
121 | always_ff @(posedge clk) begin
122 |     if(clk_en) begin
123 | 
124 |         o_valid    <= 1'd0;
125 |         o_ready    <= 1'b1;
126 |         fill_delay <= 1'b0;
127 | 
128 |         case (state)
129 |             IDLE: begin
130 |                 
131 |                 col_cntr <= '0;
132 | 
133 |                 if (i_valid && i_sop && o_ready) begin
134 |                     state <= FILL;
135 |                     col_cntr <= col_cntr + 'd1;
136 |                 end
137 |             end
138 |             FILL: begin
139 |                 if (i_valid) begin
140 |                     col_cntr <= col_cntr + 'd1;
141 | 
142 |                     if(i_eop)begin
143 |                         state <= RELEASE;
144 |                         col_cntr <= '0;
145 |                         o_ready <= 1'b0;
146 |                         fill_delay <= 1'b1;
147 |                     end
148 |                 end
149 |                 
150 |             end
151 |             RELEASE: begin
152 | 
153 |                 if(col_cntr == OUT_DIMENSION)begin
154 |                     state <= IDLE;
155 |                 end
156 |                 else begin
157 |                     o_data <= $signed(integrators[col_cntr])/(2**WEIGHT_FRACT_WIDTH) + $signed(bias[col_cntr]);
158 |                     o_valid <= !fill_delay;
159 |                     col_cntr <= col_cntr + $size(col_cntr)'(!fill_delay);
160 |                 end
161 | 
162 |                 o_sop <= col_cntr == 'd0;
163 |                 o_eop <= col_cntr == (OUT_DIMENSION- 1);
164 |                 o_ready <= 1'b0;
165 | 
166 |             end
167 | 
168 |             default : state <= IDLE;
169 |         endcase
170 |     end
171 | 
172 |     if(~rst_n) begin
173 |         col_cntr <= 0;
174 |         state <= IDLE;
175 |     end 
176 | end
177 | 
178 | always_ff @(posedge clk) begin
179 |     if(clk_en) begin
180 |         if(i_valid_ff && o_ready_ff)
181 | 
182 |             foreach (integrators[x]) begin
183 |                 if(i_sop_ff)
184 |                     integrators[x] <= $signed(weights[x])*$signed(i_data_ff);
185 |                 else
186 |                     integrators[x] <= $signed(weights[x])*$signed(i_data_ff) + $signed(integrators[x]);
187 |             end
188 |     end
189 | end
190 | 
191 | 
192 | endmodule : fully_connected_layer
193 | 
194 | 
195 | module single_port_rom
196 |   # (parameter ADDR_WIDTH = 4,
197 |      parameter DATA_WIDTH = 32
198 |     )
199 | 
200 |   ( 	input 					clk,
201 |    		input [ADDR_WIDTH-1:0]	r_addr,
202 |    		input [ADDR_WIDTH-1:0]	w_addr,
203 |    		input  [DATA_WIDTH-1:0]	data,
204 |    		output logic [DATA_WIDTH-1:0]	o,
205 |    		input 					we
206 |   );
207 | 
208 |   reg [DATA_WIDTH-1:0] 	mem [2**ADDR_WIDTH];
209 | 
210 |   always @ (posedge clk) begin
211 |     if (we)
212 |       mem[w_addr] <= data;
213 |       
214 |     o <=  mem[r_addr];
215 |   end
216 | 
217 |     
218 | endmodule
219 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Convolutional-Neural-Network-using-SystemVerilog
 2 | Synthesizable RTL-Based video stream Convolutional Neural Network ( non HLS )
 3 | 
 4 | The testbench's images are taken from MNIST digits dataset.
 5 | 
 6 | ![image](https://github.com/user-attachments/assets/447386e3-ac5d-4a59-b600-3b1323c77b01)
 7 | 
 8 | 
 9 | 
10 | All weights and reference calculated by "cnn_behind.py"  
11 | 
12 | Numbers and size of Convolution and Fully Connected layers are parameterizable.
13 | 
14 | In testbench I used this CNN structure: 
15 | 
16 |         conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0)
17 |         conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0)
18 |         fc1 = nn.Linear(200, 64)
19 |         fc2 = nn.Linear(64, 10)
20 | 
21 | ![image](https://github.com/user-attachments/assets/13c7ffff-05bd-4ebd-9d6c-612513aa67b7)
22 | 
23 | 
24 | 
25 | Simulation Results: 
26 | ![image](https://github.com/user-attachments/assets/6dc68ca5-9127-4118-94c8-77288268b2d6)
27 | 


--------------------------------------------------------------------------------
/activation_func/relu.sv:
--------------------------------------------------------------------------------
 1 | module relu #(
 2 | 	parameter PIX_WIDTH = 8,
 3 | 	parameter DIMENSION = 4
 4 | ) (
 5 | 	//input pixels
 6 | 	input        [DIMENSION-1:0][PIX_WIDTH-1:0] i_data ,
 7 | 	// output pixels
 8 | 	output logic [DIMENSION-1:0][PIX_WIDTH-1:0] o_data 
 9 | );
10 | 
11 | 
12 | always_comb begin
13 | 	foreach (o_data[i]) begin
14 | 		o_data[i] = i_data[i][PIX_WIDTH-1] ? '0 : i_data[i];
15 | 	end
16 | 	
17 | end
18 | 
19 | 
20 | 
21 | endmodule : relu


--------------------------------------------------------------------------------
/blocks/conv_block.sv:
--------------------------------------------------------------------------------
  1 | 
  2 | module conv_block #(
  3 |     //data width parameters
  4 |     parameter PIX_WIDTH          = 8     ,
  5 |     parameter WEIGHT_WIDTH       = 10    ,
  6 |     parameter WEIGHT_FRACT_WIDTH = 5     ,
  7 |     parameter TRUNK              = "TRUE",
  8 |     //resolution
  9 |     parameter IMG_WIDTH          = 28    ,
 10 |     parameter IMG_HEIGHT         = 28    ,
 11 |     //conv_array_parameter
 12 |     parameter KERNEL_DIMENSION   = 3     ,
 13 |     parameter IN_DIMENSION       = 1     ,
 14 |     parameter OUT_DIMENSION      = 4
 15 | ) (
 16 |     input                                              clk                              ,
 17 |     input                                              clk_en                           ,
 18 |     input                                              rst_n                            ,
 19 |     //input pixels
 20 |     input        [ IN_DIMENSION-1:0][   PIX_WIDTH-1:0] i_data                           ,
 21 |     input                                              i_valid                          ,
 22 |     input                                              i_sop                            ,
 23 |     input                                              i_eop                            ,
 24 |     // output pixels
 25 |     output                                             logic [OUT_DIMENSION-1:0][((TRUNK  == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] o_data ,
 26 |     output logic                                       o_valid                          ,
 27 |     output logic                                       o_sop                            ,
 28 |     output logic                                       o_eop                            ,
 29 |     ///
 30 |     input  int                                         weights_mem_in_data              ,
 31 |     input  int                                         weights_mem_in_addr              ,
 32 |     input                                              weights_mem_in_kernel_wr         ,
 33 |     ///
 34 |     // input        [OUT_DIMENSION-1:0][WEIGHT_WIDTH-1:0] bias                             ,
 35 |     ///
 36 |     output logic                                       o_ready
 37 | );
 38 | 
 39 | 
 40 |     logic [OUT_DIMENSION + OUT_DIMENSION * IN_DIMENSION * KERNEL_DIMENSION * KERNEL_DIMENSION - 1 : 0][WEIGHT_WIDTH-1:0] kernel_plain;
 41 |     wire [OUT_DIMENSION-1:0][IN_DIMENSION-1:0][KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH-1:0] kernel;
 42 |     wire        [OUT_DIMENSION-1:0][WEIGHT_WIDTH-1:0] bias;
 43 |     assign {bias,kernel} = kernel_plain;
 44 | 
 45 | always_ff @(posedge clk) begin 
 46 |     if(weights_mem_in_kernel_wr) 
 47 |        kernel_plain[weights_mem_in_addr] <= weights_mem_in_data;  
 48 | 
 49 |     if(~rst_n) begin
 50 |     end
 51 | end
 52 | 
 53 |     logic signed [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] conv_outputs[OUT_DIMENSION][IN_DIMENSION];
 54 | 
 55 |     logic valid[OUT_DIMENSION][IN_DIMENSION];
 56 |     logic sop  [OUT_DIMENSION][IN_DIMENSION];
 57 |     logic eop  [OUT_DIMENSION][IN_DIMENSION];
 58 |     logic ready[OUT_DIMENSION][IN_DIMENSION];
 59 |     
 60 |     
 61 |     genvar row,col;
 62 |     generate
 63 |         for (row = 0; row < OUT_DIMENSION; row++) begin
 64 |             for (col = 0; col < IN_DIMENSION; col++) begin
 65 | 
 66 |                 conv #(
 67 |                     .PIX_WIDTH         (PIX_WIDTH         ),
 68 |                     .WEIGHT_WIDTH      (WEIGHT_WIDTH      ),
 69 |                     .WEIGHT_FRACT_WIDTH(WEIGHT_FRACT_WIDTH),
 70 |                     .TRUNK             (TRUNK             ),
 71 |                     .KERNEL_DIMENSION  (KERNEL_DIMENSION  ),
 72 |                     .img_width         (IMG_WIDTH         ),
 73 |                     .img_height        (IMG_HEIGHT        )
 74 |                 ) inst_conv (
 75 |                     .clk      (clk                   ),
 76 |                     .clk_en   (clk_en                ),
 77 |                     .rst_n    (rst_n                 ),
 78 |                     .i_data   (i_data[col]           ),
 79 |                     .i_valid  (i_valid               ),
 80 |                     .i_sop    (i_sop                 ),
 81 |                     .i_eop    (i_eop                 ),
 82 |                     .o_data   (conv_outputs[row][col]),
 83 |                     .o_valid  (valid[row][col]       ),
 84 |                     .o_sop    (sop  [row][col]       ),
 85 |                     .o_eop    (eop  [row][col]       ),
 86 |                     .kernel   (kernel[row][col]      ),
 87 |                     .ready    (ready[row][col]       ),
 88 |                     .cols_cntr(                      ),
 89 |                     .rows_cntr(                      )
 90 |                 );
 91 | 
 92 |             end
 93 |         end
 94 |     endgenerate
 95 | 
 96 | 
 97 |     logic [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] sum[OUT_DIMENSION];
 98 | 
 99 |     always_comb begin
100 | 
101 |         foreach (sum[x]) begin
102 |             sum[x] = '0;
103 |         end
104 | 
105 |         foreach (conv_outputs[x,z]) begin
106 |             sum[x] += $signed(conv_outputs[x][z]);
107 |         end
108 |     end
109 | 
110 | 
111 |     always_ff @(posedge clk or negedge rst_n) begin
112 |         if(~rst_n) begin
113 |             o_valid <= 0;
114 |             o_sop   <= 0;
115 |             o_eop   <= 0;
116 |         end else if(clk_en) begin
117 |             o_valid <= valid[0][0];
118 |             o_sop   <= sop[0][0];
119 |             o_eop   <= eop[0][0];
120 | 
121 |             foreach (o_data[x]) begin
122 |                 o_data[x] <= $signed(sum[x]) + $signed(bias[x]);
123 |             end
124 | 
125 | 
126 |         end
127 |     end
128 | 
129 | assign o_ready = ready[0][0];
130 | 
131 | 
132 | endmodule : conv_block


--------------------------------------------------------------------------------
/blocks/max_pooling_block.sv:
--------------------------------------------------------------------------------
 1 | module max_pooling_block #(
 2 |     //data width parameters
 3 |     parameter PIX_WIDTH      = 8 ,
 4 |     //resolution
 5 |     parameter IMG_WIDTH      = 28,
 6 |     parameter IMG_HEIGHT     = 28,
 7 |     parameter POOL_DIMENSION = 2 ,
 8 |     //pool_array_parameter
 9 |     parameter DIMENSION      = 4
10 | ) (
11 |     input                                       clk    ,
12 |     input                                       clk_en ,
13 |     input                                       rst_n  ,
14 |     //input pixels
15 |     input        [DIMENSION-1:0][PIX_WIDTH-1:0] i_data ,
16 |     input                                       i_valid,
17 |     input                                       i_sop  ,
18 |     input                                       i_eop  ,
19 |     // output pixels
20 |     output logic [DIMENSION-1:0][PIX_WIDTH-1:0] o_data ,
21 |     output logic                                o_valid,
22 |     output logic                                o_sop  ,
23 |     output logic                                o_eop  ,
24 |     ///
25 |     output logic                                o_ready
26 | );
27 | 
28 |     logic valid[DIMENSION];
29 |     logic sop  [DIMENSION];
30 |     logic eop  [DIMENSION];
31 |     logic ready[DIMENSION];
32 | 
33 |     genvar row,col;
34 | 
35 |     
36 |     
37 |     generate
38 |         for (row = 0; row < DIMENSION; row++) begin
39 | 
40 |             maxpooling #(
41 |                 .PIX_WIDTH     (PIX_WIDTH     ),
42 |                 .POOL_DIMENSION(POOL_DIMENSION),
43 |                 .WIDTH         (IMG_WIDTH     ),
44 |                 .HEIGHT        (IMG_HEIGHT    )
45 |             ) inst_maxpooling (
46 |                 .clk      (clk        ),
47 |                 .clk_en   (clk_en     ),
48 |                 .rst_n    (rst_n      ),
49 |                 .i_data   (i_data[row]),
50 |                 .i_valid  (i_valid    ),
51 |                 .i_sop    (i_sop      ),
52 |                 .i_eop    (i_eop      ),
53 |                 .o_data   (o_data[row]),
54 |                 .o_valid  (valid[row] ),
55 |                 .o_sop    (sop  [row] ),
56 |                 .o_eop    (eop  [row] ),
57 |                 .ready    (ready[row] ),
58 |                 .cols_cntr(           ),
59 |                 .rows_cntr(           )
60 |             );
61 | 
62 | 
63 |         end
64 |     endgenerate
65 | 
66 | 
67 | 
68 | assign o_valid = valid[0];
69 | assign o_sop = sop[0];
70 | assign o_eop   = eop[0];
71 | assign o_ready = ready[0];
72 | 
73 | 
74 | 
75 | 
76 | endmodule : max_pooling_block


--------------------------------------------------------------------------------
/cnn_behind.py:
--------------------------------------------------------------------------------
  1 | # Standard library imports
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | # PyTorch imports
  6 | import torch
  7 | from torch import nn, optim
  8 | from torch.nn import functional as F
  9 | from torch.utils.data import DataLoader
 10 | from torch.utils.data.dataset import random_split
 11 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 12 | 
 13 | # torchvision imports for datasets and transforms
 14 | from torchvision import datasets, transforms
 15 | 
 16 | 
 17 | load_model = 1
 18 | 
 19 | 
 20 | # Define the CNN model
 21 | class MyCNN(nn.Module):
 22 |     """
 23 |     Simple CNN model with 2 convolutional layers and 2 fully connected layers.
 24 | 
 25 |     Args:
 26 |     -----
 27 |         nn.Module: Base class for all neural network modules in PyTorch.
 28 |     """
 29 |     def __init__(self):
 30 |         super(MyCNN, self).__init__()
 31 |         self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0)
 32 |         self.conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0)
 33 |         self.fc1 = nn.Linear(200, 64)
 34 |         self.fc2 = nn.Linear(64, 10)
 35 | 
 36 |         self._initialize_weights()
 37 | 
 38 |     a = 0
 39 | 
 40 |     def forward(self, x):
 41 |         """
 42 |         Forward pass of the model.
 43 |         
 44 |         Args:
 45 |         -----
 46 |             x (torch.Tensor): Input tensor.
 47 |         
 48 |         Returns:
 49 |         --------
 50 |             torch.Tensor: Output tensor.
 51 |         """
 52 |         x = F.relu(self.conv1(x))
 53 |         x = F.max_pool2d(x, 2, 2)
 54 |         x = F.relu(self.conv2(x))
 55 |         x = F.max_pool2d(x, 2, 2)
 56 |         x = x.view(x.size(0), -1)  # Flatten the tensor
 57 |         x = F.relu(self.fc1(x))
 58 |         x = self.fc2(x)
 59 |         self.a = 0
 60 |         return x
 61 |     
 62 |     # Xavier weight initialization
 63 |     def _initialize_weights(self):
 64 |         """
 65 |         Initialize the weights of the model.
 66 |         """
 67 |         for m in self.modules():
 68 |             if isinstance(m, nn.Conv2d):
 69 |                 nn.init.normal_(m.weight, 0, 0.01)
 70 |                 if m.bias is not None:
 71 |                     nn.init.constant_(m.bias, 0)
 72 |             elif isinstance(m, nn.Linear):
 73 |                 nn.init.xavier_uniform_(m.weight)
 74 |                 if m.bias is not None:
 75 |                     nn.init.constant_(m.bias, 0)
 76 | class EarlyStopping:
 77 |     """
 78 |     Early stopping to stop the training when the loss does not improve after
 79 | 
 80 |     Args:
 81 |     -----
 82 |         patience (int): Number of epochs to wait before stopping the training.
 83 |         verbose (bool): If True, prints a message for each epoch where the loss
 84 |                         does not improve.
 85 |         delta (float): Minimum change in the monitored quantity to qualify as an improvement.
 86 |     """
 87 |     def __init__(self, patience=7, verbose=False, delta=0):
 88 |         self.patience = patience
 89 |         self.verbose = verbose
 90 |         self.counter = 0
 91 |         self.best_score = None
 92 |         self.early_stop = False
 93 |         self.delta = delta
 94 | 
 95 |     def __call__(self, val_loss):
 96 |         score = -val_loss
 97 | 
 98 |         if self.best_score is None:
 99 |             self.best_score = score
100 |         elif score < self.best_score + self.delta:
101 |             self.counter += 1
102 |             if self.counter >= self.patience:
103 |                 self.early_stop = True
104 |         else:
105 |             self.best_score = score
106 |             self.counter = 0
107 | class Trainer:
108 |     """
109 |     Trainer class to train the model.
110 | 
111 |     Args:
112 |     -----
113 |         model (nn.Module): Neural network model.
114 |         criterion (torch.nn.modules.loss): Loss function.
115 |         optimizer (torch.optim): Optimizer.
116 |         device (torch.device): Device to run the model on.
117 |         patience (int): Number of epochs to wait before stopping the training.
118 |     """
119 |     def __init__(self, model, criterion, optimizer, device, patience=7):
120 |         self.model = model
121 |         self.criterion = criterion
122 |         self.optimizer = optimizer
123 |         self.device = device
124 |         self.early_stopping = EarlyStopping(patience=patience)
125 |         self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=3, verbose=True, factor=0.5, min_lr=1e-6)
126 |         self.train_losses = []
127 |         self.val_losses = []
128 |         self.gradient_norms = []
129 | 
130 |     def train(self, train_loader, val_loader, epochs):
131 |         """
132 |         Train the model.
133 |         
134 |         Args:
135 |         -----
136 |             train_loader (torch.utils.data.DataLoader): DataLoader for training dataset.
137 |             val_loader (torch.utils.data.DataLoader): DataLoader for validation dataset.
138 |             epochs (int): Number of epochs to train the model.
139 |         """
140 |         for epoch in range(epochs):
141 |             self.model.train()
142 |             for images, labels in train_loader:
143 |                 images, labels = images.to(self.device), labels.to(self.device)
144 | 
145 |                 self.optimizer.zero_grad()
146 |                 outputs = self.model(images)
147 |                 loss = self.criterion(outputs, labels)
148 |                 loss.backward()
149 |                 self.optimizer.step()
150 |             
151 |             self.train_losses.append(loss.item())
152 | 
153 |             val_loss = self.evaluate(val_loader)
154 |             self.val_losses.append(val_loss)
155 |             self.scheduler.step(val_loss)
156 |             self.early_stopping(val_loss)
157 | 
158 |             # Log the training and validation loss
159 |             print(f'Epoch {epoch+1}, Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')
160 | 
161 |             if self.early_stopping.early_stop:
162 |                 print("Early stopping")
163 |                 break
164 | 
165 |     def evaluate(self, test_loader):
166 |         """
167 |         Evaluate the model on the test dataset.
168 | 
169 |         Args:
170 |         -----
171 |             test_loader (torch.utils.data.DataLoader): DataLoader for test dataset.
172 | 
173 |         Returns:
174 |         --------
175 |             float: Average loss on the test dataset.
176 |         """
177 |         self.model.eval()
178 |         total_loss = 0
179 |         with torch.no_grad():
180 |             for images, labels in test_loader:
181 |                 images, labels = images.to(self.device), labels.to(self.device)
182 | 
183 |                 outputs = self.model(images)
184 |                 loss = self.criterion(outputs, labels)
185 |                 total_loss += loss.item()
186 | 
187 |         return total_loss / len(test_loader)
188 |     
189 |     def accuracy(self, test_loader):
190 |         """
191 |         Calculate the accuracy of the model on the test dataset.
192 | 
193 |         Args:
194 |         -----
195 |             test_loader (torch.utils.data.DataLoader): DataLoader for test dataset.
196 | 
197 |         Returns:
198 |         --------
199 |             float: Accuracy of the model on the test dataset.
200 |         """
201 |         self.model.eval()
202 |         correct = 0
203 |         total = 0
204 |         with torch.no_grad():
205 |             for images, labels in test_loader:
206 |                 images, labels = images.to(self.device), labels.to(self.device)
207 | 
208 |                 outputs = self.model(images)
209 |                 _, predicted = torch.max(outputs.data, 1)
210 |                 total += labels.size(0)
211 |                 correct += (predicted == labels).sum().item()
212 | 
213 |         return correct / total
214 | 
215 |     def plot_losses(self, window_size=100):
216 |         # Compute moving averages
217 |         train_losses_smooth = self.moving_average(self.train_losses, window_size)
218 |         val_losses_smooth = self.moving_average(self.val_losses, window_size)
219 | 
220 |         # Plot
221 |         plt.plot(train_losses_smooth, label='Train Loss')
222 |         plt.plot(val_losses_smooth, label='Validation Loss')
223 |         plt.legend()
224 |         plt.grid()
225 |         plt.title('Losses')
226 | 
227 |     def moving_average(self, data, window_size):
228 |         return np.convolve(data, np.ones(window_size)/window_size, mode='valid')
229 | # Data loading and transformation
230 | transform = transforms.Compose([
231 |     transforms.ToTensor(),
232 |     transforms.Normalize((0.0,), (1.0,))  # Normalize for grayscale image
233 | ])
234 | 
235 | mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
236 | test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)
237 | 
238 | # Plot a sample image
239 | image, label = mnist_dataset[0]
240 | # plt.imshow(image.squeeze().numpy(), cmap='gray')
241 | # plt.title(f'Label: {label}')
242 | # plt.show()
243 | 
244 | # Split the dataset into training and validation sets
245 | train_split = 0.8
246 | 
247 | train_size = int(train_split * len(mnist_dataset))
248 | val_size = len(mnist_dataset) - train_size
249 | 
250 | # Split the dataset
251 | train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size])
252 | 
253 | # Create DataLoaders for each dataset
254 | train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
255 | val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
256 | test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
257 | # Model instantiation
258 | model = MyCNN()
259 | 
260 | 
261 | path = "C:/projects/DSP_modules/2D/soft/lstmmodelgpu.pth"
262 | 
263 | # Move model to GPU if available
264 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
265 | 
266 | model = model.to(device)
267 | 
268 | #Loss function and optimizer
269 | criterion = nn.CrossEntropyLoss()
270 | optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5, amsgrad=True, eps=1e-8,  betas=(0.9, 0.999))
271 | 
272 | 
273 | if load_model == 1:
274 |     # Load:
275 |     model.load_state_dict(torch.load(path))
276 | 
277 | 
278 | 
279 | # # Trainer instantiation
280 | trainer = Trainer(model, criterion, optimizer, device, patience=10)
281 | 
282 | if load_model == 0:
283 |     # Training
284 |     print('train started')
285 |     trainer.train(train_loader, val_loader, epochs=10)
286 | 
287 |     torch.save(model.state_dict(), path)
288 |     
289 | 
290 |     with open('output.txt', 'w') as f:
291 |         f.write('real fc1_weights_re[64][200] = ')
292 | 
293 |     torch.set_printoptions(threshold=15_000)
294 | 
295 |     print(model.fc1.weight, file=open('output.txt', 'a'))
296 | 
297 | #Evaluation
298 | print("evaluation")
299 | model.a = 1
300 | loss = trainer.evaluate(test_loader)
301 | accuracy = trainer.accuracy(test_loader)
302 | print(f'Accuracy: {accuracy:.2%}')
303 | 
304 | print('FINISHED!!!')
305 | print('FINISHED!!!')


--------------------------------------------------------------------------------
/pooling/maxpooling.sv:
--------------------------------------------------------------------------------
  1 | // For sim purposes: Image width and height are not changeable dynamically
  2 | //
  3 | // -----------------------------------------------------------------------------
  4 | // Copyright (c) 2014-2024 All rights reserved
  5 | // -----------------------------------------------------------------------------
  6 | // Author : Maksim Ananev mananev086@gmail.com
  7 | // 
  8 | // Create : 2024-05-13 11:30:23
  9 | // Revise : 2024-10-22 12:20:46
 10 | // Editor : sublime text4, tab size (4)
 11 | // -----------------------------------------------------------------------------
 12 | 
 13 | 
 14 | module maxpooling #(
 15 |     parameter PIX_WIDTH      = 8 ,
 16 |     parameter POOL_DIMENSION = 2 ,
 17 |     parameter WIDTH          = 28,
 18 |     parameter HEIGHT         = 28
 19 | ) (
 20 |     input                        clk      , // Clock
 21 |     input                        clk_en   , // Clock Enable
 22 |     input                        rst_n    , // Asynchronous reset active low
 23 |     //input pixels
 24 |     input        [PIX_WIDTH-1:0] i_data   ,
 25 |     input                        i_valid  ,
 26 |     input                        i_sop    ,
 27 |     input                        i_eop    ,
 28 |     // output pixels
 29 |     output       [PIX_WIDTH-1:0] o_data   ,
 30 |     output                       o_valid  ,
 31 |     output                       o_sop    ,
 32 |     output                       o_eop    ,
 33 |     ///
 34 |     output logic                 ready    ,
 35 |     output logic [         11:0] cols_cntr,
 36 |     output logic [         11:0] rows_cntr
 37 | );
 38 | 
 39 | 
 40 | 
 41 | /*
 42 |     Pixels Delay scheme. if maxpooling 3*3
 43 | 
 44 |     pixel_input----------->--------------------                                   --pix[2][2]-->     --pix[2][1]-->
 45 |                                 /-------\     |                                  |                  |
 46 |                          ---<--| FIFO_0 |--<------>----delayed_line[0]------->-----|FF|---------->------|FF|--->-----------pix[2][0]-->--
 47 |                          |     \-------/
 48 |                          |                                                        --pix[1][2]-->     --pix[1][1]-->
 49 |                          |                                                       |                  |
 50 |                          ------------------------->---delayed_line[1]------->------|FF|---------->------|FF|--->-----------pix[1][0]-->--
 51 |                          |
 52 |                          |                                                        --pix[0][2]-->     --pix[0][2]-->
 53 |                          |      /-------\                                        |                  |
 54 |                          --->--| FIFO_1 |--------->---delayed_line[2]------->------|FF|---------->------|FF|--->-----------pix[0][0]-->--
 55 |                                \-------/
 56 | */
 57 | 
 58 |     logic [WIDTH-1:0][PIX_WIDTH-1:0] fifo[POOL_DIMENSION-1]; /// !!!!!!!!!!!!!!!!!!!!!!! only for simulation, MUST BE REPLACED by a regular FIFO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 59 | 
 60 |     logic [PIX_WIDTH-1:0] delayed_line[POOL_DIMENSION];
 61 | 
 62 |     bit [POOL_DIMENSION-2:0][PIX_WIDTH-1:0] after_fifos_ffs[POOL_DIMENSION];
 63 | 
 64 |     logic [PIX_WIDTH-1:0] delayed_pix[POOL_DIMENSION][POOL_DIMENSION];
 65 | 
 66 |     always_comb begin
 67 |         foreach (delayed_line[i]) begin
 68 |             delayed_line[i] = (i == 0) ? i_data : fifo[i-1][WIDTH-1];
 69 |         end
 70 | 
 71 |         foreach (delayed_pix[i,y]) begin
 72 |             delayed_pix[i][y] = (y==0)?delayed_line[i] : after_fifos_ffs[i][y-1];
 73 |         end
 74 |     end
 75 | 
 76 |     always_ff @(posedge clk) begin
 77 |         if(clk_en && (i_valid || !ready))begin
 78 |             foreach (fifo[i]) begin
 79 |                 fifo[i] <= {fifo[i][WIDTH-2:0],( (i == 0) ? i_data : fifo[i-1][WIDTH-1] )};
 80 |             end
 81 | 
 82 |             foreach (after_fifos_ffs[i]) begin
 83 |                 after_fifos_ffs[i] <= {after_fifos_ffs[i],delayed_line[i]};
 84 |             end
 85 |         end
 86 |     end
 87 | 
 88 | 
 89 | 
 90 | /*
 91 |     maths. if maxpooling 3*3
 92 | 
 93 | 
 94 |                  image lines          
 95 |      /--      -------------------     --\
 96 |      |        | a11 | a12 | a13 |       |
 97 |      |        -------------------       |
 98 |   max|        | a21 | a22 | a23 |       |   --------> 
 99 |      |        -------------------       |
100 |      |        | a31 | a32 | a33 |       |
101 |      \__      -------------------     --/ 
102 | */
103 | logic [PIX_WIDTH-1:0] max_detected   ;
104 | logic [PIX_WIDTH-1:0] max_detected_ff;
105 | 
106 | logic [POOL_DIMENSION-1:0][PIX_WIDTH-1:0] max_row_detected   ;
107 | logic [POOL_DIMENSION-1:0][PIX_WIDTH-1:0] max_row_detected_ff;
108 | 
109 | always_comb begin
110 | 
111 |     foreach (max_row_detected[i]) begin
112 |         max_row_detected[i] = delayed_pix[i][0];        
113 |     end
114 | 
115 |     for (int y = 0; y < POOL_DIMENSION; y++) begin
116 |         for (int i = 1; i < POOL_DIMENSION; i++) begin
117 |             if(max_row_detected[y] < delayed_pix[y][i])
118 |                 max_row_detected[y] = delayed_pix[y][i];
119 |         end        
120 |     end
121 | 
122 |     max_detected = max_row_detected_ff[0];
123 |     for (int i = 1; i < POOL_DIMENSION; i++) begin
124 |         if(max_detected < max_row_detected_ff[i])
125 |                 max_detected = max_row_detected_ff[i];
126 |     end
127 | 
128 | end
129 | 
130 | always_ff @(posedge clk) begin
131 |     if(clk_en) begin
132 |         max_row_detected_ff <= max_row_detected;
133 |         max_detected_ff <= max_detected;
134 |     end
135 | end
136 | 
137 | 
138 | /*
139 | 
140 | */
141 | 
142 |     assign o_data = max_detected_ff;
143 | 
144 | /*
145 |     Latency
146 | */
147 |     logic [2:0] valid_delay   = '0            ;
148 |     wire        valid_delayed = valid_delay[1];
149 |     always_ff @(posedge clk or negedge rst_n) begin
150 |         if(~rst_n) begin
151 |             valid_delay <= 0;
152 |         end else begin
153 |             if(clk_en)
154 |                 valid_delay <= $size(valid_delay)'( {valid_delay, i_valid && ready} );
155 |         end
156 |     end
157 | 
158 | 
159 | /*
160 |     counters
161 | */
162 | 
163 | logic [$clog2(POOL_DIMENSION)-1:0]valid_col,valid_row;
164 | 
165 |     always_ff @(posedge clk or negedge rst_n) begin
166 |         if(~rst_n) begin
167 |             cols_cntr <= 0;
168 |             rows_cntr <= 0;
169 |             valid_col   <= 0;
170 |             valid_row   <= 0;
171 |         end else begin
172 |             if(clk_en)begin
173 |                 if(valid_delayed)begin
174 |                     cols_cntr <= (cols_cntr == WIDTH-1) ? '0 : (cols_cntr + 'd1);
175 | 
176 |                     if(valid_col == POOL_DIMENSION-1)
177 |                         valid_col <= '0;
178 |                     else
179 |                         valid_col <= valid_col + 'd1;
180 | 
181 |                     if(cols_cntr == WIDTH-1)begin
182 |                         rows_cntr <= rows_cntr + 'd1;
183 |                         valid_col   <= '0;
184 |                         valid_row   <= valid_row + 'd1;
185 |                         if (valid_row == POOL_DIMENSION-1) begin
186 |                             valid_row   <= 0;
187 |                         end
188 |                     end
189 |                 end
190 |                 else if(i_sop)begin
191 |                     cols_cntr <= '0;
192 |                     rows_cntr <= '0;
193 |                     valid_col   <= 0;
194 |                     valid_row   <= 0;
195 |                 end
196 |             end
197 |         end
198 |     end
199 | 
200 | /*
201 |     video control signals
202 | */
203 |     assign ready = clk_en;
204 | 
205 | 
206 |     assign o_valid = valid_delayed && (valid_col == POOL_DIMENSION-1) && (valid_row == POOL_DIMENSION-1) ; 
207 | 
208 |     assign o_eop = valid_delayed && (valid_col == POOL_DIMENSION-1) && (cols_cntr == WIDTH-(WIDTH[0]+POOL_DIMENSION[0]+1)) && (rows_cntr == HEIGHT-(HEIGHT[0]+POOL_DIMENSION[0]+1));
209 | 
210 |     assign o_sop = valid_delayed && (valid_col == POOL_DIMENSION-1)  && (rows_cntr == POOL_DIMENSION-1) && (cols_cntr == POOL_DIMENSION-1);
211 | 
212 | 
213 | 
214 | 
215 | 
216 | endmodule : maxpooling
217 | 


--------------------------------------------------------------------------------
/top/CNN.sv:
--------------------------------------------------------------------------------
  1 | // // Convolutional Neural Network  module. For sim purposes:
  2 | // // No back preassure in design
  3 | // // The math of the module replicates the python script
  4 | // //        self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0)
  5 | // //        self.conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0)
  6 | // //        self.fc1 = nn.Linear(200, 64)
  7 | // //        self.fc2 = nn.Linear(64, 10)
  8 | // //
  9 | // // To simplify simulation all weights are initialized from "CNN.svh"
 10 | // //
 11 | // // -----------------------------------------------------------------------------
 12 | // // Copyright (c) 2014-2024 All rights reserved
 13 | // // -----------------------------------------------------------------------------
 14 | // // Author : Maksim Ananev mananev086@gmail.com
 15 | // // 
 16 | // // Create : 2024-05-13 11:30:23
 17 | // // Revise : 2024-10-22 12:20:46
 18 | // // Editor : sublime text4, tab size (4)
 19 | // // -----------------------------------------------------------------------------
 20 | 
 21 | 
 22 | module CNN #(
 23 |     parameter                             PIX_WIDTH          = 16     ,
 24 |     parameter                             WEIGHT_WIDTH       = 10     ,
 25 |     parameter                             FRACT_WIDTH        = 5      ,
 26 |     parameter                             CONV_NUMB          = 2      ,
 27 |     parameter logic [CONV_NUMB-1:0][1:0][7:0]  CONV_DIMENSION     = {{8'd8, 8'd4}, {8'd4, 8'd1}},   
 28 |     parameter logic [CONV_NUMB-1:0][ 3:0] KERNEL_DIMENSION   = {4'd3  , 4'd3},
 29 |     parameter                             FLAT_NUMB          = 2      ,
 30 |     parameter                             CLASSES_QNT        = 10     ,
 31 |     parameter logic [FLAT_NUMB :0][15:0] FLAT_DIMENSION     = {16'd10, 16'd64, 16'd200},
 32 |     parameter                             IMG_WIDTH          = 28     ,
 33 |     parameter                             IMG_HEIGHT         = 28
 34 | ) (
 35 |     input                                clk                     , // Clock
 36 |     input                                clk_en                  , // Clock Enable
 37 |     input                                rst_n                   , // Asynchronous reset active low
 38 |     //input pixels
 39 |     input        [  PIX_WIDTH-1:0]       i_data                  ,
 40 |     input                                i_valid                 ,
 41 |     input                                i_sop                   ,
 42 |     input                                i_eop                   ,
 43 |     ///
 44 |     output logic                         o_valid                 ,
 45 |     output logic [CLASSES_QNT-1:0][31:0] classes                 ,
 46 |     ///
 47 |     input  int                           weights_mem_in_data     ,
 48 |     input  int                           weights_mem_in_addr     ,
 49 |     input  int                           weights_mem_sel_addr    ,
 50 |     input        [  CONV_NUMB-1:0]       weights_mem_in_kernel_wr,
 51 |     input        [  FLAT_NUMB-1:0]       weights_mem_in_fc_wr
 52 | 
 53 | );
 54 | 
 55 | 
 56 | logic [63:0][PIX_WIDTH-1:0] conv_data[CONV_NUMB];
 57 | logic [63:0][PIX_WIDTH-1:0] relu_data[CONV_NUMB];
 58 | logic [63:0][PIX_WIDTH-1:0] pool_data[CONV_NUMB];
 59 | 
 60 | logic conv_valid[CONV_NUMB];
 61 | logic conv_sop  [CONV_NUMB];
 62 | logic conv_eop  [CONV_NUMB];
 63 | 
 64 | logic pool_valid[CONV_NUMB];
 65 | logic pool_sop  [CONV_NUMB];
 66 | logic pool_eop  [CONV_NUMB];
 67 | 
 68 | 
 69 | genvar numb;
 70 | generate
 71 |     for (numb = 0; numb < CONV_NUMB; numb++) begin:conv_genloop
 72 |         conv_block #(
 73 |             .PIX_WIDTH         (PIX_WIDTH                               ),
 74 |             .WEIGHT_WIDTH      (WEIGHT_WIDTH                            ),
 75 |             .WEIGHT_FRACT_WIDTH(FRACT_WIDTH                             ),
 76 |             .TRUNK             ("TRUE"                                  ),
 77 |             .IMG_WIDTH         ((IMG_WIDTH - (2**(numb+1)-2))/(2**numb) ),
 78 |             .IMG_HEIGHT        ((IMG_HEIGHT - (2**(numb+1)-2))/(2**numb)),
 79 |             .KERNEL_DIMENSION  (KERNEL_DIMENSION[numb]                  ),
 80 |             .IN_DIMENSION      (CONV_DIMENSION[numb][0]                 ),
 81 |             .OUT_DIMENSION     (CONV_DIMENSION[numb][1]                 )
 82 |         ) conv_block (
 83 |             .clk                     (clk                                       ),
 84 |             .clk_en                  (1                                         ),
 85 |             .rst_n                   (rst_n                                     ),
 86 |             .i_data                  ((numb == 0) ? i_data  : pool_data [numb-1]),
 87 |             .i_valid                 ((numb == 0) ? i_valid : pool_valid[numb-1]),
 88 |             .i_sop                   ((numb == 0) ? i_sop   : pool_sop  [numb-1]),
 89 |             .i_eop                   ((numb == 0) ? i_eop   : pool_eop  [numb-1]),
 90 |             .o_data                  (conv_data [numb]                          ),
 91 |             .o_valid                 (conv_valid[numb]                          ),
 92 |             .o_sop                   (conv_sop  [numb]                          ),
 93 |             .o_eop                   (conv_eop  [numb]                          ),
 94 |             .weights_mem_in_data     (weights_mem_in_data                       ),
 95 |             .weights_mem_in_addr     (weights_mem_in_addr                       ),
 96 |             .weights_mem_in_kernel_wr(weights_mem_in_kernel_wr[numb]            ),
 97 |             .o_ready                 (                                          )
 98 |         );
 99 | 
100 | 
101 |         relu #(
102 |             .PIX_WIDTH(PIX_WIDTH              ),
103 |             .DIMENSION(CONV_DIMENSION[numb][1])
104 |         ) conv_relu (
105 |             .i_data(conv_data[numb]),
106 |             .o_data(relu_data[numb])
107 |         );
108 | 
109 | 
110 |         max_pooling_block #(
111 |             .PIX_WIDTH     (PIX_WIDTH                                 ),
112 |             .IMG_WIDTH     ((IMG_WIDTH - (2**(numb+1)-2))/(2**numb) -2),
113 |             .IMG_HEIGHT    ((IMG_HEIGHT - (2**(numb+1)-2))/(2**numb)-2),
114 |             .POOL_DIMENSION(2                                         ),
115 |             .DIMENSION     (CONV_DIMENSION[numb][1]                   )
116 |         ) max_pooling_block (
117 |             .clk    (clk             ),
118 |             .clk_en (clk_en          ),
119 |             .rst_n  (rst_n           ),
120 |             .i_data (relu_data [numb]),
121 |             .i_valid(conv_valid[numb]),
122 |             .i_sop  (conv_sop  [numb]),
123 |             .i_eop  (conv_eop  [numb]),
124 |             .o_data (pool_data [numb]),
125 |             .o_valid(pool_valid[numb]),
126 |             .o_sop  (pool_sop  [numb]),
127 |             .o_eop  (pool_eop  [numb]),
128 |             .o_ready(                )
129 |         );
130 |     end
131 |     endgenerate
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | logic [PIX_WIDTH-1:0]flat_data;
140 | logic flat_valid;
141 | logic flat_sop;
142 | logic flat_eop;
143 | logic flat_ready;
144 | 
145 |     flat #(
146 |         .PIX_WIDTH (PIX_WIDTH                                         ),
147 |         .DIMENSION (CONV_DIMENSION[CONV_NUMB-1][1]                              ),
148 |         .img_width ((IMG_WIDTH - (2**(CONV_NUMB+1)-2))/(2**CONV_NUMB) ),
149 |         .img_height((IMG_HEIGHT - (2**(CONV_NUMB+1)-2))/(2**CONV_NUMB))
150 |     ) inst_flat (
151 |         .clk    (clk                    ),
152 |         .clk_en (clk_en                 ),
153 |         .rst_n  (rst_n                  ),
154 |         .i_data (pool_data [CONV_NUMB-1]),
155 |         .i_valid(pool_valid[CONV_NUMB-1]),
156 |         .i_sop  (pool_sop  [CONV_NUMB-1]),
157 |         .i_eop  (pool_eop  [CONV_NUMB-1]),
158 |         .o_data (flat_data              ),
159 |         .o_valid(flat_valid             ),
160 |         .o_sop  (flat_sop               ),
161 |         .o_eop  (flat_eop               ),
162 |         .o_ready(flat_ready             )
163 |     );
164 | 
165 | logic [PIX_WIDTH+$clog2(FLAT_DIMENSION[0])-1:0]fc_data[FLAT_NUMB];
166 | logic fc_valid[FLAT_NUMB];
167 | logic fc_sop[FLAT_NUMB];
168 | logic fc_eop[FLAT_NUMB];
169 | logic fc_ready[FLAT_NUMB];
170 | 
171 | logic [PIX_WIDTH+$clog2(FLAT_DIMENSION[0])-1:0] fc_relu_data[FLAT_NUMB];
172 | 
173 | generate
174 |     for (numb = 0; numb < FLAT_NUMB; numb++) begin:fc_genloop
175 |         fully_connected_layer #(
176 |             .PIX_WIDTH         (PIX_WIDTH + ((numb == 0) ? 0 : $clog2(FLAT_DIMENSION[0]))),
177 |             .WEIGHT_WIDTH      (WEIGHT_WIDTH                                             ),
178 |             .WEIGHT_FRACT_WIDTH(FRACT_WIDTH                                              ),
179 |             .IN_DIMENSION      (FLAT_DIMENSION[numb]                                     ),
180 |             .OUT_DIMENSION     (FLAT_DIMENSION[numb+1]                                   )
181 |         ) inst_fully_connected_layer1 (
182 |             .clk                 (clk                                           ),
183 |             .clk_en              (clk_en                                        ),
184 |             .rst_n               (rst_n                                         ),
185 |             .i_data              ((numb == 0) ? flat_data : fc_relu_data[numb-1]),
186 |             .i_valid             ((numb == 0) ? flat_valid : fc_valid[numb-1]   ),
187 |             .i_sop               ((numb == 0) ? flat_sop : fc_sop[numb-1]       ),
188 |             .i_eop               ((numb == 0) ? flat_eop : fc_eop[numb-1]       ),
189 |             .o_data              (fc_data[numb]                                 ),
190 |             .o_valid             (fc_valid[numb]                                ),
191 |             .o_sop               (fc_sop[numb]                                  ),
192 |             .o_eop               (fc_eop[numb]                                  ),
193 |             .o_ready             (fc_ready[numb]                                ),
194 |             .weights_mem_in_data (weights_mem_in_data                           ),
195 |             .weights_mem_in_addr (weights_mem_in_addr                           ),
196 |             .weights_mem_sel_addr(weights_mem_sel_addr                          ),
197 |             .weights_mem_in_fc_wr(weights_mem_in_fc_wr[numb]                    )
198 |         );
199 | 
200 |         relu #(
201 |             .PIX_WIDTH(PIX_WIDTH+$clog2(FLAT_DIMENSION[0])),
202 |             .DIMENSION(1                )
203 |         ) fc_relu (
204 |             .i_data(fc_data[numb]     ),
205 |             .o_data(fc_relu_data[numb])
206 |         );
207 |     end
208 | endgenerate
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | int classes_cntr = 0;
216 | always_ff @(posedge clk or negedge rst_n) begin
217 |     if(clk_en)begin
218 |         
219 |         o_valid <= 'd0;
220 | 
221 |         if(fc_valid[FLAT_NUMB-1])begin
222 | 
223 |             classes[classes_cntr] <= $signed(fc_data[FLAT_NUMB-1]);
224 | 
225 |             if(fc_eop[FLAT_NUMB-1])begin
226 |                 classes_cntr <= '0;
227 |                 o_valid <= 'd1;
228 |             end
229 |             else 
230 |                 classes_cntr <= classes_cntr + 'd1;
231 | 
232 |         end
233 | 
234 |     end
235 | 
236 | 
237 |     if(~rst_n) begin
238 |         classes <= '0;
239 |         classes_cntr <= '0;
240 |     end
241 | end
242 | 
243 | endmodule : CNN
244 | 
245 | 


--------------------------------------------------------------------------------