├── 2D_filter └── conv.sv ├── CNN_TB.sv ├── Fully connected layer ├── flat.sv └── fully_connected_layer.sv ├── README.md ├── activation_func └── relu.sv ├── blocks ├── conv_block.sv └── max_pooling_block.sv ├── cnn_behind.py ├── pooling └── maxpooling.sv └── top ├── CNN.sv └── CNN.svh /2D_filter/conv.sv: -------------------------------------------------------------------------------- 1 | // 2D Convolution module. For sim purposes: Image width and height are not changeable dynamically 2 | // 3 | // ----------------------------------------------------------------------------- 4 | // Copyright (c) 2014-2024 All rights reserved 5 | // ----------------------------------------------------------------------------- 6 | // Author : Maksim Ananev mananev086@gmail.com 7 | // 8 | // Create : 2024-05-13 11:30:23 9 | // Revise : 2024-10-22 12:20:46 10 | // Editor : sublime text4, tab size (4) 11 | // ----------------------------------------------------------------------------- 12 | 13 | 14 | 15 | // `define RELU 16 | 17 | module conv #( 18 | parameter PIX_WIDTH = 8 , 19 | parameter WEIGHT_WIDTH = 10 , 20 | parameter WEIGHT_FRACT_WIDTH = 5 , 21 | parameter KERNEL_DIMENSION = 3 , 22 | parameter TRUNK = "TRUE", 23 | parameter logic [ 11:0] img_width = 28, 24 | parameter logic [ 11:0] img_height = 28 25 | 26 | ) ( 27 | input clk , // Clock 28 | input clk_en , // Clock Enable 29 | input rst_n , // Asynchronous reset active low 30 | //input pixels 31 | input [PIX_WIDTH-1:0] i_data , 32 | input i_valid , 33 | input i_sop , 34 | input i_eop , 35 | // output pixels 36 | output [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] o_data , 37 | output o_valid , 38 | output o_sop , 39 | output o_eop , 40 | /// 41 | input [KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH-1:0] kernel , 42 | // input [ 11:0] img_width , 43 | // input [ 11:0] img_height, 44 | output logic ready , 45 | output logic [ 11:0] cols_cntr , 46 | output logic [ 11:0] rows_cntr 47 | ); 48 | 49 | 50 | 51 | /* 52 | Pixels Delay scheme. if Kernel 3*3 53 | 54 | pixel_input----------->-------------------- --pix[2][2]--> --pix[2][1]--> 55 | /-------\ | | | 56 | ---<--| FIFO_0 |--<------>----delayed_line[0]------->-----|FF|---------->------|FF|--->-----------pix[2][0]-->-- 57 | | \-------/ 58 | | --pix[1][2]--> --pix[1][1]--> 59 | | | | 60 | ------------------------->---delayed_line[1]------->------|FF|---------->------|FF|--->-----------pix[1][0]-->-- 61 | | 62 | | --pix[0][2]--> --pix[0][2]--> 63 | | /-------\ | | 64 | --->--| FIFO_1 |--------->---delayed_line[2]------->------|FF|---------->------|FF|--->-----------pix[0][0]-->-- 65 | \-------/ 66 | */ 67 | localparam MAX_DEPTH = 1920; 68 | 69 | logic [img_width-1:0][PIX_WIDTH-1:0] fifo[KERNEL_DIMENSION-1]; /// !!!!!!!!!!!!!!!!!!!!!!! only for simulation, MUST BE REPLACED by a regular FIFO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 70 | 71 | logic [PIX_WIDTH-1:0] delayed_line[KERNEL_DIMENSION]; 72 | 73 | bit [KERNEL_DIMENSION-2:0][PIX_WIDTH-1:0] after_fifos_ffs[KERNEL_DIMENSION]; 74 | 75 | logic [PIX_WIDTH-1:0] delayed_pix[KERNEL_DIMENSION][KERNEL_DIMENSION]; 76 | 77 | always_comb begin 78 | foreach (delayed_line[i]) begin 79 | delayed_line[i] = (i == 0) ? i_data : fifo[i-1][img_width-1]; 80 | end 81 | 82 | foreach (delayed_pix[i,y]) begin 83 | delayed_pix[i][y] = (y==0)?delayed_line[i] : after_fifos_ffs[i][y-1]; 84 | end 85 | end 86 | 87 | always_ff @(posedge clk) begin 88 | if(clk_en && (i_valid || !ready))begin 89 | foreach (fifo[i]) begin 90 | fifo[i] <= {fifo[i][img_width-2:0],( (i == 0) ? i_data : fifo[i-1][img_width-1] )}; 91 | end 92 | 93 | foreach (after_fifos_ffs[i]) begin 94 | after_fifos_ffs[i] <= {after_fifos_ffs[i],delayed_line[i]}; 95 | end 96 | end 97 | end 98 | 99 | 100 | 101 | /* 102 | Convolution's maths. if Kernel 3*3 103 | 104 | 105 | kernel table image lines multiplication table 106 | ------------------- ----------------------------------------- ----------------------------------------------------- 107 | | a11 | a12 | a13 | | pix[i][y] | pix[i][y+1] | pix[i][y+2] | | a11*pix[i][y+2] | a12*pix[i][y+1] | a13*pix[i][y] | 108 | ------------------- ----------------------------------------- ----------------------------------------------------- 109 | | a21 | a22 | a23 | X | pix[i][y] | pix[i][y+1] | pix[i][y+2] | = | a21*pix[i][y+2] | a22*pix[i][y+1] | a23*pix[i][y] | --------> Sum(multiplication table) 110 | ------------------- ----------------------------------------- ----------------------------------------------------- 111 | | a31 | a32 | a33 | | pix[i][y] | pix[i][y+1] | pix[i][y+2] | | a31*pix[i][y+2] | a32*pix[i][y+1] | a33*pix[i][y] | 112 | ------------------- ----------------------------------------- ----------------------------------------------------- 113 | */ 114 | logic signed [KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH+PIX_WIDTH-1:0] mult_result; 115 | 116 | always_ff @(posedge clk) begin : proc_multiplying 117 | if(clk_en)begin 118 | foreach (mult_result[i,y]) begin 119 | mult_result[i][y] <= $signed({1'b0, delayed_pix[(KERNEL_DIMENSION-1)-i][(KERNEL_DIMENSION-1)-y]}) * $signed(kernel[i][y]); 120 | end 121 | end 122 | end 123 | 124 | logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_result,3)-1:0]mult_intermed_sum_1dim[KERNEL_DIMENSION]; 125 | logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_result,3)-1:0]mult_sum_1dim[KERNEL_DIMENSION]; 126 | logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_sum_1dim,2)-1:0]mult_intermed_sum_2dim; 127 | 128 | always_comb begin 129 | mult_intermed_sum_2dim = '0; 130 | foreach (mult_intermed_sum_1dim[i]) begin 131 | mult_intermed_sum_1dim[i] = '0; 132 | foreach (mult_intermed_sum_1dim[y]) begin 133 | mult_intermed_sum_1dim[i] += $signed(mult_result[i][y]); 134 | end 135 | 136 | mult_intermed_sum_2dim += mult_sum_1dim[i]; 137 | 138 | end 139 | end 140 | 141 | 142 | logic signed [$clog2(KERNEL_DIMENSION)+$size(mult_sum_1dim,2)-1:0]mult_sum_out; 143 | always_ff @(posedge clk) begin : proc_mult_sum 144 | if(clk_en)begin 145 | foreach (mult_sum_1dim[i]) begin 146 | mult_sum_1dim[i] <= mult_intermed_sum_1dim[i]; 147 | end 148 | mult_sum_out <= (TRUNK == "TRUE") ? (mult_intermed_sum_2dim>>>WEIGHT_FRACT_WIDTH) : mult_intermed_sum_2dim; 149 | end 150 | 151 | end 152 | 153 | 154 | /* 155 | normalize( Sum(multiplication table) ) -----> pixel output 156 | 157 | normalize(a, min = 0, max = 255){ 158 | if(a < min) 159 | a = 0; 160 | if(a > max) 161 | a = max; 162 | 163 | } 164 | */ 165 | `ifdef RELU 166 | assign o_data = ( mult_sum_out < 0 ) ? '0 : ( ( |mult_sum_out[$size( mult_sum_out )-1:PIX_WIDTH] ) ? ( 2**PIX_WIDTH - 1 ) : mult_sum_out); 167 | `else 168 | assign o_data = mult_sum_out; 169 | `endif 170 | /* 171 | Latency 172 | */ 173 | logic [2:0] valid_delay = '0 ; 174 | wire valid_delayed = valid_delay[2]; 175 | always_ff @(posedge clk or negedge rst_n) begin 176 | if(~rst_n) begin 177 | valid_delay <= 0; 178 | end else begin 179 | if(clk_en) 180 | valid_delay <= {valid_delay, i_valid && ready}; 181 | end 182 | end 183 | 184 | 185 | /* 186 | counters 187 | */ 188 | always_ff @(posedge clk or negedge rst_n) begin 189 | if(~rst_n) begin 190 | cols_cntr <= 0; 191 | rows_cntr <= 0; 192 | end else begin 193 | if(clk_en)begin 194 | if(valid_delayed || (!ready && (rows_cntr == img_height)))begin 195 | cols_cntr <= (cols_cntr == img_width-1) ? '0 : (cols_cntr + 'd1); 196 | if(cols_cntr == img_width-1) 197 | rows_cntr <= (rows_cntr == img_height) ? '0 : (rows_cntr + 'd1); 198 | end 199 | else if(i_sop)begin 200 | cols_cntr <= '0; 201 | rows_cntr <= '0; 202 | end 203 | end 204 | end 205 | end 206 | 207 | /* 208 | video control signals 209 | */ 210 | always_ff @(posedge clk or negedge rst_n) begin 211 | if(~rst_n) begin 212 | ready <= 1; 213 | end else if(clk_en) begin 214 | if (i_eop) begin 215 | ready <= 1'b0; 216 | end 217 | else if(rows_cntr == 0) 218 | ready <= 1'b1; 219 | end 220 | end 221 | 222 | 223 | assign o_valid = valid_delayed && (rows_cntr > 1) && (rows_cntr < img_height) && (cols_cntr > 1) && (cols_cntr < img_width); 224 | 225 | assign o_eop = valid_delayed && (cols_cntr == img_width-1) && (rows_cntr == img_height-1); 226 | 227 | assign o_sop = valid_delayed && (rows_cntr == 2) && (cols_cntr == 2); 228 | 229 | 230 | 231 | 232 | 233 | endmodule : conv 234 | -------------------------------------------------------------------------------- /CNN_TB.sv: -------------------------------------------------------------------------------- 1 | // Convolutional Neural Network TB. 2 | // 3 | // The images are taken from MNIST digits dataset. 4 | // 5 | // All weights and reference calculated by "cnn_behind.py" 6 | // 7 | // ----------------------------------------------------------------------------- 8 | // Copyright (c) 2014-2024 All rights reserved 9 | // ----------------------------------------------------------------------------- 10 | // Author : Maksim Ananev mananev086@gmail.com 11 | // 12 | // Create : 2024-05-13 11:30:23 13 | // Revise : 2024-10-22 12:20:46 14 | // Editor : sublime text4, tab size (4) 15 | // ----------------------------------------------------------------------------- 16 | 17 | `include "./top/CNN.svh" 18 | 19 | `timescale 1ns/1ns 20 | 21 | 22 | `define DATAFLOW_CHECK 23 | 24 | module CNN_TB (); 25 | 26 | parameter CLASSES_QNT = 10; 27 | parameter IMG_WIDTH = 28; 28 | parameter IMG_HEIGHT = 28; 29 | 30 | real image_7[IMG_HEIGHT][IMG_WIDTH] = 31 | '{ 32 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 33 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 34 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 35 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 36 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 37 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 38 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 39 | '{0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 40 | '{0, 0, 0, 0, 0, 0, 222, 254, 254, 254, 254, 241, 198, 198, 198, 198, 198, 198, 198, 198, 170, 52, 0, 0, 0, 0, 0, 0}, 41 | '{0, 0, 0, 0, 0, 0, 67, 114, 72, 114, 163, 227, 254, 225, 254, 254, 254, 250, 229, 254, 254, 140, 0, 0, 0, 0, 0, 0}, 42 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 66, 14, 67, 67, 67, 59, 21, 236, 254, 106, 0, 0, 0, 0, 0, 0}, 43 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 253, 209, 18, 0, 0, 0, 0, 0, 0}, 44 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 233, 255, 83, 0, 0, 0, 0, 0, 0, 0}, 45 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129, 254, 238, 44, 0, 0, 0, 0, 0, 0, 0}, 46 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 249, 254, 62, 0, 0, 0, 0, 0, 0, 0, 0}, 47 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 133, 254, 187, 5, 0, 0, 0, 0, 0, 0, 0, 0}, 48 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 205, 248, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 49 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 254, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 50 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 251, 240, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 51 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 221, 254, 166, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 52 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 203, 254, 219, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 53 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 254, 254, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 54 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 224, 254, 115, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 55 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 133, 254, 254, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 56 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 242, 254, 254, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 57 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 254, 254, 219, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 58 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 254, 207, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 59 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 60 | }; 61 | 62 | real image_2[IMG_HEIGHT][IMG_WIDTH] = 63 | '{ 64 | 65 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 66 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 67 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 68 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 125, 171, 255, 255, 150, 93, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 69 | 70 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 169, 253, 253, 253, 253, 253, 253, 218, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 71 | 72 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 169, 253, 253, 253, 213, 142, 176, 253, 253, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 73 | 74 | '{ 0, 0, 0, 0, 0, 0, 0, 52, 250, 253, 210, 32, 12, 0, 6, 206, 253, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 75 | 76 | '{ 0, 0, 0, 0, 0, 0, 0, 77, 251, 210, 25, 0, 0, 0, 122, 248, 253, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 77 | 78 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 31, 18, 0, 0, 0, 0, 209, 253, 253, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 79 | 80 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 247, 253, 198, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 81 | 82 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 247, 253, 231, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 83 | 84 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 253, 253, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 85 | 86 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 246, 253, 159, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 87 | 88 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 234, 253, 233, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 89 | 90 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 198, 253, 253, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 91 | 92 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 78, 248, 253, 189, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 93 | 94 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 19, 200, 253, 253, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 95 | 96 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 134, 253, 253, 173, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 97 | 98 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 248, 253, 253, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 99 | 100 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 248, 253, 253, 43, 20, 20, 20, 20, 5, 0, 5, 20, 20, 37, 150, 150, 150, 147, 10, 0}, 101 | 102 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 248, 253, 253, 253, 253, 253, 253, 253, 168, 143, 166, 253, 253, 253, 253, 253, 253, 253, 123, 0}, 103 | 104 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 174, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 249, 247, 247, 169, 117, 117, 57, 0}, 105 | 106 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 123, 123, 123, 166, 253, 253, 253, 155, 123, 123, 41, 0, 0, 0, 0, 0, 0, 0}, 107 | 108 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 109 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 110 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 111 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 112 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 113 | 114 | }; 115 | real image_1[IMG_HEIGHT][IMG_WIDTH] = 116 | '{ '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 117 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 118 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 119 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 120 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 254, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 121 | 122 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 252, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 123 | 124 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 135, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 125 | 126 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 244, 150, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 127 | 128 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 254, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 129 | 130 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 202, 223, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 131 | 132 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 254, 216, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 133 | 134 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 254, 195, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 135 | 136 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 140, 254, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 137 | 138 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 237, 205, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 139 | 140 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 124, 255, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 141 | 142 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171, 254, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 143 | 144 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 232, 215, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 145 | 146 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 254, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 147 | 148 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 151, 254, 142, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 149 | 150 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 228, 254, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 151 | 152 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 251, 254, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 153 | 154 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 141, 254, 205, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 155 | 156 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 215, 254, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 157 | 158 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 198, 176, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 159 | 160 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 161 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 162 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 163 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 164 | }; 165 | 166 | 167 | real image_0[IMG_HEIGHT][IMG_WIDTH] = 168 | '{ 169 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 170 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 171 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 172 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 173 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 150, 253, 202, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 174 | 175 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 251, 251, 253, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 176 | 177 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 197, 251, 251, 253, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 178 | 179 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 190, 251, 251, 251, 253, 169, 109, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 180 | 181 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 253, 251, 251, 251, 251, 253, 251, 251, 220, 51, 0, 0, 0, 0, 0, 0, 0, 0}, 182 | 183 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 182, 255, 253, 253, 253, 253, 234, 222, 253, 253, 253, 0, 0, 0, 0, 0, 0, 0, 0}, 184 | 185 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 63, 221, 253, 251, 251, 251, 147, 77, 62, 128, 251, 251, 105, 0, 0, 0, 0, 0, 0, 0}, 186 | 187 | '{ 0, 0, 0, 0, 0, 0, 0, 32, 231, 251, 253, 251, 220, 137, 10, 0, 0, 31, 230, 251, 243, 113, 5, 0, 0, 0, 0, 0}, 188 | 189 | '{ 0, 0, 0, 0, 0, 0, 0, 37, 251, 251, 253, 188, 20, 0, 0, 0, 0, 0, 109, 251, 253, 251, 35, 0, 0, 0, 0, 0}, 190 | 191 | '{ 0, 0, 0, 0, 0, 0, 0, 37, 251, 251, 201, 30, 0, 0, 0, 0, 0, 0, 31, 200, 253, 251, 35, 0, 0, 0, 0, 0}, 192 | 193 | '{ 0, 0, 0, 0, 0, 0, 0, 37, 253, 253, 0, 0, 0, 0, 0, 0, 0, 0, 32, 202, 255, 253, 164, 0, 0, 0, 0, 0}, 194 | 195 | '{ 0, 0, 0, 0, 0, 0, 0, 140, 251, 251, 0, 0, 0, 0, 0, 0, 0, 0, 109, 251, 253, 251, 35, 0, 0, 0, 0, 0}, 196 | 197 | '{ 0, 0, 0, 0, 0, 0, 0, 217, 251, 251, 0, 0, 0, 0, 0, 0, 21, 63, 231, 251, 253, 230, 30, 0, 0, 0, 0, 0}, 198 | 199 | '{ 0, 0, 0, 0, 0, 0, 0, 217, 251, 251, 0, 0, 0, 0, 0, 0, 144, 251, 251, 251, 221, 61, 0, 0, 0, 0, 0, 0}, 200 | 201 | '{ 0, 0, 0, 0, 0, 0, 0, 217, 251, 251, 0, 0, 0, 0, 0, 182, 221, 251, 251, 251, 180, 0, 0, 0, 0, 0, 0, 0}, 202 | 203 | '{ 0, 0, 0, 0, 0, 0, 0, 218, 253, 253, 73, 73, 228, 253, 253, 255, 253, 253, 253, 253, 0, 0, 0, 0, 0, 0, 0, 0}, 204 | 205 | '{ 0, 0, 0, 0, 0, 0, 0, 113, 251, 251, 253, 251, 251, 251, 251, 253, 251, 251, 251, 147, 0, 0, 0, 0, 0, 0, 0, 0}, 206 | 207 | '{ 0, 0, 0, 0, 0, 0, 0, 31, 230, 251, 253, 251, 251, 251, 251, 253, 230, 189, 35, 10, 0, 0, 0, 0, 0, 0, 0, 0}, 208 | 209 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 62, 142, 253, 251, 251, 251, 251, 253, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 210 | 211 | '{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 174, 251, 173, 71, 72, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 212 | 213 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 214 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 215 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 216 | '{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} 217 | }; 218 | 219 | 220 | parameter PIX_WIDTH = 16; 221 | parameter WEIGHT_WIDTH = 16; 222 | parameter FRACT_WIDTH = 12; 223 | parameter CONV_NUMB = 2 ; 224 | parameter FLAT_NUMB = 2 ; 225 | 226 | parameter logic [CONV_NUMB-1:0][ 1:0][7:0] CONV_DIMENSION = {{8'd8, 8'd4}, {8'd4, 8'd1}}; 227 | parameter logic [CONV_NUMB-1:0][ 3:0] KERNEL_DIMENSION = {4'd3 , 4'd3} ; 228 | parameter logic [FLAT_NUMB :0][15:0] FLAT_DIMENSION = {CLASSES_QNT, 16'd64, 16'd200} ; 229 | 230 | 231 | 232 | logic clk = 0; 233 | logic clk_en = 1; 234 | logic rst_n = 0; 235 | 236 | logic [PIX_WIDTH-1:0] i_data; 237 | 238 | logic i_valid = 0; 239 | logic i_sop = 0; 240 | logic i_eop = 0; 241 | 242 | logic o_valid; 243 | 244 | logic [CLASSES_QNT-1:0][31:0] classes; 245 | 246 | int weights_mem_in_data; 247 | int weights_mem_in_addr; 248 | int weights_mem_sel_addr; 249 | 250 | logic [CONV_NUMB-1:0]weights_mem_in_kernel_wr=0; 251 | logic [FLAT_NUMB-1:0]weights_mem_in_fc_wr = 0; 252 | 253 | localparam R2I_COEF = 2**FRACT_WIDTH; 254 | 255 | CNN #( 256 | .PIX_WIDTH (PIX_WIDTH ), 257 | .WEIGHT_WIDTH (WEIGHT_WIDTH ), 258 | .FRACT_WIDTH (FRACT_WIDTH ), 259 | .CONV_NUMB (CONV_NUMB ), 260 | .CONV_DIMENSION (CONV_DIMENSION ), 261 | .KERNEL_DIMENSION(KERNEL_DIMENSION), 262 | .FLAT_NUMB (FLAT_NUMB ), 263 | .FLAT_DIMENSION (FLAT_DIMENSION ), 264 | .CLASSES_QNT (CLASSES_QNT ), 265 | .IMG_WIDTH (IMG_WIDTH ), 266 | .IMG_HEIGHT (IMG_HEIGHT ) 267 | ) inst_CNN ( 268 | .clk (clk ), 269 | .clk_en (clk_en ), 270 | .rst_n (rst_n ), 271 | .i_data (i_data ), 272 | .i_valid (i_valid ), 273 | .i_sop (i_sop ), 274 | .i_eop (i_eop ), 275 | .o_valid (o_valid ), 276 | .classes (classes ), 277 | .weights_mem_in_data (weights_mem_in_data ), 278 | .weights_mem_in_addr (weights_mem_in_addr ), 279 | .weights_mem_sel_addr (weights_mem_sel_addr ), 280 | .weights_mem_in_kernel_wr(weights_mem_in_kernel_wr), 281 | .weights_mem_in_fc_wr (weights_mem_in_fc_wr ) 282 | ); 283 | 284 | 285 | initial begin 286 | forever begin 287 | #10 clk = !clk; 288 | end 289 | end 290 | 291 | 292 | 293 | 294 | 295 | initial begin 296 | #100; 297 | 298 | rst_n = 1; 299 | 300 | #100; 301 | ///////////// 302 | ///WEIGHTS INITIALIZATION 303 | ////////////// 304 | @(posedge clk); 305 | foreach (kernel_1_re[dim2, dim1, row, col]) begin 306 | weights_mem_in_data = R2I_COEF*kernel_1_re[dim2][dim1][row][col]; 307 | weights_mem_in_kernel_wr[0] = 1'b1; 308 | @(posedge clk); 309 | weights_mem_in_addr++; 310 | weights_mem_in_kernel_wr[0] = 1'b0; 311 | end 312 | foreach (conv_1_bias_re[x]) begin 313 | weights_mem_in_data = R2I_COEF*conv_1_bias_re[x]; 314 | weights_mem_in_kernel_wr[0] = 1'b1; 315 | @(posedge clk); 316 | weights_mem_in_addr++; 317 | weights_mem_in_kernel_wr[0] = 1'b0; 318 | end 319 | weights_mem_in_kernel_wr[0] = 1'b0; 320 | weights_mem_in_addr = 0; 321 | 322 | @(posedge clk); 323 | 324 | foreach (kernel_2_re[dim2, dim1, row, col]) begin 325 | weights_mem_in_data = R2I_COEF*kernel_2_re[dim2][dim1][row][col]; 326 | weights_mem_in_kernel_wr[1] = 1'b1; 327 | @(posedge clk); 328 | weights_mem_in_addr++; 329 | end 330 | foreach (conv_2_bias_re[x]) begin 331 | weights_mem_in_data = R2I_COEF*conv_2_bias_re[x]; 332 | weights_mem_in_kernel_wr[1] = 1'b1; 333 | @(posedge clk); 334 | weights_mem_in_addr++; 335 | weights_mem_in_kernel_wr[1] = 1'b0; 336 | end 337 | weights_mem_in_kernel_wr[1] = 1'b0; 338 | weights_mem_in_addr = 0; 339 | 340 | 341 | foreach (fc1_weights_re[x,y]) begin 342 | weights_mem_in_data = R2I_COEF*fc1_weights_re[x][y]; 343 | weights_mem_sel_addr = x; 344 | 345 | weights_mem_in_addr = y; 346 | weights_mem_in_fc_wr[0] = 1'b1; 347 | @(posedge clk); 348 | weights_mem_in_fc_wr[0] = 1'b0; 349 | end 350 | foreach (fc1_bias_re[x]) begin 351 | weights_mem_in_data = R2I_COEF*fc1_bias_re[x]; 352 | weights_mem_sel_addr = FLAT_DIMENSION[1]; 353 | 354 | weights_mem_in_addr = x; 355 | weights_mem_in_fc_wr[0] = 1'b1; 356 | @(posedge clk); 357 | weights_mem_in_fc_wr[0] = 1'b0; 358 | end 359 | 360 | foreach (fc2_weights_re[x,y]) begin 361 | weights_mem_in_data = R2I_COEF*fc2_weights_re[x][y]; 362 | weights_mem_sel_addr = x; 363 | 364 | weights_mem_in_addr = y; 365 | weights_mem_in_fc_wr[1] = 1'b1; 366 | @(posedge clk); 367 | weights_mem_in_fc_wr[1] = 1'b0; 368 | end 369 | foreach (fc2_bias_re[x]) begin 370 | weights_mem_in_data = R2I_COEF*fc2_bias_re[x]; 371 | weights_mem_sel_addr = CLASSES_QNT; 372 | 373 | weights_mem_in_addr = x; 374 | weights_mem_in_fc_wr[1] = 1'b1; 375 | @(posedge clk); 376 | weights_mem_in_fc_wr[1] = 1'b0; 377 | end 378 | ///////////////////////////////////////////////////////// 379 | 380 | 381 | 382 | 383 | /////////////////////////////////////////////// 384 | ///////////////////// 385 | foreach (image_0[row,col]) begin 386 | @(posedge clk); 387 | i_data = (image_7[row][col]/255) * R2I_COEF; 388 | i_valid = 1; 389 | i_sop = (row == 0) && (col == 0); 390 | i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1); 391 | end 392 | 393 | @(posedge clk); 394 | i_valid = 0; 395 | i_sop = 0; 396 | i_eop = 0; 397 | 398 | 399 | wait(inst_CNN.o_valid); 400 | @(posedge clk); 401 | foreach (image_0[row,col]) begin 402 | @(posedge clk); 403 | i_data = (image_2[row][col]/255) * R2I_COEF; 404 | i_valid = 1; 405 | i_sop = (row == 0) && (col == 0); 406 | i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1); 407 | end 408 | 409 | @(posedge clk); 410 | i_valid = 0; 411 | i_sop = 0; 412 | i_eop = 0; 413 | 414 | 415 | wait(inst_CNN.o_valid); 416 | @(posedge clk); 417 | foreach (image_0[row,col]) begin 418 | @(posedge clk); 419 | i_data = (image_1[row][col]/255) * R2I_COEF; 420 | i_valid = 1; 421 | i_sop = (row == 0) && (col == 0); 422 | i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1); 423 | end 424 | 425 | @(posedge clk); 426 | i_valid = 0; 427 | i_sop = 0; 428 | i_eop = 0; 429 | 430 | 431 | wait(inst_CNN.o_valid); 432 | @(posedge clk); 433 | foreach (image_0[row,col]) begin 434 | @(posedge clk); 435 | i_data = (image_0[row][col]/255) * R2I_COEF; 436 | i_valid = 1; 437 | i_sop = (row == 0) && (col == 0); 438 | i_eop = (row == IMG_HEIGHT-1) && (col == IMG_WIDTH-1); 439 | end 440 | 441 | @(posedge clk); 442 | i_valid = 0; 443 | i_sop = 0; 444 | i_eop = 0; 445 | end 446 | 447 | 448 | ///////////////////////////////////////////// 449 | //////DATAFLOW CHECK 450 | ///////////////////////////////////////////// 451 | `ifdef DATAFLOW_CHECK 452 | int first_conv_rows_cntr = 0; 453 | int first_conv_cols_cntr = 0; 454 | int first_pool_rows_cntr = 0; 455 | int first_pool_cols_cntr = 0; 456 | 457 | int second_conv_rows_cntr = 0; 458 | int second_conv_cols_cntr = 0; 459 | int second_pool_rows_cntr = 0; 460 | int second_pool_cols_cntr = 0; 461 | 462 | int flat_cntr = 0; 463 | 464 | real first_conv_data[CONV_DIMENSION[0][1]][IMG_HEIGHT-2][IMG_WIDTH-2]; 465 | real first_relu_data[CONV_DIMENSION[0][1]][IMG_HEIGHT-2][IMG_WIDTH-2]; 466 | real first_pool_data[CONV_DIMENSION[0][1]][(IMG_HEIGHT-2)/2][(IMG_WIDTH-2)/2]; 467 | 468 | real second_conv_data[CONV_DIMENSION[1][1]][(IMG_HEIGHT-2)/2-2][(IMG_WIDTH-2)/2-2]; 469 | real second_relu_data[CONV_DIMENSION[1][1]][(IMG_HEIGHT-2)/2-2][(IMG_WIDTH-2)/2-2]; 470 | real second_pool_data[CONV_DIMENSION[1][1]][((IMG_HEIGHT-2)/2-2)/2][((IMG_WIDTH-2)/2-2)/2]; 471 | 472 | real flat_data [CONV_DIMENSION[1][1]*(((IMG_HEIGHT-2)/2-2)/2)*(((IMG_HEIGHT-2)/2-2)/2)]; 473 | `endif 474 | 475 | int first_fc_cntr = 0; 476 | int second_fc_cntr = 0; 477 | 478 | 479 | real first_fc_data[FLAT_DIMENSION[1]]; 480 | real second_fc_data[CLASSES_QNT]; 481 | 482 | `ifdef DATAFLOW_CHECK 483 | initial begin 484 | forever 485 | @(posedge clk) 486 | if(inst_CNN.conv_valid[0])begin 487 | foreach (first_conv_data[i]) begin 488 | first_conv_data[i][first_conv_rows_cntr][first_conv_cols_cntr] <= $itor($signed(inst_CNN.conv_data[0][i]))/R2I_COEF; 489 | first_relu_data[i][first_conv_rows_cntr][first_conv_cols_cntr] <= $itor(inst_CNN.relu_data[0][i])/R2I_COEF; 490 | end 491 | first_conv_cols_cntr++; 492 | if(first_conv_cols_cntr == IMG_WIDTH-2)begin 493 | first_conv_cols_cntr = 0; 494 | first_conv_rows_cntr++; 495 | end 496 | if(inst_CNN.conv_eop[0])begin 497 | first_conv_cols_cntr = 0; 498 | first_conv_rows_cntr = 0; 499 | end 500 | end 501 | end 502 | 503 | initial begin 504 | forever 505 | @(posedge clk) 506 | if(inst_CNN.pool_valid[0])begin 507 | foreach (first_pool_data[i]) begin 508 | first_pool_data[i][first_pool_rows_cntr][first_pool_cols_cntr] <= $itor(inst_CNN.pool_data[0][i])/R2I_COEF; 509 | end 510 | first_pool_cols_cntr++; 511 | if(first_pool_cols_cntr == (IMG_HEIGHT-2)/2)begin 512 | first_pool_cols_cntr = 0; 513 | first_pool_rows_cntr++; 514 | end 515 | if(inst_CNN.pool_eop[0])begin 516 | first_pool_cols_cntr = 0; 517 | first_pool_rows_cntr = 0; 518 | end 519 | end 520 | end 521 | 522 | initial begin 523 | forever 524 | @(posedge clk) 525 | if(inst_CNN.conv_valid[1])begin 526 | foreach (second_conv_data[i]) begin 527 | second_conv_data[i][second_conv_rows_cntr][second_conv_cols_cntr] <= $itor($signed(inst_CNN.conv_data[0][i]))/R2I_COEF; 528 | second_relu_data[i][second_conv_rows_cntr][second_conv_cols_cntr] <= $itor(inst_CNN.relu_data[0][i])/R2I_COEF; 529 | end 530 | second_conv_cols_cntr++; 531 | if(second_conv_cols_cntr == (IMG_HEIGHT-2)/2-2)begin 532 | second_conv_cols_cntr = 0; 533 | second_conv_rows_cntr++; 534 | end 535 | if(inst_CNN.conv_eop[1])begin 536 | second_conv_cols_cntr = 0; 537 | second_conv_rows_cntr = 0; 538 | end 539 | end 540 | end 541 | 542 | initial begin 543 | forever 544 | @(posedge clk) 545 | if(inst_CNN.pool_valid[1])begin 546 | foreach (second_conv_data[i]) begin 547 | second_pool_data[i][second_pool_rows_cntr][second_pool_cols_cntr] <= $itor(inst_CNN.pool_data[0][i])/R2I_COEF; 548 | end 549 | second_pool_cols_cntr++; 550 | if(second_pool_cols_cntr == ((IMG_HEIGHT-2)/2-2)/2)begin 551 | second_pool_cols_cntr = 0; 552 | second_pool_rows_cntr++; 553 | end 554 | if(inst_CNN.pool_eop[1])begin 555 | second_pool_cols_cntr = 0; 556 | second_pool_rows_cntr = 0; 557 | end 558 | end 559 | end 560 | 561 | initial begin 562 | forever 563 | @(posedge clk) 564 | if(inst_CNN.flat_valid)begin 565 | flat_data[flat_cntr] <= $itor(inst_CNN.flat_data)/R2I_COEF; 566 | flat_cntr++; 567 | 568 | if(inst_CNN.flat_eop)begin 569 | flat_cntr = 0; 570 | end 571 | end 572 | end 573 | 574 | 575 | initial begin 576 | forever 577 | @(posedge clk) 578 | if(inst_CNN.fc_valid[0])begin 579 | first_fc_data[first_fc_cntr] <= $itor($signed(inst_CNN.fc_relu_data[0]))/R2I_COEF; 580 | first_fc_cntr++; 581 | 582 | if(inst_CNN.fc_eop[0])begin 583 | first_fc_cntr = 0; 584 | end 585 | end 586 | end 587 | 588 | `endif 589 | initial begin 590 | forever 591 | @(posedge clk) 592 | if(inst_CNN.fc_valid[1])begin 593 | second_fc_data[second_fc_cntr] <= $itor($signed(inst_CNN.fc_data[1]))/R2I_COEF; 594 | second_fc_cntr++; 595 | 596 | if(inst_CNN.fc_eop[1])begin 597 | second_fc_cntr = 0; 598 | end 599 | end 600 | end 601 | 602 | 603 | 604 | 605 | 606 | 607 | /////////////////////////////////////////// 608 | /////////// CNN RESULTS CHECK 609 | /////////////////////////////////////////// 610 | 611 | typedef enum { 612 | ZERO = 0, 613 | ONE = 1, 614 | TWO = 2, 615 | THREE = 3, 616 | FOUR = 4, 617 | FIVE = 5, 618 | SIX = 6, 619 | SEVEN = 7, 620 | EIGHT = 8, 621 | NINE = 9, 622 | 623 | NONE = 999 624 | } e_number; 625 | 626 | e_number detected_class; 627 | 628 | 629 | int detected = 0 ; 630 | real det_max = 0.0; 631 | initial begin 632 | detected_class = NONE; 633 | forever wait ( 634 | inst_CNN.fc_eop[1]) begin 635 | @(posedge clk 636 | ); 637 | detected_class = ZERO; 638 | detected = 0; 639 | det_max = 0; 640 | for (int i = 0; i < CLASSES_QNT; i++) begin 641 | if(second_fc_data[i] >= det_max)begin 642 | detected = i; 643 | det_max = second_fc_data[i]; 644 | end 645 | end 646 | 647 | for (int i = 0; i < CLASSES_QNT; i++) begin 648 | if(detected == i)begin 649 | break; 650 | end 651 | 652 | detected_class = detected_class.next(); 653 | 654 | end 655 | 656 | @(posedge clk); 657 | @(posedge clk); 658 | 659 | end 660 | end 661 | 662 | 663 | 664 | 665 | endmodule : CNN_TB 666 | 667 | 668 | 669 | -------------------------------------------------------------------------------- /Fully connected layer/flat.sv: -------------------------------------------------------------------------------- 1 | // For sim purposes: Image width and height are not changeable dynamically 2 | // 3 | // ----------------------------------------------------------------------------- 4 | // Copyright (c) 2014-2024 All rights reserved 5 | // ----------------------------------------------------------------------------- 6 | // Author : Maksim Ananev mananev086@gmail.com 7 | // 8 | // Create : 2024-05-13 11:30:23 9 | // Revise : 2024-10-22 12:20:46 10 | // Editor : sublime text4, tab size (4) 11 | // ----------------------------------------------------------------------------- 12 | 13 | module flat #( 14 | parameter PIX_WIDTH = 8 , 15 | parameter DIMENSION = 8 , 16 | parameter logic [11:0] img_width = 7, 17 | parameter logic [11:0] img_height = 7 18 | ) ( 19 | input clk , // Clock 20 | input clk_en , // Clock Enable 21 | input rst_n , // Asynchronous reset active low 22 | //input pixels 23 | input [DIMENSION-1:0][PIX_WIDTH-1:0] i_data , 24 | input i_valid, 25 | input i_sop , 26 | input i_eop , 27 | // output pixels 28 | output logic [PIX_WIDTH-1:0] o_data , 29 | output logic o_valid, 30 | output logic o_sop , 31 | output logic o_eop , 32 | /// 33 | output logic o_ready 34 | ); 35 | 36 | 37 | logic [DIMENSION-1:0][img_height-1:0][img_width-1:0][PIX_WIDTH-1:0] img_buf; 38 | wire [DIMENSION*img_height*img_width-1:0][PIX_WIDTH-1:0] img_buf_plain = img_buf; 39 | logic [$clog2(DIMENSION*img_width*img_height)-1:0]o_cntr; 40 | 41 | typedef enum logic [2:0] { 42 | IDLE = 'd1, 43 | FILL = 'd2, 44 | RELEASE = 'd4 45 | } e_state; 46 | 47 | e_state state; 48 | 49 | 50 | always_ff @(posedge clk) begin 51 | if(clk_en) begin 52 | 53 | o_valid <= 1'd0; 54 | 55 | o_ready <= 1'b1; 56 | case (state) 57 | IDLE: begin 58 | if (i_valid && i_sop && o_ready) begin 59 | state <= FILL; 60 | end 61 | end 62 | FILL: begin 63 | if(i_valid && i_eop)begin 64 | state <= RELEASE; 65 | o_cntr <= '0; 66 | o_ready <= 1'b0; 67 | end 68 | end 69 | RELEASE: begin 70 | 71 | if(o_cntr == DIMENSION * img_width * img_height)begin 72 | state <= IDLE; 73 | end 74 | else begin 75 | o_valid <= 1'b1; 76 | o_data <= img_buf_plain[o_cntr]; 77 | o_cntr <= o_cntr + 'd1; 78 | end 79 | 80 | o_sop <= o_cntr == 'd0; 81 | o_eop <= o_cntr == (DIMENSION * img_width * img_height - 1); 82 | o_ready <= 1'b0; 83 | end 84 | default : state <= IDLE; 85 | endcase 86 | end 87 | 88 | if(~rst_n) begin 89 | o_cntr <= 0; 90 | state <= IDLE; 91 | end 92 | end 93 | 94 | always_ff @(posedge clk) begin 95 | if(clk_en) begin 96 | if (i_valid && o_ready) begin 97 | foreach (img_buf[i]) begin 98 | img_buf[i] <= {i_data[i], img_buf[i][img_height-1:1], img_buf[i][0][img_width-1:1]}; 99 | end 100 | end 101 | end 102 | end 103 | 104 | endmodule : flat -------------------------------------------------------------------------------- /Fully connected layer/fully_connected_layer.sv: -------------------------------------------------------------------------------- 1 | // Fully connected layer of CNN. For sim purposes: 2 | // Image width and height are not changeable dynamically 3 | // To simplify simulation all weights initialize from "CNN.svh" -- Temporal 4 | // ----------------------------------------------------------------------------- 5 | // Copyright (c) 2014-2024 All rights reserved 6 | // ----------------------------------------------------------------------------- 7 | // Author : Maksim Ananev mananev086@gmail.com 8 | // 9 | // Create : 2024-05-13 11:30:23 10 | // Revise : 2024-10-22 12:20:46 11 | // Editor : sublime text4, tab size (4) 12 | // ----------------------------------------------------------------------------- 13 | 14 | module fully_connected_layer #( 15 | //data width parameters 16 | parameter PIX_WIDTH = 16 , 17 | parameter WEIGHT_WIDTH = 16 , 18 | parameter WEIGHT_FRACT_WIDTH = 10 , 19 | //array_parameter 20 | parameter IN_DIMENSION = 200, 21 | parameter OUT_DIMENSION = 64 22 | ) ( 23 | input clk , // Clock 24 | input clk_en , // Clock Enable 25 | input rst_n , // Asynchronous reset active low 26 | //input pixels 27 | input [ PIX_WIDTH-1:0] i_data , 28 | input i_valid , 29 | input i_sop , 30 | input i_eop , 31 | // output pixels 32 | output logic [PIX_WIDTH+$clog2(IN_DIMENSION)-1:0] o_data , 33 | output logic o_valid , 34 | output logic o_sop , 35 | output logic o_eop , 36 | /// 37 | input int weights_mem_in_data , 38 | input [$clog2(IN_DIMENSION)-1:0] weights_mem_in_addr , 39 | input [$clog2(OUT_DIMENSION):0] weights_mem_sel_addr, 40 | input weights_mem_in_fc_wr, 41 | /// 42 | output logic o_ready 43 | ); 44 | 45 | 46 | 47 | 48 | int col_cntr; 49 | logic [OUT_DIMENSION :0] weight_wr; 50 | 51 | always_comb begin 52 | weight_wr = '0; 53 | weight_wr[weights_mem_sel_addr] = weights_mem_in_fc_wr; 54 | end 55 | 56 | genvar y; 57 | 58 | logic [WEIGHT_WIDTH-1:0]weights[OUT_DIMENSION]; 59 | 60 | generate 61 | for(y = 0; y < OUT_DIMENSION; y++)begin 62 | single_port_rom #( 63 | .ADDR_WIDTH($clog2(IN_DIMENSION)), 64 | .DATA_WIDTH(WEIGHT_WIDTH) 65 | ) 66 | weight_rom( 67 | .clk(clk), 68 | .w_addr(weights_mem_in_addr), 69 | .r_addr(col_cntr), 70 | .data(weights_mem_in_data), 71 | .o(weights[y]), 72 | .we(weight_wr[y]) 73 | 74 | ); 75 | end 76 | 77 | endgenerate 78 | 79 | 80 | 81 | 82 | logic [WEIGHT_WIDTH-1:0]bias[OUT_DIMENSION]; 83 | 84 | logic [PIX_WIDTH-1:0] i_data_ff; 85 | logic i_sop_ff; 86 | logic o_ready_ff; 87 | logic i_valid_ff; 88 | always_ff @(posedge clk) begin 89 | 90 | if(weight_wr[OUT_DIMENSION]) 91 | bias[weights_mem_in_addr] <= weights_mem_in_data; 92 | 93 | i_data_ff <= i_data; 94 | i_valid_ff <= i_valid; 95 | i_sop_ff <= i_sop; 96 | o_ready_ff <= o_ready; 97 | 98 | if(~rst_n) begin 99 | i_data_ff <= '0; 100 | i_sop_ff <= '0; 101 | o_ready_ff <= '0; 102 | i_valid_ff <= '0; 103 | end 104 | end 105 | 106 | 107 | 108 | logic [PIX_WIDTH*WEIGHT_FRACT_WIDTH+$clog2(OUT_DIMENSION)-1:0] integrators[OUT_DIMENSION]; 109 | 110 | 111 | typedef enum logic [2:0] { 112 | IDLE = 'd1, 113 | FILL = 'd2, 114 | RELEASE = 'd4 115 | } e_state; 116 | 117 | e_state state; 118 | 119 | logic fill_delay; 120 | 121 | always_ff @(posedge clk) begin 122 | if(clk_en) begin 123 | 124 | o_valid <= 1'd0; 125 | o_ready <= 1'b1; 126 | fill_delay <= 1'b0; 127 | 128 | case (state) 129 | IDLE: begin 130 | 131 | col_cntr <= '0; 132 | 133 | if (i_valid && i_sop && o_ready) begin 134 | state <= FILL; 135 | col_cntr <= col_cntr + 'd1; 136 | end 137 | end 138 | FILL: begin 139 | if (i_valid) begin 140 | col_cntr <= col_cntr + 'd1; 141 | 142 | if(i_eop)begin 143 | state <= RELEASE; 144 | col_cntr <= '0; 145 | o_ready <= 1'b0; 146 | fill_delay <= 1'b1; 147 | end 148 | end 149 | 150 | end 151 | RELEASE: begin 152 | 153 | if(col_cntr == OUT_DIMENSION)begin 154 | state <= IDLE; 155 | end 156 | else begin 157 | o_data <= $signed(integrators[col_cntr])/(2**WEIGHT_FRACT_WIDTH) + $signed(bias[col_cntr]); 158 | o_valid <= !fill_delay; 159 | col_cntr <= col_cntr + $size(col_cntr)'(!fill_delay); 160 | end 161 | 162 | o_sop <= col_cntr == 'd0; 163 | o_eop <= col_cntr == (OUT_DIMENSION- 1); 164 | o_ready <= 1'b0; 165 | 166 | end 167 | 168 | default : state <= IDLE; 169 | endcase 170 | end 171 | 172 | if(~rst_n) begin 173 | col_cntr <= 0; 174 | state <= IDLE; 175 | end 176 | end 177 | 178 | always_ff @(posedge clk) begin 179 | if(clk_en) begin 180 | if(i_valid_ff && o_ready_ff) 181 | 182 | foreach (integrators[x]) begin 183 | if(i_sop_ff) 184 | integrators[x] <= $signed(weights[x])*$signed(i_data_ff); 185 | else 186 | integrators[x] <= $signed(weights[x])*$signed(i_data_ff) + $signed(integrators[x]); 187 | end 188 | end 189 | end 190 | 191 | 192 | endmodule : fully_connected_layer 193 | 194 | 195 | module single_port_rom 196 | # (parameter ADDR_WIDTH = 4, 197 | parameter DATA_WIDTH = 32 198 | ) 199 | 200 | ( input clk, 201 | input [ADDR_WIDTH-1:0] r_addr, 202 | input [ADDR_WIDTH-1:0] w_addr, 203 | input [DATA_WIDTH-1:0] data, 204 | output logic [DATA_WIDTH-1:0] o, 205 | input we 206 | ); 207 | 208 | reg [DATA_WIDTH-1:0] mem [2**ADDR_WIDTH]; 209 | 210 | always @ (posedge clk) begin 211 | if (we) 212 | mem[w_addr] <= data; 213 | 214 | o <= mem[r_addr]; 215 | end 216 | 217 | 218 | endmodule 219 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Convolutional-Neural-Network-using-SystemVerilog 2 | Synthesizable RTL-Based video stream Convolutional Neural Network ( non HLS ) 3 | 4 | The testbench's images are taken from MNIST digits dataset. 5 | 6 | ![image](https://github.com/user-attachments/assets/447386e3-ac5d-4a59-b600-3b1323c77b01) 7 | 8 | 9 | 10 | All weights and reference calculated by "cnn_behind.py" 11 | 12 | Numbers and size of Convolution and Fully Connected layers are parameterizable. 13 | 14 | In testbench I used this CNN structure: 15 | 16 | conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0) 17 | conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0) 18 | fc1 = nn.Linear(200, 64) 19 | fc2 = nn.Linear(64, 10) 20 | 21 | ![image](https://github.com/user-attachments/assets/13c7ffff-05bd-4ebd-9d6c-612513aa67b7) 22 | 23 | 24 | 25 | Simulation Results: 26 | ![image](https://github.com/user-attachments/assets/6dc68ca5-9127-4118-94c8-77288268b2d6) 27 | -------------------------------------------------------------------------------- /activation_func/relu.sv: -------------------------------------------------------------------------------- 1 | module relu #( 2 | parameter PIX_WIDTH = 8, 3 | parameter DIMENSION = 4 4 | ) ( 5 | //input pixels 6 | input [DIMENSION-1:0][PIX_WIDTH-1:0] i_data , 7 | // output pixels 8 | output logic [DIMENSION-1:0][PIX_WIDTH-1:0] o_data 9 | ); 10 | 11 | 12 | always_comb begin 13 | foreach (o_data[i]) begin 14 | o_data[i] = i_data[i][PIX_WIDTH-1] ? '0 : i_data[i]; 15 | end 16 | 17 | end 18 | 19 | 20 | 21 | endmodule : relu -------------------------------------------------------------------------------- /blocks/conv_block.sv: -------------------------------------------------------------------------------- 1 | 2 | module conv_block #( 3 | //data width parameters 4 | parameter PIX_WIDTH = 8 , 5 | parameter WEIGHT_WIDTH = 10 , 6 | parameter WEIGHT_FRACT_WIDTH = 5 , 7 | parameter TRUNK = "TRUE", 8 | //resolution 9 | parameter IMG_WIDTH = 28 , 10 | parameter IMG_HEIGHT = 28 , 11 | //conv_array_parameter 12 | parameter KERNEL_DIMENSION = 3 , 13 | parameter IN_DIMENSION = 1 , 14 | parameter OUT_DIMENSION = 4 15 | ) ( 16 | input clk , 17 | input clk_en , 18 | input rst_n , 19 | //input pixels 20 | input [ IN_DIMENSION-1:0][ PIX_WIDTH-1:0] i_data , 21 | input i_valid , 22 | input i_sop , 23 | input i_eop , 24 | // output pixels 25 | output logic [OUT_DIMENSION-1:0][((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] o_data , 26 | output logic o_valid , 27 | output logic o_sop , 28 | output logic o_eop , 29 | /// 30 | input int weights_mem_in_data , 31 | input int weights_mem_in_addr , 32 | input weights_mem_in_kernel_wr , 33 | /// 34 | // input [OUT_DIMENSION-1:0][WEIGHT_WIDTH-1:0] bias , 35 | /// 36 | output logic o_ready 37 | ); 38 | 39 | 40 | logic [OUT_DIMENSION + OUT_DIMENSION * IN_DIMENSION * KERNEL_DIMENSION * KERNEL_DIMENSION - 1 : 0][WEIGHT_WIDTH-1:0] kernel_plain; 41 | wire [OUT_DIMENSION-1:0][IN_DIMENSION-1:0][KERNEL_DIMENSION-1:0][KERNEL_DIMENSION-1:0][WEIGHT_WIDTH-1:0] kernel; 42 | wire [OUT_DIMENSION-1:0][WEIGHT_WIDTH-1:0] bias; 43 | assign {bias,kernel} = kernel_plain; 44 | 45 | always_ff @(posedge clk) begin 46 | if(weights_mem_in_kernel_wr) 47 | kernel_plain[weights_mem_in_addr] <= weights_mem_in_data; 48 | 49 | if(~rst_n) begin 50 | end 51 | end 52 | 53 | logic signed [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] conv_outputs[OUT_DIMENSION][IN_DIMENSION]; 54 | 55 | logic valid[OUT_DIMENSION][IN_DIMENSION]; 56 | logic sop [OUT_DIMENSION][IN_DIMENSION]; 57 | logic eop [OUT_DIMENSION][IN_DIMENSION]; 58 | logic ready[OUT_DIMENSION][IN_DIMENSION]; 59 | 60 | 61 | genvar row,col; 62 | generate 63 | for (row = 0; row < OUT_DIMENSION; row++) begin 64 | for (col = 0; col < IN_DIMENSION; col++) begin 65 | 66 | conv #( 67 | .PIX_WIDTH (PIX_WIDTH ), 68 | .WEIGHT_WIDTH (WEIGHT_WIDTH ), 69 | .WEIGHT_FRACT_WIDTH(WEIGHT_FRACT_WIDTH), 70 | .TRUNK (TRUNK ), 71 | .KERNEL_DIMENSION (KERNEL_DIMENSION ), 72 | .img_width (IMG_WIDTH ), 73 | .img_height (IMG_HEIGHT ) 74 | ) inst_conv ( 75 | .clk (clk ), 76 | .clk_en (clk_en ), 77 | .rst_n (rst_n ), 78 | .i_data (i_data[col] ), 79 | .i_valid (i_valid ), 80 | .i_sop (i_sop ), 81 | .i_eop (i_eop ), 82 | .o_data (conv_outputs[row][col]), 83 | .o_valid (valid[row][col] ), 84 | .o_sop (sop [row][col] ), 85 | .o_eop (eop [row][col] ), 86 | .kernel (kernel[row][col] ), 87 | .ready (ready[row][col] ), 88 | .cols_cntr( ), 89 | .rows_cntr( ) 90 | ); 91 | 92 | end 93 | end 94 | endgenerate 95 | 96 | 97 | logic [((TRUNK == "TRUE") ? PIX_WIDTH : (PIX_WIDTH+WEIGHT_FRACT_WIDTH))-1:0] sum[OUT_DIMENSION]; 98 | 99 | always_comb begin 100 | 101 | foreach (sum[x]) begin 102 | sum[x] = '0; 103 | end 104 | 105 | foreach (conv_outputs[x,z]) begin 106 | sum[x] += $signed(conv_outputs[x][z]); 107 | end 108 | end 109 | 110 | 111 | always_ff @(posedge clk or negedge rst_n) begin 112 | if(~rst_n) begin 113 | o_valid <= 0; 114 | o_sop <= 0; 115 | o_eop <= 0; 116 | end else if(clk_en) begin 117 | o_valid <= valid[0][0]; 118 | o_sop <= sop[0][0]; 119 | o_eop <= eop[0][0]; 120 | 121 | foreach (o_data[x]) begin 122 | o_data[x] <= $signed(sum[x]) + $signed(bias[x]); 123 | end 124 | 125 | 126 | end 127 | end 128 | 129 | assign o_ready = ready[0][0]; 130 | 131 | 132 | endmodule : conv_block -------------------------------------------------------------------------------- /blocks/max_pooling_block.sv: -------------------------------------------------------------------------------- 1 | module max_pooling_block #( 2 | //data width parameters 3 | parameter PIX_WIDTH = 8 , 4 | //resolution 5 | parameter IMG_WIDTH = 28, 6 | parameter IMG_HEIGHT = 28, 7 | parameter POOL_DIMENSION = 2 , 8 | //pool_array_parameter 9 | parameter DIMENSION = 4 10 | ) ( 11 | input clk , 12 | input clk_en , 13 | input rst_n , 14 | //input pixels 15 | input [DIMENSION-1:0][PIX_WIDTH-1:0] i_data , 16 | input i_valid, 17 | input i_sop , 18 | input i_eop , 19 | // output pixels 20 | output logic [DIMENSION-1:0][PIX_WIDTH-1:0] o_data , 21 | output logic o_valid, 22 | output logic o_sop , 23 | output logic o_eop , 24 | /// 25 | output logic o_ready 26 | ); 27 | 28 | logic valid[DIMENSION]; 29 | logic sop [DIMENSION]; 30 | logic eop [DIMENSION]; 31 | logic ready[DIMENSION]; 32 | 33 | genvar row,col; 34 | 35 | 36 | 37 | generate 38 | for (row = 0; row < DIMENSION; row++) begin 39 | 40 | maxpooling #( 41 | .PIX_WIDTH (PIX_WIDTH ), 42 | .POOL_DIMENSION(POOL_DIMENSION), 43 | .WIDTH (IMG_WIDTH ), 44 | .HEIGHT (IMG_HEIGHT ) 45 | ) inst_maxpooling ( 46 | .clk (clk ), 47 | .clk_en (clk_en ), 48 | .rst_n (rst_n ), 49 | .i_data (i_data[row]), 50 | .i_valid (i_valid ), 51 | .i_sop (i_sop ), 52 | .i_eop (i_eop ), 53 | .o_data (o_data[row]), 54 | .o_valid (valid[row] ), 55 | .o_sop (sop [row] ), 56 | .o_eop (eop [row] ), 57 | .ready (ready[row] ), 58 | .cols_cntr( ), 59 | .rows_cntr( ) 60 | ); 61 | 62 | 63 | end 64 | endgenerate 65 | 66 | 67 | 68 | assign o_valid = valid[0]; 69 | assign o_sop = sop[0]; 70 | assign o_eop = eop[0]; 71 | assign o_ready = ready[0]; 72 | 73 | 74 | 75 | 76 | endmodule : max_pooling_block -------------------------------------------------------------------------------- /cnn_behind.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | # PyTorch imports 6 | import torch 7 | from torch import nn, optim 8 | from torch.nn import functional as F 9 | from torch.utils.data import DataLoader 10 | from torch.utils.data.dataset import random_split 11 | from torch.optim.lr_scheduler import ReduceLROnPlateau 12 | 13 | # torchvision imports for datasets and transforms 14 | from torchvision import datasets, transforms 15 | 16 | 17 | load_model = 1 18 | 19 | 20 | # Define the CNN model 21 | class MyCNN(nn.Module): 22 | """ 23 | Simple CNN model with 2 convolutional layers and 2 fully connected layers. 24 | 25 | Args: 26 | ----- 27 | nn.Module: Base class for all neural network modules in PyTorch. 28 | """ 29 | def __init__(self): 30 | super(MyCNN, self).__init__() 31 | self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0) 32 | self.conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0) 33 | self.fc1 = nn.Linear(200, 64) 34 | self.fc2 = nn.Linear(64, 10) 35 | 36 | self._initialize_weights() 37 | 38 | a = 0 39 | 40 | def forward(self, x): 41 | """ 42 | Forward pass of the model. 43 | 44 | Args: 45 | ----- 46 | x (torch.Tensor): Input tensor. 47 | 48 | Returns: 49 | -------- 50 | torch.Tensor: Output tensor. 51 | """ 52 | x = F.relu(self.conv1(x)) 53 | x = F.max_pool2d(x, 2, 2) 54 | x = F.relu(self.conv2(x)) 55 | x = F.max_pool2d(x, 2, 2) 56 | x = x.view(x.size(0), -1) # Flatten the tensor 57 | x = F.relu(self.fc1(x)) 58 | x = self.fc2(x) 59 | self.a = 0 60 | return x 61 | 62 | # Xavier weight initialization 63 | def _initialize_weights(self): 64 | """ 65 | Initialize the weights of the model. 66 | """ 67 | for m in self.modules(): 68 | if isinstance(m, nn.Conv2d): 69 | nn.init.normal_(m.weight, 0, 0.01) 70 | if m.bias is not None: 71 | nn.init.constant_(m.bias, 0) 72 | elif isinstance(m, nn.Linear): 73 | nn.init.xavier_uniform_(m.weight) 74 | if m.bias is not None: 75 | nn.init.constant_(m.bias, 0) 76 | class EarlyStopping: 77 | """ 78 | Early stopping to stop the training when the loss does not improve after 79 | 80 | Args: 81 | ----- 82 | patience (int): Number of epochs to wait before stopping the training. 83 | verbose (bool): If True, prints a message for each epoch where the loss 84 | does not improve. 85 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 86 | """ 87 | def __init__(self, patience=7, verbose=False, delta=0): 88 | self.patience = patience 89 | self.verbose = verbose 90 | self.counter = 0 91 | self.best_score = None 92 | self.early_stop = False 93 | self.delta = delta 94 | 95 | def __call__(self, val_loss): 96 | score = -val_loss 97 | 98 | if self.best_score is None: 99 | self.best_score = score 100 | elif score < self.best_score + self.delta: 101 | self.counter += 1 102 | if self.counter >= self.patience: 103 | self.early_stop = True 104 | else: 105 | self.best_score = score 106 | self.counter = 0 107 | class Trainer: 108 | """ 109 | Trainer class to train the model. 110 | 111 | Args: 112 | ----- 113 | model (nn.Module): Neural network model. 114 | criterion (torch.nn.modules.loss): Loss function. 115 | optimizer (torch.optim): Optimizer. 116 | device (torch.device): Device to run the model on. 117 | patience (int): Number of epochs to wait before stopping the training. 118 | """ 119 | def __init__(self, model, criterion, optimizer, device, patience=7): 120 | self.model = model 121 | self.criterion = criterion 122 | self.optimizer = optimizer 123 | self.device = device 124 | self.early_stopping = EarlyStopping(patience=patience) 125 | self.scheduler = ReduceLROnPlateau(self.optimizer, 'min', patience=3, verbose=True, factor=0.5, min_lr=1e-6) 126 | self.train_losses = [] 127 | self.val_losses = [] 128 | self.gradient_norms = [] 129 | 130 | def train(self, train_loader, val_loader, epochs): 131 | """ 132 | Train the model. 133 | 134 | Args: 135 | ----- 136 | train_loader (torch.utils.data.DataLoader): DataLoader for training dataset. 137 | val_loader (torch.utils.data.DataLoader): DataLoader for validation dataset. 138 | epochs (int): Number of epochs to train the model. 139 | """ 140 | for epoch in range(epochs): 141 | self.model.train() 142 | for images, labels in train_loader: 143 | images, labels = images.to(self.device), labels.to(self.device) 144 | 145 | self.optimizer.zero_grad() 146 | outputs = self.model(images) 147 | loss = self.criterion(outputs, labels) 148 | loss.backward() 149 | self.optimizer.step() 150 | 151 | self.train_losses.append(loss.item()) 152 | 153 | val_loss = self.evaluate(val_loader) 154 | self.val_losses.append(val_loss) 155 | self.scheduler.step(val_loss) 156 | self.early_stopping(val_loss) 157 | 158 | # Log the training and validation loss 159 | print(f'Epoch {epoch+1}, Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}') 160 | 161 | if self.early_stopping.early_stop: 162 | print("Early stopping") 163 | break 164 | 165 | def evaluate(self, test_loader): 166 | """ 167 | Evaluate the model on the test dataset. 168 | 169 | Args: 170 | ----- 171 | test_loader (torch.utils.data.DataLoader): DataLoader for test dataset. 172 | 173 | Returns: 174 | -------- 175 | float: Average loss on the test dataset. 176 | """ 177 | self.model.eval() 178 | total_loss = 0 179 | with torch.no_grad(): 180 | for images, labels in test_loader: 181 | images, labels = images.to(self.device), labels.to(self.device) 182 | 183 | outputs = self.model(images) 184 | loss = self.criterion(outputs, labels) 185 | total_loss += loss.item() 186 | 187 | return total_loss / len(test_loader) 188 | 189 | def accuracy(self, test_loader): 190 | """ 191 | Calculate the accuracy of the model on the test dataset. 192 | 193 | Args: 194 | ----- 195 | test_loader (torch.utils.data.DataLoader): DataLoader for test dataset. 196 | 197 | Returns: 198 | -------- 199 | float: Accuracy of the model on the test dataset. 200 | """ 201 | self.model.eval() 202 | correct = 0 203 | total = 0 204 | with torch.no_grad(): 205 | for images, labels in test_loader: 206 | images, labels = images.to(self.device), labels.to(self.device) 207 | 208 | outputs = self.model(images) 209 | _, predicted = torch.max(outputs.data, 1) 210 | total += labels.size(0) 211 | correct += (predicted == labels).sum().item() 212 | 213 | return correct / total 214 | 215 | def plot_losses(self, window_size=100): 216 | # Compute moving averages 217 | train_losses_smooth = self.moving_average(self.train_losses, window_size) 218 | val_losses_smooth = self.moving_average(self.val_losses, window_size) 219 | 220 | # Plot 221 | plt.plot(train_losses_smooth, label='Train Loss') 222 | plt.plot(val_losses_smooth, label='Validation Loss') 223 | plt.legend() 224 | plt.grid() 225 | plt.title('Losses') 226 | 227 | def moving_average(self, data, window_size): 228 | return np.convolve(data, np.ones(window_size)/window_size, mode='valid') 229 | # Data loading and transformation 230 | transform = transforms.Compose([ 231 | transforms.ToTensor(), 232 | transforms.Normalize((0.0,), (1.0,)) # Normalize for grayscale image 233 | ]) 234 | 235 | mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform) 236 | test_dataset = datasets.MNIST(root='./data', train=False, transform=transform) 237 | 238 | # Plot a sample image 239 | image, label = mnist_dataset[0] 240 | # plt.imshow(image.squeeze().numpy(), cmap='gray') 241 | # plt.title(f'Label: {label}') 242 | # plt.show() 243 | 244 | # Split the dataset into training and validation sets 245 | train_split = 0.8 246 | 247 | train_size = int(train_split * len(mnist_dataset)) 248 | val_size = len(mnist_dataset) - train_size 249 | 250 | # Split the dataset 251 | train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size]) 252 | 253 | # Create DataLoaders for each dataset 254 | train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) 255 | val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False) 256 | test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) 257 | # Model instantiation 258 | model = MyCNN() 259 | 260 | 261 | path = "C:/projects/DSP_modules/2D/soft/lstmmodelgpu.pth" 262 | 263 | # Move model to GPU if available 264 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 265 | 266 | model = model.to(device) 267 | 268 | #Loss function and optimizer 269 | criterion = nn.CrossEntropyLoss() 270 | optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5, amsgrad=True, eps=1e-8, betas=(0.9, 0.999)) 271 | 272 | 273 | if load_model == 1: 274 | # Load: 275 | model.load_state_dict(torch.load(path)) 276 | 277 | 278 | 279 | # # Trainer instantiation 280 | trainer = Trainer(model, criterion, optimizer, device, patience=10) 281 | 282 | if load_model == 0: 283 | # Training 284 | print('train started') 285 | trainer.train(train_loader, val_loader, epochs=10) 286 | 287 | torch.save(model.state_dict(), path) 288 | 289 | 290 | with open('output.txt', 'w') as f: 291 | f.write('real fc1_weights_re[64][200] = ') 292 | 293 | torch.set_printoptions(threshold=15_000) 294 | 295 | print(model.fc1.weight, file=open('output.txt', 'a')) 296 | 297 | #Evaluation 298 | print("evaluation") 299 | model.a = 1 300 | loss = trainer.evaluate(test_loader) 301 | accuracy = trainer.accuracy(test_loader) 302 | print(f'Accuracy: {accuracy:.2%}') 303 | 304 | print('FINISHED!!!') 305 | print('FINISHED!!!') -------------------------------------------------------------------------------- /pooling/maxpooling.sv: -------------------------------------------------------------------------------- 1 | // For sim purposes: Image width and height are not changeable dynamically 2 | // 3 | // ----------------------------------------------------------------------------- 4 | // Copyright (c) 2014-2024 All rights reserved 5 | // ----------------------------------------------------------------------------- 6 | // Author : Maksim Ananev mananev086@gmail.com 7 | // 8 | // Create : 2024-05-13 11:30:23 9 | // Revise : 2024-10-22 12:20:46 10 | // Editor : sublime text4, tab size (4) 11 | // ----------------------------------------------------------------------------- 12 | 13 | 14 | module maxpooling #( 15 | parameter PIX_WIDTH = 8 , 16 | parameter POOL_DIMENSION = 2 , 17 | parameter WIDTH = 28, 18 | parameter HEIGHT = 28 19 | ) ( 20 | input clk , // Clock 21 | input clk_en , // Clock Enable 22 | input rst_n , // Asynchronous reset active low 23 | //input pixels 24 | input [PIX_WIDTH-1:0] i_data , 25 | input i_valid , 26 | input i_sop , 27 | input i_eop , 28 | // output pixels 29 | output [PIX_WIDTH-1:0] o_data , 30 | output o_valid , 31 | output o_sop , 32 | output o_eop , 33 | /// 34 | output logic ready , 35 | output logic [ 11:0] cols_cntr, 36 | output logic [ 11:0] rows_cntr 37 | ); 38 | 39 | 40 | 41 | /* 42 | Pixels Delay scheme. if maxpooling 3*3 43 | 44 | pixel_input----------->-------------------- --pix[2][2]--> --pix[2][1]--> 45 | /-------\ | | | 46 | ---<--| FIFO_0 |--<------>----delayed_line[0]------->-----|FF|---------->------|FF|--->-----------pix[2][0]-->-- 47 | | \-------/ 48 | | --pix[1][2]--> --pix[1][1]--> 49 | | | | 50 | ------------------------->---delayed_line[1]------->------|FF|---------->------|FF|--->-----------pix[1][0]-->-- 51 | | 52 | | --pix[0][2]--> --pix[0][2]--> 53 | | /-------\ | | 54 | --->--| FIFO_1 |--------->---delayed_line[2]------->------|FF|---------->------|FF|--->-----------pix[0][0]-->-- 55 | \-------/ 56 | */ 57 | 58 | logic [WIDTH-1:0][PIX_WIDTH-1:0] fifo[POOL_DIMENSION-1]; /// !!!!!!!!!!!!!!!!!!!!!!! only for simulation, MUST BE REPLACED by a regular FIFO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 59 | 60 | logic [PIX_WIDTH-1:0] delayed_line[POOL_DIMENSION]; 61 | 62 | bit [POOL_DIMENSION-2:0][PIX_WIDTH-1:0] after_fifos_ffs[POOL_DIMENSION]; 63 | 64 | logic [PIX_WIDTH-1:0] delayed_pix[POOL_DIMENSION][POOL_DIMENSION]; 65 | 66 | always_comb begin 67 | foreach (delayed_line[i]) begin 68 | delayed_line[i] = (i == 0) ? i_data : fifo[i-1][WIDTH-1]; 69 | end 70 | 71 | foreach (delayed_pix[i,y]) begin 72 | delayed_pix[i][y] = (y==0)?delayed_line[i] : after_fifos_ffs[i][y-1]; 73 | end 74 | end 75 | 76 | always_ff @(posedge clk) begin 77 | if(clk_en && (i_valid || !ready))begin 78 | foreach (fifo[i]) begin 79 | fifo[i] <= {fifo[i][WIDTH-2:0],( (i == 0) ? i_data : fifo[i-1][WIDTH-1] )}; 80 | end 81 | 82 | foreach (after_fifos_ffs[i]) begin 83 | after_fifos_ffs[i] <= {after_fifos_ffs[i],delayed_line[i]}; 84 | end 85 | end 86 | end 87 | 88 | 89 | 90 | /* 91 | maths. if maxpooling 3*3 92 | 93 | 94 | image lines 95 | /-- ------------------- --\ 96 | | | a11 | a12 | a13 | | 97 | | ------------------- | 98 | max| | a21 | a22 | a23 | | --------> 99 | | ------------------- | 100 | | | a31 | a32 | a33 | | 101 | \__ ------------------- --/ 102 | */ 103 | logic [PIX_WIDTH-1:0] max_detected ; 104 | logic [PIX_WIDTH-1:0] max_detected_ff; 105 | 106 | logic [POOL_DIMENSION-1:0][PIX_WIDTH-1:0] max_row_detected ; 107 | logic [POOL_DIMENSION-1:0][PIX_WIDTH-1:0] max_row_detected_ff; 108 | 109 | always_comb begin 110 | 111 | foreach (max_row_detected[i]) begin 112 | max_row_detected[i] = delayed_pix[i][0]; 113 | end 114 | 115 | for (int y = 0; y < POOL_DIMENSION; y++) begin 116 | for (int i = 1; i < POOL_DIMENSION; i++) begin 117 | if(max_row_detected[y] < delayed_pix[y][i]) 118 | max_row_detected[y] = delayed_pix[y][i]; 119 | end 120 | end 121 | 122 | max_detected = max_row_detected_ff[0]; 123 | for (int i = 1; i < POOL_DIMENSION; i++) begin 124 | if(max_detected < max_row_detected_ff[i]) 125 | max_detected = max_row_detected_ff[i]; 126 | end 127 | 128 | end 129 | 130 | always_ff @(posedge clk) begin 131 | if(clk_en) begin 132 | max_row_detected_ff <= max_row_detected; 133 | max_detected_ff <= max_detected; 134 | end 135 | end 136 | 137 | 138 | /* 139 | 140 | */ 141 | 142 | assign o_data = max_detected_ff; 143 | 144 | /* 145 | Latency 146 | */ 147 | logic [2:0] valid_delay = '0 ; 148 | wire valid_delayed = valid_delay[1]; 149 | always_ff @(posedge clk or negedge rst_n) begin 150 | if(~rst_n) begin 151 | valid_delay <= 0; 152 | end else begin 153 | if(clk_en) 154 | valid_delay <= $size(valid_delay)'( {valid_delay, i_valid && ready} ); 155 | end 156 | end 157 | 158 | 159 | /* 160 | counters 161 | */ 162 | 163 | logic [$clog2(POOL_DIMENSION)-1:0]valid_col,valid_row; 164 | 165 | always_ff @(posedge clk or negedge rst_n) begin 166 | if(~rst_n) begin 167 | cols_cntr <= 0; 168 | rows_cntr <= 0; 169 | valid_col <= 0; 170 | valid_row <= 0; 171 | end else begin 172 | if(clk_en)begin 173 | if(valid_delayed)begin 174 | cols_cntr <= (cols_cntr == WIDTH-1) ? '0 : (cols_cntr + 'd1); 175 | 176 | if(valid_col == POOL_DIMENSION-1) 177 | valid_col <= '0; 178 | else 179 | valid_col <= valid_col + 'd1; 180 | 181 | if(cols_cntr == WIDTH-1)begin 182 | rows_cntr <= rows_cntr + 'd1; 183 | valid_col <= '0; 184 | valid_row <= valid_row + 'd1; 185 | if (valid_row == POOL_DIMENSION-1) begin 186 | valid_row <= 0; 187 | end 188 | end 189 | end 190 | else if(i_sop)begin 191 | cols_cntr <= '0; 192 | rows_cntr <= '0; 193 | valid_col <= 0; 194 | valid_row <= 0; 195 | end 196 | end 197 | end 198 | end 199 | 200 | /* 201 | video control signals 202 | */ 203 | assign ready = clk_en; 204 | 205 | 206 | assign o_valid = valid_delayed && (valid_col == POOL_DIMENSION-1) && (valid_row == POOL_DIMENSION-1) ; 207 | 208 | assign o_eop = valid_delayed && (valid_col == POOL_DIMENSION-1) && (cols_cntr == WIDTH-(WIDTH[0]+POOL_DIMENSION[0]+1)) && (rows_cntr == HEIGHT-(HEIGHT[0]+POOL_DIMENSION[0]+1)); 209 | 210 | assign o_sop = valid_delayed && (valid_col == POOL_DIMENSION-1) && (rows_cntr == POOL_DIMENSION-1) && (cols_cntr == POOL_DIMENSION-1); 211 | 212 | 213 | 214 | 215 | 216 | endmodule : maxpooling 217 | -------------------------------------------------------------------------------- /top/CNN.sv: -------------------------------------------------------------------------------- 1 | // // Convolutional Neural Network module. For sim purposes: 2 | // // No back preassure in design 3 | // // The math of the module replicates the python script 4 | // // self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=0) 5 | // // self.conv2 = nn.Conv2d(4, 8, kernel_size=3, stride=1, padding=0) 6 | // // self.fc1 = nn.Linear(200, 64) 7 | // // self.fc2 = nn.Linear(64, 10) 8 | // // 9 | // // To simplify simulation all weights are initialized from "CNN.svh" 10 | // // 11 | // // ----------------------------------------------------------------------------- 12 | // // Copyright (c) 2014-2024 All rights reserved 13 | // // ----------------------------------------------------------------------------- 14 | // // Author : Maksim Ananev mananev086@gmail.com 15 | // // 16 | // // Create : 2024-05-13 11:30:23 17 | // // Revise : 2024-10-22 12:20:46 18 | // // Editor : sublime text4, tab size (4) 19 | // // ----------------------------------------------------------------------------- 20 | 21 | 22 | module CNN #( 23 | parameter PIX_WIDTH = 16 , 24 | parameter WEIGHT_WIDTH = 10 , 25 | parameter FRACT_WIDTH = 5 , 26 | parameter CONV_NUMB = 2 , 27 | parameter logic [CONV_NUMB-1:0][1:0][7:0] CONV_DIMENSION = {{8'd8, 8'd4}, {8'd4, 8'd1}}, 28 | parameter logic [CONV_NUMB-1:0][ 3:0] KERNEL_DIMENSION = {4'd3 , 4'd3}, 29 | parameter FLAT_NUMB = 2 , 30 | parameter CLASSES_QNT = 10 , 31 | parameter logic [FLAT_NUMB :0][15:0] FLAT_DIMENSION = {16'd10, 16'd64, 16'd200}, 32 | parameter IMG_WIDTH = 28 , 33 | parameter IMG_HEIGHT = 28 34 | ) ( 35 | input clk , // Clock 36 | input clk_en , // Clock Enable 37 | input rst_n , // Asynchronous reset active low 38 | //input pixels 39 | input [ PIX_WIDTH-1:0] i_data , 40 | input i_valid , 41 | input i_sop , 42 | input i_eop , 43 | /// 44 | output logic o_valid , 45 | output logic [CLASSES_QNT-1:0][31:0] classes , 46 | /// 47 | input int weights_mem_in_data , 48 | input int weights_mem_in_addr , 49 | input int weights_mem_sel_addr , 50 | input [ CONV_NUMB-1:0] weights_mem_in_kernel_wr, 51 | input [ FLAT_NUMB-1:0] weights_mem_in_fc_wr 52 | 53 | ); 54 | 55 | 56 | logic [63:0][PIX_WIDTH-1:0] conv_data[CONV_NUMB]; 57 | logic [63:0][PIX_WIDTH-1:0] relu_data[CONV_NUMB]; 58 | logic [63:0][PIX_WIDTH-1:0] pool_data[CONV_NUMB]; 59 | 60 | logic conv_valid[CONV_NUMB]; 61 | logic conv_sop [CONV_NUMB]; 62 | logic conv_eop [CONV_NUMB]; 63 | 64 | logic pool_valid[CONV_NUMB]; 65 | logic pool_sop [CONV_NUMB]; 66 | logic pool_eop [CONV_NUMB]; 67 | 68 | 69 | genvar numb; 70 | generate 71 | for (numb = 0; numb < CONV_NUMB; numb++) begin:conv_genloop 72 | conv_block #( 73 | .PIX_WIDTH (PIX_WIDTH ), 74 | .WEIGHT_WIDTH (WEIGHT_WIDTH ), 75 | .WEIGHT_FRACT_WIDTH(FRACT_WIDTH ), 76 | .TRUNK ("TRUE" ), 77 | .IMG_WIDTH ((IMG_WIDTH - (2**(numb+1)-2))/(2**numb) ), 78 | .IMG_HEIGHT ((IMG_HEIGHT - (2**(numb+1)-2))/(2**numb)), 79 | .KERNEL_DIMENSION (KERNEL_DIMENSION[numb] ), 80 | .IN_DIMENSION (CONV_DIMENSION[numb][0] ), 81 | .OUT_DIMENSION (CONV_DIMENSION[numb][1] ) 82 | ) conv_block ( 83 | .clk (clk ), 84 | .clk_en (1 ), 85 | .rst_n (rst_n ), 86 | .i_data ((numb == 0) ? i_data : pool_data [numb-1]), 87 | .i_valid ((numb == 0) ? i_valid : pool_valid[numb-1]), 88 | .i_sop ((numb == 0) ? i_sop : pool_sop [numb-1]), 89 | .i_eop ((numb == 0) ? i_eop : pool_eop [numb-1]), 90 | .o_data (conv_data [numb] ), 91 | .o_valid (conv_valid[numb] ), 92 | .o_sop (conv_sop [numb] ), 93 | .o_eop (conv_eop [numb] ), 94 | .weights_mem_in_data (weights_mem_in_data ), 95 | .weights_mem_in_addr (weights_mem_in_addr ), 96 | .weights_mem_in_kernel_wr(weights_mem_in_kernel_wr[numb] ), 97 | .o_ready ( ) 98 | ); 99 | 100 | 101 | relu #( 102 | .PIX_WIDTH(PIX_WIDTH ), 103 | .DIMENSION(CONV_DIMENSION[numb][1]) 104 | ) conv_relu ( 105 | .i_data(conv_data[numb]), 106 | .o_data(relu_data[numb]) 107 | ); 108 | 109 | 110 | max_pooling_block #( 111 | .PIX_WIDTH (PIX_WIDTH ), 112 | .IMG_WIDTH ((IMG_WIDTH - (2**(numb+1)-2))/(2**numb) -2), 113 | .IMG_HEIGHT ((IMG_HEIGHT - (2**(numb+1)-2))/(2**numb)-2), 114 | .POOL_DIMENSION(2 ), 115 | .DIMENSION (CONV_DIMENSION[numb][1] ) 116 | ) max_pooling_block ( 117 | .clk (clk ), 118 | .clk_en (clk_en ), 119 | .rst_n (rst_n ), 120 | .i_data (relu_data [numb]), 121 | .i_valid(conv_valid[numb]), 122 | .i_sop (conv_sop [numb]), 123 | .i_eop (conv_eop [numb]), 124 | .o_data (pool_data [numb]), 125 | .o_valid(pool_valid[numb]), 126 | .o_sop (pool_sop [numb]), 127 | .o_eop (pool_eop [numb]), 128 | .o_ready( ) 129 | ); 130 | end 131 | endgenerate 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | logic [PIX_WIDTH-1:0]flat_data; 140 | logic flat_valid; 141 | logic flat_sop; 142 | logic flat_eop; 143 | logic flat_ready; 144 | 145 | flat #( 146 | .PIX_WIDTH (PIX_WIDTH ), 147 | .DIMENSION (CONV_DIMENSION[CONV_NUMB-1][1] ), 148 | .img_width ((IMG_WIDTH - (2**(CONV_NUMB+1)-2))/(2**CONV_NUMB) ), 149 | .img_height((IMG_HEIGHT - (2**(CONV_NUMB+1)-2))/(2**CONV_NUMB)) 150 | ) inst_flat ( 151 | .clk (clk ), 152 | .clk_en (clk_en ), 153 | .rst_n (rst_n ), 154 | .i_data (pool_data [CONV_NUMB-1]), 155 | .i_valid(pool_valid[CONV_NUMB-1]), 156 | .i_sop (pool_sop [CONV_NUMB-1]), 157 | .i_eop (pool_eop [CONV_NUMB-1]), 158 | .o_data (flat_data ), 159 | .o_valid(flat_valid ), 160 | .o_sop (flat_sop ), 161 | .o_eop (flat_eop ), 162 | .o_ready(flat_ready ) 163 | ); 164 | 165 | logic [PIX_WIDTH+$clog2(FLAT_DIMENSION[0])-1:0]fc_data[FLAT_NUMB]; 166 | logic fc_valid[FLAT_NUMB]; 167 | logic fc_sop[FLAT_NUMB]; 168 | logic fc_eop[FLAT_NUMB]; 169 | logic fc_ready[FLAT_NUMB]; 170 | 171 | logic [PIX_WIDTH+$clog2(FLAT_DIMENSION[0])-1:0] fc_relu_data[FLAT_NUMB]; 172 | 173 | generate 174 | for (numb = 0; numb < FLAT_NUMB; numb++) begin:fc_genloop 175 | fully_connected_layer #( 176 | .PIX_WIDTH (PIX_WIDTH + ((numb == 0) ? 0 : $clog2(FLAT_DIMENSION[0]))), 177 | .WEIGHT_WIDTH (WEIGHT_WIDTH ), 178 | .WEIGHT_FRACT_WIDTH(FRACT_WIDTH ), 179 | .IN_DIMENSION (FLAT_DIMENSION[numb] ), 180 | .OUT_DIMENSION (FLAT_DIMENSION[numb+1] ) 181 | ) inst_fully_connected_layer1 ( 182 | .clk (clk ), 183 | .clk_en (clk_en ), 184 | .rst_n (rst_n ), 185 | .i_data ((numb == 0) ? flat_data : fc_relu_data[numb-1]), 186 | .i_valid ((numb == 0) ? flat_valid : fc_valid[numb-1] ), 187 | .i_sop ((numb == 0) ? flat_sop : fc_sop[numb-1] ), 188 | .i_eop ((numb == 0) ? flat_eop : fc_eop[numb-1] ), 189 | .o_data (fc_data[numb] ), 190 | .o_valid (fc_valid[numb] ), 191 | .o_sop (fc_sop[numb] ), 192 | .o_eop (fc_eop[numb] ), 193 | .o_ready (fc_ready[numb] ), 194 | .weights_mem_in_data (weights_mem_in_data ), 195 | .weights_mem_in_addr (weights_mem_in_addr ), 196 | .weights_mem_sel_addr(weights_mem_sel_addr ), 197 | .weights_mem_in_fc_wr(weights_mem_in_fc_wr[numb] ) 198 | ); 199 | 200 | relu #( 201 | .PIX_WIDTH(PIX_WIDTH+$clog2(FLAT_DIMENSION[0])), 202 | .DIMENSION(1 ) 203 | ) fc_relu ( 204 | .i_data(fc_data[numb] ), 205 | .o_data(fc_relu_data[numb]) 206 | ); 207 | end 208 | endgenerate 209 | 210 | 211 | 212 | 213 | 214 | 215 | int classes_cntr = 0; 216 | always_ff @(posedge clk or negedge rst_n) begin 217 | if(clk_en)begin 218 | 219 | o_valid <= 'd0; 220 | 221 | if(fc_valid[FLAT_NUMB-1])begin 222 | 223 | classes[classes_cntr] <= $signed(fc_data[FLAT_NUMB-1]); 224 | 225 | if(fc_eop[FLAT_NUMB-1])begin 226 | classes_cntr <= '0; 227 | o_valid <= 'd1; 228 | end 229 | else 230 | classes_cntr <= classes_cntr + 'd1; 231 | 232 | end 233 | 234 | end 235 | 236 | 237 | if(~rst_n) begin 238 | classes <= '0; 239 | classes_cntr <= '0; 240 | end 241 | end 242 | 243 | endmodule : CNN 244 | 245 | --------------------------------------------------------------------------------