├── .gitignore ├── BitFusion_column.v ├── Buffer_32bit.v ├── Input_MUX_REG.v ├── PE.v ├── PE_adder.v ├── PE_register.v ├── Weight_MUX_REG.v ├── accumulator.v ├── bitbrick.v ├── bitbrick_shift.v ├── signed3bit_MUL.v └── top.v /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.log 3 | *.swp 4 | *.sh 5 | *.tcl 6 | *.svf 7 | *.ptcl 8 | *.txt 9 | *.out 10 | *.key 11 | 12 | alib-52/ 13 | designs/ 14 | final_reports/ 15 | inputs/ 16 | csrc/ 17 | output/ 18 | reports/ 19 | simv 20 | simv.daidir/ 21 | WORK/ 22 | -------------------------------------------------------------------------------- /BitFusion_column.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | module BitFusion_column( 4 | input clk, 5 | input reset, 6 | 7 | // Input data comes from the BUFFER outside the total architecture 8 | // Output comes from the BUFFER which is near the PE!!! 9 | input [31:0] sorted_input_1, sorted_input_2, sorted_input_3, sorted_input_4, sorted_input_5, sorted_input_6, sorted_input_7, sorted_input_8, 10 | sorted_input_9, sorted_input_10, sorted_input_11, sorted_input_12, sorted_input_13, sorted_input_14, sorted_input_15, sorted_input_16, 11 | 12 | input [31:0] WBUF_data_in_1, WBUF_data_in_2, WBUF_data_in_3, WBUF_data_in_4, WBUF_data_in_5, WBUF_data_in_6, WBUF_data_in_7, WBUF_data_in_8, 13 | WBUF_data_in_9, WBUF_data_in_10, WBUF_data_in_11, WBUF_data_in_12, WBUF_data_in_13, WBUF_data_in_14, WBUF_data_in_15, WBUF_data_in_16, 14 | 15 | /// Really Global 16 | input [3:0] sign_x, 17 | input [3:0] sign_y, 18 | input [1:0] input_bitwidth, 19 | input [47:0] signal, 20 | 21 | output [27:0] total_output 22 | ); 23 | 24 | wire [19:0] PE_sum_1, PE_sum_2, PE_sum_3, PE_sum_4, PE_sum_5, PE_sum_6, PE_sum_7, PE_sum_8, 25 | PE_sum_9, PE_sum_10, PE_sum_11, PE_sum_12, PE_sum_13, PE_sum_14, PE_sum_15, PE_sum_16; 26 | 27 | wire [19:0] PE_sum_out_1, PE_sum_out_2, PE_sum_out_3, PE_sum_out_4, PE_sum_out_5, PE_sum_out_6, PE_sum_out_7, PE_sum_out_8, 28 | PE_sum_out_9, PE_sum_out_10, PE_sum_out_11, PE_sum_out_12, PE_sum_out_13, PE_sum_out_14, PE_sum_out_15, PE_sum_out_16; 29 | 30 | wire [31:0] WBUF_data_out_1, WBUF_data_out_2, WBUF_data_out_3, WBUF_data_out_4, WBUF_data_out_5, WBUF_data_out_6, WBUF_data_out_7, WBUF_data_out_8, 31 | WBUF_data_out_9, WBUF_data_out_10, WBUF_data_out_11, WBUF_data_out_12, WBUF_data_out_13, WBUF_data_out_14, WBUF_data_out_15, WBUF_data_out_16; 32 | 33 | wire [31:0] sorted_weight_1, sorted_weight_2, sorted_weight_3, sorted_weight_4, sorted_weight_5, sorted_weight_6, sorted_weight_7, sorted_weight_8, 34 | sorted_weight_9, sorted_weight_10, sorted_weight_11, sorted_weight_12, sorted_weight_13, sorted_weight_14, sorted_weight_15, sorted_weight_16; 35 | 36 | // Weight --> BUF_32bit --> MUX_REG --> PE --> PE_REG 37 | BUF_32bit WBUF_1(.clk(clk), .reset(reset), .data_in(WBUF_data_in_1), .data_out(WBUF_data_out_1)); 38 | Weight_MUX_REG Weight_MUX_REG_1(.clk(clk), .reset(reset), .buffer(WBUF_data_out_1), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_1)); 39 | PE PE_1(.clk(clk), .reset(reset), .x(sorted_input_1), .y(sorted_weight_1), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(20'b00000000000000000000), .PE_sum(PE_sum_1) ); 40 | PE_register PE_reg_1(.clk(clk), .reset(reset), .PE_sum(PE_sum_1), .PE_sum_out(PE_sum_out_1) ); 41 | 42 | BUF_32bit WBUF_2(.clk(clk), .reset(reset), .data_in(WBUF_data_in_2), .data_out(WBUF_data_out_2)); 43 | Weight_MUX_REG Weight_MUX_REG_2(.clk(clk), .reset(reset), .buffer(WBUF_data_out_2), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_2)); 44 | PE PE_2(.clk(clk), .reset(reset), .x(sorted_input_2), .y(sorted_weight_2), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_1), .PE_sum(PE_sum_2) ); 45 | PE_register PE_reg_2(.clk(clk), .reset(reset), .PE_sum(PE_sum_2), .PE_sum_out(PE_sum_out_2) ); 46 | 47 | BUF_32bit WBUF_3(.clk(clk), .reset(reset), .data_in(WBUF_data_in_3), .data_out(WBUF_data_out_3)); 48 | Weight_MUX_REG Weight_MUX_REG_3(.clk(clk), .reset(reset), .buffer(WBUF_data_out_3), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_3)); 49 | PE PE_3(.clk(clk), .reset(reset), .x(sorted_input_3), .y(sorted_weight_3), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_2), .PE_sum(PE_sum_3) ); 50 | PE_register PE_reg_3(.clk(clk), .reset(reset), .PE_sum(PE_sum_3), .PE_sum_out(PE_sum_out_3) ); 51 | 52 | BUF_32bit WBUF_4(.clk(clk), .reset(reset), .data_in(WBUF_data_in_4), .data_out(WBUF_data_out_4)); 53 | Weight_MUX_REG Weight_MUX_REG_4(.clk(clk), .reset(reset), .buffer(WBUF_data_out_4), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_4)); 54 | PE PE_4(.clk(clk), .reset(reset), .x(sorted_input_4), .y(sorted_weight_4), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_3), .PE_sum(PE_sum_4) ); 55 | PE_register PE_reg_4(.clk(clk), .reset(reset), .PE_sum(PE_sum_4), .PE_sum_out(PE_sum_out_4) ); 56 | 57 | BUF_32bit WBUF_5(.clk(clk), .reset(reset), .data_in(WBUF_data_in_5), .data_out(WBUF_data_out_5)); 58 | Weight_MUX_REG Weight_MUX_REG_5(.clk(clk), .reset(reset), .buffer(WBUF_data_out_5), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_5)); 59 | PE PE_5(.clk(clk), .reset(reset), .x(sorted_input_5), .y(sorted_weight_5), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_4), .PE_sum(PE_sum_5) ); 60 | PE_register PE_reg_5(.clk(clk), .reset(reset), .PE_sum(PE_sum_5), .PE_sum_out(PE_sum_out_5) ); 61 | 62 | BUF_32bit WBUF_6(.clk(clk), .reset(reset), .data_in(WBUF_data_in_6), .data_out(WBUF_data_out_6)); 63 | Weight_MUX_REG Weight_MUX_REG_6(.clk(clk), .reset(reset), .buffer(WBUF_data_out_6), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_6)); 64 | PE PE_6(.clk(clk), .reset(reset), .x(sorted_input_6), .y(sorted_weight_6), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_5), .PE_sum(PE_sum_6) ); 65 | PE_register PE_reg_6(.clk(clk), .reset(reset), .PE_sum(PE_sum_6), .PE_sum_out(PE_sum_out_6) ); 66 | 67 | BUF_32bit WBUF_7(.clk(clk), .reset(reset), .data_in(WBUF_data_in_7), .data_out(WBUF_data_out_7)); 68 | Weight_MUX_REG Weight_MUX_REG_7(.clk(clk), .reset(reset), .buffer(WBUF_data_out_7), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_7)); 69 | PE PE_7(.clk(clk), .reset(reset), .x(sorted_input_7), .y(sorted_weight_7), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_6), .PE_sum(PE_sum_7) ); 70 | PE_register PE_reg_7(.clk(clk), .reset(reset), .PE_sum(PE_sum_7), .PE_sum_out(PE_sum_out_7) ); 71 | 72 | BUF_32bit WBUF_8(.clk(clk), .reset(reset), .data_in(WBUF_data_in_8), .data_out(WBUF_data_out_8)); 73 | Weight_MUX_REG Weight_MUX_REG_8(.clk(clk), .reset(reset), .buffer(WBUF_data_out_8), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_8)); 74 | PE PE_8(.clk(clk), .reset(reset), .x(sorted_input_8), .y(sorted_weight_8), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_7), .PE_sum(PE_sum_8) ); 75 | PE_register PE_reg_8(.clk(clk), .reset(reset), .PE_sum(PE_sum_8), .PE_sum_out(PE_sum_out_8) ); 76 | 77 | BUF_32bit WBUF_9(.clk(clk), .reset(reset), .data_in(WBUF_data_in_9), .data_out(WBUF_data_out_9)); 78 | Weight_MUX_REG Weight_MUX_REG_9(.clk(clk), .reset(reset), .buffer(WBUF_data_out_9), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_9)); 79 | PE PE_9(.clk(clk), .reset(reset), .x(sorted_input_9), .y(sorted_weight_9), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_8), .PE_sum(PE_sum_9) ); 80 | PE_register PE_reg_9(.clk(clk), .reset(reset), .PE_sum(PE_sum_9), .PE_sum_out(PE_sum_out_9) ); 81 | 82 | BUF_32bit WBUF_10(.clk(clk), .reset(reset), .data_in(WBUF_data_in_10), .data_out(WBUF_data_out_10)); 83 | Weight_MUX_REG Weight_MUX_REG_10(.clk(clk), .reset(reset), .buffer(WBUF_data_out_10), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_10)); 84 | PE PE_10(.clk(clk), .reset(reset), .x(sorted_input_10), .y(sorted_weight_10), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_9), .PE_sum(PE_sum_10) ); 85 | PE_register PE_reg_10(.clk(clk), .reset(reset), .PE_sum(PE_sum_10), .PE_sum_out(PE_sum_out_10) ); 86 | 87 | BUF_32bit WBUF_11(.clk(clk), .reset(reset), .data_in(WBUF_data_in_11), .data_out(WBUF_data_out_11)); 88 | Weight_MUX_REG Weight_MUX_REG_11(.clk(clk), .reset(reset), .buffer(WBUF_data_out_11), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_11)); 89 | PE PE_11(.clk(clk), .reset(reset), .x(sorted_input_11), .y(sorted_weight_11), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_10), .PE_sum(PE_sum_11) ); 90 | PE_register PE_reg_11(.clk(clk), .reset(reset), .PE_sum(PE_sum_11), .PE_sum_out(PE_sum_out_11) ); 91 | 92 | BUF_32bit WBUF_12(.clk(clk), .reset(reset), .data_in(WBUF_data_in_12), .data_out(WBUF_data_out_12)); 93 | Weight_MUX_REG Weight_MUX_REG_12(.clk(clk), .reset(reset), .buffer(WBUF_data_out_12), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_12)); 94 | PE PE_12(.clk(clk), .reset(reset), .x(sorted_input_12), .y(sorted_weight_12), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_11), .PE_sum(PE_sum_12) ); 95 | PE_register PE_reg_12(.clk(clk), .reset(reset), .PE_sum(PE_sum_12), .PE_sum_out(PE_sum_out_12) ); 96 | 97 | BUF_32bit WBUF_13(.clk(clk), .reset(reset), .data_in(WBUF_data_in_13), .data_out(WBUF_data_out_13)); 98 | Weight_MUX_REG Weight_MUX_REG_13(.clk(clk), .reset(reset), .buffer(WBUF_data_out_13), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_13)); 99 | PE PE_13(.clk(clk), .reset(reset), .x(sorted_input_13), .y(sorted_weight_13), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_12), .PE_sum(PE_sum_13) ); 100 | PE_register PE_reg_13(.clk(clk), .reset(reset), .PE_sum(PE_sum_13), .PE_sum_out(PE_sum_out_13) ); 101 | 102 | BUF_32bit WBUF_14(.clk(clk), .reset(reset), .data_in(WBUF_data_in_14), .data_out(WBUF_data_out_14)); 103 | Weight_MUX_REG Weight_MUX_REG_14(.clk(clk), .reset(reset), .buffer(WBUF_data_out_14), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_14)); 104 | PE PE_14(.clk(clk), .reset(reset), .x(sorted_input_14), .y(sorted_weight_14), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_13), .PE_sum(PE_sum_14) ); 105 | PE_register PE_reg_14(.clk(clk), .reset(reset), .PE_sum(PE_sum_14), .PE_sum_out(PE_sum_out_14) ); 106 | 107 | BUF_32bit WBUF_15(.clk(clk), .reset(reset), .data_in(WBUF_data_in_15), .data_out(WBUF_data_out_15)); 108 | Weight_MUX_REG Weight_MUX_REG_15(.clk(clk), .reset(reset), .buffer(WBUF_data_out_15), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_15)); 109 | PE PE_15(.clk(clk), .reset(reset), .x(sorted_input_15), .y(sorted_weight_15), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_14), .PE_sum(PE_sum_15) ); 110 | PE_register PE_reg_15(.clk(clk), .reset(reset), .PE_sum(PE_sum_15), .PE_sum_out(PE_sum_out_15) ); 111 | 112 | BUF_32bit WBUF_16(.clk(clk), .reset(reset), .data_in(WBUF_data_in_16), .data_out(WBUF_data_out_16)); 113 | Weight_MUX_REG Weight_MUX_REG_16(.clk(clk), .reset(reset), .buffer(WBUF_data_out_16), .input_bitwidth(input_bitwidth), .sorted_data(sorted_weight_16)); 114 | PE PE_16(.clk(clk), .reset(reset), .x(sorted_input_16), .y(sorted_weight_16), .sign_x(sign_x), .sign_y(sign_y), .signal(signal), .previous_sum(PE_sum_out_15), .PE_sum(PE_sum_16) ); 115 | PE_register PE_reg_16(.clk(clk), .reset(reset), .PE_sum(PE_sum_16), .PE_sum_out(PE_sum_out_16) ); 116 | 117 | ACC accumulator(.clk(clk), .reset(reset), .PE_sum(PE_sum_out_16), .total_output(total_output) ); 118 | 119 | endmodule 120 | -------------------------------------------------------------------------------- /Buffer_32bit.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | module BUF_32bit( 4 | input clk, 5 | input reset, 6 | input [31:0] data_in, 7 | 8 | output [31:0] data_out 9 | ); 10 | 11 | buf BUF_1(data_out[0], data_in[0]); 12 | buf BUF_2(data_out[1], data_in[1]); 13 | buf BUF_3(data_out[2], data_in[2]); 14 | buf BUF_4(data_out[3], data_in[3]); 15 | buf BUF_5(data_out[4], data_in[4]); 16 | buf BUF_6(data_out[5], data_in[5]); 17 | buf BUF_7(data_out[6], data_in[6]); 18 | buf BUF_8(data_out[7], data_in[7]); 19 | buf BUF_9(data_out[8], data_in[8]); 20 | buf BUF_10(data_out[9], data_in[9]); 21 | buf BUF_11(data_out[10], data_in[10]); 22 | buf BUF_12(data_out[11], data_in[11]); 23 | buf BUF_13(data_out[12], data_in[12]); 24 | buf BUF_14(data_out[13], data_in[13]); 25 | buf BUF_15(data_out[14], data_in[14]); 26 | buf BUF_16(data_out[15], data_in[15]); 27 | buf BUF_17(data_out[16], data_in[16]); 28 | buf BUF_18(data_out[17], data_in[17]); 29 | buf BUF_19(data_out[18], data_in[18]); 30 | buf BUF_20(data_out[19], data_in[19]); 31 | buf BUF_21(data_out[20], data_in[20]); 32 | buf BUF_22(data_out[21], data_in[21]); 33 | buf BUF_23(data_out[22], data_in[22]); 34 | buf BUF_24(data_out[23], data_in[23]); 35 | buf BUF_25(data_out[24], data_in[24]); 36 | buf BUF_26(data_out[25], data_in[25]); 37 | buf BUF_27(data_out[26], data_in[26]); 38 | buf BUF_28(data_out[27], data_in[27]); 39 | buf BUF_29(data_out[28], data_in[28]); 40 | buf BUF_30(data_out[29], data_in[29]); 41 | buf BUF_31(data_out[30], data_in[30]); 42 | buf BUF_32(data_out[31], data_in[31]); 43 | 44 | endmodule 45 | -------------------------------------------------------------------------------- /Input_MUX_REG.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // Get 32 bit data from buffer, and 4 | // sort them depending on the bitwidth of Network. 5 | // The thing is that, in all cases output is 32bit! 6 | // ex) 8bit, 8bit case, 8bit value is copied and becomes 32bit!! 7 | 8 | // Iprobably have to make different MUX_REG module for Input, Weight.. 9 | 10 | module Input_MUX_REG( 11 | input clk, 12 | input reset, 13 | 14 | // bit_mode --> reflects total 9 scenarios (2, 4, 8) X (2, 4, 8) 15 | // However, for 2, 4, 8 bit, each behavior is the same, so 3 cases is enough 16 | // !!!! for Weight Buffer MUX_REG, it uses the information of Input, 17 | // !!!! for Input Buffer MUX_REG, it uses the information of Weight 18 | input [1:0] weight_bitwidth, 19 | 20 | input [31:0] buffer, 21 | output reg [31:0] sorted_data 22 | ); 23 | 24 | reg [1:0] state; 25 | 26 | // It's logically right 27 | 28 | always @ (posedge clk) 29 | if (reset) begin 30 | sorted_data <= 0; 31 | state <= 2'b00; 32 | end 33 | else if (weight_bitwidth == 2'b00) 34 | sorted_data <= buffer; 35 | else if (state == 2'b00) begin 36 | if(weight_bitwidth == 2'b01) begin 37 | sorted_data <= { {2{buffer[15:14]}}, {2{buffer[11:10]}}, {2{buffer[13:12]}}, {2{buffer[9:8]}}, {2{buffer[7:6]}}, {2{buffer[3:2]}}, {2{buffer[5:4]}}, {2{buffer[1:0]}} }; 38 | state <= state + 1; 39 | end 40 | else begin 41 | sorted_data <= { {4{buffer[7:6]}}, {4{buffer[5:4]}}, {4{buffer[3:2]}}, {4{buffer[1:0]}} }; 42 | state <= state + 1; 43 | end 44 | end 45 | 46 | else if (state == 2'b01) begin 47 | if(weight_bitwidth == 2'b01) begin 48 | sorted_data <= { {2{buffer[31:30]}}, {2{buffer[27:26]}}, {2{buffer[29:28]}}, {2{buffer[25:24]}}, {2{buffer[23:22]}}, {2{buffer[19:18]}}, {2{buffer[21:20]}}, {2{buffer[17:16]}} }; 49 | state <= 0; 50 | end 51 | else begin 52 | sorted_data <= { {4{buffer[15:14]}}, {4{buffer[13:12]}}, {4{buffer[11:10]}}, {4{buffer[9:8]}} }; 53 | state <= state + 1; 54 | end 55 | end 56 | 57 | else if (state == 2'b10) begin 58 | sorted_data <= { {4{buffer[23:22]}}, {4{buffer[21:20]}}, {4{buffer[19:18]}}, {4{buffer[17:16]}} }; 59 | state <= state + 1; 60 | end 61 | 62 | else begin 63 | sorted_data <= { {4{buffer[31:30]}}, {4{buffer[29:28]}}, {4{buffer[27:26]}}, {4{buffer[25:24]}} }; 64 | state <= 0; 65 | end 66 | 67 | 68 | // always @ (posedge clk) 69 | // if (weight_bitwidth == 2'b00) 70 | // sorted_data = buffer; 71 | // else if (state == 2'b00) 72 | // if(weight_bitwidth == 2'b01) begin 73 | // sorted_data <= { buffer[15:8], buffer[15:8], buffer[7:0], buffer[7:0] }; 74 | // state <= state + 1; 75 | // end 76 | // else begin 77 | // sorted_data <= { buffer[7:0], buffer[7:0], buffer[7:0], buffer[7:0] }; 78 | // state <= state + 1; 79 | // end 80 | 81 | // else if (state == 2'b01) 82 | // if(weight_bitwidth == 2'b01) begin 83 | // sorted_data <= { buffer[31:24], buffer[31:24], buffer[23:16], buffer[23:16] }; 84 | // state <= 0; 85 | // end 86 | // else begin 87 | // sorted_data <= { buffer[15:8], buffer[15:8], buffer[15:8], buffer[15:8] }; 88 | // state <= state + 1; 89 | // end 90 | 91 | // else if (state == 2'b10) begin 92 | // sorted_data <= { buffer[23:16], buffer[23:16], buffer[23:16], buffer[23:16] }; 93 | // state <= state + 1; 94 | // end 95 | 96 | // else begin 97 | // sorted_data = { buffer[31:24], buffer[31:24], buffer[31:24], buffer[31:24] }; 98 | // state <= 0; 99 | // end 100 | 101 | 102 | // The most overhead 103 | 104 | // always @ (posedge clk) 105 | // if (weight_bitwidth == 2'b00) 106 | // sorted_data = buffer; 107 | // else if ( state==2'b00 ) begin 108 | // if(weight_bitwidth == 2'b01) begin 109 | // sorted_data <= { buffer[29:28], buffer[29:28], buffer[25:24], buffer[25:24], 110 | // buffer[21:20], buffer[21:20], buffer[17:16], buffer[17:16], 111 | // buffer[13:12], buffer[13:12], buffer[9:8], buffer[9:8], 112 | // buffer[5:4], buffer[5:4], buffer[1:0], buffer[1:0] }; 113 | // state <= state + 1; 114 | // end 115 | // else begin 116 | // sorted_data <= { buffer[25:24], buffer[25:24], buffer[25:24], buffer[25:24], 117 | // buffer[17:16], buffer[17:16], buffer[17:16], buffer[17:16], 118 | // buffer[9:8], buffer[9:8], buffer[9:8], buffer[9:8], 119 | // buffer[1:0], buffer[1:0] , buffer[1:0], buffer[1:0] }; 120 | // state <= state + 1; 121 | // end 122 | // end 123 | 124 | // else if ( state==2'b01 ) begin 125 | // if(weight_bitwidth == 2'b01) begin 126 | // sorted_data <= { buffer[31:30], buffer[31:30], buffer[27:26], buffer[27:26], 127 | // buffer[23:22], buffer[23:22], buffer[19:18], buffer[19:18], 128 | // buffer[15:14], buffer[15:14], buffer[11:10], buffer[11:10], 129 | // buffer[7:6], buffer[7:6], buffer[3:2], buffer[3:2] }; 130 | // state <= 0; 131 | // end 132 | // else begin 133 | // sorted_data <= { buffer[27:26], buffer[27:26], buffer[27:26], buffer[27:26], 134 | // buffer[19:18], buffer[19:18], buffer[19:18], buffer[19:18], 135 | // buffer[11:10], buffer[11:10], buffer[11:10], buffer[11:10], 136 | // buffer[3:2], buffer[3:2], buffer[3:2], buffer[3:2] }; 137 | // state <= state + 1; 138 | // end 139 | // end 140 | 141 | // else if ( state==2'b10 ) begin 142 | // sorted_data <= { buffer[29:28], buffer[29:28], buffer[29:28], buffer[29:28], 143 | // buffer[21:20], buffer[21:20], buffer[21:20], buffer[21:20], 144 | // buffer[13:12], buffer[13:12], buffer[13:12], buffer[13:12], 145 | // buffer[5:4], buffer[5:4], buffer[5:4], buffer[5:4] }; 146 | // state <= state + 1; 147 | // end 148 | 149 | // else begin 150 | // sorted_data <= { buffer[31:30], buffer[31:30], buffer[31:30], buffer[31:30], 151 | // buffer[23:22], buffer[23:22], buffer[23:22], buffer[23:22], 152 | // buffer[15:14], buffer[15:14], buffer[15:14], buffer[15:14], 153 | // buffer[7:6], buffer[7:6], buffer[7:6], buffer[7:6] }; 154 | // state <= 0; 155 | // end 156 | 157 | 158 | 159 | endmodule 160 | -------------------------------------------------------------------------------- /PE.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // [Fusion Unit] 4 | // 4 X 4 bitbricks + (4 x 4 shift) 5 | // 17-way adder tree 6 | // output = p_5 p_4 p_3 p_2 p_1 p_0 (6bit) 7 | 8 | module PE( 9 | input clk, 10 | input reset, 11 | input [31:0] x, 12 | input [31:0] y, 13 | input [3:0] sign_x, 14 | input [3:0] sign_y, 15 | 16 | input [47:0] signal, 17 | 18 | input [19:0] previous_sum, 19 | 20 | // output [31:0] input_convey, 21 | output [19:0] PE_sum 22 | ); 23 | 24 | wire [5:0] p [15:0]; 25 | 26 | wire [15:0] p_shift_0, p_shift_1, p_shift_2, p_shift_3, 27 | p_shift_4, p_shift_5, p_shift_6, p_shift_7, 28 | p_shift_8, p_shift_9, p_shift_10, p_shift_11, 29 | p_shift_12, p_shift_13, p_shift_14, p_shift_15; 30 | 31 | bitbrick BB1(.clk(clk), .reset(reset), .x(x[1:0]), .y(y[1:0]), .sign_x(sign_x[0]), .sign_y(sign_y[0]), .p(p[0])); 32 | bitbrick_shift BBS1(.p(p[0]), .signal(signal[2:0]), .product(p_shift_0)); 33 | bitbrick BB2(.clk(clk), .reset(reset), .x(x[3:2]), .y(y[3:2]), .sign_x(sign_x[0]), .sign_y(sign_y[1]), .p(p[1])); 34 | bitbrick_shift BBS2(.p(p[1]), .signal(signal[5:3]), .product(p_shift_1)); 35 | bitbrick BB3(.clk(clk), .reset(reset), .x(x[5:4]), .y(y[5:4]), .sign_x(sign_x[0]), .sign_y(sign_y[2]), .p(p[2])); 36 | bitbrick_shift BBS3(.p(p[2]), .signal(signal[8:6]), .product(p_shift_2)); 37 | bitbrick BB4(.clk(clk), .reset(reset), .x(x[7:6]), .y(y[7:6]), .sign_x(sign_x[0]), .sign_y(sign_y[3]), .p(p[3])); 38 | bitbrick_shift BBS4(.p(p[3]), .signal(signal[11:9]), .product(p_shift_3)); 39 | 40 | bitbrick BB5(.clk(clk), .reset(reset), .x(x[9:8]), .y(y[9:8]), .sign_x(sign_x[1]), .sign_y(sign_y[0]), .p(p[4])); 41 | bitbrick_shift BBS5(.p(p[4]), .signal(signal[14:12]), .product(p_shift_4)); 42 | bitbrick BB6(.clk(clk), .reset(reset), .x(x[11:10]), .y(y[11:10]), .sign_x(sign_x[1]), .sign_y(sign_y[1]), .p(p[5])); 43 | bitbrick_shift BBS6(.p(p[5]), .signal(signal[17:15]), .product(p_shift_5)); 44 | bitbrick BB7(.clk(clk), .reset(reset), .x(x[13:12]), .y(y[13:12]), .sign_x(sign_x[1]), .sign_y(sign_y[2]), .p(p[6])); 45 | bitbrick_shift BBS7(.p(p[6]), .signal(signal[20:18]), .product(p_shift_6)); 46 | bitbrick BB8(.clk(clk), .reset(reset), .x(x[15:14]), .y(y[15:14]), .sign_x(sign_x[1]), .sign_y(sign_y[3]), .p(p[7])); 47 | bitbrick_shift BBS8(.p(p[7]), .signal(signal[23:21]), .product(p_shift_7)); 48 | 49 | bitbrick BB9(.clk(clk), .reset(reset), .x(x[17:16]), .y(y[17:16]), .sign_x(sign_x[2]), .sign_y(sign_y[0]), .p(p[8])); 50 | bitbrick_shift BBS9(.p(p[8]), .signal(signal[26:24]), .product(p_shift_8)); 51 | bitbrick BB10(.clk(clk), .reset(reset), .x(x[19:18]), .y(y[19:18]), .sign_x(sign_x[2]), .sign_y(sign_y[1]), .p(p[9])); 52 | bitbrick_shift BBS10(.p(p[9]), .signal(signal[29:27]), .product(p_shift_9)); 53 | bitbrick BB11(.clk(clk), .reset(reset), .x(x[21:20]), .y(y[21:20]), .sign_x(sign_x[2]), .sign_y(sign_y[2]), .p(p[10])); 54 | bitbrick_shift BBS11(.p(p[10]), .signal(signal[32:30]), .product(p_shift_10)); 55 | bitbrick BB12(.clk(clk), .reset(reset), .x(x[23:22]), .y(y[23:22]), .sign_x(sign_x[2]), .sign_y(sign_y[3]), .p(p[11])); 56 | bitbrick_shift BBS12(.p(p[11]), .signal(signal[35:33]), .product(p_shift_11)); 57 | 58 | bitbrick BB13(.clk(clk), .reset(reset), .x(x[25:24]), .y(y[25:24]), .sign_x(sign_x[3]), .sign_y(sign_y[0]), .p(p[12])); 59 | bitbrick_shift BBS13(.p(p[12]), .signal(signal[38:36]), .product(p_shift_12)); 60 | bitbrick BB14(.clk(clk), .reset(reset), .x(x[27:26]), .y(y[27:26]), .sign_x(sign_x[3]), .sign_y(sign_y[1]), .p(p[13])); 61 | bitbrick_shift BBS14(.p(p[13]), .signal(signal[41:39]), .product(p_shift_13)); 62 | bitbrick BB15(.clk(clk), .reset(reset), .x(x[29:28]), .y(y[29:28]), .sign_x(sign_x[3]), .sign_y(sign_y[2]), .p(p[14])); 63 | bitbrick_shift BBS15(.p(p[14]), .signal(signal[44:42]), .product(p_shift_14)); 64 | bitbrick BB16(.clk(clk), .reset(reset), .x(x[31:30]), .y(y[31:30]), .sign_x(sign_x[3]), .sign_y(sign_y[3]), .p(p[15])); 65 | bitbrick_shift BBS16(.p(p[15]), .signal(signal[47:45]), .product(p_shift_15)); 66 | 67 | 68 | PE_adder ADDER_TREE( 69 | .p_shift_0(p_shift_0), 70 | .p_shift_1(p_shift_1), 71 | .p_shift_2(p_shift_2), 72 | .p_shift_3(p_shift_3), 73 | .p_shift_4(p_shift_4), 74 | .p_shift_5(p_shift_5), 75 | .p_shift_6(p_shift_6), 76 | .p_shift_7(p_shift_7), 77 | .p_shift_8(p_shift_8), 78 | .p_shift_9(p_shift_9), 79 | .p_shift_10(p_shift_10), 80 | .p_shift_11(p_shift_11), 81 | .p_shift_12(p_shift_12), 82 | .p_shift_13(p_shift_13), 83 | .p_shift_14(p_shift_14), 84 | .p_shift_15(p_shift_15), 85 | .previous_sum(previous_sum), 86 | 87 | .PE_sum(PE_sum) 88 | ); 89 | 90 | endmodule 91 | 92 | 93 | -------------------------------------------------------------------------------- /PE_adder.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | module PE_adder( 4 | input [15:0] p_shift_0, 5 | input [15:0] p_shift_1, 6 | input [15:0] p_shift_2, 7 | input [15:0] p_shift_3, 8 | input [15:0] p_shift_4, 9 | input [15:0] p_shift_5, 10 | input [15:0] p_shift_6, 11 | input [15:0] p_shift_7, 12 | input [15:0] p_shift_8, 13 | input [15:0] p_shift_9, 14 | input [15:0] p_shift_10, 15 | input [15:0] p_shift_11, 16 | input [15:0] p_shift_12, 17 | input [15:0] p_shift_13, 18 | input [15:0] p_shift_14, 19 | input [15:0] p_shift_15, 20 | input [19:0] previous_sum, 21 | 22 | output [19:0] PE_sum 23 | ); 24 | 25 | // More optimization possible? probably compiler would do that 26 | wire [19:0] p_shift_extend [15:0]; 27 | 28 | assign p_shift_extend[0] = { {4{p_shift_0[15]}}, p_shift_0 }; 29 | assign p_shift_extend[1] = { {4{p_shift_1[15]}}, p_shift_1 }; 30 | assign p_shift_extend[2] = { {4{p_shift_2[15]}}, p_shift_2 }; 31 | assign p_shift_extend[3] = { {4{p_shift_3[15]}}, p_shift_3 }; 32 | assign p_shift_extend[4] = { {4{p_shift_4[15]}}, p_shift_4 }; 33 | assign p_shift_extend[5] = { {4{p_shift_5[15]}}, p_shift_5 }; 34 | assign p_shift_extend[6] = { {4{p_shift_6[15]}}, p_shift_6 }; 35 | assign p_shift_extend[7] = { {4{p_shift_7[15]}}, p_shift_7 }; 36 | assign p_shift_extend[8] = { {4{p_shift_8[15]}}, p_shift_8 }; 37 | assign p_shift_extend[9] = { {4{p_shift_9[15]}}, p_shift_9 }; 38 | assign p_shift_extend[10] = { {4{p_shift_10[15]}}, p_shift_10 }; 39 | assign p_shift_extend[11] = { {4{p_shift_11[15]}}, p_shift_11 }; 40 | assign p_shift_extend[12] = { {4{p_shift_12[15]}}, p_shift_12 }; 41 | assign p_shift_extend[13] = { {4{p_shift_13[15]}}, p_shift_13 }; 42 | assign p_shift_extend[14] = { {4{p_shift_14[15]}}, p_shift_14 }; 43 | assign p_shift_extend[15] = { {4{p_shift_15[15]}}, p_shift_15 }; 44 | 45 | wire [19:0] adder_1; 46 | wire [19:0] adder_2; 47 | wire [19:0] adder_3; 48 | wire [19:0] adder_4; 49 | 50 | assign adder_1 = p_shift_extend[0] + p_shift_extend[1] + p_shift_extend[2] + p_shift_extend[3]; 51 | assign adder_2 = p_shift_extend[4] + p_shift_extend[5] + p_shift_extend[6] + p_shift_extend[7]; 52 | assign adder_3 = p_shift_extend[8] + p_shift_extend[9] + p_shift_extend[10] + p_shift_extend[11]; 53 | assign adder_4 = p_shift_extend[12] + p_shift_extend[13] + p_shift_extend[14] + p_shift_extend[15]; 54 | 55 | assign PE_sum = adder_1 + adder_2 + adder_3 + adder_4 + previous_sum; 56 | 57 | endmodule 58 | -------------------------------------------------------------------------------- /PE_register.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // Because it's register(small buffer), 4 | // we have set the PE_sum_out as reg(registerd) 5 | 6 | module PE_register( 7 | input clk, 8 | input reset, 9 | 10 | input [19:0] PE_sum, 11 | output reg [19:0] PE_sum_out 12 | ); 13 | 14 | // reg temp; 15 | // always @ (posedge clk) 16 | // if (reset) begin 17 | // temp <= 0; 18 | // PE_sum_out <= 0; 19 | // end 20 | // else if (temp == 0) begin 21 | // temp <= 1; 22 | // PE_sum_out <= 0; 23 | // end 24 | // else 25 | // PE_sum_out <= PE_sum; 26 | 27 | always @ (posedge clk) 28 | if (reset) 29 | PE_sum_out <= 0; 30 | else 31 | PE_sum_out <= PE_sum; 32 | 33 | endmodule 34 | -------------------------------------------------------------------------------- /Weight_MUX_REG.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // Get 32 bit data from buffer, and 4 | // sort them depending on the bitwidth of Network. 5 | // The thing is that, in all cases output is 32bit! 6 | // ex) 8bit, 8bit case, 8bit value is copied and becomes 32bit!! 7 | 8 | // Iprobably have to make different MUX_REG module for Input, Weight.. 9 | 10 | module Weight_MUX_REG( 11 | input clk, 12 | input reset, 13 | 14 | // bit_mode --> reflects total 9 scenarios (2, 4, 8) X (2, 4, 8) 15 | // However, for 2, 4, 8 bit, each behavior is the same, so 3 cases is enough 16 | // !!!! for Weight Buffer MUX_REG, it uses the information of Input, 17 | // !!!! for Input Buffer MUX_REG, it uses the information of Weight 18 | input [1:0] input_bitwidth, 19 | 20 | input [31:0] buffer, 21 | // output reg [1:0] input_bitwidth, 22 | output reg [31:0] sorted_data 23 | ); 24 | 25 | reg [1:0] state; 26 | 27 | // 00 : 2bit, 01 : 4bit, 10 : 8bit 28 | // This is the case of using front part of buffer. 29 | // has to be divided more by cycles. 30 | always @ (posedge clk) 31 | if (reset) begin 32 | sorted_data <= 0; 33 | state <= 2'b00; 34 | end 35 | else if (input_bitwidth == 2'b00) 36 | sorted_data <= buffer; 37 | else if (state == 2'b00) begin 38 | if(input_bitwidth == 2'b01) begin 39 | sorted_data <= { buffer[15:8], buffer[15:8], buffer[7:0], buffer[7:0] }; 40 | state <= state + 1; 41 | end 42 | else begin 43 | sorted_data <= { buffer[7:0], buffer[7:0], buffer[7:0], buffer[7:0] }; 44 | state <= state + 1; 45 | end 46 | end 47 | 48 | else if (state == 2'b01) begin 49 | if(input_bitwidth == 2'b01) begin 50 | sorted_data <= { buffer[31:24], buffer[31:24], buffer[23:16], buffer[23:16] }; 51 | state <= 0; 52 | end 53 | else begin 54 | sorted_data <= { buffer[15:8], buffer[15:8], buffer[15:8], buffer[15:8] }; 55 | state <= state + 1; 56 | end 57 | end 58 | 59 | else if (state == 2'b10) begin 60 | sorted_data <= { buffer[23:16], buffer[23:16], buffer[23:16], buffer[23:16] }; 61 | state <= state + 1; 62 | end 63 | 64 | else begin 65 | sorted_data <= { buffer[31:24], buffer[31:24], buffer[31:24], buffer[31:24] }; 66 | state <= 0; 67 | end 68 | 69 | 70 | endmodule 71 | -------------------------------------------------------------------------------- /accumulator.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // [Fusion Unit] 4 | // 4 X 4 bitbricks + (4 x 4 shift) 5 | // 17-way adder tree 6 | // output = p_5 p_4 p_3 p_2 p_1 p_0 (6bit) 7 | 8 | module ACC( 9 | input clk, 10 | input reset, 11 | input [19:0] PE_sum, 12 | 13 | output reg [27:0] total_output 14 | ); 15 | 16 | // wire [27:0] temp; 17 | // assign temp = { {8{PE_sum[19]}}, PE_sum }; 18 | 19 | // always @ (posedge clk) 20 | // if (reset) 21 | // temp = 0; 22 | // else 23 | // temp = PE_sum + temp; 24 | 25 | 26 | //// Is it the only way?????? 27 | // assign total_output = temp; 28 | 29 | always @ (posedge clk) 30 | if (reset) 31 | total_output <= 0; 32 | else 33 | total_output <= { {8{PE_sum[19]}}, PE_sum } + total_output; 34 | 35 | endmodule -------------------------------------------------------------------------------- /bitbrick.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // [BitBrick] 4 | // input : x_0 x_1 (2bit) * y_1 y_0 (2bit) 5 | // add sign and MUL (signed 3bit * 3bit) 6 | // output = p_5 p_4 p_3 p_2 p_1 p_0 (6bit) 7 | 8 | module bitbrick( 9 | input clk, 10 | input reset, 11 | input [1:0] x, 12 | input [1:0] y, 13 | input sign_x, 14 | input sign_y, 15 | output [5:0] p 16 | ); 17 | 18 | wire [2:0] in_x; 19 | wire [2:0] in_y; 20 | 21 | assign in_x[1:0] = x[1:0]; 22 | assign in_y[1:0] = y[1:0]; 23 | assign in_x[2] = sign_x & x[1]; 24 | assign in_y[2] = sign_y & y[1]; 25 | 26 | signed_3bit_MUL MUL_3bit(.in_x(in_x), .in_y(in_y), .p(p)); 27 | 28 | endmodule 29 | 30 | 31 | -------------------------------------------------------------------------------- /bitbrick_shift.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | module bitbrick_shift( 4 | input [5:0] p, 5 | input [2:0] signal, 6 | output [15:0] product 7 | ); 8 | 9 | // output is 6bit and the maximum shift value is 12bit 10 | // But, in this architecture, output only needs 4 the LSB bit(2 MSB bit is just sign extension) 11 | // So, I can reduce here. 12 | // ex ) Not MSB part, input = 000, 001, 010, 011 -> possible output = 0 ~ +9 : 4bit is enough 13 | // MSB Part, input = 000, 001, 110, 111 -> possible output = -2 ~ +4 : 4bit is enough 14 | // Therefore, here, the number of bit of product is just 16bit which is 12bit(shift) + 4bit(output bit) 15 | 16 | //sign extension and shift it 17 | assign product = { {10{p[5]}}, p } << (signal*2); 18 | 19 | endmodule -------------------------------------------------------------------------------- /signed3bit_MUL.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | module signed_3bit_MUL( 4 | input signed [2:0] in_x, 5 | input signed [2:0] in_y, 6 | output signed [5:0] p 7 | ); 8 | 9 | assign p = in_x * in_y; 10 | 11 | endmodule -------------------------------------------------------------------------------- /top.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // [Bit Fusion Architecture] 4 | // 16 X 16 PEs 5 | // Accumulator 6 | // Buffer (+ Reg, MUX) 7 | 8 | module BitFusion( 9 | input clk, 10 | input reset, 11 | 12 | input [31:0] IBUF_data_in_1, IBUF_data_in_2, IBUF_data_in_3, IBUF_data_in_4, IBUF_data_in_5, IBUF_data_in_6, IBUF_data_in_7, IBUF_data_in_8, 13 | IBUF_data_in_9, IBUF_data_in_10, IBUF_data_in_11, IBUF_data_in_12, IBUF_data_in_13, IBUF_data_in_14, IBUF_data_in_15, IBUF_data_in_16, 14 | 15 | input [31:0] WBUF_data_in_1_1, WBUF_data_in_1_2, WBUF_data_in_1_3, WBUF_data_in_1_4, WBUF_data_in_1_5, WBUF_data_in_1_6, WBUF_data_in_1_7, WBUF_data_in_1_8, WBUF_data_in_1_9, WBUF_data_in_1_10, WBUF_data_in_1_11, WBUF_data_in_1_12, WBUF_data_in_1_13, WBUF_data_in_1_14, WBUF_data_in_1_15, WBUF_data_in_1_16, 16 | input [31:0] WBUF_data_in_2_1, WBUF_data_in_2_2, WBUF_data_in_2_3, WBUF_data_in_2_4, WBUF_data_in_2_5, WBUF_data_in_2_6, WBUF_data_in_2_7, WBUF_data_in_2_8, WBUF_data_in_2_9, WBUF_data_in_2_10, WBUF_data_in_2_11, WBUF_data_in_2_12, WBUF_data_in_2_13, WBUF_data_in_2_14, WBUF_data_in_2_15, WBUF_data_in_2_16, 17 | input [31:0] WBUF_data_in_3_1, WBUF_data_in_3_2, WBUF_data_in_3_3, WBUF_data_in_3_4, WBUF_data_in_3_5, WBUF_data_in_3_6, WBUF_data_in_3_7, WBUF_data_in_3_8, WBUF_data_in_3_9, WBUF_data_in_3_10, WBUF_data_in_3_11, WBUF_data_in_3_12, WBUF_data_in_3_13, WBUF_data_in_3_14, WBUF_data_in_3_15, WBUF_data_in_3_16, 18 | input [31:0] WBUF_data_in_4_1, WBUF_data_in_4_2, WBUF_data_in_4_3, WBUF_data_in_4_4, WBUF_data_in_4_5, WBUF_data_in_4_6, WBUF_data_in_4_7, WBUF_data_in_4_8, WBUF_data_in_4_9, WBUF_data_in_4_10, WBUF_data_in_4_11, WBUF_data_in_4_12, WBUF_data_in_4_13, WBUF_data_in_4_14, WBUF_data_in_4_15, WBUF_data_in_4_16, 19 | input [31:0] WBUF_data_in_5_1, WBUF_data_in_5_2, WBUF_data_in_5_3, WBUF_data_in_5_4, WBUF_data_in_5_5, WBUF_data_in_5_6, WBUF_data_in_5_7, WBUF_data_in_5_8, WBUF_data_in_5_9, WBUF_data_in_5_10, WBUF_data_in_5_11, WBUF_data_in_5_12, WBUF_data_in_5_13, WBUF_data_in_5_14, WBUF_data_in_5_15, WBUF_data_in_5_16, 20 | input [31:0] WBUF_data_in_6_1, WBUF_data_in_6_2, WBUF_data_in_6_3, WBUF_data_in_6_4, WBUF_data_in_6_5, WBUF_data_in_6_6, WBUF_data_in_6_7, WBUF_data_in_6_8, WBUF_data_in_6_9, WBUF_data_in_6_10, WBUF_data_in_6_11, WBUF_data_in_6_12, WBUF_data_in_6_13, WBUF_data_in_6_14, WBUF_data_in_6_15, WBUF_data_in_6_16, 21 | input [31:0] WBUF_data_in_7_1, WBUF_data_in_7_2, WBUF_data_in_7_3, WBUF_data_in_7_4, WBUF_data_in_7_5, WBUF_data_in_7_6, WBUF_data_in_7_7, WBUF_data_in_7_8, WBUF_data_in_7_9, WBUF_data_in_7_10, WBUF_data_in_7_11, WBUF_data_in_7_12, WBUF_data_in_7_13, WBUF_data_in_7_14, WBUF_data_in_7_15, WBUF_data_in_7_16, 22 | input [31:0] WBUF_data_in_8_1, WBUF_data_in_8_2, WBUF_data_in_8_3, WBUF_data_in_8_4, WBUF_data_in_8_5, WBUF_data_in_8_6, WBUF_data_in_8_7, WBUF_data_in_8_8, WBUF_data_in_8_9, WBUF_data_in_8_10, WBUF_data_in_8_11, WBUF_data_in_8_12, WBUF_data_in_8_13, WBUF_data_in_8_14, WBUF_data_in_8_15, WBUF_data_in_8_16, 23 | input [31:0] WBUF_data_in_9_1, WBUF_data_in_9_2, WBUF_data_in_9_3, WBUF_data_in_9_4, WBUF_data_in_9_5, WBUF_data_in_9_6, WBUF_data_in_9_7, WBUF_data_in_9_8, WBUF_data_in_9_9, WBUF_data_in_9_10, WBUF_data_in_9_11, WBUF_data_in_9_12, WBUF_data_in_9_13, WBUF_data_in_9_14, WBUF_data_in_9_15, WBUF_data_in_9_16, 24 | input [31:0] WBUF_data_in_10_1, WBUF_data_in_10_2, WBUF_data_in_10_3, WBUF_data_in_10_4, WBUF_data_in_10_5, WBUF_data_in_10_6, WBUF_data_in_10_7, WBUF_data_in_10_8, WBUF_data_in_10_9, WBUF_data_in_10_10, WBUF_data_in_10_11, WBUF_data_in_10_12, WBUF_data_in_10_13, WBUF_data_in_10_14, WBUF_data_in_10_15, WBUF_data_in_10_16, 25 | input [31:0] WBUF_data_in_11_1, WBUF_data_in_11_2, WBUF_data_in_11_3, WBUF_data_in_11_4, WBUF_data_in_11_5, WBUF_data_in_11_6, WBUF_data_in_11_7, WBUF_data_in_11_8, WBUF_data_in_11_9, WBUF_data_in_11_10, WBUF_data_in_11_11, WBUF_data_in_11_12, WBUF_data_in_11_13, WBUF_data_in_11_14, WBUF_data_in_11_15, WBUF_data_in_11_16, 26 | input [31:0] WBUF_data_in_12_1, WBUF_data_in_12_2, WBUF_data_in_12_3, WBUF_data_in_12_4, WBUF_data_in_12_5, WBUF_data_in_12_6, WBUF_data_in_12_7, WBUF_data_in_12_8, WBUF_data_in_12_9, WBUF_data_in_12_10, WBUF_data_in_12_11, WBUF_data_in_12_12, WBUF_data_in_12_13, WBUF_data_in_12_14, WBUF_data_in_12_15, WBUF_data_in_12_16, 27 | input [31:0] WBUF_data_in_13_1, WBUF_data_in_13_2, WBUF_data_in_13_3, WBUF_data_in_13_4, WBUF_data_in_13_5, WBUF_data_in_13_6, WBUF_data_in_13_7, WBUF_data_in_13_8, WBUF_data_in_13_9, WBUF_data_in_13_10, WBUF_data_in_13_11, WBUF_data_in_13_12, WBUF_data_in_13_13, WBUF_data_in_13_14, WBUF_data_in_13_15, WBUF_data_in_13_16, 28 | input [31:0] WBUF_data_in_14_1, WBUF_data_in_14_2, WBUF_data_in_14_3, WBUF_data_in_14_4, WBUF_data_in_14_5, WBUF_data_in_14_6, WBUF_data_in_14_7, WBUF_data_in_14_8, WBUF_data_in_14_9, WBUF_data_in_14_10, WBUF_data_in_14_11, WBUF_data_in_14_12, WBUF_data_in_14_13, WBUF_data_in_14_14, WBUF_data_in_14_15, WBUF_data_in_14_16, 29 | input [31:0] WBUF_data_in_15_1, WBUF_data_in_15_2, WBUF_data_in_15_3, WBUF_data_in_15_4, WBUF_data_in_15_5, WBUF_data_in_15_6, WBUF_data_in_15_7, WBUF_data_in_15_8, WBUF_data_in_15_9, WBUF_data_in_15_10, WBUF_data_in_15_11, WBUF_data_in_15_12, WBUF_data_in_15_13, WBUF_data_in_15_14, WBUF_data_in_15_15, WBUF_data_in_15_16, 30 | input [31:0] WBUF_data_in_16_1, WBUF_data_in_16_2, WBUF_data_in_16_3, WBUF_data_in_16_4, WBUF_data_in_16_5, WBUF_data_in_16_6, WBUF_data_in_16_7, WBUF_data_in_16_8, WBUF_data_in_16_9, WBUF_data_in_16_10, WBUF_data_in_16_11, WBUF_data_in_16_12, WBUF_data_in_16_13, WBUF_data_in_16_14, WBUF_data_in_16_15, WBUF_data_in_16_16, 31 | 32 | /// First thing to do!! --> Sign designation!!!!! 33 | input [3:0] sign_x, 34 | input [3:0] sign_y, 35 | input [1:0] weight_bitwidth, 36 | input [1:0] input_bitwidth, 37 | 38 | input [47:0] signal, 39 | 40 | // only output works, because it's going to be used as REG!! 41 | output [27:0] total_output_1, total_output_2, total_output_3, total_output_4, total_output_5, total_output_6, total_output_7, total_output_8, 42 | total_output_9, total_output_10, total_output_11, total_output_12, total_output_13, total_output_14, total_output_15, total_output_16 43 | ); 44 | 45 | wire [31:0] IBUF_data_out_1, IBUF_data_out_2, IBUF_data_out_3, IBUF_data_out_4, IBUF_data_out_5, IBUF_data_out_6, IBUF_data_out_7, IBUF_data_out_8, 46 | IBUF_data_out_9, IBUF_data_out_10, IBUF_data_out_11, IBUF_data_out_12, IBUF_data_out_13, IBUF_data_out_14, IBUF_data_out_15, IBUF_data_out_16; 47 | 48 | wire [31:0] sorted_input_1, sorted_input_2, sorted_input_3, sorted_input_4, sorted_input_5, sorted_input_6, sorted_input_7, sorted_input_8, 49 | sorted_input_9, sorted_input_10, sorted_input_11, sorted_input_12, sorted_input_13, sorted_input_14, sorted_input_15, sorted_input_16; 50 | 51 | // x from input buffer, y from weight buffer 52 | BUF_32bit IBUF_1(.clk(clk), .reset(reset), .data_in(IBUF_data_in_1), .data_out(IBUF_data_out_1)); 53 | Input_MUX_REG Input_MUX_REG_1(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_1), .sorted_data(sorted_input_1) ); 54 | 55 | BUF_32bit IBUF_2(.clk(clk), .reset(reset), .data_in(IBUF_data_in_2), .data_out(IBUF_data_out_2)); 56 | Input_MUX_REG Input_MUX_REG_2(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_2), .sorted_data(sorted_input_2) ); 57 | 58 | BUF_32bit IBUF_3(.clk(clk), .reset(reset), .data_in(IBUF_data_in_3), .data_out(IBUF_data_out_3)); 59 | Input_MUX_REG Input_MUX_REG_3(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_3), .sorted_data(sorted_input_3) ); 60 | 61 | BUF_32bit IBUF_4(.clk(clk), .reset(reset), .data_in(IBUF_data_in_4), .data_out(IBUF_data_out_4)); 62 | Input_MUX_REG Input_MUX_REG_4(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_4), .sorted_data(sorted_input_4) ); 63 | 64 | BUF_32bit IBUF_5(.clk(clk), .reset(reset), .data_in(IBUF_data_in_5), .data_out(IBUF_data_out_5)); 65 | Input_MUX_REG Input_MUX_REG_5(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_5), .sorted_data(sorted_input_5) ); 66 | 67 | BUF_32bit IBUF_6(.clk(clk), .reset(reset), .data_in(IBUF_data_in_6), .data_out(IBUF_data_out_6)); 68 | Input_MUX_REG Input_MUX_REG_6(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_6), .sorted_data(sorted_input_6) ); 69 | 70 | BUF_32bit IBUF_7(.clk(clk), .reset(reset), .data_in(IBUF_data_in_7), .data_out(IBUF_data_out_7)); 71 | Input_MUX_REG Input_MUX_REG_7(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_7), .sorted_data(sorted_input_7) ); 72 | 73 | BUF_32bit IBUF_8(.clk(clk), .reset(reset), .data_in(IBUF_data_in_8), .data_out(IBUF_data_out_8)); 74 | Input_MUX_REG Input_MUX_REG_8(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_8), .sorted_data(sorted_input_8) ); 75 | 76 | BUF_32bit IBUF_9(.clk(clk), .reset(reset), .data_in(IBUF_data_in_9), .data_out(IBUF_data_out_9)); 77 | Input_MUX_REG Input_MUX_REG_9(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_9), .sorted_data(sorted_input_9) ); 78 | 79 | BUF_32bit IBUF_10(.clk(clk), .reset(reset), .data_in(IBUF_data_in_10), .data_out(IBUF_data_out_10)); 80 | Input_MUX_REG Input_MUX_REG_10(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_10), .sorted_data(sorted_input_10) ); 81 | 82 | BUF_32bit IBUF_11(.clk(clk), .reset(reset), .data_in(IBUF_data_in_11), .data_out(IBUF_data_out_11)); 83 | Input_MUX_REG Input_MUX_REG_11(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_11), .sorted_data(sorted_input_11) ); 84 | 85 | BUF_32bit IBUF_12(.clk(clk), .reset(reset), .data_in(IBUF_data_in_12), .data_out(IBUF_data_out_12)); 86 | Input_MUX_REG Input_MUX_REG_12(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_12), .sorted_data(sorted_input_12) ); 87 | 88 | BUF_32bit IBUF_13(.clk(clk), .reset(reset), .data_in(IBUF_data_in_13), .data_out(IBUF_data_out_13)); 89 | Input_MUX_REG Input_MUX_REG_13(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_13), .sorted_data(sorted_input_13) ); 90 | 91 | BUF_32bit IBUF_14(.clk(clk), .reset(reset), .data_in(IBUF_data_in_14), .data_out(IBUF_data_out_14)); 92 | Input_MUX_REG Input_MUX_REG_14(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_14), .sorted_data(sorted_input_14) ); 93 | 94 | BUF_32bit IBUF_15(.clk(clk), .reset(reset), .data_in(IBUF_data_in_15), .data_out(IBUF_data_out_15)); 95 | Input_MUX_REG Input_MUX_REG_15(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_15), .sorted_data(sorted_input_15) ); 96 | 97 | BUF_32bit IBUF_16(.clk(clk), .reset(reset), .data_in(IBUF_data_in_16), .data_out(IBUF_data_out_16)); 98 | Input_MUX_REG Input_MUX_REG_16(.clk(clk), .reset(reset), .weight_bitwidth(weight_bitwidth), .buffer(IBUF_data_out_16), .sorted_data(sorted_input_16) ); 99 | 100 | 101 | BitFusion_column BitFusion_column_1( .clk(clk), .reset(reset), 102 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 103 | .WBUF_data_in_1(WBUF_data_in_1_1), .WBUF_data_in_2(WBUF_data_in_1_2), .WBUF_data_in_3(WBUF_data_in_1_3), .WBUF_data_in_4(WBUF_data_in_1_4), .WBUF_data_in_5(WBUF_data_in_1_5), .WBUF_data_in_6(WBUF_data_in_1_6), .WBUF_data_in_7(WBUF_data_in_1_7), .WBUF_data_in_8(WBUF_data_in_1_8), .WBUF_data_in_9(WBUF_data_in_1_9), .WBUF_data_in_10(WBUF_data_in_1_10), .WBUF_data_in_11(WBUF_data_in_1_11), .WBUF_data_in_12(WBUF_data_in_1_12), .WBUF_data_in_13(WBUF_data_in_1_13), .WBUF_data_in_14(WBUF_data_in_1_14), .WBUF_data_in_15(WBUF_data_in_1_15), .WBUF_data_in_16(WBUF_data_in_1_16), 104 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_1) ); 105 | 106 | BitFusion_column BitFusion_column_2( .clk(clk), .reset(reset), 107 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 108 | .WBUF_data_in_1(WBUF_data_in_2_1), .WBUF_data_in_2(WBUF_data_in_2_2), .WBUF_data_in_3(WBUF_data_in_2_3), .WBUF_data_in_4(WBUF_data_in_2_4), .WBUF_data_in_5(WBUF_data_in_2_5), .WBUF_data_in_6(WBUF_data_in_2_6), .WBUF_data_in_7(WBUF_data_in_2_7), .WBUF_data_in_8(WBUF_data_in_2_8), .WBUF_data_in_9(WBUF_data_in_2_9), .WBUF_data_in_10(WBUF_data_in_2_10), .WBUF_data_in_11(WBUF_data_in_2_11), .WBUF_data_in_12(WBUF_data_in_2_12), .WBUF_data_in_13(WBUF_data_in_2_13), .WBUF_data_in_14(WBUF_data_in_2_14), .WBUF_data_in_15(WBUF_data_in_2_15), .WBUF_data_in_16(WBUF_data_in_2_16), 109 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_2) ); 110 | 111 | BitFusion_column BitFusion_column_3( .clk(clk), .reset(reset), 112 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 113 | .WBUF_data_in_1(WBUF_data_in_3_1), .WBUF_data_in_2(WBUF_data_in_3_2), .WBUF_data_in_3(WBUF_data_in_3_3), .WBUF_data_in_4(WBUF_data_in_3_4), .WBUF_data_in_5(WBUF_data_in_3_5), .WBUF_data_in_6(WBUF_data_in_3_6), .WBUF_data_in_7(WBUF_data_in_3_7), .WBUF_data_in_8(WBUF_data_in_3_8), .WBUF_data_in_9(WBUF_data_in_3_9), .WBUF_data_in_10(WBUF_data_in_3_10), .WBUF_data_in_11(WBUF_data_in_3_11), .WBUF_data_in_12(WBUF_data_in_3_12), .WBUF_data_in_13(WBUF_data_in_3_13), .WBUF_data_in_14(WBUF_data_in_3_14), .WBUF_data_in_15(WBUF_data_in_3_15), .WBUF_data_in_16(WBUF_data_in_3_16), 114 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_3) ); 115 | 116 | BitFusion_column BitFusion_column_4( .clk(clk), .reset(reset), 117 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 118 | .WBUF_data_in_1(WBUF_data_in_4_1), .WBUF_data_in_2(WBUF_data_in_4_2), .WBUF_data_in_3(WBUF_data_in_4_3), .WBUF_data_in_4(WBUF_data_in_4_4), .WBUF_data_in_5(WBUF_data_in_4_5), .WBUF_data_in_6(WBUF_data_in_4_6), .WBUF_data_in_7(WBUF_data_in_4_7), .WBUF_data_in_8(WBUF_data_in_4_8), .WBUF_data_in_9(WBUF_data_in_4_9), .WBUF_data_in_10(WBUF_data_in_4_10), .WBUF_data_in_11(WBUF_data_in_4_11), .WBUF_data_in_12(WBUF_data_in_4_12), .WBUF_data_in_13(WBUF_data_in_4_13), .WBUF_data_in_14(WBUF_data_in_4_14), .WBUF_data_in_15(WBUF_data_in_4_15), .WBUF_data_in_16(WBUF_data_in_4_16), 119 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_4) ); 120 | 121 | BitFusion_column BitFusion_column_5( .clk(clk), .reset(reset), 122 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 123 | .WBUF_data_in_1(WBUF_data_in_5_1), .WBUF_data_in_2(WBUF_data_in_5_2), .WBUF_data_in_3(WBUF_data_in_5_3), .WBUF_data_in_4(WBUF_data_in_5_4), .WBUF_data_in_5(WBUF_data_in_5_5), .WBUF_data_in_6(WBUF_data_in_5_6), .WBUF_data_in_7(WBUF_data_in_5_7), .WBUF_data_in_8(WBUF_data_in_5_8), .WBUF_data_in_9(WBUF_data_in_5_9), .WBUF_data_in_10(WBUF_data_in_5_10), .WBUF_data_in_11(WBUF_data_in_5_11), .WBUF_data_in_12(WBUF_data_in_5_12), .WBUF_data_in_13(WBUF_data_in_5_13), .WBUF_data_in_14(WBUF_data_in_5_14), .WBUF_data_in_15(WBUF_data_in_5_15), .WBUF_data_in_16(WBUF_data_in_5_16), 124 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_5) ); 125 | 126 | BitFusion_column BitFusion_column_6( .clk(clk), .reset(reset), 127 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 128 | .WBUF_data_in_1(WBUF_data_in_6_1), .WBUF_data_in_2(WBUF_data_in_6_2), .WBUF_data_in_3(WBUF_data_in_6_3), .WBUF_data_in_4(WBUF_data_in_6_4), .WBUF_data_in_5(WBUF_data_in_6_5), .WBUF_data_in_6(WBUF_data_in_6_6), .WBUF_data_in_7(WBUF_data_in_6_7), .WBUF_data_in_8(WBUF_data_in_6_8), .WBUF_data_in_9(WBUF_data_in_6_9), .WBUF_data_in_10(WBUF_data_in_6_10), .WBUF_data_in_11(WBUF_data_in_6_11), .WBUF_data_in_12(WBUF_data_in_6_12), .WBUF_data_in_13(WBUF_data_in_6_13), .WBUF_data_in_14(WBUF_data_in_6_14), .WBUF_data_in_15(WBUF_data_in_6_15), .WBUF_data_in_16(WBUF_data_in_6_16), 129 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_6) ); 130 | 131 | BitFusion_column BitFusion_column_7( .clk(clk), .reset(reset), 132 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 133 | .WBUF_data_in_1(WBUF_data_in_7_1), .WBUF_data_in_2(WBUF_data_in_7_2), .WBUF_data_in_3(WBUF_data_in_7_3), .WBUF_data_in_4(WBUF_data_in_7_4), .WBUF_data_in_5(WBUF_data_in_7_5), .WBUF_data_in_6(WBUF_data_in_7_6), .WBUF_data_in_7(WBUF_data_in_7_7), .WBUF_data_in_8(WBUF_data_in_7_8), .WBUF_data_in_9(WBUF_data_in_7_9), .WBUF_data_in_10(WBUF_data_in_7_10), .WBUF_data_in_11(WBUF_data_in_7_11), .WBUF_data_in_12(WBUF_data_in_7_12), .WBUF_data_in_13(WBUF_data_in_7_13), .WBUF_data_in_14(WBUF_data_in_7_14), .WBUF_data_in_15(WBUF_data_in_7_15), .WBUF_data_in_16(WBUF_data_in_7_16), 134 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_7) ); 135 | 136 | BitFusion_column BitFusion_column_8( .clk(clk), .reset(reset), 137 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 138 | .WBUF_data_in_1(WBUF_data_in_8_1), .WBUF_data_in_2(WBUF_data_in_8_2), .WBUF_data_in_3(WBUF_data_in_8_3), .WBUF_data_in_4(WBUF_data_in_8_4), .WBUF_data_in_5(WBUF_data_in_8_5), .WBUF_data_in_6(WBUF_data_in_8_6), .WBUF_data_in_7(WBUF_data_in_8_7), .WBUF_data_in_8(WBUF_data_in_8_8), .WBUF_data_in_9(WBUF_data_in_8_9), .WBUF_data_in_10(WBUF_data_in_8_10), .WBUF_data_in_11(WBUF_data_in_8_11), .WBUF_data_in_12(WBUF_data_in_8_12), .WBUF_data_in_13(WBUF_data_in_8_13), .WBUF_data_in_14(WBUF_data_in_8_14), .WBUF_data_in_15(WBUF_data_in_8_15), .WBUF_data_in_16(WBUF_data_in_8_16), 139 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_8) ); 140 | 141 | BitFusion_column BitFusion_column_9( .clk(clk), .reset(reset), 142 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 143 | .WBUF_data_in_1(WBUF_data_in_9_1), .WBUF_data_in_2(WBUF_data_in_9_2), .WBUF_data_in_3(WBUF_data_in_9_3), .WBUF_data_in_4(WBUF_data_in_9_4), .WBUF_data_in_5(WBUF_data_in_9_5), .WBUF_data_in_6(WBUF_data_in_9_6), .WBUF_data_in_7(WBUF_data_in_9_7), .WBUF_data_in_8(WBUF_data_in_9_8), .WBUF_data_in_9(WBUF_data_in_9_9), .WBUF_data_in_10(WBUF_data_in_9_10), .WBUF_data_in_11(WBUF_data_in_9_11), .WBUF_data_in_12(WBUF_data_in_9_12), .WBUF_data_in_13(WBUF_data_in_9_13), .WBUF_data_in_14(WBUF_data_in_9_14), .WBUF_data_in_15(WBUF_data_in_9_15), .WBUF_data_in_16(WBUF_data_in_9_16), 144 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_9) ); 145 | 146 | BitFusion_column BitFusion_column_10( .clk(clk), .reset(reset), 147 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 148 | .WBUF_data_in_1(WBUF_data_in_10_1), .WBUF_data_in_2(WBUF_data_in_10_2), .WBUF_data_in_3(WBUF_data_in_10_3), .WBUF_data_in_4(WBUF_data_in_10_4), .WBUF_data_in_5(WBUF_data_in_10_5), .WBUF_data_in_6(WBUF_data_in_10_6), .WBUF_data_in_7(WBUF_data_in_10_7), .WBUF_data_in_8(WBUF_data_in_10_8), .WBUF_data_in_9(WBUF_data_in_10_9), .WBUF_data_in_10(WBUF_data_in_10_10), .WBUF_data_in_11(WBUF_data_in_10_11), .WBUF_data_in_12(WBUF_data_in_10_12), .WBUF_data_in_13(WBUF_data_in_10_13), .WBUF_data_in_14(WBUF_data_in_10_14), .WBUF_data_in_15(WBUF_data_in_10_15), .WBUF_data_in_16(WBUF_data_in_10_16), 149 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_10) ); 150 | 151 | BitFusion_column BitFusion_column_11( .clk(clk), .reset(reset), 152 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 153 | .WBUF_data_in_1(WBUF_data_in_11_1), .WBUF_data_in_2(WBUF_data_in_11_2), .WBUF_data_in_3(WBUF_data_in_11_3), .WBUF_data_in_4(WBUF_data_in_11_4), .WBUF_data_in_5(WBUF_data_in_11_5), .WBUF_data_in_6(WBUF_data_in_11_6), .WBUF_data_in_7(WBUF_data_in_11_7), .WBUF_data_in_8(WBUF_data_in_11_8), .WBUF_data_in_9(WBUF_data_in_11_9), .WBUF_data_in_10(WBUF_data_in_11_10), .WBUF_data_in_11(WBUF_data_in_11_11), .WBUF_data_in_12(WBUF_data_in_11_12), .WBUF_data_in_13(WBUF_data_in_11_13), .WBUF_data_in_14(WBUF_data_in_11_14), .WBUF_data_in_15(WBUF_data_in_11_15), .WBUF_data_in_16(WBUF_data_in_11_16), 154 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_11) ); 155 | 156 | BitFusion_column BitFusion_column_12( .clk(clk), .reset(reset), 157 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 158 | .WBUF_data_in_1(WBUF_data_in_12_1), .WBUF_data_in_2(WBUF_data_in_12_2), .WBUF_data_in_3(WBUF_data_in_12_3), .WBUF_data_in_4(WBUF_data_in_12_4), .WBUF_data_in_5(WBUF_data_in_12_5), .WBUF_data_in_6(WBUF_data_in_12_6), .WBUF_data_in_7(WBUF_data_in_12_7), .WBUF_data_in_8(WBUF_data_in_12_8), .WBUF_data_in_9(WBUF_data_in_12_9), .WBUF_data_in_10(WBUF_data_in_12_10), .WBUF_data_in_11(WBUF_data_in_12_11), .WBUF_data_in_12(WBUF_data_in_12_12), .WBUF_data_in_13(WBUF_data_in_12_13), .WBUF_data_in_14(WBUF_data_in_12_14), .WBUF_data_in_15(WBUF_data_in_12_15), .WBUF_data_in_16(WBUF_data_in_12_16), 159 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_12) ); 160 | 161 | BitFusion_column BitFusion_column_13( .clk(clk), .reset(reset), 162 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 163 | .WBUF_data_in_1(WBUF_data_in_13_1), .WBUF_data_in_2(WBUF_data_in_13_2), .WBUF_data_in_3(WBUF_data_in_13_3), .WBUF_data_in_4(WBUF_data_in_13_4), .WBUF_data_in_5(WBUF_data_in_13_5), .WBUF_data_in_6(WBUF_data_in_13_6), .WBUF_data_in_7(WBUF_data_in_13_7), .WBUF_data_in_8(WBUF_data_in_13_8), .WBUF_data_in_9(WBUF_data_in_13_9), .WBUF_data_in_10(WBUF_data_in_13_10), .WBUF_data_in_11(WBUF_data_in_13_11), .WBUF_data_in_12(WBUF_data_in_13_12), .WBUF_data_in_13(WBUF_data_in_13_13), .WBUF_data_in_14(WBUF_data_in_13_14), .WBUF_data_in_15(WBUF_data_in_13_15), .WBUF_data_in_16(WBUF_data_in_13_16), 164 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_13) ); 165 | 166 | BitFusion_column BitFusion_column_14( .clk(clk), .reset(reset), 167 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 168 | .WBUF_data_in_1(WBUF_data_in_14_1), .WBUF_data_in_2(WBUF_data_in_14_2), .WBUF_data_in_3(WBUF_data_in_14_3), .WBUF_data_in_4(WBUF_data_in_14_4), .WBUF_data_in_5(WBUF_data_in_14_5), .WBUF_data_in_6(WBUF_data_in_14_6), .WBUF_data_in_7(WBUF_data_in_14_7), .WBUF_data_in_8(WBUF_data_in_14_8), .WBUF_data_in_9(WBUF_data_in_14_9), .WBUF_data_in_10(WBUF_data_in_14_10), .WBUF_data_in_11(WBUF_data_in_14_11), .WBUF_data_in_12(WBUF_data_in_14_12), .WBUF_data_in_13(WBUF_data_in_14_13), .WBUF_data_in_14(WBUF_data_in_14_14), .WBUF_data_in_15(WBUF_data_in_14_15), .WBUF_data_in_16(WBUF_data_in_14_16), 169 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_14) ); 170 | 171 | BitFusion_column BitFusion_column_15( .clk(clk), .reset(reset), 172 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 173 | .WBUF_data_in_1(WBUF_data_in_15_1), .WBUF_data_in_2(WBUF_data_in_15_2), .WBUF_data_in_3(WBUF_data_in_15_3), .WBUF_data_in_4(WBUF_data_in_15_4), .WBUF_data_in_5(WBUF_data_in_15_5), .WBUF_data_in_6(WBUF_data_in_15_6), .WBUF_data_in_7(WBUF_data_in_15_7), .WBUF_data_in_8(WBUF_data_in_15_8), .WBUF_data_in_9(WBUF_data_in_15_9), .WBUF_data_in_10(WBUF_data_in_15_10), .WBUF_data_in_11(WBUF_data_in_15_11), .WBUF_data_in_12(WBUF_data_in_15_12), .WBUF_data_in_13(WBUF_data_in_15_13), .WBUF_data_in_14(WBUF_data_in_15_14), .WBUF_data_in_15(WBUF_data_in_15_15), .WBUF_data_in_16(WBUF_data_in_15_16), 174 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_15) ); 175 | 176 | BitFusion_column BitFusion_column_16( .clk(clk), .reset(reset), 177 | .sorted_input_1(sorted_input_1), .sorted_input_2(sorted_input_2), .sorted_input_3(sorted_input_3), .sorted_input_4(sorted_input_4), .sorted_input_5(sorted_input_5), .sorted_input_6(sorted_input_6), .sorted_input_7(sorted_input_7), .sorted_input_8(sorted_input_8), .sorted_input_9(sorted_input_9), .sorted_input_10(sorted_input_10), .sorted_input_11(sorted_input_11), .sorted_input_12(sorted_input_12), .sorted_input_13(sorted_input_13), .sorted_input_14(sorted_input_14), .sorted_input_15(sorted_input_15), .sorted_input_16(sorted_input_16), 178 | .WBUF_data_in_1(WBUF_data_in_16_1), .WBUF_data_in_2(WBUF_data_in_16_2), .WBUF_data_in_3(WBUF_data_in_16_3), .WBUF_data_in_4(WBUF_data_in_16_4), .WBUF_data_in_5(WBUF_data_in_16_5), .WBUF_data_in_6(WBUF_data_in_16_6), .WBUF_data_in_7(WBUF_data_in_16_7), .WBUF_data_in_8(WBUF_data_in_16_8), .WBUF_data_in_9(WBUF_data_in_16_9), .WBUF_data_in_10(WBUF_data_in_16_10), .WBUF_data_in_11(WBUF_data_in_16_11), .WBUF_data_in_12(WBUF_data_in_16_12), .WBUF_data_in_13(WBUF_data_in_16_13), .WBUF_data_in_14(WBUF_data_in_16_14), .WBUF_data_in_15(WBUF_data_in_16_15), .WBUF_data_in_16(WBUF_data_in_16_16), 179 | .sign_x(sign_x), .sign_y(sign_y), .input_bitwidth(input_bitwidth), .signal(signal), .total_output(total_output_16) ); 180 | 181 | 182 | endmodule 183 | 184 | 185 | --------------------------------------------------------------------------------