├── LICENSE ├── OPT1 ├── cube │ ├── array_mac_based │ │ ├── get_pipline_mulwidth.v │ │ ├── pe.v │ │ ├── sim │ │ │ ├── filelist.f │ │ │ ├── makefile │ │ │ ├── test_mac_tc_array.sv │ │ │ └── timescale.sv │ │ ├── syn │ │ │ ├── dc.tcl │ │ │ ├── filelist.f │ │ │ ├── outputs │ │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ │ ├── top_cube_8_area_report_5.0.txt │ │ │ │ │ ├── top_cube_8_area_report_5.5.txt │ │ │ │ │ ├── top_cube_8_area_report_6.0.txt │ │ │ │ │ ├── top_cube_8_area_report_6.3.txt │ │ │ │ │ ├── top_cube_8_area_report_6.5.txt │ │ │ │ │ ├── top_cube_8_power_report_5.0.txt │ │ │ │ │ ├── top_cube_8_power_report_5.5.txt │ │ │ │ │ ├── top_cube_8_power_report_6.0.txt │ │ │ │ │ ├── top_cube_8_power_report_6.3.txt │ │ │ │ │ ├── top_cube_8_power_report_6.5.txt │ │ │ │ │ ├── top_cube_8_timing_report_5.0.txt │ │ │ │ │ ├── top_cube_8_timing_report_5.5.txt │ │ │ │ │ ├── top_cube_8_timing_report_6.0.txt │ │ │ │ │ ├── top_cube_8_timing_report_6.3.txt │ │ │ │ │ └── top_cube_8_timing_report_6.5.txt │ │ │ └── run.sh │ │ └── top.v │ └── array_opt1_based │ │ ├── DW02_tree.sv │ │ ├── PE.v │ │ ├── booth_partial_product_generator.v │ │ ├── booth_partial_product_generator_pp1.v │ │ ├── booth_pp_gen.v │ │ ├── get_pipline_mulwidth.v │ │ ├── inv_conveter_8.v │ │ ├── inv_unit.v │ │ ├── inv_unit_nor_out.v │ │ ├── opt1_mac.v │ │ ├── sim │ │ ├── filelist.f │ │ ├── makefile │ │ ├── test_opt1_cube.sv │ │ └── timescale.sv │ │ ├── syn │ │ ├── dc.tcl │ │ ├── filelist.f │ │ ├── outputs │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ ├── top_opt1_cube_area_report_3.5.txt │ │ │ │ ├── top_opt1_cube_area_report_4.0.txt │ │ │ │ ├── top_opt1_cube_power_report_3.5.txt │ │ │ │ ├── top_opt1_cube_power_report_4.0.txt │ │ │ │ ├── top_opt1_cube_timing_report_3.5.txt │ │ │ │ └── top_opt1_cube_timing_report_4.0.txt │ │ └── run.sh │ │ └── top.v ├── systolic_array_os │ ├── array_mac_based │ │ ├── get_pipline_mulwidth.v │ │ ├── pe.v │ │ ├── sim │ │ │ ├── filelist.f │ │ │ ├── makefile │ │ │ ├── test_mac_os_array.sv │ │ │ └── timescale.sv │ │ ├── syn │ │ │ ├── dc.tcl │ │ │ ├── filelist.f │ │ │ ├── outputs │ │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ │ ├── top_array_16_area_report_5.0.txt │ │ │ │ │ ├── top_array_16_area_report_5.5.txt │ │ │ │ │ ├── top_array_16_area_report_6.0.txt │ │ │ │ │ ├── top_array_16_area_report_6.5.txt │ │ │ │ │ ├── top_array_16_power_report_5.0.txt │ │ │ │ │ ├── top_array_16_power_report_5.5.txt │ │ │ │ │ ├── top_array_16_power_report_6.0.txt │ │ │ │ │ ├── top_array_16_power_report_6.5.txt │ │ │ │ │ ├── top_array_16_timing_report_5.0.txt │ │ │ │ │ ├── top_array_16_timing_report_5.5.txt │ │ │ │ │ ├── top_array_16_timing_report_6.0.txt │ │ │ │ │ └── top_array_16_timing_report_6.5.txt │ │ │ └── run.sh │ │ └── top.v │ ├── array_opt1_based │ │ ├── DW02_tree.sv │ │ ├── booth_partial_product_generator.v │ │ ├── booth_partial_product_generator_pp1.v │ │ ├── booth_pp_gen.v │ │ ├── get_pipline_mulwidth.v │ │ ├── inv_conveter_8.v │ │ ├── inv_unit.v │ │ ├── inv_unit_nor_out.v │ │ ├── opt1_mac.v │ │ ├── pe.v │ │ ├── sim │ │ │ ├── filelist.f │ │ │ ├── makefile │ │ │ ├── test_opt1_os_array.sv │ │ │ └── timescale.sv │ │ ├── syn │ │ │ ├── dc.tcl │ │ │ ├── filelist.f │ │ │ ├── outputs │ │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ │ ├── top_opt1_array_16_area_report_3.0.txt │ │ │ │ │ ├── top_opt1_array_16_area_report_3.1.txt │ │ │ │ │ ├── top_opt1_array_16_area_report_4.0.txt │ │ │ │ │ ├── top_opt1_array_16_area_report_5.0.txt │ │ │ │ │ ├── top_opt1_array_16_power_report_3.0.txt │ │ │ │ │ ├── top_opt1_array_16_power_report_3.1.txt │ │ │ │ │ ├── top_opt1_array_16_power_report_4.0.txt │ │ │ │ │ ├── top_opt1_array_16_power_report_5.0.txt │ │ │ │ │ ├── top_opt1_array_16_timing_report_3.0.txt │ │ │ │ │ ├── top_opt1_array_16_timing_report_3.1.txt │ │ │ │ │ ├── top_opt1_array_16_timing_report_4.0.txt │ │ │ │ │ └── top_opt1_array_16_timing_report_5.0.txt │ │ │ └── run.sh │ │ └── top.v │ ├── mac_pe │ │ ├── mac.v │ │ └── syn │ │ │ ├── dc.tcl │ │ │ ├── filelist.f │ │ │ ├── outputs │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ ├── mac_area_report_1.2.txt │ │ │ │ ├── mac_area_report_1.3.txt │ │ │ │ ├── mac_area_report_1.5.txt │ │ │ │ ├── mac_area_report_1.667.txt │ │ │ │ ├── mac_area_report_2.0.txt │ │ │ │ ├── mac_power_report_1.2.txt │ │ │ │ ├── mac_power_report_1.3.txt │ │ │ │ ├── mac_power_report_1.5.txt │ │ │ │ ├── mac_power_report_1.667.txt │ │ │ │ ├── mac_power_report_2.0.txt │ │ │ │ ├── mac_timing_report_1.2.txt │ │ │ │ ├── mac_timing_report_1.3.txt │ │ │ │ ├── mac_timing_report_1.5.txt │ │ │ │ ├── mac_timing_report_1.667.txt │ │ │ │ └── mac_timing_report_2.0.txt │ │ │ └── run.sh │ └── opt1_pe │ │ ├── DW02_tree.sv │ │ ├── booth_partial_product_generator.v │ │ ├── booth_partial_product_generator_pp1.v │ │ ├── booth_pp_gen.v │ │ ├── dff_async.v │ │ ├── inv_conveter_8.v │ │ ├── inv_unit.v │ │ ├── inv_unit_nor_out.v │ │ ├── opt1_mac.v │ │ ├── power │ │ ├── pt.sh │ │ └── ptpx.tcl │ │ ├── sim │ │ ├── filelist.f │ │ ├── makefile │ │ ├── test_opt1_mac.sv │ │ └── timescale.sv │ │ └── syn │ │ ├── dc.tcl │ │ ├── filelist.f │ │ └── outputs │ │ └── saed32rvt_tt0p85v25c │ │ ├── opt1_mac_area_report_1.11.txt │ │ ├── opt1_mac_area_report_1.15.txt │ │ ├── opt1_mac_area_report_1.2.txt │ │ ├── opt1_mac_area_report_1.3.txt │ │ ├── opt1_mac_area_report_1.5.txt │ │ ├── opt1_mac_power_report_1.10.txt │ │ ├── opt1_mac_power_report_1.11.txt │ │ ├── opt1_mac_power_report_1.15.txt │ │ ├── opt1_mac_power_report_1.2.txt │ │ ├── opt1_mac_power_report_1.3.txt │ │ ├── opt1_mac_power_report_1.5.txt │ │ └── opt1_mac_timing_report_1.11.txt └── systolic_array_ws │ ├── array_mac_based │ ├── PE.v │ ├── get_pipline_mulwidth.v │ ├── sim │ │ ├── filelist.f │ │ ├── makefile │ │ ├── test_ws_array.sv │ │ └── timescale.sv │ ├── syn │ │ ├── dc.tcl │ │ ├── filelist.f │ │ ├── outputs │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ ├── top_array_16_area_report_4.5.txt │ │ │ │ ├── top_array_16_area_report_5.0.txt │ │ │ │ ├── top_array_16_area_report_5.5.txt │ │ │ │ ├── top_array_16_power_report_4.5.txt │ │ │ │ ├── top_array_16_power_report_5.0.txt │ │ │ │ ├── top_array_16_power_report_5.5.txt │ │ │ │ ├── top_array_16_timing_report_4.5.txt │ │ │ │ ├── top_array_16_timing_report_5.0.txt │ │ │ │ └── top_array_16_timing_report_5.5.txt │ │ └── run.sh │ └── top.v │ └── array_opt1_based │ ├── DW02_tree.sv │ ├── PE.v │ ├── booth_partial_product_generator.v │ ├── booth_partial_product_generator_pp1.v │ ├── booth_pp_gen.v │ ├── get_pipline_mulwidth.v │ ├── inv_conveter_8.v │ ├── inv_unit.v │ ├── inv_unit_nor_out.v │ ├── opt1_mac.v │ ├── sim │ ├── filelist.f │ ├── makefile │ ├── test_opt1_ws_array.sv │ └── timescale.sv │ └── syn │ ├── dc.tcl │ ├── filelist.f │ ├── outputs │ └── saed32rvt_tt0p85v25c │ │ ├── top_opt1_array_16_area_report_3.1.txt │ │ ├── top_opt1_array_16_area_report_3.3.txt │ │ ├── top_opt1_array_16_area_report_3.5.txt │ │ ├── top_opt1_array_16_area_report_4.0.txt │ │ ├── top_opt1_array_16_area_report_4.5.txt │ │ ├── top_opt1_array_16_power_report_3.1.txt │ │ ├── top_opt1_array_16_power_report_3.3.txt │ │ ├── top_opt1_array_16_power_report_3.5.txt │ │ ├── top_opt1_array_16_power_report_4.0.txt │ │ ├── top_opt1_array_16_power_report_4.5.txt │ │ ├── top_opt1_array_16_timing_report_3.1.txt │ │ ├── top_opt1_array_16_timing_report_3.3.txt │ │ ├── top_opt1_array_16_timing_report_3.5.txt │ │ ├── top_opt1_array_16_timing_report_4.0.txt │ │ └── top_opt1_array_16_timing_report_4.5.txt │ └── run.sh ├── OPT2 ├── DW02_tree.sv ├── get_pipline_mulwidth.v ├── partial_product_select.sv ├── sim │ ├── filelist.f │ ├── makefile │ ├── test_opt2ws_array.sv │ └── timescale.sv ├── syn │ ├── dc_array.tcl │ ├── filelist.f │ ├── outputs_array │ │ └── saed32rvt_tt0p85v25c │ │ │ ├── top_tpe_n16_area_report_1.45.txt │ │ │ ├── top_tpe_n16_power_report_1.45.txt │ │ │ ├── top_tpe_n16_timing_report_1.45.txt │ │ │ ├── top_tpe_n32_area_report_1.50.txt │ │ │ ├── top_tpe_n32_power_report_1.50.txt │ │ │ ├── top_tpe_n32_timing_report_1.50.txt │ │ │ ├── top_tpe_n4_area_report_1.35.txt │ │ │ ├── top_tpe_n4_power_report_1.35.txt │ │ │ ├── top_tpe_n4_timing_report_1.35.txt │ │ │ ├── top_tpe_n8_area_report_1.35.txt │ │ │ ├── top_tpe_n8_power_report_1.35.txt │ │ │ └── top_tpe_n8_timing_report_1.35.txt │ └── run.sh ├── top_pe_tile.sv ├── top_tpe.sv ├── tree_full_sum.sv ├── vector_encoder.sv └── weight_rf.sv ├── OPT3_OPT4C ├── array │ ├── sim │ │ ├── filelist.f │ │ ├── makefile │ │ └── test_opt4c_column_array.sv │ ├── syn │ │ ├── dc_array.tcl │ │ ├── filelist.f │ │ ├── outputs_array │ │ │ └── saed32rvt_tt0p85v25c │ │ │ │ ├── top_pe_column_n16_area_report_0.58.txt │ │ │ │ ├── top_pe_column_n16_area_report_1.0.txt │ │ │ │ ├── top_pe_column_n16_area_report_1.4.txt │ │ │ │ ├── top_pe_column_n16_timing_report_0.58.txt │ │ │ │ ├── top_pe_column_n16_timing_report_1.0.txt │ │ │ │ ├── top_pe_column_n16_timing_report_1.4.txt │ │ │ │ ├── top_pe_column_n32_area_report_0.59.txt │ │ │ │ ├── top_pe_column_n32_area_report_0.6.txt │ │ │ │ ├── top_pe_column_n32_area_report_0.8.txt │ │ │ │ ├── top_pe_column_n32_area_report_1.0.txt │ │ │ │ ├── top_pe_column_n32_area_report_1.4.txt │ │ │ │ ├── top_pe_column_n32_timing_report_0.59.txt │ │ │ │ ├── top_pe_column_n32_timing_report_0.6.txt │ │ │ │ ├── top_pe_column_n32_timing_report_0.8.txt │ │ │ │ ├── top_pe_column_n32_timing_report_1.0.txt │ │ │ │ └── top_pe_column_n32_timing_report_1.4.txt │ │ └── run.sh │ └── top_pe_column.v └── pe │ ├── DW02_tree.sv │ ├── encoder_multi_bit.v │ ├── get_negedge.sv │ ├── get_pipline_mulwidth.v │ ├── pe.v │ ├── sim │ ├── filelist.f │ ├── makefile │ ├── test_opt3_pe_inner_product_vectors.sv │ └── timescale.sv │ ├── sparse_encoder.v │ └── top_pe.v ├── README.md ├── assets ├── 64709dc13fe00c9e794af29ce991901-20250429185133-a8iy6xm.png ├── image-20250423170332-07aeb6b.png ├── image-20250424204435-5fldpy3.png ├── image-20250425153743-bfn0w5o.png ├── image-20250518164949-c177kdo.png ├── image-20250518165300-lx9r183.png └── image-20250518171735-9upne4o.png └── library └── saed32rvt_tt0p85v25c.db /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 QIZHE WU 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/pe.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // b = w * a 4 | module PE # 5 | ( 6 | parameter WIDTH = 8, 7 | parameter ACC_WIDTH = 24 8 | ) 9 | ( 10 | input wire rst_n, 11 | input wire clk, 12 | input wire signed [WIDTH-1 :0] i_z_a, 13 | input wire signed [WIDTH-1 :0] i_x_b, 14 | input wire signed [ACC_WIDTH-1 :0] i_y_p, 15 | 16 | output reg signed [WIDTH-1 :0] o_z_a, 17 | output reg signed [WIDTH-1 :0] o_x_b, 18 | output reg signed [ACC_WIDTH-1 :0] o_y_p 19 | ); 20 | 21 | always @(posedge clk or negedge rst_n) begin 22 | if(!rst_n) begin 23 | o_y_p <= 0; 24 | end 25 | else begin 26 | o_y_p <= i_y_p + i_z_a*i_x_b; 27 | end 28 | end 29 | always @(posedge clk or negedge rst_n) begin 30 | if(!rst_n) begin 31 | o_z_a <= 0; 32 | o_x_b <= 0; 33 | end 34 | else begin 35 | o_z_a <= i_z_a; 36 | o_x_b <= i_x_b; 37 | end 38 | end 39 | 40 | endmodule 41 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/sim/test_mac_tc_array.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/get_pipline_mulwidth.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/pe.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/top.v -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/get_pipline_mulwidth.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/pe.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_mac_based/top.v -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_area_report_5.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 15:04:16 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 2242 14 | Number of nets: 176282 15 | Number of cells: 158330 16 | Number of combinational cells: 135812 17 | Number of sequential cells: 22518 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18939 20 | Number of references: 61 21 | 22 | Combinational area: 364099.145599 23 | Buf/Inv area: 25153.140203 24 | Noncombinational area: 160248.468827 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 288290.688231 27 | 28 | Total cell area: 524347.614426 29 | Total area: 812638.302657 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 524347.6144 100.0 364099.1456 160248.4688 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 364099.1456 160248.4688 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_area_report_5.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 15:03:14 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 2242 14 | Number of nets: 173938 15 | Number of cells: 155827 16 | Number of combinational cells: 134526 17 | Number of sequential cells: 21301 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18823 20 | Number of references: 60 21 | 22 | Combinational area: 359446.277265 23 | Buf/Inv area: 24840.543061 24 | Noncombinational area: 151577.075397 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 282457.359956 27 | 28 | Total cell area: 511023.352662 29 | Total area: 793480.712618 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 511023.3527 100.0 359446.2773 151577.0754 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 359446.2773 151577.0754 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_area_report_6.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 15:01:57 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 2242 14 | Number of nets: 172558 15 | Number of cells: 154052 16 | Number of combinational cells: 133456 17 | Number of sequential cells: 20596 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18762 20 | Number of references: 49 21 | 22 | Combinational area: 355401.575711 23 | Buf/Inv area: 24580.807870 24 | Noncombinational area: 146568.913646 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 277591.824793 27 | 28 | Total cell area: 501970.489357 29 | Total area: 779562.314150 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 501970.4894 100.0 355401.5757 146568.9136 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 355401.5757 146568.9136 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_area_report_6.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 16:30:00 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 2242 14 | Number of nets: 169812 15 | Number of cells: 150451 16 | Number of combinational cells: 128955 17 | Number of sequential cells: 21496 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18524 20 | Number of references: 50 21 | 22 | Combinational area: 345042.665789 23 | Buf/Inv area: 24108.608295 24 | Noncombinational area: 152969.784542 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 280464.082969 27 | 28 | Total cell area: 498012.450331 29 | Total area: 778476.533299 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 498012.4503 100.0 345042.6658 152969.7845 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 345042.6658 152969.7845 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_area_report_6.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 16:29:53 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 2242 14 | Number of nets: 168880 15 | Number of cells: 148730 16 | Number of combinational cells: 126794 17 | Number of sequential cells: 21936 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18535 20 | Number of references: 45 21 | 22 | Combinational area: 338651.961035 23 | Buf/Inv area: 23931.978190 24 | Noncombinational area: 156093.214358 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 279650.345222 27 | 28 | Total cell area: 494745.175393 29 | Total area: 774395.520614 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 494745.1754 100.0 338651.9610 156093.2144 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 338651.9610 156093.2144 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_power_report_5.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 15:04:17 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 26.6757 mW (93%) 34 | Net Switching Power = 1.9664 mW (7%) 35 | --------- 36 | Total Dynamic Power = 28.6421 mW (100%) 37 | 38 | Cell Leakage Power = 13.1521 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 2.1310e+04 442.8031 5.5410e+09 2.7294e+04 ( 65.31%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 5.3655e+03 1.5236e+03 7.6111e+09 1.4500e+04 ( 34.69%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 2.6676e+04 uW 1.9664e+03 uW 1.3152e+10 pW 4.1794e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_power_report_5.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 15:03:15 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 23.2528 mW (93%) 34 | Net Switching Power = 1.8356 mW (7%) 35 | --------- 36 | Total Dynamic Power = 25.0884 mW (100%) 37 | 38 | Cell Leakage Power = 12.7093 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 1.8301e+04 409.9053 5.2520e+09 2.3963e+04 ( 63.40%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 4.9520e+03 1.4257e+03 7.4573e+09 1.3835e+04 ( 36.60%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 2.3253e+04 uW 1.8356e+03 uW 1.2709e+10 pW 3.7798e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_power_report_6.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 15:01:58 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 20.4053 mW (93%) 34 | Net Switching Power = 1.6114 mW (7%) 35 | --------- 36 | Total Dynamic Power = 22.0167 mW (100%) 37 | 38 | Cell Leakage Power = 12.3190 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 1.6152e+04 360.5312 5.0649e+09 2.1578e+04 ( 62.84%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 4.2531e+03 1.2508e+03 7.2540e+09 1.2758e+04 ( 37.16%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 2.0405e+04 uW 1.6114e+03 uW 1.2319e+10 pW 3.4336e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_power_report_6.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 16:30:01 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 20.2884 mW (93%) 34 | Net Switching Power = 1.5658 mW (7%) 35 | --------- 36 | Total Dynamic Power = 21.8541 mW (100%) 37 | 38 | Cell Leakage Power = 12.2804 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 1.6172e+04 364.0454 5.3036e+09 2.1840e+04 ( 63.98%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 4.1163e+03 1.2017e+03 6.9769e+09 1.2295e+04 ( 36.02%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 2.0288e+04 uW 1.5658e+03 uW 1.2280e+10 pW 3.4135e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_cube_8_power_report_6.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 16:29:54 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 19.7638 mW (93%) 34 | Net Switching Power = 1.5017 mW (7%) 35 | --------- 36 | Total Dynamic Power = 21.2654 mW (100%) 37 | 38 | Cell Leakage Power = 12.1763 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 1.5926e+04 352.0675 5.4070e+09 2.1686e+04 ( 64.85%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 3.8374e+03 1.1496e+03 6.7692e+09 1.1756e+04 ( 35.15%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 1.9764e+04 uW 1.5016e+03 uW 1.2176e+10 pW 3.3442e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_mac_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/PE.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | 3 | // b = w * a 4 | module PE # 5 | ( 6 | parameter WIDTH = 8, 7 | parameter ACC_WIDTH = 24 8 | ) 9 | ( 10 | input wire rst_n, 11 | input wire clk, 12 | input wire signed [WIDTH-1 :0] i_z_a, 13 | input wire signed [WIDTH-1 :0] i_x_b, 14 | input wire signed [2*ACC_WIDTH-1 :0] i_y_p, 15 | 16 | output reg signed [WIDTH-1 :0] o_z_a, 17 | output reg signed [WIDTH-1 :0] o_x_b, 18 | output reg signed [2*ACC_WIDTH-1 :0] o_y_p 19 | ); 20 | 21 | always @(posedge clk or negedge rst_n) begin 22 | if(!rst_n) begin 23 | o_z_a <= 0; 24 | o_x_b <= 0; 25 | end 26 | else begin 27 | o_z_a <= i_z_a; 28 | o_x_b <= i_x_b; 29 | end 30 | end 31 | 32 | wire [ACC_WIDTH-1:0] acc_sum; 33 | wire [ACC_WIDTH-1:0] acc_carry; 34 | always @(*) begin 35 | if(!rst_n) begin 36 | o_y_p = 0; 37 | end 38 | else begin 39 | o_y_p = {acc_sum, acc_carry}; // partial_result + weight * a; 40 | end 41 | end 42 | opt1_mac #( 43 | .ACC_WIDTH(ACC_WIDTH), 44 | .INPUT_PIP(0) 45 | ) opt1_mac_test ( 46 | .clk(clk), 47 | .rst_n(rst_n), 48 | .operand_a_in(i_z_a), 49 | .operand_b_in(i_x_b), 50 | .partial_result(i_y_p), 51 | .acc_sum(acc_sum), 52 | .acc_carry(acc_carry) 53 | ); 54 | 55 | endmodule 56 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/booth_partial_product_generator_pp1.v: -------------------------------------------------------------------------------- 1 | module booth_partial_product_generator_pp1( 2 | input wire [1:0] operand_slice_a, 3 | input wire [7:0] operand_b, 4 | input wire [8:0] operand_b_neg, 5 | output wire [9:0] pp_out 6 | ); 7 | 8 | wire [8:0] pp_source; 9 | wire not_code0; 10 | //---------------------------------------------------- 11 | //| pp | flag_2x | flag_s1 | flag_s2 | 12 | //---------------------------------------------------- 13 | //| operand_b | 0 | 0 | 1 | 14 | //| -operand_b | 0 | 1 | 0 | 15 | //| 2operand_b | 1 | 0 | 1 | 16 | //| -2operand_b | 1 | 1 | 0 | 17 | //| 0 | x | 0 | 0 | 18 | //---------------------------------------------------- 19 | 20 | //---------------------------------------------------- 21 | //operand_slice_a[1] operand_slice_a[0] operand_slice_a[-1] | pp 22 | //---------------------------------------------------- 23 | // 0 0 0 | 0 24 | // 0 1 0 | operand_b 25 | // 1 0 0 | 2operand_b 26 | // 1 1 0 | -operand_b 27 | //---------------------------------------------------- 28 | 29 | wire flag_2x; 30 | wire flag_s1; 31 | wire flag_s2; 32 | assign not_code0 = ~operand_slice_a[0]; 33 | assign flag_2x = not_code0; 34 | assign flag_s1 = operand_slice_a[1]; // 取反 35 | assign flag_s2 = ~(operand_slice_a[1] | not_code0); // A 36 | wire flag_not_2x = operand_slice_a[0]; 37 | assign pp_source = (({{operand_b[7]}, operand_b} & {9{flag_s2}}) | (operand_b_neg & {9{flag_s1}})); // A or ~A or 0 38 | assign pp_out[0] = (!flag_2x & pp_source[0]);// x2 (<<1) pp_out[0]=0 39 | assign pp_out[8:1] = (({8{flag_2x}} & pp_source[7:0]) | ({8{flag_not_2x}} & pp_source[8:1])); 40 | assign pp_out[9] = pp_source[8]; 41 | 42 | endmodule -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/booth_pp_gen.v: -------------------------------------------------------------------------------- 1 | module booth_pp_gen( 2 | input wire [7:0] operand_a, 3 | input wire [7:0] operand_b, 4 | output wire [9:0] pp1, 5 | output wire [9:0] pp2, 6 | output wire [9:0] pp3, 7 | output wire [9:0] pp4 8 | ); 9 | 10 | wire [1:0] operand_slice_a1 ; 11 | wire [2:0] operand_slice_a2 ; 12 | wire [2:0] operand_slice_a3 ; 13 | wire [2:0] operand_slice_a4 ; 14 | wire [8:0] operand_b_neg ; 15 | 16 | inv_converter_8 inv_converter_8_inst( 17 | .data_i (operand_b), 18 | .inv_o (operand_b_neg) 19 | ); 20 | 21 | assign operand_slice_a1 = operand_a[1:0] ; 22 | assign operand_slice_a2 = operand_a[3:1] ; 23 | assign operand_slice_a3 = operand_a[5:3] ; 24 | assign operand_slice_a4 = operand_a[7:5] ; 25 | 26 | booth_partial_product_generator_pp1 ppg_1 ( 27 | .operand_slice_a (operand_slice_a1), 28 | .operand_b (operand_b), 29 | .operand_b_neg (operand_b_neg), 30 | .pp_out (pp1) 31 | ); 32 | 33 | booth_partial_product_generator ppg_2 ( 34 | .operand_slice_a (operand_slice_a2), 35 | .operand_b (operand_b), 36 | .operand_b_neg (operand_b_neg), 37 | .pp_out (pp2) 38 | ); 39 | 40 | booth_partial_product_generator ppg_3 ( 41 | .operand_slice_a (operand_slice_a3), 42 | .operand_b (operand_b), 43 | .operand_b_neg (operand_b_neg), 44 | .pp_out (pp3) 45 | ); 46 | 47 | booth_partial_product_generator ppg_4 ( 48 | .operand_slice_a (operand_slice_a4), 49 | .operand_b (operand_b), 50 | .operand_b_neg (operand_b_neg), 51 | .pp_out (pp4) 52 | ); 53 | 54 | endmodule 55 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/inv_conveter_8.v: -------------------------------------------------------------------------------- 1 | // 取反 加1 (-1*A、-2*A 都要进行取反加1) 2 | module inv_converter_8( 3 | input wire [7:0] data_i , 4 | output wire[8:0] inv_o 5 | ); 6 | 7 | wire [5:0] wire_cout ; 8 | wire not_o ; 9 | 10 | assign inv_o[0] = data_i[0]; 11 | 12 | inv_unit inv_unit_bit1( 13 | .a (data_i[1] ), 14 | .b (data_i[0] ), 15 | .xor_o (inv_o[1] ), 16 | .or_o (wire_cout[0] ) 17 | ); 18 | 19 | genvar i; 20 | generate 21 | for(i=2;i<=5;i=i+1) begin 22 | inv_unit inv_unit_inst( 23 | .a (data_i[i] ), 24 | .b (wire_cout[i-2] ), 25 | .xor_o (inv_o[i] ), 26 | .or_o (wire_cout[i-1] ) 27 | ); 28 | end 29 | endgenerate 30 | 31 | inv_unit_nor_out inv_unit_nor_out_inst_6( 32 | .a (data_i[6] ), 33 | .b (wire_cout[4] ), 34 | .xor_o (inv_o[6] ), 35 | .nor_o (wire_cout[5] ) 36 | ); 37 | 38 | inv_unit_nor_out inv_unit_nor_out_inst_7( 39 | .a (data_i[7] ), 40 | .b (not_o ), 41 | .xor_o (inv_o[7] ), 42 | .nor_o ( ) 43 | ); 44 | 45 | 46 | 47 | assign not_o = ~wire_cout[5] ; 48 | 49 | 50 | assign inv_o[8] = ~(wire_cout[5] | data_i[7]); 51 | 52 | endmodule -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/inv_unit.v: -------------------------------------------------------------------------------- 1 | module inv_unit( 2 | input wire a , 3 | input wire b , 4 | output wire xor_o , 5 | output wire or_o 6 | ); 7 | 8 | wire aORb ; 9 | wire aNANDb ; 10 | 11 | assign aORb = a | b; 12 | assign aNANDb = ~(a & b); 13 | assign xor_o = (aORb & aNANDb); 14 | assign or_o = aORb; 15 | 16 | endmodule -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/inv_unit_nor_out.v: -------------------------------------------------------------------------------- 1 | module inv_unit_nor_out( 2 | input wire a , 3 | input wire b , 4 | 5 | output wire xor_o , 6 | output wire nor_o 7 | ); 8 | 9 | 10 | wire a_AND_b ; 11 | wire a_NOR_b ; 12 | 13 | 14 | assign a_AND_b = a & b; 15 | 16 | 17 | assign a_NOR_b = ~(a | b); 18 | 19 | 20 | assign xor_o = ~(a_AND_b | a_NOR_b); 21 | assign nor_o = a_NOR_b; 22 | 23 | endmodule -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_partial_product_generator_pp1.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_partial_product_generator.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_pp_gen.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/DW02_tree.sv 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/get_pipline_mulwidth.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_conveter_8.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_unit_nor_out.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_unit.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/opt1_mac.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/PE.v 12 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/top.v 13 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/sim/test_opt1_cube.sv -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_partial_product_generator_pp1.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_partial_product_generator.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/booth_pp_gen.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/DW02_tree.sv 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/get_pipline_mulwidth.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_conveter_8.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_unit_nor_out.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/inv_unit.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/opt1_mac.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/PE.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/cube/array_opt1_based/top.v -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_cube_area_report_3.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 17:38:12 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 3458 14 | Number of nets: 182196 15 | Number of cells: 155226 16 | Number of combinational cells: 118270 17 | Number of sequential cells: 36956 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 8957 20 | Number of references: 53 21 | 22 | Combinational area: 320796.315136 23 | Buf/Inv area: 11469.010488 24 | Noncombinational area: 250917.902843 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 435113.750099 27 | 28 | Total cell area: 571714.217979 29 | Total area: 1006827.968078 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 571714.2180 100.0 320796.3151 250917.9028 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 320796.3151 250917.9028 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_cube_area_report_4.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 17:29:19 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 3458 14 | Number of nets: 172546 15 | Number of cells: 145442 16 | Number of combinational cells: 113737 17 | Number of sequential cells: 31705 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 8533 20 | Number of references: 41 21 | 22 | Combinational area: 309579.414996 23 | Buf/Inv area: 10845.849386 24 | Noncombinational area: 215145.609035 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 339342.081341 27 | 28 | Total cell area: 524725.024031 29 | Total area: 864067.105371 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 524725.0240 100.0 309579.4150 215145.6090 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 309579.4150 215145.6090 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_cube_power_report_3.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 17:38:13 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 1000000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 55.5638 mW (95%) 34 | Net Switching Power = 2.8361 mW (5%) 35 | --------- 36 | Total Dynamic Power = 58.3999 mW (100%) 37 | 38 | Cell Leakage Power = 15.3234 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 4.8903e+04 739.3984 9.5022e+09 5.9146e+04 ( 80.23%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 6.6606e+03 2.0966e+03 5.8212e+09 1.4578e+04 ( 19.77%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 5.5564e+04 uW 2.8360e+03 uW 1.5323e+10 pW 7.3724e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_cube_power_report_4.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : top 6 | Version: L-2016.03-SP1 7 | Date : Tue Apr 29 17:29:20 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | top 540000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 42.2340 mW (95%) 34 | Net Switching Power = 2.3983 mW (5%) 35 | --------- 36 | Total Dynamic Power = 44.6323 mW (100%) 37 | 38 | Cell Leakage Power = 13.7406 mW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 3.6503e+04 568.2457 8.1674e+09 4.5239e+04 ( 77.50%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 5.7307e+03 1.8300e+03 5.5732e+09 1.3134e+04 ( 22.50%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 4.2234e+04 uW 2.3982e+03 uW 1.3741e+10 pW 5.8373e+04 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/cube/array_opt1_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/pe.v: -------------------------------------------------------------------------------- 1 | module PE # 2 | ( 3 | parameter WIDTH = 8, 4 | parameter ACC_WIDTH = 32 5 | ) 6 | ( 7 | input wire rst_n, 8 | input wire clk, 9 | input wire signed [WIDTH-1 :0] a, 10 | input wire signed [WIDTH-1 :0] b, 11 | input wire clc, // clean_result_cache, 12 | 13 | output reg signed [WIDTH-1 :0] row, 14 | output reg signed [WIDTH-1 :0] col, 15 | output reg signed [ACC_WIDTH-1 :0] result 16 | ); 17 | 18 | always @(posedge clk or negedge rst_n) begin 19 | if(!rst_n) begin 20 | result <= 0; 21 | end 22 | else begin 23 | if(clc) 24 | result <= 0; 25 | else 26 | result <= result + a * b ; 27 | end 28 | end 29 | 30 | always @(posedge clk or negedge rst_n) begin 31 | if(!rst_n) begin 32 | row <= 0; 33 | col <= 0; 34 | end 35 | else begin 36 | row <= a; 37 | col <= b; 38 | end 39 | end 40 | 41 | endmodule 42 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/get_pipline_mulwidth.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/pe.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/top.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/sim/test_mac_os_array.sv -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/pe.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_mac_based/top.v -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_5.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 22:34:10 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 8451 14 | Number of nets: 128311 15 | Number of cells: 112726 16 | Number of combinational cells: 84190 17 | Number of sequential cells: 28536 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 5805 20 | Number of references: 50 21 | 22 | Combinational area: 239275.559873 23 | Buf/Inv area: 7667.778681 24 | Noncombinational area: 203064.617542 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 275201.167973 27 | 28 | Total cell area: 442340.177415 29 | Total area: 717541.345389 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 442340.1774 100.0 239275.5599 203064.6175 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 239275.5599 203064.6175 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_5.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 09:05:13 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 8451 14 | Number of nets: 142454 15 | Number of cells: 129717 16 | Number of combinational cells: 102527 17 | Number of sequential cells: 27190 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 19750 20 | Number of references: 68 21 | 22 | Combinational area: 245219.225256 23 | Buf/Inv area: 26220.036707 24 | Noncombinational area: 193487.454879 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 299650.902472 27 | 28 | Total cell area: 438706.680136 29 | Total area: 738357.582608 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 438706.6801 100.0 245219.2253 193487.4549 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 245219.2253 193487.4549 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_6.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 02:01:06 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 8451 14 | Number of nets: 142025 15 | Number of cells: 128981 16 | Number of combinational cells: 101112 17 | Number of sequential cells: 27869 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 19836 20 | Number of references: 64 21 | 22 | Combinational area: 240515.019886 23 | Buf/Inv area: 26108.213326 24 | Noncombinational area: 198318.732403 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 302296.527989 27 | 28 | Total cell area: 438833.752290 29 | Total area: 741130.280279 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 438833.7523 100.0 240515.0199 198318.7324 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 240515.0199 198318.7324 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_6.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 21:20:10 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 8450 14 | Number of nets: 121193 15 | Number of cells: 107816 16 | Number of combinational cells: 87253 17 | Number of sequential cells: 20563 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 13472 20 | Number of references: 41 21 | 22 | Combinational area: 230356.120978 23 | Buf/Inv area: 17334.908192 24 | Noncombinational area: 146326.968557 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 219054.618540 27 | 28 | Total cell area: 376683.089535 29 | Total area: 595737.708075 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 376683.0895 100.0 230356.1210 146326.9686 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 230356.1210 146326.9686 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_mac_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/booth_partial_product_generator_pp1.v: -------------------------------------------------------------------------------- 1 | module booth_partial_product_generator_pp1( 2 | input wire [1:0] operand_slice_a, 3 | input wire [7:0] operand_b, 4 | input wire [8:0] operand_b_neg, 5 | output wire [9:0] pp_out 6 | ); 7 | 8 | wire [8:0] pp_source; 9 | wire not_code0; 10 | //---------------------------------------------------- 11 | //| pp | flag_2x | flag_s1 | flag_s2 | 12 | //---------------------------------------------------- 13 | //| operand_b | 0 | 0 | 1 | 14 | //| -operand_b | 0 | 1 | 0 | 15 | //| 2operand_b | 1 | 0 | 1 | 16 | //| -2operand_b | 1 | 1 | 0 | 17 | //| 0 | x | 0 | 0 | 18 | //---------------------------------------------------- 19 | 20 | //---------------------------------------------------- 21 | //operand_slice_a[1] operand_slice_a[0] operand_slice_a[-1] | pp 22 | //---------------------------------------------------- 23 | // 0 0 0 | 0 24 | // 0 1 0 | operand_b 25 | // 1 0 0 | 2operand_b 26 | // 1 1 0 | -operand_b 27 | //---------------------------------------------------- 28 | 29 | wire flag_2x; 30 | wire flag_s1; 31 | wire flag_s2; 32 | assign not_code0 = ~operand_slice_a[0]; 33 | assign flag_2x = not_code0; 34 | assign flag_s1 = operand_slice_a[1]; // 取反 35 | assign flag_s2 = ~(operand_slice_a[1] | not_code0); // A 36 | wire flag_not_2x = operand_slice_a[0]; 37 | assign pp_source = (({{operand_b[7]}, operand_b} & {9{flag_s2}}) | (operand_b_neg & {9{flag_s1}})); // A or ~A or 0 38 | assign pp_out[0] = (!flag_2x & pp_source[0]);// x2 (<<1) pp_out[0]=0 39 | assign pp_out[8:1] = (({8{flag_2x}} & pp_source[7:0]) | ({8{flag_not_2x}} & pp_source[8:1])); 40 | assign pp_out[9] = pp_source[8]; 41 | 42 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/booth_pp_gen.v: -------------------------------------------------------------------------------- 1 | module booth_pp_gen( 2 | input wire [7:0] operand_a, 3 | input wire [7:0] operand_b, 4 | output wire [9:0] pp1, 5 | output wire [9:0] pp2, 6 | output wire [9:0] pp3, 7 | output wire [9:0] pp4 8 | ); 9 | 10 | wire [1:0] operand_slice_a1 ; 11 | wire [2:0] operand_slice_a2 ; 12 | wire [2:0] operand_slice_a3 ; 13 | wire [2:0] operand_slice_a4 ; 14 | wire [8:0] operand_b_neg ; 15 | 16 | inv_converter_8 inv_converter_8_inst( 17 | .data_i (operand_b), 18 | .inv_o (operand_b_neg) 19 | ); 20 | 21 | assign operand_slice_a1 = operand_a[1:0] ; 22 | assign operand_slice_a2 = operand_a[3:1] ; 23 | assign operand_slice_a3 = operand_a[5:3] ; 24 | assign operand_slice_a4 = operand_a[7:5] ; 25 | 26 | booth_partial_product_generator_pp1 ppg_1 ( 27 | .operand_slice_a (operand_slice_a1), 28 | .operand_b (operand_b), 29 | .operand_b_neg (operand_b_neg), 30 | .pp_out (pp1) 31 | ); 32 | 33 | booth_partial_product_generator ppg_2 ( 34 | .operand_slice_a (operand_slice_a2), 35 | .operand_b (operand_b), 36 | .operand_b_neg (operand_b_neg), 37 | .pp_out (pp2) 38 | ); 39 | 40 | booth_partial_product_generator ppg_3 ( 41 | .operand_slice_a (operand_slice_a3), 42 | .operand_b (operand_b), 43 | .operand_b_neg (operand_b_neg), 44 | .pp_out (pp3) 45 | ); 46 | 47 | booth_partial_product_generator ppg_4 ( 48 | .operand_slice_a (operand_slice_a4), 49 | .operand_b (operand_b), 50 | .operand_b_neg (operand_b_neg), 51 | .pp_out (pp4) 52 | ); 53 | 54 | endmodule 55 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/inv_conveter_8.v: -------------------------------------------------------------------------------- 1 | // 取反 加1 (-1*A、-2*A 都要进行取反加1) 2 | module inv_converter_8( 3 | input wire [7:0] data_i , 4 | output wire[8:0] inv_o 5 | ); 6 | 7 | wire [5:0] wire_cout ; 8 | wire not_o ; 9 | 10 | assign inv_o[0] = data_i[0]; 11 | 12 | inv_unit inv_unit_bit1( 13 | .a (data_i[1] ), 14 | .b (data_i[0] ), 15 | .xor_o (inv_o[1] ), 16 | .or_o (wire_cout[0] ) 17 | ); 18 | 19 | genvar i; 20 | generate 21 | for(i=2;i<=5;i=i+1) begin 22 | inv_unit inv_unit_inst( 23 | .a (data_i[i] ), 24 | .b (wire_cout[i-2] ), 25 | .xor_o (inv_o[i] ), 26 | .or_o (wire_cout[i-1] ) 27 | ); 28 | end 29 | endgenerate 30 | 31 | inv_unit_nor_out inv_unit_nor_out_inst_6( 32 | .a (data_i[6] ), 33 | .b (wire_cout[4] ), 34 | .xor_o (inv_o[6] ), 35 | .nor_o (wire_cout[5] ) 36 | ); 37 | 38 | inv_unit_nor_out inv_unit_nor_out_inst_7( 39 | .a (data_i[7] ), 40 | .b (not_o ), 41 | .xor_o (inv_o[7] ), 42 | .nor_o ( ) 43 | ); 44 | 45 | 46 | 47 | assign not_o = ~wire_cout[5] ; 48 | 49 | 50 | assign inv_o[8] = ~(wire_cout[5] | data_i[7]); 51 | 52 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/inv_unit.v: -------------------------------------------------------------------------------- 1 | module inv_unit( 2 | input wire a , 3 | input wire b , 4 | output wire xor_o , 5 | output wire or_o 6 | ); 7 | 8 | wire aORb ; 9 | wire aNANDb ; 10 | 11 | assign aORb = a | b; 12 | assign aNANDb = ~(a & b); 13 | assign xor_o = (aORb & aNANDb); 14 | assign or_o = aORb; 15 | 16 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/inv_unit_nor_out.v: -------------------------------------------------------------------------------- 1 | module inv_unit_nor_out( 2 | input wire a , 3 | input wire b , 4 | 5 | output wire xor_o , 6 | output wire nor_o 7 | ); 8 | 9 | 10 | wire a_AND_b ; 11 | wire a_NOR_b ; 12 | 13 | 14 | assign a_AND_b = a & b; 15 | 16 | 17 | assign a_NOR_b = ~(a | b); 18 | 19 | 20 | assign xor_o = ~(a_AND_b | a_NOR_b); 21 | assign nor_o = a_NOR_b; 22 | 23 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/pe.v: -------------------------------------------------------------------------------- 1 | module PE # 2 | ( 3 | parameter WIDTH = 8, 4 | parameter ACC_WIDTH = 32 5 | ) 6 | ( 7 | input wire rst_n, 8 | input wire clk, 9 | input wire signed [WIDTH-1 :0] a, 10 | input wire signed [WIDTH-1 :0] b, 11 | input wire clc, // clean_result_cache 12 | 13 | output reg signed [WIDTH-1 :0] row, 14 | output reg signed [WIDTH-1 :0] col, 15 | output signed [2*ACC_WIDTH-1 :0] result 16 | ); 17 | 18 | 19 | wire [ACC_WIDTH-1:0] acc_sum; 20 | wire [ACC_WIDTH-1:0] acc_carry; 21 | opt1_mac #( 22 | .ACC_WIDTH(ACC_WIDTH), 23 | .INPUT_PIP(0) 24 | ) opt1_mac_test ( 25 | .clk(clk), 26 | .rst_n(rst_n), 27 | .operand_a_in(a), 28 | .operand_b_in(b), 29 | .clc(clc), // input wire clean_result_cache 30 | .acc_sum(acc_sum), 31 | .acc_carry(acc_carry) 32 | ); 33 | assign result = {acc_sum, acc_carry}; 34 | 35 | always @(posedge clk or negedge rst_n) begin 36 | if(!rst_n) begin 37 | row <= 0; 38 | col <= 0; 39 | end 40 | else begin 41 | row <= a; 42 | col <= b; 43 | end 44 | end 45 | 46 | endmodule 47 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_partial_product_generator_pp1.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_partial_product_generator.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_pp_gen.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/DW02_tree.sv 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/get_pipline_mulwidth.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_conveter_8.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_unit_nor_out.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_unit.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/opt1_mac.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/pe.v 12 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/top.v 13 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/sim/test_opt1_os_array.sv -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_partial_product_generator_pp1.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_partial_product_generator.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/booth_pp_gen.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/get_pipline_mulwidth.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_conveter_8.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_unit_nor_out.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/inv_unit.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/opt1_mac.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/pe.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/array_opt1_based/top.v -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_3.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 22:20:27 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 16643 14 | Number of nets: 112293 15 | Number of cells: 89819 16 | Number of combinational cells: 62411 17 | Number of sequential cells: 27408 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 3642 20 | Number of references: 44 21 | 22 | Combinational area: 180352.527108 23 | Buf/Inv area: 4965.211362 24 | Noncombinational area: 184527.864258 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 214887.355961 27 | 28 | Total cell area: 364880.391366 29 | Total area: 579767.747327 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 364880.3914 100.0 180352.5271 184527.8643 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 180352.5271 184527.8643 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_3.1.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 22:25:38 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 16643 14 | Number of nets: 112169 15 | Number of cells: 89606 16 | Number of combinational cells: 62511 17 | Number of sequential cells: 27095 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 3801 20 | Number of references: 46 21 | 22 | Combinational area: 180041.454863 23 | Buf/Inv area: 5188.858085 24 | Noncombinational area: 182441.850241 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 213063.218085 27 | 28 | Total cell area: 362483.305104 29 | Total area: 575546.523189 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 362483.3051 100.0 180041.4549 182441.8502 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 180041.4549 182441.8502 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_4.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 22:14:57 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 16643 14 | Number of nets: 106921 15 | Number of cells: 84128 16 | Number of combinational cells: 62175 17 | Number of sequential cells: 21953 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 3159 20 | Number of references: 41 21 | 22 | Combinational area: 178797.419843 23 | Buf/Inv area: 4437.354282 24 | Noncombinational area: 147788.806561 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 198388.423749 27 | 28 | Total cell area: 326586.226403 29 | Total area: 524974.650152 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 326586.2264 100.0 178797.4198 147788.8066 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 178797.4198 147788.8066 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_5.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Mon Apr 28 22:11:11 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 16643 14 | Number of nets: 110172 15 | Number of cells: 87753 16 | Number of combinational cells: 66681 17 | Number of sequential cells: 21072 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 7882 20 | Number of references: 45 21 | 22 | Combinational area: 182889.137771 23 | Buf/Inv area: 10241.494951 24 | Noncombinational area: 141605.482917 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 192544.112125 27 | 28 | Total cell area: 324494.620688 29 | Total area: 517038.732813 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 324494.6207 100.0 182889.1378 141605.4829 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 182889.1378 141605.4829 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/array_opt1_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/mac.v: -------------------------------------------------------------------------------- 1 | module mac # 2 | ( 3 | parameter WIDTH = 8, 4 | parameter ACC_WIDTH = 32 5 | ) 6 | ( 7 | input wire rst_n, 8 | input wire clk, 9 | input wire signed [WIDTH-1 :0] a, 10 | input wire signed [WIDTH-1 :0] b, 11 | output reg signed [ACC_WIDTH-1 :0] result 12 | ); 13 | 14 | wire signed [15:0] result_p; 15 | reg signed [7:0] operand_a; 16 | reg signed [7:0] operand_b; 17 | 18 | assign result_p = $signed(operand_a) * $signed(operand_b) ; 19 | 20 | always @(posedge clk) begin 21 | operand_a <= a; 22 | operand_b <= b; 23 | end 24 | 25 | always @(posedge clk or negedge rst_n) begin 26 | if(!rst_n) begin 27 | result <= 0; 28 | end 29 | else begin 30 | result <= result + $signed(result_p); 31 | end 32 | end 33 | 34 | 35 | endmodule 36 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array/mac_pe/mac.v 2 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_area_report_1.2.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 22:02:25 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 50 14 | Number of nets: 648 15 | Number of cells: 566 16 | Number of combinational cells: 517 17 | Number of sequential cells: 49 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 76 20 | Number of references: 29 21 | 22 | Combinational area: 1327.394108 23 | Buf/Inv area: 96.574720 24 | Noncombinational area: 338.011527 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 255.053295 27 | 28 | Total cell area: 1665.405634 29 | Total area: 1920.458929 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- --------- --------- ------ --------- 39 | mac 1665.4056 100.0 1327.3941 338.0115 0.0000 mac 40 | -------------------------------- --------- ------- --------- --------- ------ --------- 41 | Total 1327.3941 338.0115 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_area_report_1.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 22:02:11 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 50 14 | Number of nets: 629 15 | Number of cells: 548 16 | Number of combinational cells: 500 17 | Number of sequential cells: 48 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 78 20 | Number of references: 25 21 | 22 | Combinational area: 1310.620601 23 | Buf/Inv area: 100.641025 24 | Noncombinational area: 338.011527 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 249.784443 27 | 28 | Total cell area: 1648.632128 29 | Total area: 1898.416571 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- --------- --------- ------ --------- 39 | mac 1648.6321 100.0 1310.6206 338.0115 0.0000 mac 40 | -------------------------------- --------- ------- --------- --------- ------ --------- 41 | Total 1310.6206 338.0115 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_area_report_1.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 22:01:49 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 50 14 | Number of nets: 624 15 | Number of cells: 537 16 | Number of combinational cells: 489 17 | Number of sequential cells: 48 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 64 20 | Number of references: 32 21 | 22 | Combinational area: 1242.764157 23 | Buf/Inv area: 82.088512 24 | Noncombinational area: 336.994951 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 244.120786 27 | 28 | Total cell area: 1579.759108 29 | Total area: 1823.879893 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- --------- --------- ------ --------- 39 | mac 1579.7591 100.0 1242.7642 336.9950 0.0000 mac 40 | -------------------------------- --------- ------- --------- --------- ------ --------- 41 | Total 1242.7642 336.9950 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_area_report_1.667.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 23:56:20 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 50 14 | Number of nets: 553 15 | Number of cells: 464 16 | Number of combinational cells: 416 17 | Number of sequential cells: 48 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 48 20 | Number of references: 25 21 | 22 | Combinational area: 1115.438011 23 | Buf/Inv area: 60.994560 24 | Noncombinational area: 333.945223 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 216.743615 27 | 28 | Total cell area: 1449.383234 29 | Total area: 1666.126849 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- --------- --------- ------ --------- 39 | mac 1449.3832 100.0 1115.4380 333.9452 0.0000 mac 40 | -------------------------------- --------- ------- --------- --------- ------ --------- 41 | Total 1115.4380 333.9452 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_area_report_2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 23:56:08 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 50 14 | Number of nets: 485 15 | Number of cells: 392 16 | Number of combinational cells: 344 17 | Number of sequential cells: 48 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 38 20 | Number of references: 18 21 | 22 | Combinational area: 954.310710 23 | Buf/Inv area: 48.287360 24 | Noncombinational area: 333.436935 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 194.075856 27 | 28 | Total cell area: 1287.747646 29 | Total area: 1481.823502 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | mac 1287.7476 100.0 954.3107 333.4369 0.0000 mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 954.3107 333.4369 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_power_report_1.2.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 22:02:25 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 315.2414 uW (88%) 34 | Net Switching Power = 43.1800 uW (12%) 35 | --------- 36 | Total Dynamic Power = 358.4213 uW (100%) 37 | 38 | Cell Leakage Power = 41.7637 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 192.2402 4.9935 1.3096e+07 210.3301 ( 52.56%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 123.0011 38.1865 2.8667e+07 189.8550 ( 47.44%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 315.2413 uW 43.1800 uW 4.1764e+07 pW 400.1851 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_power_report_1.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 22:02:11 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 289.7189 uW (88%) 34 | Net Switching Power = 38.7602 uW (12%) 35 | --------- 36 | Total Dynamic Power = 328.4791 uW (100%) 37 | 38 | Cell Leakage Power = 41.2265 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 177.6110 4.5924 1.3206e+07 195.4097 ( 52.86%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 112.1078 34.1678 2.8020e+07 174.2959 ( 47.14%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 289.7188 uW 38.7602 uW 4.1226e+07 pW 369.7056 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_power_report_1.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 22:01:49 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 247.0607 uW (88%) 34 | Net Switching Power = 32.9918 uW (12%) 35 | --------- 36 | Total Dynamic Power = 280.0525 uW (100%) 37 | 38 | Cell Leakage Power = 40.3249 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 154.6854 4.0334 1.3042e+07 171.7608 ( 53.61%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 92.3753 28.9584 2.7283e+07 148.6165 ( 46.39%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 247.0607 uW 32.9918 uW 4.0325e+07 pW 320.3774 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_power_report_1.667.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 23:56:20 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 216.4097 uW (89%) 34 | Net Switching Power = 26.7427 uW (11%) 35 | --------- 36 | Total Dynamic Power = 243.1525 uW (100%) 37 | 38 | Cell Leakage Power = 36.5528 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 138.3570 3.6077 1.2556e+07 154.5203 ( 55.24%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 78.0527 23.1350 2.3997e+07 125.1850 ( 44.76%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 216.4098 uW 26.7427 uW 3.6553e+07 pW 279.7053 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/outputs/saed32rvt_tt0p85v25c/mac_power_report_2.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 23:56:08 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 170.6189 uW (89%) 34 | Net Switching Power = 20.3627 uW (11%) 35 | --------- 36 | Total Dynamic Power = 190.9816 uW (100%) 37 | 38 | Cell Leakage Power = 32.5461 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 113.5433 3.1163 1.2486e+07 129.1459 ( 57.78%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 57.0756 17.2464 2.0060e+07 94.3818 ( 42.22%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 170.6189 uW 20.3627 uW 3.2546e+07 pW 223.5277 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/mac_pe/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator.v: -------------------------------------------------------------------------------- 1 | module booth_partial_product_generator( 2 | input wire [2:0] operand_slice_a, 3 | input wire [7:0] operand_b, 4 | input wire [8:0] operand_b_neg, 5 | output wire [9:0] pp_out 6 | ); 7 | 8 | wire [8:0] pp_source ; 9 | //---------------------------------------------------- 10 | //| pp | flag_2x | flag_s1 | flag_s2 | 11 | //---------------------------------------------------- 12 | //| operand_b | 0 | 0 | 1 | 13 | //| -operand_b | 0 | 1 | 0 | 14 | //| 2operand_b | 1 | 0 | 1 | 15 | //| -2operand_b | 1 | 1 | 0 | 16 | //| 0 | x | 0 | 0 | 17 | //---------------------------------------------------- 18 | wire not_c2 ; 19 | wire c1_and_c0 ; 20 | wire c1_nor_c0 ; 21 | wire nor_o2 ; 22 | 23 | wire flag_2x ; 24 | wire flag_s1 ; 25 | wire flag_s2 ; 26 | 27 | assign not_c2 = ~operand_slice_a[2] ; 28 | assign c1_and_c0 = operand_slice_a[1] & operand_slice_a[0] ; 29 | assign c1_nor_c0 = ~(operand_slice_a[1] | operand_slice_a[0]) ; 30 | assign nor_o2 = ~(c1_and_c0 | c1_nor_c0) ; 31 | 32 | assign flag_2x = ~nor_o2 ; 33 | assign flag_s1 = ~(not_c2 | c1_and_c0) ; 34 | assign flag_s2 = ~(operand_slice_a[2] | c1_nor_c0) ; 35 | 36 | wire flag_not_2x = nor_o2; 37 | 38 | assign pp_source = (({{operand_b[7]}, operand_b} & {9{flag_s2}}) | (operand_b_neg & {9{flag_s1}})); 39 | assign pp_out[0] = (!flag_2x & pp_source[0]); 40 | assign pp_out[8:1] = (({8{flag_2x}} & pp_source[7:0]) | ({8{flag_not_2x}} & pp_source[8:1])); 41 | assign pp_out[9] = pp_source[8]; 42 | 43 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator_pp1.v: -------------------------------------------------------------------------------- 1 | module booth_partial_product_generator_pp1( 2 | input wire [1:0] operand_slice_a, 3 | input wire [7:0] operand_b, 4 | input wire [8:0] operand_b_neg, 5 | output wire [9:0] pp_out 6 | ); 7 | 8 | wire [8:0] pp_source; 9 | wire not_code0; 10 | //---------------------------------------------------- 11 | //| pp | flag_2x | flag_s1 | flag_s2 | 12 | //---------------------------------------------------- 13 | //| operand_b | 0 | 0 | 1 | 14 | //| -operand_b | 0 | 1 | 0 | 15 | //| 2operand_b | 1 | 0 | 1 | 16 | //| -2operand_b | 1 | 1 | 0 | 17 | //| 0 | x | 0 | 0 | 18 | //---------------------------------------------------- 19 | wire flag_2x; 20 | wire flag_s1; 21 | wire flag_s2; 22 | assign not_code0 = ~operand_slice_a[0]; 23 | assign flag_2x = not_code0; 24 | assign flag_s1 = operand_slice_a[1]; 25 | assign flag_s2 = ~(operand_slice_a[1] | not_code0); 26 | wire flag_not_2x = operand_slice_a[0]; 27 | assign pp_source = (({{operand_b[7]}, operand_b} & {9{flag_s2}}) | (operand_b_neg & {9{flag_s1}})); 28 | assign pp_out[0] = (!flag_2x & pp_source[0]); 29 | assign pp_out[8:1] = (({8{flag_2x}} & pp_source[7:0]) | ({8{flag_not_2x}} & pp_source[8:1])); 30 | assign pp_out[9] = pp_source[8]; 31 | 32 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/booth_pp_gen.v: -------------------------------------------------------------------------------- 1 | module booth_pp_gen( 2 | input wire [7:0] operand_a, 3 | input wire [7:0] operand_b, 4 | output wire [9:0] pp1, 5 | output wire [9:0] pp2, 6 | output wire [9:0] pp3, 7 | output wire [9:0] pp4 8 | ); 9 | 10 | wire [1:0] operand_slice_a1 ; 11 | wire [2:0] operand_slice_a2 ; 12 | wire [2:0] operand_slice_a3 ; 13 | wire [2:0] operand_slice_a4 ; 14 | wire [8:0] operand_b_neg ; 15 | 16 | inv_converter_8 inv_converter_8_inst( 17 | .data_i (operand_b), 18 | .inv_o (operand_b_neg) 19 | ); 20 | 21 | assign operand_slice_a1 = operand_a[1:0] ; 22 | assign operand_slice_a2 = operand_a[3:1] ; 23 | assign operand_slice_a3 = operand_a[5:3] ; 24 | assign operand_slice_a4 = operand_a[7:5] ; 25 | 26 | booth_partial_product_generator_pp1 ppg_1 ( 27 | .operand_slice_a (operand_slice_a1), 28 | .operand_b (operand_b), 29 | .operand_b_neg (operand_b_neg), 30 | .pp_out (pp1) 31 | ); 32 | 33 | booth_partial_product_generator ppg_2 ( 34 | .operand_slice_a (operand_slice_a2), 35 | .operand_b (operand_b), 36 | .operand_b_neg (operand_b_neg), 37 | .pp_out (pp2) 38 | ); 39 | 40 | booth_partial_product_generator ppg_3 ( 41 | .operand_slice_a (operand_slice_a3), 42 | .operand_b (operand_b), 43 | .operand_b_neg (operand_b_neg), 44 | .pp_out (pp3) 45 | ); 46 | 47 | booth_partial_product_generator ppg_4 ( 48 | .operand_slice_a (operand_slice_a4), 49 | .operand_b (operand_b), 50 | .operand_b_neg (operand_b_neg), 51 | .pp_out (pp4) 52 | ); 53 | 54 | endmodule 55 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/dff_async.v: -------------------------------------------------------------------------------- 1 | module dff_async #( 2 | parameter WIDTH = 8 3 | )( 4 | input wire clk, 5 | input wire rst_n, 6 | input wire [WIDTH-1:0] d, 7 | output reg [WIDTH-1:0] q 8 | ); 9 | 10 | always @(posedge clk or negedge rst_n) begin 11 | if (!rst_n) begin 12 | q <= 0; 13 | end 14 | else begin 15 | q <= d; 16 | end 17 | end 18 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/inv_conveter_8.v: -------------------------------------------------------------------------------- 1 | module inv_converter_8( 2 | input wire [7:0] data_i , 3 | output wire[8:0] inv_o 4 | ); 5 | 6 | wire [5:0] wire_cout ; 7 | wire not_o ; 8 | 9 | assign inv_o[0] = data_i[0]; 10 | 11 | inv_unit inv_unit_bit1( 12 | .a (data_i[1] ), 13 | .b (data_i[0] ), 14 | .xor_o (inv_o[1] ), 15 | .or_o (wire_cout[0] ) 16 | ); 17 | 18 | genvar i; 19 | generate 20 | for(i=2;i<=5;i=i+1) begin 21 | inv_unit inv_unit_inst( 22 | .a (data_i[i] ), 23 | .b (wire_cout[i-2] ), 24 | .xor_o (inv_o[i] ), 25 | .or_o (wire_cout[i-1] ) 26 | ); 27 | end 28 | endgenerate 29 | 30 | inv_unit_nor_out inv_unit_nor_out_inst_6( 31 | .a (data_i[6] ), 32 | .b (wire_cout[4] ), 33 | .xor_o (inv_o[6] ), 34 | .nor_o (wire_cout[5] ) 35 | ); 36 | 37 | inv_unit_nor_out inv_unit_nor_out_inst_7( 38 | .a (data_i[7] ), 39 | .b (not_o ), 40 | .xor_o (inv_o[7] ), 41 | .nor_o ( ) 42 | ); 43 | 44 | 45 | 46 | assign not_o = ~wire_cout[5] ; 47 | 48 | 49 | assign inv_o[8] = ~(wire_cout[5] | data_i[7]); 50 | 51 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/inv_unit.v: -------------------------------------------------------------------------------- 1 | module inv_unit( 2 | input wire a , 3 | input wire b , 4 | output wire xor_o , 5 | output wire or_o 6 | ); 7 | 8 | wire aORb ; 9 | wire aNANDb ; 10 | 11 | assign aORb = a | b; 12 | assign aNANDb = ~(a & b); 13 | assign xor_o = (aORb & aNANDb); 14 | assign or_o = aORb; 15 | 16 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/inv_unit_nor_out.v: -------------------------------------------------------------------------------- 1 | module inv_unit_nor_out( 2 | input wire a , 3 | input wire b , 4 | 5 | output wire xor_o , 6 | output wire nor_o 7 | ); 8 | 9 | 10 | wire a_AND_b ; 11 | wire a_NOR_b ; 12 | 13 | 14 | assign a_AND_b = a & b; 15 | 16 | 17 | assign a_NOR_b = ~(a | b); 18 | 19 | 20 | assign xor_o = ~(a_AND_b | a_NOR_b); 21 | assign nor_o = a_NOR_b; 22 | 23 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/power/pt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p logs 4 | 5 | rm -f logs/ptpx.log 6 | 7 | pt_shell -f ptpx.tcl > logs/ptpx.log 2>&1 8 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator_pp1.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_pp_gen.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_conveter_8.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_unit_nor_out.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_unit.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/opt1_mac.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/DW02_tree.sv 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/dff_async.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/sim/test_opt1_mac.sv -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator_pp1.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_partial_product_generator.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/booth_pp_gen.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_conveter_8.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_unit_nor_out.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/inv_unit.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/opt1_mac.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_os/opt1_pe/dff_async.v -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_area_report_1.11.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : opt1_mac 5 | Version: L-2016.03-SP1 6 | Date : Fri Apr 25 00:01:29 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 82 14 | Number of nets: 493 15 | Number of cells: 423 16 | Number of combinational cells: 340 17 | Number of sequential cells: 83 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 18 20 | Number of references: 26 21 | 22 | Combinational area: 968.542792 23 | Buf/Inv area: 24.397824 24 | Noncombinational area: 530.144401 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 281.689216 27 | 28 | Total cell area: 1498.687193 29 | Total area: 1780.376408 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | opt1_mac 1498.6872 100.0 968.5428 530.1444 0.0000 opt1_mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 968.5428 530.1444 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_area_report_1.15.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : opt1_mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 23:58:39 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 82 14 | Number of nets: 470 15 | Number of cells: 381 16 | Number of combinational cells: 297 17 | Number of sequential cells: 84 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 9 20 | Number of references: 29 21 | 22 | Combinational area: 866.122755 23 | Buf/Inv area: 11.436480 24 | Noncombinational area: 531.669265 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 270.887913 27 | 28 | Total cell area: 1397.792019 29 | Total area: 1668.679932 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | opt1_mac 1397.7920 100.0 866.1228 531.6693 0.0000 opt1_mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 866.1228 531.6693 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_area_report_1.2.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : opt1_mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 21:51:17 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 82 14 | Number of nets: 443 15 | Number of cells: 355 16 | Number of combinational cells: 272 17 | Number of sequential cells: 83 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 9 20 | Number of references: 27 21 | 22 | Combinational area: 817.581251 23 | Buf/Inv area: 12.198912 24 | Noncombinational area: 530.144401 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 262.022116 27 | 28 | Total cell area: 1347.725652 29 | Total area: 1609.747768 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | opt1_mac 1347.7257 100.0 817.5813 530.1444 0.0000 opt1_mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 817.5813 530.1444 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_area_report_1.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : opt1_mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 21:50:52 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 82 14 | Number of nets: 429 15 | Number of cells: 325 16 | Number of combinational cells: 241 17 | Number of sequential cells: 84 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 10 20 | Number of references: 21 21 | 22 | Combinational area: 706.012033 23 | Buf/Inv area: 12.707200 24 | Noncombinational area: 530.652689 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 246.156770 27 | 28 | Total cell area: 1236.664721 29 | Total area: 1482.821492 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | opt1_mac 1236.6647 100.0 706.0120 530.6527 0.0000 opt1_mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 706.0120 530.6527 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_area_report_1.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : opt1_mac 5 | Version: L-2016.03-SP1 6 | Date : Thu Apr 24 22:15:46 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 82 14 | Number of nets: 420 15 | Number of cells: 314 16 | Number of combinational cells: 230 17 | Number of sequential cells: 84 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 10 20 | Number of references: 20 21 | 22 | Combinational area: 673.227455 23 | Buf/Inv area: 12.707200 24 | Noncombinational area: 530.652689 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 242.291177 27 | 28 | Total cell area: 1203.880144 29 | Total area: 1446.171321 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------ --------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- --------- ------- -------- --------- ------ --------- 39 | opt1_mac 1203.8801 100.0 673.2275 530.6527 0.0000 opt1_mac 40 | -------------------------------- --------- ------- -------- --------- ------ --------- 41 | Total 673.2275 530.6527 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.10.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Fri Apr 25 00:00:34 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 472.4020 uW (92%) 34 | Net Switching Power = 39.0248 uW (8%) 35 | --------- 36 | Total Dynamic Power = 511.4268 uW (100%) 37 | 38 | Cell Leakage Power = 43.3907 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 351.0157 10.0630 2.1473e+07 382.5519 ( 68.95%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 121.3864 28.9618 2.1918e+07 172.2658 ( 31.05%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 472.4021 uW 39.0248 uW 4.3391e+07 pW 554.8176 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.11.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Fri Apr 25 00:01:29 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 470.2067 uW (92%) 34 | Net Switching Power = 39.8879 uW (8%) 35 | --------- 36 | Total Dynamic Power = 510.0946 uW (100%) 37 | 38 | Cell Leakage Power = 43.6436 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 348.9735 10.2215 2.1472e+07 380.6671 ( 68.74%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 121.2332 29.6664 2.2172e+07 173.0711 ( 31.26%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 470.2067 uW 39.8879 uW 4.3644e+07 pW 553.7382 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.15.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 23:58:39 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 483.3620 uW (92%) 34 | Net Switching Power = 43.4941 uW (8%) 35 | --------- 36 | Total Dynamic Power = 526.8561 uW (100%) 37 | 38 | Cell Leakage Power = 41.7000 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 356.5389 14.7027 2.1571e+07 392.8125 ( 69.09%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 126.8231 28.7914 2.0129e+07 175.7437 ( 30.91%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 483.3620 uW 43.4941 uW 4.1700e+07 pW 568.5562 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.2.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 21:51:17 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 456.7748 uW (92%) 34 | Net Switching Power = 41.3373 uW (8%) 35 | --------- 36 | Total Dynamic Power = 498.1122 uW (100%) 37 | 38 | Cell Leakage Power = 40.6293 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 339.3490 14.1701 2.1449e+07 374.9679 ( 69.60%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 117.4258 27.1672 1.9180e+07 163.7735 ( 30.40%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 456.7748 uW 41.3373 uW 4.0629e+07 pW 538.7414 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 21:50:52 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 404.6984 uW (92%) 34 | Net Switching Power = 35.5794 uW (8%) 35 | --------- 36 | Total Dynamic Power = 440.2777 uW (100%) 37 | 38 | Cell Leakage Power = 37.0940 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 314.6726 12.4211 2.1412e+07 348.5062 ( 73.01%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 90.0258 23.1583 1.5682e+07 128.8656 ( 26.99%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 404.6984 uW 35.5794 uW 3.7094e+07 pW 477.3718 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_os/opt1_pe/syn/outputs/saed32rvt_tt0p85v25c/opt1_mac_power_report_1.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : power 4 | -analysis_effort low 5 | Design : opt1_mac 6 | Version: L-2016.03-SP1 7 | Date : Thu Apr 24 22:15:46 2025 8 | **************************************** 9 | 10 | 11 | Library(s) Used: 12 | 13 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 14 | 15 | 16 | Operating Conditions: tt0p85v25c Library: saed32rvt_tt0p85v25c 17 | Wire Load Model Mode: enclosed 18 | 19 | Design Wire Load Model Library 20 | ------------------------------------------------ 21 | opt1_mac 8000 saed32rvt_tt0p85v25c 22 | 23 | 24 | Global Operating Voltage = 0.85 25 | Power-specific unit information : 26 | Voltage Units = 1V 27 | Capacitance Units = 1.000000ff 28 | Time Units = 1ns 29 | Dynamic Power Units = 1uW (derived from V,C,T units) 30 | Leakage Power Units = 1pW 31 | 32 | 33 | Cell Internal Power = 346.3265 uW (92%) 34 | Net Switching Power = 30.0518 uW (8%) 35 | --------- 36 | Total Dynamic Power = 376.3783 uW (100%) 37 | 38 | Cell Leakage Power = 36.1296 uW 39 | 40 | 41 | Internal Switching Leakage Total 42 | Power Group Power Power Power Power ( % ) Attrs 43 | -------------------------------------------------------------------------------------------------- 44 | io_pad 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 45 | memory 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 46 | black_box 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 47 | clock_network 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 48 | register 272.3762 10.5279 2.1416e+07 304.3204 ( 73.77%) 49 | sequential 0.0000 0.0000 0.0000 0.0000 ( 0.00%) 50 | combinational 73.9502 19.5239 1.4713e+07 108.1875 ( 26.23%) 51 | -------------------------------------------------------------------------------------------------- 52 | Total 346.3264 uW 30.0518 uW 3.6130e+07 pW 412.5079 uW 53 | 1 54 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/PE.v: -------------------------------------------------------------------------------- 1 | // b = w * a 2 | module PE # 3 | ( 4 | parameter WIDTH = 8, 5 | parameter ACC_WIDTH = 24 6 | ) 7 | ( 8 | input wire rst_n , 9 | input wire clk , 10 | input wire weight_wen , 11 | input [WIDTH-1 :0] weight_din , 12 | input wire signed [WIDTH-1 :0] a , 13 | input wire signed [ACC_WIDTH-1 :0] partial_result , 14 | 15 | output reg signed [WIDTH-1 :0] col , 16 | output reg signed [ACC_WIDTH-1 :0] row 17 | ); 18 | 19 | 20 | reg signed [WIDTH-1 :0] weight; 21 | 22 | always @(posedge clk or negedge rst_n) begin 23 | if(!rst_n) begin 24 | weight <= 0; 25 | end 26 | else begin 27 | if(weight_wen) begin 28 | weight <= weight_din; 29 | end 30 | else begin 31 | weight <= weight; 32 | end 33 | end 34 | end 35 | 36 | always @(posedge clk or negedge rst_n) begin 37 | if(!rst_n) begin 38 | col <= 0; 39 | end 40 | else begin 41 | if(weight_wen) begin 42 | col <= weight_din; 43 | end 44 | else begin 45 | col <= a; 46 | end 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if(!rst_n) begin 52 | row <= 0; 53 | end 54 | else begin 55 | row <= partial_result + weight * a; 56 | end 57 | end 58 | 59 | endmodule 60 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/sim/timescale.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/get_pipline_mulwidth.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/top.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/PE.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/sim/test_ws_array.sv -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/top.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_mac_based/PE.v -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_4.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 06:55:01 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 579 14 | Number of nets: 94259 15 | Number of cells: 85765 16 | Number of combinational cells: 73765 17 | Number of sequential cells: 12000 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 10093 20 | Number of references: 61 21 | 22 | Combinational area: 197292.494716 23 | Buf/Inv area: 13707.256804 24 | Noncombinational area: 85380.694857 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 163055.856111 27 | 28 | Total cell area: 282673.189573 29 | Total area: 445729.045683 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- ------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 39 | top 282673.1896 100.0 197292.4947 85380.6949 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 41 | Total 197292.4947 85380.6949 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_5.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 06:55:40 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 579 14 | Number of nets: 97354 15 | Number of cells: 88658 16 | Number of combinational cells: 76023 17 | Number of sequential cells: 12635 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 10092 20 | Number of references: 65 21 | 22 | Combinational area: 200751.902704 23 | Buf/Inv area: 13444.980162 24 | Noncombinational area: 89917.673623 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 168397.455475 27 | 28 | Total cell area: 290669.576327 29 | Total area: 459067.031802 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- ------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 39 | top 290669.5763 100.0 200751.9027 89917.6736 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 41 | Total 200751.9027 89917.6736 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/syn/outputs/saed32rvt_tt0p85v25c/top_array_16_area_report_5.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 06:55:03 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 579 14 | Number of nets: 93434 15 | Number of cells: 84442 16 | Number of combinational cells: 72553 17 | Number of sequential cells: 11889 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 9611 20 | Number of references: 44 21 | 22 | Combinational area: 191943.017875 23 | Buf/Inv area: 12536.415322 24 | Noncombinational area: 84598.947900 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 138851.762307 27 | 28 | Total cell area: 276541.965775 29 | Total area: 415393.728082 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- ------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 39 | top 276541.9658 100.0 191943.0179 84598.9479 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 41 | Total 191943.0179 84598.9479 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_mac_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/PE.v: -------------------------------------------------------------------------------- 1 | // b = w * a 2 | module PE # 3 | ( 4 | parameter WIDTH = 8, 5 | parameter ACC_WIDTH = 24 6 | ) 7 | ( 8 | input wire rst_n , 9 | input wire clk , 10 | input wire weight_wen , 11 | input [WIDTH-1 :0] weight_din , 12 | input wire signed [WIDTH-1 :0] a , 13 | input wire signed [2*ACC_WIDTH-1 :0] partial_result , 14 | 15 | output reg signed [WIDTH-1 :0] col , 16 | output reg signed [2*ACC_WIDTH-1 :0] row 17 | ); 18 | 19 | 20 | reg signed [WIDTH-1 :0] weight; 21 | 22 | always @(posedge clk or negedge rst_n) begin 23 | if(!rst_n) begin 24 | weight <= 0; 25 | end 26 | else begin 27 | if(weight_wen) begin 28 | weight <= weight_din; 29 | end 30 | else begin 31 | weight <= weight; 32 | end 33 | end 34 | end 35 | 36 | always @(posedge clk or negedge rst_n) begin 37 | if(!rst_n) begin 38 | col <= 0; 39 | end 40 | else begin 41 | if(weight_wen) begin 42 | col <= weight_din; 43 | end 44 | else begin 45 | col <= a; 46 | end 47 | end 48 | end 49 | 50 | wire [ACC_WIDTH-1:0] acc_sum; 51 | wire [ACC_WIDTH-1:0] acc_carry; 52 | always @(*) begin 53 | if(!rst_n) begin 54 | row <= 0; 55 | end 56 | else begin 57 | row <= {acc_sum, acc_carry}; // partial_result + weight * a; 58 | end 59 | end 60 | opt1_mac #( 61 | .ACC_WIDTH(ACC_WIDTH), 62 | .INPUT_PIP(0) 63 | ) opt1_mac_test ( 64 | .clk(clk), 65 | .rst_n(rst_n), 66 | .operand_a_in(a), 67 | .operand_b_in(weight), 68 | .partial_result(partial_result), 69 | .acc_sum(acc_sum), 70 | .acc_carry(acc_carry) 71 | ); 72 | 73 | 74 | endmodule 75 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/booth_partial_product_generator_pp1.v: -------------------------------------------------------------------------------- 1 | module booth_partial_product_generator_pp1( 2 | input wire [1:0] operand_slice_a, 3 | input wire [7:0] operand_b, 4 | input wire [8:0] operand_b_neg, 5 | output wire [9:0] pp_out 6 | ); 7 | 8 | wire [8:0] pp_source; 9 | wire not_code0; 10 | //---------------------------------------------------- 11 | //| pp | flag_2x | flag_s1 | flag_s2 | 12 | //---------------------------------------------------- 13 | //| operand_b | 0 | 0 | 1 | 14 | //| -operand_b | 0 | 1 | 0 | 15 | //| 2operand_b | 1 | 0 | 1 | 16 | //| -2operand_b | 1 | 1 | 0 | 17 | //| 0 | x | 0 | 0 | 18 | //---------------------------------------------------- 19 | 20 | //---------------------------------------------------- 21 | //operand_slice_a[1] operand_slice_a[0] operand_slice_a[-1] | pp 22 | //---------------------------------------------------- 23 | // 0 0 0 | 0 24 | // 0 1 0 | operand_b 25 | // 1 0 0 | 2operand_b 26 | // 1 1 0 | -operand_b 27 | //---------------------------------------------------- 28 | 29 | wire flag_2x; 30 | wire flag_s1; 31 | wire flag_s2; 32 | assign not_code0 = ~operand_slice_a[0]; 33 | assign flag_2x = not_code0; 34 | assign flag_s1 = operand_slice_a[1]; // 取反 35 | assign flag_s2 = ~(operand_slice_a[1] | not_code0); // A 36 | wire flag_not_2x = operand_slice_a[0]; 37 | assign pp_source = (({{operand_b[7]}, operand_b} & {9{flag_s2}}) | (operand_b_neg & {9{flag_s1}})); // A or ~A or 0 38 | assign pp_out[0] = (!flag_2x & pp_source[0]);// x2 (<<1) pp_out[0]=0 39 | assign pp_out[8:1] = (({8{flag_2x}} & pp_source[7:0]) | ({8{flag_not_2x}} & pp_source[8:1])); 40 | assign pp_out[9] = pp_source[8]; 41 | 42 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/booth_pp_gen.v: -------------------------------------------------------------------------------- 1 | module booth_pp_gen( 2 | input wire [7:0] operand_a, 3 | input wire [7:0] operand_b, 4 | output wire [9:0] pp1, 5 | output wire [9:0] pp2, 6 | output wire [9:0] pp3, 7 | output wire [9:0] pp4 8 | ); 9 | 10 | wire [1:0] operand_slice_a1 ; 11 | wire [2:0] operand_slice_a2 ; 12 | wire [2:0] operand_slice_a3 ; 13 | wire [2:0] operand_slice_a4 ; 14 | wire [8:0] operand_b_neg ; 15 | 16 | inv_converter_8 inv_converter_8_inst( 17 | .data_i (operand_b), 18 | .inv_o (operand_b_neg) 19 | ); 20 | 21 | assign operand_slice_a1 = operand_a[1:0] ; 22 | assign operand_slice_a2 = operand_a[3:1] ; 23 | assign operand_slice_a3 = operand_a[5:3] ; 24 | assign operand_slice_a4 = operand_a[7:5] ; 25 | 26 | booth_partial_product_generator_pp1 ppg_1 ( 27 | .operand_slice_a (operand_slice_a1), 28 | .operand_b (operand_b), 29 | .operand_b_neg (operand_b_neg), 30 | .pp_out (pp1) 31 | ); 32 | 33 | booth_partial_product_generator ppg_2 ( 34 | .operand_slice_a (operand_slice_a2), 35 | .operand_b (operand_b), 36 | .operand_b_neg (operand_b_neg), 37 | .pp_out (pp2) 38 | ); 39 | 40 | booth_partial_product_generator ppg_3 ( 41 | .operand_slice_a (operand_slice_a3), 42 | .operand_b (operand_b), 43 | .operand_b_neg (operand_b_neg), 44 | .pp_out (pp3) 45 | ); 46 | 47 | booth_partial_product_generator ppg_4 ( 48 | .operand_slice_a (operand_slice_a4), 49 | .operand_b (operand_b), 50 | .operand_b_neg (operand_b_neg), 51 | .pp_out (pp4) 52 | ); 53 | 54 | endmodule 55 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk) begin 19 | if(~rst_n) 20 | pipeline_regs[i] <= 0; 21 | else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | // // 实例化 get_pipeline_mulwidth 模块 39 | // get_pipeline_mulwidth #( 40 | // .N(4), // 设置流水线深度为 4 41 | // .WIDTH(8) // 设置信号宽度为 8 位 42 | // ) pipeline_inst ( 43 | // .clk(clk), // 连接时钟信号 44 | // .rst_n(rst_n), // 连接复位信号 45 | // .signal(input_signal), // 连接输入信号 46 | // .pipeline_signal(output_signal) // 连接输出信号 47 | // ); 48 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/inv_conveter_8.v: -------------------------------------------------------------------------------- 1 | // 取反 加1 (-1*A、-2*A 都要进行取反加1) 2 | module inv_converter_8( 3 | input wire [7:0] data_i , 4 | output wire[8:0] inv_o 5 | ); 6 | 7 | wire [5:0] wire_cout ; 8 | wire not_o ; 9 | 10 | assign inv_o[0] = data_i[0]; 11 | 12 | inv_unit inv_unit_bit1( 13 | .a (data_i[1] ), 14 | .b (data_i[0] ), 15 | .xor_o (inv_o[1] ), 16 | .or_o (wire_cout[0] ) 17 | ); 18 | 19 | genvar i; 20 | generate 21 | for(i=2;i<=5;i=i+1) begin 22 | inv_unit inv_unit_inst( 23 | .a (data_i[i] ), 24 | .b (wire_cout[i-2] ), 25 | .xor_o (inv_o[i] ), 26 | .or_o (wire_cout[i-1] ) 27 | ); 28 | end 29 | endgenerate 30 | 31 | inv_unit_nor_out inv_unit_nor_out_inst_6( 32 | .a (data_i[6] ), 33 | .b (wire_cout[4] ), 34 | .xor_o (inv_o[6] ), 35 | .nor_o (wire_cout[5] ) 36 | ); 37 | 38 | inv_unit_nor_out inv_unit_nor_out_inst_7( 39 | .a (data_i[7] ), 40 | .b (not_o ), 41 | .xor_o (inv_o[7] ), 42 | .nor_o ( ) 43 | ); 44 | 45 | 46 | 47 | assign not_o = ~wire_cout[5] ; 48 | 49 | 50 | assign inv_o[8] = ~(wire_cout[5] | data_i[7]); 51 | 52 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/inv_unit.v: -------------------------------------------------------------------------------- 1 | module inv_unit( 2 | input wire a , 3 | input wire b , 4 | output wire xor_o , 5 | output wire or_o 6 | ); 7 | 8 | wire aORb ; 9 | wire aNANDb ; 10 | 11 | assign aORb = a | b; 12 | assign aNANDb = ~(a & b); 13 | assign xor_o = (aORb & aNANDb); 14 | assign or_o = aORb; 15 | 16 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/inv_unit_nor_out.v: -------------------------------------------------------------------------------- 1 | module inv_unit_nor_out( 2 | input wire a , 3 | input wire b , 4 | 5 | output wire xor_o , 6 | output wire nor_o 7 | ); 8 | 9 | 10 | wire a_AND_b ; 11 | wire a_NOR_b ; 12 | 13 | 14 | assign a_AND_b = a & b; 15 | 16 | 17 | assign a_NOR_b = ~(a | b); 18 | 19 | 20 | assign xor_o = ~(a_AND_b | a_NOR_b); 21 | assign nor_o = a_NOR_b; 22 | 23 | endmodule -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_partial_product_generator_pp1.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_partial_product_generator.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_pp_gen.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/DW02_tree.sv 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/get_pipline_mulwidth.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_conveter_8.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_unit_nor_out.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_unit.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/opt1_mac.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/PE.v 12 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/top.v 13 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/sim/test_opt1_ws_array.sv -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_partial_product_generator_pp1.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_partial_product_generator.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/booth_pp_gen.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/DW02_tree.sv 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/get_pipline_mulwidth.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_conveter_8.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_unit_nor_out.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/inv_unit.v 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/opt1_mac.v 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/PE.v 11 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT1/systolic_array_ws/array_opt1_based/top.v -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_3.1.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 14:34:43 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 899 14 | Number of nets: 99282 15 | Number of cells: 86297 16 | Number of combinational cells: 64965 17 | Number of sequential cells: 21332 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 5527 20 | Number of references: 58 21 | 22 | Combinational area: 179732.416711 23 | Buf/Inv area: 7275.126202 24 | Noncombinational area: 145814.107156 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 221035.364630 27 | 28 | Total cell area: 325546.523867 29 | Total area: 546581.888497 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 325546.5239 100.0 179732.4167 145814.1072 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 179732.4167 145814.1072 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_3.3.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 14:36:07 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 899 14 | Number of nets: 96684 15 | Number of cells: 83957 16 | Number of combinational cells: 64062 17 | Number of sequential cells: 19895 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 5679 20 | Number of references: 45 21 | 22 | Combinational area: 175403.836065 23 | Buf/Inv area: 7295.457700 24 | Noncombinational area: 135854.203567 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 212913.820041 27 | 28 | Total cell area: 311258.039632 29 | Total area: 524171.859673 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 311258.0396 100.0 175403.8361 135854.2036 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 175403.8361 135854.2036 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_3.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 14:33:26 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 899 14 | Number of nets: 95640 15 | Number of cells: 82243 16 | Number of combinational cells: 63154 17 | Number of sequential cells: 19089 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 5672 20 | Number of references: 44 21 | 22 | Combinational area: 168755.174453 23 | Buf/Inv area: 7222.518364 24 | Noncombinational area: 130421.621271 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 208509.334690 27 | 28 | Total cell area: 299176.795724 29 | Total area: 507686.130413 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 299176.7957 100.0 168755.1745 130421.6213 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 168755.1745 130421.6213 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_4.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 14:32:12 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 899 14 | Number of nets: 105295 15 | Number of cells: 95394 16 | Number of combinational cells: 76940 17 | Number of sequential cells: 18454 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 11106 20 | Number of references: 43 21 | 22 | Combinational area: 189567.026846 23 | Buf/Inv area: 14131.423031 24 | Noncombinational area: 125557.305097 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 207152.444105 27 | 28 | Total cell area: 315124.331943 29 | Total area: 522276.776049 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- -------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 39 | top 315124.3319 100.0 189567.0268 125557.3051 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ----------- ------ --------- 41 | Total 189567.0268 125557.3051 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/outputs/saed32rvt_tt0p85v25c/top_opt1_array_16_area_report_4.5.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top 5 | Version: L-2016.03-SP1 6 | Date : Tue Apr 29 14:18:23 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 899 14 | Number of nets: 101594 15 | Number of cells: 91239 16 | Number of combinational cells: 77069 17 | Number of sequential cells: 14170 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 9757 20 | Number of references: 48 21 | 22 | Combinational area: 192323.218738 23 | Buf/Inv area: 12414.934450 24 | Noncombinational area: 95757.903859 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 185994.997408 27 | 28 | Total cell area: 288081.122597 29 | Total area: 474076.120005 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | -------------------- ------------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 39 | top 288081.1226 100.0 192323.2187 95757.9039 0.0000 top 40 | -------------------------------- ----------- ------- ----------- ---------- ------ --------- 41 | Total 192323.2187 95757.9039 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT1/systolic_array_ws/array_opt1_based/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT2/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk or negedge rst_n) begin 19 | if(!rst_n) begin 20 | pipeline_regs[i] <= 0; 21 | end else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | -------------------------------------------------------------------------------- /OPT2/partial_product_select.sv: -------------------------------------------------------------------------------- 1 | module partial_product_select( 2 | input clk, 3 | input [7:0] weight, 4 | input [3:0] bit_enable, 5 | input [7:0] partial_product_select, 6 | output signed [9:0] partial_product [0:3] 7 | ); 8 | 9 | wire signed [8:0] inv_b; 10 | wire signed [7:0] b; 11 | wire signed [8:0] b_2; 12 | wire signed [8:0] b_neg; 13 | wire signed [9:0] b_2_neg; 14 | wire [1:0] cal_partial_product_select [0:3]; 15 | reg signed [9:0] partial_product_bw [0:3]; 16 | 17 | assign cal_partial_product_select[0] = partial_product_select[1:0]; 18 | assign cal_partial_product_select[1] = partial_product_select[3:2]; 19 | assign cal_partial_product_select[2] = partial_product_select[5:4]; 20 | assign cal_partial_product_select[3] = partial_product_select[7:6]; 21 | assign partial_product = partial_product_bw; 22 | assign b = $signed(weight); 23 | assign inv_b = $signed(~b); 24 | assign b_2 = {b,1'b0}; 25 | assign b_neg = inv_b + 1'b1; 26 | assign b_2_neg = {b_neg,1'b0}; 27 | 28 | genvar i; 29 | generate 30 | for (i = 0 ; i < 4; i = i + 1) begin 31 | always @(posedge clk) begin 32 | if(bit_enable[i]) begin 33 | case(cal_partial_product_select[i]) 34 | 2'b00: partial_product_bw[i] <= $signed(b_2_neg); 35 | 2'b01: partial_product_bw[i] <= $signed(b); 36 | 2'b10: partial_product_bw[i] <= $signed(b_2); 37 | 2'b11: partial_product_bw[i] <= $signed(b_neg); 38 | endcase 39 | end 40 | else 41 | partial_product_bw[i] <= 0; 42 | end 43 | end 44 | endgenerate 45 | 46 | endmodule 47 | 48 | -------------------------------------------------------------------------------- /OPT2/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/sim/test_opt2ws_array.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/DW02_tree.sv 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/partial_product_select.sv 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/top_pe_tile.sv 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/top_tpe.sv 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/tree_full_sum.sv 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/vector_encoder.sv 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/weight_rf.sv 10 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/get_pipline_mulwidth.v -------------------------------------------------------------------------------- /OPT2/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT2/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT2/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/partial_product_select.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/top_pe_tile.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/top_tpe.sv 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/tree_full_sum.sv 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/vector_encoder.sv 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/weight_rf.sv 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT2/get_pipline_mulwidth.v 8 | -------------------------------------------------------------------------------- /OPT2/syn/outputs_array/saed32rvt_tt0p85v25c/top_tpe_n4_area_report_1.35.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_tpe 5 | Version: L-2016.03-SP1 6 | Date : Sat Apr 26 20:52:32 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1898 14 | Number of nets: 24489 15 | Number of cells: 19466 16 | Number of combinational cells: 15298 17 | Number of sequential cells: 4164 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 1214 20 | Number of references: 19 21 | 22 | Combinational area: 39657.137716 23 | Buf/Inv area: 1555.869575 24 | Noncombinational area: 27514.646919 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 18506.190658 27 | 28 | Total cell area: 67171.784636 29 | Total area: 85677.975294 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ ------------- 39 | top_tpe 67171.7846 100.0 1512.6651 4123.2324 0.0000 top_tpe 40 | genblk1[0].genblk1.pe_tile 15471.7785 23.0 9538.0242 5933.7543 0.0000 top_pe_tile_3 41 | genblk1[1].genblk1.pe_tile 15471.7785 23.0 9538.0242 5933.7543 0.0000 top_pe_tile_2 42 | genblk1[2].genblk1.pe_tile 15478.3863 23.0 9538.0242 5940.3621 0.0000 top_pe_tile_1 43 | genblk1[3].genblk1.pe_tile 15113.9438 22.5 9530.3999 5583.5439 0.0000 top_pe_tile_0 44 | -------------------------------- ---------- ------- ---------- ---------- ------ ------------- 45 | Total 39657.1377 27514.6469 0.0000 46 | 47 | 1 48 | -------------------------------------------------------------------------------- /OPT2/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc_array.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT2/top_tpe.sv: -------------------------------------------------------------------------------- 1 | module top_tpe #( 2 | parameter N = 32, 3 | localparam RESULT_WIDTH = 20 4 | )( 5 | 6 | input clk, 7 | input [127:0] operand_a, 8 | input weight_wen, 9 | input [8*N-1:0] weight_din, 10 | output [RESULT_WIDTH*N-1:0] result 11 | 12 | ); 13 | 14 | wire [3:0] bit_enable [0:15]; 15 | wire [7:0] partial_product_select [0:15]; 16 | wire [3:0] bit_enable_pip [0:N-1][0:15]; 17 | wire [7:0] partial_product_select_pip [0:N-1][0:15]; 18 | 19 | vector_encoder encoder( 20 | .clk(clk), 21 | .operand_a(operand_a), 22 | .bit_enable(bit_enable), 23 | .partial_product_select(partial_product_select) 24 | ); 25 | 26 | genvar i; 27 | generate 28 | for (i = 0 ; i < N ; i = i + 1) begin 29 | if(i == 0) begin 30 | top_pe_tile pe_tile( 31 | .clk(clk), 32 | .weight_wen(weight_wen), 33 | .weight_din(weight_din[8*(i+1)-1:8*i]), 34 | .bit_enable(bit_enable), 35 | .partial_product_select(partial_product_select), 36 | .result(result[RESULT_WIDTH*(i+1)-1:RESULT_WIDTH*i]), 37 | .bit_enable_pip(bit_enable_pip[i]), 38 | .partial_product_select_pip(partial_product_select_pip[i]) 39 | ); 40 | end 41 | else begin 42 | top_pe_tile pe_tile( 43 | .clk(clk), 44 | .weight_wen(weight_wen), 45 | .weight_din(weight_din[8*(i+1)-1:8*i]), 46 | .bit_enable(bit_enable_pip[i-1]), 47 | .partial_product_select(partial_product_select_pip[i-1]), 48 | .result(result[RESULT_WIDTH*(i+1)-1:RESULT_WIDTH*i]), 49 | .bit_enable_pip(bit_enable_pip[i]), 50 | .partial_product_select_pip(partial_product_select_pip[i]) 51 | ); 52 | end 53 | end 54 | endgenerate 55 | 56 | endmodule -------------------------------------------------------------------------------- /OPT2/tree_full_sum.sv: -------------------------------------------------------------------------------- 1 | module tree_full_sum #( 2 | parameter K = 16, 3 | parameter WIDTH = 13 4 | )( 5 | input clk, 6 | input [K*WIDTH-1:0] csa_input, 7 | output reg signed [WIDTH-1:0] full_sum 8 | ); 9 | 10 | wire signed [WIDTH-1:0] acc_sum; 11 | wire signed [WIDTH-1:0] acc_carry; 12 | 13 | genvar i; 14 | generate 15 | if(K == 4) begin 16 | always @(posedge clk) begin 17 | full_sum <= $signed(csa_input[1*WIDTH-1:0*WIDTH]) + $signed(csa_input[2*WIDTH-1:1*WIDTH]) + $signed(csa_input[3*WIDTH-1:2*WIDTH]) + $signed(csa_input[4*WIDTH-1:3*WIDTH]); 18 | end 19 | end 20 | else begin 21 | DW02_tree #(K, WIDTH, 1) 22 | ins_tree ( .INPUT(csa_input), .OUT0(acc_sum), .OUT1(acc_carry)); 23 | always @(posedge clk) begin 24 | full_sum <= $signed(acc_sum) + $signed(acc_carry); 25 | end 26 | end 27 | endgenerate 28 | 29 | 30 | 31 | 32 | endmodule 33 | 34 | 35 | -------------------------------------------------------------------------------- /OPT2/weight_rf.sv: -------------------------------------------------------------------------------- 1 | module weight_rf ( 2 | input clk, 3 | input wen, 4 | input [7:0] din, 5 | output [7:0] weight [0:15] 6 | ); 7 | 8 | reg [7:0] buffer [0:15]; 9 | 10 | assign weight = buffer; 11 | 12 | genvar i; 13 | generate 14 | for (i = 0; i < 16; i = i + 1) begin 15 | always @(posedge clk) begin 16 | if(wen) begin 17 | if (i==15) 18 | buffer[i] <= din; 19 | else 20 | buffer[i] <= buffer[i+1]; 21 | end 22 | else 23 | buffer[i] <= buffer[i]; 24 | end 25 | end 26 | endgenerate 27 | 28 | 29 | endmodule -------------------------------------------------------------------------------- /OPT3_OPT4C/array/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/DW02_tree.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/encoder_multi_bit.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/pe.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sparse_encoder.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/get_pipline_mulwidth.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/array/top_pe_column.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/array/sim/test_opt4c_column_array.sv -------------------------------------------------------------------------------- /OPT3_OPT4C/array/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/pe.v 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sparse_encoder.v 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/array/top_pe_column.v 4 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n16_area_report_0.58.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 23:19:54 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 981 14 | Number of nets: 4310 15 | Number of cells: 3698 16 | Number of combinational cells: 2238 17 | Number of sequential cells: 1460 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 135 20 | Number of references: 39 21 | 22 | Combinational area: 6069.721180 23 | Buf/Inv area: 193.657731 24 | Noncombinational area: 9784.544291 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 4059.694270 27 | 28 | Total cell area: 15854.265472 29 | Total area: 19913.959742 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- --------- --------- ------ --------- 39 | top_pe_column 15854.2655 100.0 6069.7212 9784.5443 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- --------- --------- ------ --------- 41 | Total 6069.7212 9784.5443 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n16_area_report_1.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 23:22:34 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 981 14 | Number of nets: 3666 15 | Number of cells: 3058 16 | Number of combinational cells: 1980 17 | Number of sequential cells: 1078 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 129 20 | Number of references: 34 21 | 22 | Combinational area: 5732.980391 23 | Buf/Inv area: 201.790339 24 | Noncombinational area: 7222.772694 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 3590.210165 27 | 28 | Total cell area: 12955.753085 29 | Total area: 16545.963250 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- --------- --------- ------ --------- 39 | top_pe_column 12955.7531 100.0 5732.9804 7222.7727 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- --------- --------- ------ --------- 41 | Total 5732.9804 7222.7727 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n16_area_report_1.4.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 23:22:45 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 981 14 | Number of nets: 3565 15 | Number of cells: 2875 16 | Number of combinational cells: 1896 17 | Number of sequential cells: 979 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 102 20 | Number of references: 28 21 | 22 | Combinational area: 5245.532186 23 | Buf/Inv area: 138.000192 24 | Noncombinational area: 6543.191623 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 3329.934668 27 | 28 | Total cell area: 11788.723808 29 | Total area: 15118.658476 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ---------------------------- 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- --------- --------- ------ --------- 39 | top_pe_column 11788.7238 100.0 5245.5322 6543.1916 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- --------- --------- ------ --------- 41 | Total 5245.5322 6543.1916 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n32_area_report_0.59.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 23:10:04 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1941 14 | Number of nets: 8230 15 | Number of cells: 7156 16 | Number of combinational cells: 4312 17 | Number of sequential cells: 2844 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 212 20 | Number of references: 34 21 | 22 | Combinational area: 11836.248563 23 | Buf/Inv area: 292.773890 24 | Noncombinational area: 19041.485627 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 9988.049709 27 | 28 | Total cell area: 30877.734190 29 | Total area: 40865.783899 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 39 | top_pe_column 30877.7342 100.0 11836.2486 19041.4856 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 41 | Total 11836.2486 19041.4856 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n32_area_report_0.6.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 22:58:32 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1941 14 | Number of nets: 8165 15 | Number of cells: 7074 16 | Number of combinational cells: 4233 17 | Number of sequential cells: 2841 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 204 20 | Number of references: 33 21 | 22 | Combinational area: 11668.513517 23 | Buf/Inv area: 280.320834 24 | Noncombinational area: 19021.662395 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 9948.613315 27 | 28 | Total cell area: 30690.175912 29 | Total area: 40638.789227 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 39 | top_pe_column 30690.1759 100.0 11668.5135 19021.6624 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 41 | Total 11668.5135 19021.6624 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n32_area_report_0.8.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 22:57:36 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1941 14 | Number of nets: 9162 15 | Number of cells: 8142 16 | Number of combinational cells: 5869 17 | Number of sequential cells: 2273 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 912 20 | Number of references: 36 21 | 22 | Combinational area: 14709.092379 23 | Buf/Inv area: 1192.951943 24 | Noncombinational area: 15205.435979 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 9643.641456 27 | 28 | Total cell area: 29914.528358 29 | Total area: 39558.169814 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 39 | top_pe_column 29914.5284 100.0 14709.0924 15205.4360 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 41 | Total 14709.0924 15205.4360 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n32_area_report_1.0.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 22:55:27 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1941 14 | Number of nets: 7553 15 | Number of cells: 6242 16 | Number of combinational cells: 3889 17 | Number of sequential cells: 2353 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 303 20 | Number of references: 35 21 | 22 | Combinational area: 10801.628351 23 | Buf/Inv area: 427.470212 24 | Noncombinational area: 15746.762712 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 9272.088700 27 | 28 | Total cell area: 26548.391063 29 | Total area: 35820.479763 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 39 | top_pe_column 26548.3911 100.0 10801.6284 15746.7627 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 41 | Total 10801.6284 15746.7627 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/outputs_array/saed32rvt_tt0p85v25c/top_pe_column_n32_area_report_1.4.txt: -------------------------------------------------------------------------------- 1 | 2 | **************************************** 3 | Report : area 4 | Design : top_pe_column 5 | Version: L-2016.03-SP1 6 | Date : Sun May 18 23:24:44 2025 7 | **************************************** 8 | 9 | Library(s) Used: 10 | 11 | saed32rvt_tt0p85v25c (File: /home/vcs/workspace/wqz/code/experiment/HPCA2025/library/saed32rvt_tt0p85v25c.db) 12 | 13 | Number of ports: 1941 14 | Number of nets: 7009 15 | Number of cells: 5699 16 | Number of combinational cells: 3760 17 | Number of sequential cells: 1939 18 | Number of macros/black boxes: 0 19 | Number of buf/inv: 167 20 | Number of references: 30 21 | 22 | Combinational area: 10719.539834 23 | Buf/Inv area: 255.414720 24 | Noncombinational area: 12951.178635 25 | Macro/Black Box area: 0.000000 26 | Net Interconnect area: 8190.330654 27 | 28 | Total cell area: 23670.718469 29 | Total area: 31861.049122 30 | 31 | Hierarchical area distribution 32 | ------------------------------ 33 | 34 | Global cell area Local cell area 35 | ------------------- ------------------------------ 36 | Hierarchical cell Absolute Percent Combi- Noncombi- Black- 37 | Total Total national national boxes Design 38 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 39 | top_pe_column 23670.7185 100.0 10719.5398 12951.1786 0.0000 top_pe_column 40 | -------------------------------- ---------- ------- ---------- ---------- ------ --------- 41 | Total 10719.5398 12951.1786 0.0000 42 | 43 | 1 44 | -------------------------------------------------------------------------------- /OPT3_OPT4C/array/syn/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p logs 3 | 4 | rm -f logs/dc.log 5 | 6 | dc_shell -64bit -f dc_array.tcl > logs/dc.log 2>&1 -------------------------------------------------------------------------------- /OPT3_OPT4C/array/top_pe_column.v: -------------------------------------------------------------------------------- 1 | module top_pe_column #( 2 | parameter N = 32, 3 | parameter ACC_WIDTH = 26 4 | 5 | )( 6 | input clk, 7 | input rst_n, 8 | input clr, 9 | input [7:0] en_multiplicand, // operand a 10 | input [3:0] sign_en_multiplicand, 11 | input encode_valid, 12 | input [8*N-1:0] operand_b, 13 | output [1:0] position, // to prefetch operand b 14 | output [2:0] cal_cycle, // to prefetch operand a 15 | output [52*N-1:0] pe_result 16 | ); 17 | 18 | wire [1:0] partial_product_index; 19 | 20 | genvar i; 21 | sparse_encoder sp_encoder( 22 | .clk(clk), 23 | .rst_n(rst_n), 24 | .en_multiplicand(en_multiplicand), 25 | .sign_en_multiplicand(sign_en_multiplicand), 26 | .encode_valid(encode_valid), 27 | .partial_product_index(partial_product_index), // to sparse_pe generate partial product 28 | .position_0(position), // to prefetch operand b 29 | .cal_cycle(cal_cycle) // to prefetch operand a 30 | ); 31 | 32 | generate 33 | for (i = 0 ; i < N ; i = i + 1) begin 34 | pe #( 35 | .ACC_WIDTH(26) 36 | ) sparse_pe ( 37 | .clk(clk), 38 | .rst_n(rst_n), 39 | .clr(clr), 40 | .encoder_position_ins(partial_product_index), 41 | .operand_b_ins(operand_b[8*(i+1)-1:8*i]), 42 | .result(pe_result[52*(i+1)-1:52*i]) 43 | ); 44 | end 45 | endgenerate 46 | 47 | 48 | endmodule -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/get_negedge.sv: -------------------------------------------------------------------------------- 1 | module get_negedge( 2 | input clk, 3 | input signal, 4 | output negedge_signal 5 | ); 6 | 7 | reg save; 8 | 9 | always @(posedge clk) begin 10 | save <= signal; 11 | end 12 | 13 | assign negedge_signal = ~signal & save; 14 | 15 | endmodule 16 | 17 | // get_negedge get_hit( 18 | // .clk(clk), 19 | // .signal(cache_hit), 20 | // .negedge_signal(negedge_cache_hit) 21 | // ); -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/get_pipline_mulwidth.v: -------------------------------------------------------------------------------- 1 | module get_pipeline_mulwidth #( 2 | parameter N = 4, 3 | parameter WIDTH = 8 4 | )( 5 | input wire clk, 6 | input wire rst_n, 7 | input wire [WIDTH-1:0] signal, 8 | output wire [WIDTH-1:0] pipeline_signal 9 | ); 10 | 11 | 12 | reg [WIDTH-1:0] pipeline_regs [N-1:0]; 13 | 14 | 15 | genvar i; 16 | generate 17 | for (i = 0; i < N; i = i + 1) begin : pipeline_stage 18 | always @(posedge clk or negedge rst_n) begin 19 | if(!rst_n) begin 20 | pipeline_regs[i] <= 0; 21 | end else begin 22 | if (i == 0) begin 23 | pipeline_regs[i] <= signal; 24 | end else begin 25 | pipeline_regs[i] <= pipeline_regs[i-1]; 26 | end 27 | end 28 | end 29 | end 30 | endgenerate 31 | 32 | 33 | assign pipeline_signal = pipeline_regs[N-1]; 34 | 35 | endmodule 36 | 37 | 38 | -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/pe.v: -------------------------------------------------------------------------------- 1 | module pe #( 2 | parameter ACC_WIDTH = 26 //due to the same bit-weight reduction 3 | )( 4 | input clk, 5 | input rst_n, 6 | input clr, 7 | input wire [1:0] encoder_position_ins, 8 | input wire [7:0] operand_b_ins, 9 | output wire [51:0] result 10 | ); 11 | 12 | wire signed [ACC_WIDTH-1:0] sum; 13 | wire signed [ACC_WIDTH-1:0] carry; 14 | wire signed [ACC_WIDTH-1:0] sum_input; 15 | wire signed [ACC_WIDTH-1:0] carry_input; 16 | reg signed [ACC_WIDTH-1:0] acc_sum; 17 | reg signed [ACC_WIDTH-1:0] acc_carry; 18 | reg [1:0] encoder_position; 19 | reg signed [7:0] operand_b; 20 | 21 | 22 | wire signed [8:0] b; 23 | wire signed [8:0] b_2; 24 | wire signed [8:0] neg_b; 25 | wire signed [9:0] neg_b_2; 26 | reg signed [9:0] mux_select_b; 27 | wire signed [ACC_WIDTH-1:0] mux_extend_b; 28 | wire [3*ACC_WIDTH-1:0] csa_input; 29 | 30 | assign b = $signed(operand_b); 31 | assign b_2 = {operand_b,1'b0}; 32 | assign neg_b = ~b + 1'b1; 33 | assign neg_b_2 = {neg_b,1'b0}; 34 | assign mux_extend_b = mux_select_b; 35 | assign sum_input = !clr ? 0 : acc_sum; 36 | assign carry_input = !clr ? 0 : acc_carry; 37 | assign csa_input = {mux_extend_b,sum_input,carry_input}; 38 | 39 | always @(*) begin 40 | case (encoder_position) 41 | 2'd0: mux_select_b = $signed(neg_b_2); 42 | 2'd1: mux_select_b = $signed(b); 43 | 2'd2: mux_select_b = $signed(b_2); 44 | 2'd3: mux_select_b = $signed(neg_b); 45 | endcase 46 | end 47 | 48 | DW02_tree #(3,ACC_WIDTH, 1) 49 | U1 ( .INPUT(csa_input), .OUT0(sum), .OUT1(carry) ); 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (!rst_n) begin 53 | encoder_position <=0; 54 | operand_b <=0; 55 | end 56 | else begin 57 | encoder_position <= encoder_position_ins; 58 | operand_b <= operand_b_ins; 59 | end 60 | end 61 | 62 | always @(posedge clk) begin 63 | acc_sum <= sum ; 64 | acc_carry <= carry; 65 | end 66 | 67 | assign result = {acc_sum,acc_carry}; 68 | 69 | endmodule -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/sim/filelist.f: -------------------------------------------------------------------------------- 1 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sim/timescale.sv 2 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/DW02_tree.sv 3 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/encoder_multi_bit.v 4 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/pe.v 5 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sparse_encoder.v 6 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/get_pipline_mulwidth.v 7 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/top_pe.v 8 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/get_negedge.sv 9 | /home/vcs/workspace/wqz/code/experiment/HPCA2025/OPT3_OPT4C/pe/sim/test_opt3_pe_inner_product_vectors.sv -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/sim/makefile: -------------------------------------------------------------------------------- 1 | 2 | vcs: 3 | vcs -R -j24 -full64 +v2k +vc -cpp g++-4.8 -cc gcc-4.8 -fsdb +neg_tchk -negdelay +notimingcheck +nospecify +define+FSDB -sverilog -l run.log -f filelist.f -LDFLAGS "-Wl,--no-as-needed" 4 | vd: 5 | verdi -sv -f filelist.f -ssf *.fsdb -nologo 6 | 7 | cl: 8 | rm -rf *.fsdb *.rc *.key *.log *.conf simv* csrc verdi* -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/sim/timescale.sv: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps -------------------------------------------------------------------------------- /OPT3_OPT4C/pe/top_pe.v: -------------------------------------------------------------------------------- 1 | module top_pe( 2 | input clk, 3 | input rst_n, 4 | input clr, 5 | input [7:0] en_multiplicand, // operand a 6 | input [3:0] sign_en_multiplicand, 7 | input encode_valid, 8 | input [7:0] operand_b, 9 | output [1:0] position, // to prefetch operand b 10 | output [2:0] cal_cycle, // to prefetch operand a 11 | output [51:0] pe_result 12 | ); 13 | 14 | wire [1:0] partial_product_index; 15 | 16 | sparse_encoder sp_encoder( 17 | .clk(clk), 18 | .rst_n(rst_n), 19 | .en_multiplicand(en_multiplicand), 20 | .sign_en_multiplicand(sign_en_multiplicand), 21 | .encode_valid(encode_valid), 22 | .partial_product_index(partial_product_index), // to sparse_pe generate partial product 23 | .position_0(position), // to prefetch operand b 24 | .cal_cycle(cal_cycle) // to prefetch operand a 25 | ); 26 | 27 | 28 | pe #( 29 | .ACC_WIDTH(26) 30 | ) sparse_pe ( 31 | .clk(clk), 32 | .rst_n(rst_n), 33 | .clr(clr), 34 | .encoder_position_ins(partial_product_index), 35 | .operand_b_ins(operand_b), 36 | .result(pe_result) 37 | ); 38 | 39 | 40 | 41 | 42 | endmodule -------------------------------------------------------------------------------- /assets/64709dc13fe00c9e794af29ce991901-20250429185133-a8iy6xm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/64709dc13fe00c9e794af29ce991901-20250429185133-a8iy6xm.png -------------------------------------------------------------------------------- /assets/image-20250423170332-07aeb6b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250423170332-07aeb6b.png -------------------------------------------------------------------------------- /assets/image-20250424204435-5fldpy3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250424204435-5fldpy3.png -------------------------------------------------------------------------------- /assets/image-20250425153743-bfn0w5o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250425153743-bfn0w5o.png -------------------------------------------------------------------------------- /assets/image-20250518164949-c177kdo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250518164949-c177kdo.png -------------------------------------------------------------------------------- /assets/image-20250518165300-lx9r183.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250518165300-lx9r183.png -------------------------------------------------------------------------------- /assets/image-20250518171735-9upne4o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/assets/image-20250518171735-9upne4o.png -------------------------------------------------------------------------------- /library/saed32rvt_tt0p85v25c.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wqzustc/High-Performance-Tensor-Processing-Engines/ebe4db7d2d3c36d10c47683d7689f65f5c4ca3e4/library/saed32rvt_tt0p85v25c.db --------------------------------------------------------------------------------