├── vitis └── src │ ├── helloworld.c │ ├── platform_config.h │ ├── platform.h │ ├── platform.c │ └── lscript.ld ├── verilog ├── sources_1 │ ├── new │ │ ├── binary_QKV.v │ │ ├── layer_norm_1.v │ │ ├── layer_norm_3.v │ │ ├── layer_norm_2.v │ │ ├── binary_intermediate_1.v │ │ ├── encoder_hidden_state_1.v │ │ ├── binary_intermediate_2.v │ │ ├── binary_score.v │ │ ├── binary_query.v │ │ ├── output_2.v │ │ ├── encoder_hidden_state_2.v │ │ ├── attention.v │ │ ├── encoder.v │ │ ├── output_1.v │ │ └── transformer.v │ └── bd │ │ └── design_1 │ │ ├── ui │ │ └── bd_1f5defd0.ui │ │ ├── design_1.bda │ │ └── ip │ │ └── design_1_transformer_0_0 │ │ └── design_1_transformer_0_0.xci ├── param_1 │ ├── output_2_rom.coe │ ├── alpha_rom_1.coe │ ├── alpha_rom_2.coe │ ├── alpha_rom_3.coe │ ├── beta_rom_1.coe │ ├── beta_rom_2.coe │ ├── beta_rom_3.coe │ ├── qkv_w_rom.coe │ ├── attention_intermediate_1.coe │ ├── attention_intermediate_2.coe │ ├── encoder_hidden_state_1.coe │ └── encoder_hidden_state_2.coe ├── ip_repo │ └── PS_PL_1.0 │ │ ├── drivers │ │ └── PS_PL_v1_0 │ │ │ ├── data │ │ │ ├── PS_PL.tcl │ │ │ └── PS_PL.mdd │ │ │ └── src │ │ │ ├── PS_PL.c │ │ │ ├── Makefile │ │ │ ├── PS_PL_selftest.c │ │ │ └── PS_PL.h │ │ ├── example_designs │ │ ├── debug_hw_design │ │ │ ├── PS_PL_v1_0_hw_test.tcl │ │ │ └── design.tcl │ │ └── bfm_design │ │ │ ├── design.tcl │ │ │ └── PS_PL_v1_0_tb.sv │ │ ├── xgui │ │ └── PS_PL_v1_0.tcl │ │ ├── hdl │ │ ├── PS_PL_v1_0.v │ │ └── PS_PL_v1_0_S00_AXI.v │ │ └── bd │ │ └── bd.tcl └── sim_1 │ └── new │ ├── tb_QKV.v │ ├── tb_attention.v │ ├── tb_transformer.v │ └── tb_encoder.v └── README.md /vitis/src/helloworld.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShixiangLi/Transformer_FPGA/HEAD/vitis/src/helloworld.c -------------------------------------------------------------------------------- /verilog/sources_1/new/binary_QKV.v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShixiangLi/Transformer_FPGA/HEAD/verilog/sources_1/new/binary_QKV.v -------------------------------------------------------------------------------- /verilog/param_1/output_2_rom.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 4 | -------------------------------------------------------------------------------- /vitis/src/platform_config.h: -------------------------------------------------------------------------------- 1 | #ifndef __PLATFORM_CONFIG_H_ 2 | #define __PLATFORM_CONFIG_H_ 3 | 4 | #define STDOUT_IS_PSU_UART 5 | #define UART_DEVICE_ID 0 6 | #endif 7 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/data/PS_PL.tcl: -------------------------------------------------------------------------------- 1 | 2 | 3 | proc generate {drv_handle} { 4 | xdefine_include_file $drv_handle "xparameters.h" "PS_PL" "NUM_INSTANCES" "DEVICE_ID" "C_S00_AXI_BASEADDR" "C_S00_AXI_HIGHADDR" 5 | } 6 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/src/PS_PL.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | /***************************** Include Files *******************************/ 4 | #include "PS_PL.h" 5 | 6 | /************************** Function Definitions ***************************/ 7 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/data/PS_PL.mdd: -------------------------------------------------------------------------------- 1 | 2 | 3 | OPTION psf_version = 2.1; 4 | 5 | BEGIN DRIVER PS_PL 6 | OPTION supported_peripherals = (PS_PL); 7 | OPTION copyfiles = all; 8 | OPTION VERSION = 1.0; 9 | OPTION NAME = PS_PL; 10 | END DRIVER 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transformer_FPGA 2 | Transformer FPGA Accelerator 3 | 4 | ## Introduction 5 | 基于Xilinx Vivado实现一个二值化Transformer加速器 6 | 目前只是纯实现,卷积操作实现位宽并行化,整体网络计算未实现 7 | 8 | ## Network Architecture 9 | 普通的Transformer去掉Decoder,小BERT 10 | 11 | ## Document Structure 12 | 13 | |---verilog 14 | 15 | |-ip_repo(AXI-LITE) 16 | 17 | |-param_1(coe权重文件) 18 | 19 | |-sim_1 20 | 21 | |-new(仿真文件) 22 | 23 | |-source_1 24 | 25 | |-new(各模块实现文件) 26 | -------------------------------------------------------------------------------- /verilog/param_1/alpha_rom_1.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/param_1/alpha_rom_2.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/param_1/alpha_rom_3.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/param_1/beta_rom_1.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | 000000000000000000000000000000000000000000000000000000000000, 4 | 000000000000000000000000000000000000000000000000000000000000, 5 | 000000000000000000000000000000000000000000000000000000000000, 6 | 000000000000000000000000000000000000000000000000000000000000, 7 | 000000000000000000000000000000000000000000000000000000000000, 8 | 000000000000000000000000000000000000000000000000000000000000; 9 | -------------------------------------------------------------------------------- /verilog/param_1/beta_rom_2.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | 000000000000000000000000000000000000000000000000000000000000, 4 | 000000000000000000000000000000000000000000000000000000000000, 5 | 000000000000000000000000000000000000000000000000000000000000, 6 | 000000000000000000000000000000000000000000000000000000000000, 7 | 000000000000000000000000000000000000000000000000000000000000, 8 | 000000000000000000000000000000000000000000000000000000000000; 9 | -------------------------------------------------------------------------------- /verilog/param_1/beta_rom_3.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | 000000000000000000000000000000000000000000000000000000000000, 4 | 000000000000000000000000000000000000000000000000000000000000, 5 | 000000000000000000000000000000000000000000000000000000000000, 6 | 000000000000000000000000000000000000000000000000000000000000, 7 | 000000000000000000000000000000000000000000000000000000000000, 8 | 000000000000000000000000000000000000000000000000000000000000; 9 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/src/Makefile: -------------------------------------------------------------------------------- 1 | COMPILER= 2 | ARCHIVER= 3 | CP=cp 4 | COMPILER_FLAGS= 5 | EXTRA_COMPILER_FLAGS= 6 | LIB=libxil.a 7 | 8 | RELEASEDIR=../../../lib 9 | INCLUDEDIR=../../../include 10 | INCLUDES=-I./. -I${INCLUDEDIR} 11 | 12 | INCLUDEFILES=$(wildcard *.h) 13 | LIBSOURCES=$(wildcard *.c) 14 | OUTS = $(wildcard *.o) 15 | 16 | libs: 17 | echo "Compiling PS_PL..." 18 | $(COMPILER) $(COMPILER_FLAGS) $(EXTRA_COMPILER_FLAGS) $(INCLUDES) $(LIBSOURCES) 19 | $(ARCHIVER) -r ${RELEASEDIR}/${LIB} ${OUTS} 20 | make clean 21 | 22 | include: 23 | ${CP} $(INCLUDEFILES) $(INCLUDEDIR) 24 | 25 | clean: 26 | rm -rf ${OUTS} 27 | -------------------------------------------------------------------------------- /verilog/param_1/qkv_w_rom.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/sources_1/bd/design_1/ui/bd_1f5defd0.ui: -------------------------------------------------------------------------------- 1 | { 2 | "ActiveEmotionalView":"Default View", 3 | "Default View_ScaleFactor":"0.927273", 4 | "Default View_TopLeft":"0,-33", 5 | "ExpandedHierarchyInLayout":"", 6 | "guistr":"# # String gsaved with Nlview 7.0r6 2020-01-29 bk=1.5227 VDI=41 GEI=36 GUI=JA:10.0 non-TLS 7 | # -string -flagsOSRD 8 | preplace inst zynq_ultra_ps_e_0 -pg 1 -lvl 1 -x 330 -y 300 -defaultsOSRD 9 | preplace inst transformer_0 -pg 1 -lvl 2 -x 820 -y 90 -defaultsOSRD 10 | preplace inst ps8_0_axi_periph -pg 1 -lvl 2 -x 820 -y 340 -defaultsOSRD 11 | preplace inst rst_ps8_0_96M -pg 1 -lvl 1 -x 330 -y 100 -defaultsOSRD 12 | preplace inst PS_PL_0 -pg 1 -lvl 3 -x 1150 -y 180 -defaultsOSRD 13 | preplace netloc zynq_ultra_ps_e_0_pl_clk0 1 0 3 20 200 640 200 N 14 | preplace netloc zynq_ultra_ps_e_0_pl_resetn0 1 0 2 30 210 630 15 | preplace netloc rst_ps8_0_96M_peripheral_aresetn 1 1 2 650 220 N 16 | preplace netloc PS_PL_0_raw_data 1 1 3 660 180 980J 280 1300 17 | preplace netloc PS_PL_0_raw_data_valid 1 1 3 670 190 970J 290 1290 18 | preplace netloc transformer_0_data_out 1 2 1 1010 80n 19 | preplace netloc transformer_0_data_out_valid 1 2 1 1000 100n 20 | preplace netloc zynq_ultra_ps_e_0_M_AXI_HPM0_LPD 1 1 1 N 280 21 | preplace netloc ps8_0_axi_periph_M00_AXI 1 2 1 990 140n 22 | levelinfo -pg 1 0 330 820 1150 1320 23 | pagesize -pg 1 -db -bbox -sgen 0 0 1320 460 24 | " 25 | } 26 | { 27 | "da_axi4_cnt":"1", 28 | "da_clkrst_cnt":"1" 29 | } 30 | -------------------------------------------------------------------------------- /verilog/sim_1/new/tb_QKV.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/09 11:25:03 7 | // Design Name: 8 | // Module Name: tb_QKV 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module tb_QKV; 24 | 25 | reg clk; 26 | reg rst_n; 27 | 28 | reg [16-1:0] data_in; 29 | reg data_in_valid; 30 | integer file,i; 31 | 32 | wire [2-1:0] block_sel; 33 | assign block_sel = 0; 34 | 35 | initial begin 36 | clk = 1'b0; 37 | forever begin 38 | #5 clk = ~clk; 39 | end 40 | end 41 | 42 | initial begin 43 | rst_n = 0; 44 | data_in_valid = 0; 45 | #100; 46 | rst_n = 1; 47 | #100; 48 | file = $fopen("test_data.txt", "rb"); 49 | if (file==0) begin 50 | $display("[-]error"); 51 | $stop; 52 | end 53 | for (i=0;i<30;i=i+1) begin 54 | @(posedge clk); 55 | $fscanf(file, "%b", data_in); 56 | data_in_valid <= 1; 57 | end 58 | 59 | data_in_valid <= 0; 60 | $fclose(file); 61 | end 62 | 63 | binary_QKV binary_QKV( 64 | .clk(clk), 65 | .rst_n(rst_n), 66 | .block_sel(block_sel), 67 | 68 | .data_in(data_in), 69 | .data_in_valid(data_in_valid) 70 | ); 71 | 72 | endmodule 73 | -------------------------------------------------------------------------------- /verilog/sim_1/new/tb_attention.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/10 17:01:30 7 | // Design Name: 8 | // Module Name: tb_attention 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module tb_attention; 24 | 25 | reg clk; 26 | reg rst_n; 27 | 28 | reg [16-1:0] data_in; 29 | reg data_in_valid; 30 | integer file,i; 31 | 32 | wire [3-1:0] block_sel; 33 | assign block_sel = 'd0; 34 | 35 | initial begin 36 | clk = 1'b0; 37 | forever begin 38 | #5 clk = ~clk; 39 | end 40 | end 41 | 42 | initial begin 43 | rst_n = 0; 44 | data_in_valid = 0; 45 | #100; 46 | rst_n = 1; 47 | #100; 48 | file = $fopen("test_data.txt", "rb"); 49 | if (file==0) begin 50 | $display("[-]error"); 51 | $stop; 52 | end 53 | for (i=0;i<30;i=i+1) begin 54 | @(posedge clk); 55 | $fscanf(file, "%b", data_in); 56 | data_in_valid <= 1; 57 | end 58 | 59 | data_in_valid <= 0; 60 | $fclose(file); 61 | end 62 | 63 | attention attention( 64 | .clk (clk), 65 | .rst_n (rst_n), 66 | 67 | .data_in (data_in), 68 | .data_in_valid (data_in_valid), 69 | .block_sel (block_sel) 70 | ); 71 | 72 | endmodule 73 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/example_designs/debug_hw_design/PS_PL_v1_0_hw_test.tcl: -------------------------------------------------------------------------------- 1 | # Runtime Tcl commands to interact with - PS_PL_v1_0 2 | 3 | # Sourcing design address info tcl 4 | set bd_path [get_property DIRECTORY [current_project]]/[current_project].srcs/[current_fileset]/bd 5 | source ${bd_path}/PS_PL_v1_0_include.tcl 6 | 7 | # jtag axi master interface hardware name, change as per your design. 8 | set jtag_axi_master hw_axi_1 9 | set ec 0 10 | 11 | # hw test script 12 | # Delete all previous axis transactions 13 | if { [llength [get_hw_axi_txns -quiet]] } { 14 | delete_hw_axi_txn [get_hw_axi_txns -quiet] 15 | } 16 | 17 | 18 | # Test all lite slaves. 19 | set wdata_1 abcd1234 20 | 21 | # Test: S00_AXI 22 | # Create a write transaction at s00_axi_addr address 23 | create_hw_axi_txn w_s00_axi_addr [get_hw_axis $jtag_axi_master] -type write -address $s00_axi_addr -data $wdata_1 24 | # Create a read transaction at s00_axi_addr address 25 | create_hw_axi_txn r_s00_axi_addr [get_hw_axis $jtag_axi_master] -type read -address $s00_axi_addr 26 | # Initiate transactions 27 | run_hw_axi r_s00_axi_addr 28 | run_hw_axi w_s00_axi_addr 29 | run_hw_axi r_s00_axi_addr 30 | set rdata_tmp [get_property DATA [get_hw_axi_txn r_s00_axi_addr]] 31 | # Compare read data 32 | if { $rdata_tmp == $wdata_1 } { 33 | puts "Data comparison test pass for - S00_AXI" 34 | } else { 35 | puts "Data comparison test fail for - S00_AXI, expected-$wdata_1 actual-$rdata_tmp" 36 | inc ec 37 | } 38 | 39 | # Check error flag 40 | if { $ec == 0 } { 41 | puts "PTGEN_TEST: PASSED!" 42 | } else { 43 | puts "PTGEN_TEST: FAILED!" 44 | } 45 | 46 | -------------------------------------------------------------------------------- /verilog/param_1/attention_intermediate_1.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/param_1/attention_intermediate_2.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 4 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 6 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 7 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 8 | FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF; 9 | -------------------------------------------------------------------------------- /verilog/sim_1/new/tb_transformer.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 15:23:43 7 | // Design Name: 8 | // Module Name: tb_transformer 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module tb_transformer; 24 | 25 | reg clk; 26 | reg rst_n; 27 | 28 | reg [16-1:0] data_in; 29 | reg data_in_valid; 30 | integer file,i; 31 | 32 | wire [3-1:0] block_sel; 33 | assign block_sel = 'd0; 34 | 35 | initial begin 36 | clk = 1'b0; 37 | forever begin 38 | #5 clk = ~clk; 39 | end 40 | end 41 | 42 | initial begin 43 | rst_n = 0; 44 | data_in_valid = 0; 45 | #100; 46 | rst_n = 1; 47 | #100; 48 | file = $fopen("test_data.txt", "rb"); 49 | if (file==0) begin 50 | $display("[-]error"); 51 | $stop; 52 | end 53 | for (i=0;i<30;i=i+1) begin 54 | @(posedge clk); 55 | $fscanf(file, "%b", data_in); 56 | data_in_valid <= 1; 57 | end 58 | 59 | data_in_valid <= 0; 60 | $fclose(file); 61 | end 62 | 63 | wire [16-1:0] data_out; 64 | wire data_out_valid; 65 | wire done; 66 | 67 | transformer transformer( 68 | .clk (clk), 69 | .rst_n (rst_n), 70 | 71 | .data_in (data_in), 72 | .data_in_valid (data_in_valid), 73 | 74 | .data_out (data_out), 75 | .data_out_valid (data_out_valid) 76 | ); 77 | endmodule 78 | -------------------------------------------------------------------------------- /verilog/sim_1/new/tb_encoder.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 22:31:35 7 | // Design Name: 8 | // Module Name: tb_encoder 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module tb_encoder; 24 | 25 | reg clk; 26 | reg rst_n; 27 | 28 | reg [16-1:0] data_in; 29 | reg data_in_valid; 30 | integer file,i; 31 | 32 | wire [3-1:0] block_sel; 33 | assign block_sel = 'd0; 34 | 35 | initial begin 36 | clk = 1'b0; 37 | forever begin 38 | #5 clk = ~clk; 39 | end 40 | end 41 | 42 | initial begin 43 | rst_n = 0; 44 | data_in_valid = 0; 45 | #100; 46 | rst_n = 1; 47 | #100; 48 | file = $fopen("test_data.txt", "rb"); 49 | if (file==0) begin 50 | $display("[-]error"); 51 | $stop; 52 | end 53 | for (i=0;i<30;i=i+1) begin 54 | @(posedge clk); 55 | $fscanf(file, "%b", data_in); 56 | data_in_valid <= 1; 57 | end 58 | 59 | data_in_valid <= 0; 60 | $fclose(file); 61 | end 62 | 63 | wire [16-1:0] data_out; 64 | wire data_out_valid; 65 | wire done; 66 | 67 | encoder encoder( 68 | .clk (clk), 69 | .rst_n (rst_n), 70 | 71 | .data_in (data_in), 72 | .data_in_valid (data_in_valid), 73 | .block_sel (block_sel), 74 | 75 | .data_out (data_out), 76 | .data_out_valid (data_out_valid) 77 | ); 78 | 79 | endmodule 80 | -------------------------------------------------------------------------------- /vitis/src/platform.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * Use of the Software is limited solely to applications: 16 | * (a) running on a Xilinx device, or 17 | * (b) that interact with a Xilinx device through a bus or interconnect. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 24 | * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | * Except as contained in this notice, the name of the Xilinx shall not be used 28 | * in advertising or otherwise to promote the sale, use or other dealings in 29 | * this Software without prior written authorization from Xilinx. 30 | * 31 | ******************************************************************************/ 32 | 33 | #ifndef __PLATFORM_H_ 34 | #define __PLATFORM_H_ 35 | 36 | #include "platform_config.h" 37 | 38 | void init_platform(); 39 | void cleanup_platform(); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/src/PS_PL_selftest.c: -------------------------------------------------------------------------------- 1 | 2 | /***************************** Include Files *******************************/ 3 | #include "PS_PL.h" 4 | #include "xparameters.h" 5 | #include "stdio.h" 6 | #include "xil_io.h" 7 | 8 | /************************** Constant Definitions ***************************/ 9 | #define READ_WRITE_MUL_FACTOR 0x10 10 | 11 | /************************** Function Definitions ***************************/ 12 | /** 13 | * 14 | * Run a self-test on the driver/device. Note this may be a destructive test if 15 | * resets of the device are performed. 16 | * 17 | * If the hardware system is not built correctly, this function may never 18 | * return to the caller. 19 | * 20 | * @param baseaddr_p is the base address of the PS_PLinstance to be worked on. 21 | * 22 | * @return 23 | * 24 | * - XST_SUCCESS if all self-test code passed 25 | * - XST_FAILURE if any self-test code failed 26 | * 27 | * @note Caching must be turned off for this function to work. 28 | * @note Self test may fail if data memory and device are not on the same bus. 29 | * 30 | */ 31 | XStatus PS_PL_Reg_SelfTest(void * baseaddr_p) 32 | { 33 | u32 baseaddr; 34 | int write_loop_index; 35 | int read_loop_index; 36 | int Index; 37 | 38 | baseaddr = (u32) baseaddr_p; 39 | 40 | xil_printf("******************************\n\r"); 41 | xil_printf("* User Peripheral Self Test\n\r"); 42 | xil_printf("******************************\n\n\r"); 43 | 44 | /* 45 | * Write to user logic slave module register(s) and read back 46 | */ 47 | xil_printf("User logic slave module test...\n\r"); 48 | 49 | for (write_loop_index = 0 ; write_loop_index < 4; write_loop_index++) 50 | PS_PL_mWriteReg (baseaddr, write_loop_index*4, (write_loop_index+1)*READ_WRITE_MUL_FACTOR); 51 | for (read_loop_index = 0 ; read_loop_index < 4; read_loop_index++) 52 | if ( PS_PL_mReadReg (baseaddr, read_loop_index*4) != (read_loop_index+1)*READ_WRITE_MUL_FACTOR){ 53 | xil_printf ("Error reading register value at address %x\n", (int)baseaddr + read_loop_index*4); 54 | return XST_FAILURE; 55 | } 56 | 57 | xil_printf(" - slave register write/read passed\n\n\r"); 58 | 59 | return XST_SUCCESS; 60 | } 61 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/drivers/PS_PL_v1_0/src/PS_PL.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef PS_PL_H 3 | #define PS_PL_H 4 | 5 | 6 | /****************** Include Files ********************/ 7 | #include "xil_types.h" 8 | #include "xstatus.h" 9 | 10 | #define PS_PL_S00_AXI_SLV_REG0_OFFSET 0 11 | #define PS_PL_S00_AXI_SLV_REG1_OFFSET 4 12 | #define PS_PL_S00_AXI_SLV_REG2_OFFSET 8 13 | #define PS_PL_S00_AXI_SLV_REG3_OFFSET 12 14 | 15 | 16 | /**************************** Type Definitions *****************************/ 17 | /** 18 | * 19 | * Write a value to a PS_PL register. A 32 bit write is performed. 20 | * If the component is implemented in a smaller width, only the least 21 | * significant data is written. 22 | * 23 | * @param BaseAddress is the base address of the PS_PLdevice. 24 | * @param RegOffset is the register offset from the base to write to. 25 | * @param Data is the data written to the register. 26 | * 27 | * @return None. 28 | * 29 | * @note 30 | * C-style signature: 31 | * void PS_PL_mWriteReg(u32 BaseAddress, unsigned RegOffset, u32 Data) 32 | * 33 | */ 34 | #define PS_PL_mWriteReg(BaseAddress, RegOffset, Data) \ 35 | Xil_Out32((BaseAddress) + (RegOffset), (u32)(Data)) 36 | 37 | /** 38 | * 39 | * Read a value from a PS_PL register. A 32 bit read is performed. 40 | * If the component is implemented in a smaller width, only the least 41 | * significant data is read from the register. The most significant data 42 | * will be read as 0. 43 | * 44 | * @param BaseAddress is the base address of the PS_PL device. 45 | * @param RegOffset is the register offset from the base to write to. 46 | * 47 | * @return Data is the data from the register. 48 | * 49 | * @note 50 | * C-style signature: 51 | * u32 PS_PL_mReadReg(u32 BaseAddress, unsigned RegOffset) 52 | * 53 | */ 54 | #define PS_PL_mReadReg(BaseAddress, RegOffset) \ 55 | Xil_In32((BaseAddress) + (RegOffset)) 56 | 57 | /************************** Function Prototypes ****************************/ 58 | /** 59 | * 60 | * Run a self-test on the driver/device. Note this may be a destructive test if 61 | * resets of the device are performed. 62 | * 63 | * If the hardware system is not built correctly, this function may never 64 | * return to the caller. 65 | * 66 | * @param baseaddr_p is the base address of the PS_PL instance to be worked on. 67 | * 68 | * @return 69 | * 70 | * - XST_SUCCESS if all self-test code passed 71 | * - XST_FAILURE if any self-test code failed 72 | * 73 | * @note Caching must be turned off for this function to work. 74 | * @note Self test may fail if data memory and device are not on the same bus. 75 | * 76 | */ 77 | XStatus PS_PL_Reg_SelfTest(void * baseaddr_p); 78 | 79 | #endif // PS_PL_H 80 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/xgui/PS_PL_v1_0.tcl: -------------------------------------------------------------------------------- 1 | # Definitional proc to organize widgets for parameters. 2 | proc init_gui { IPINST } { 3 | ipgui::add_param $IPINST -name "Component_Name" 4 | #Adding Page 5 | set Page_0 [ipgui::add_page $IPINST -name "Page 0"] 6 | ipgui::add_param $IPINST -name "C_S00_AXI_DATA_WIDTH" -parent ${Page_0} -widget comboBox 7 | ipgui::add_param $IPINST -name "C_S00_AXI_ADDR_WIDTH" -parent ${Page_0} 8 | ipgui::add_param $IPINST -name "C_S00_AXI_BASEADDR" -parent ${Page_0} 9 | ipgui::add_param $IPINST -name "C_S00_AXI_HIGHADDR" -parent ${Page_0} 10 | 11 | 12 | } 13 | 14 | proc update_PARAM_VALUE.C_S00_AXI_DATA_WIDTH { PARAM_VALUE.C_S00_AXI_DATA_WIDTH } { 15 | # Procedure called to update C_S00_AXI_DATA_WIDTH when any of the dependent parameters in the arguments change 16 | } 17 | 18 | proc validate_PARAM_VALUE.C_S00_AXI_DATA_WIDTH { PARAM_VALUE.C_S00_AXI_DATA_WIDTH } { 19 | # Procedure called to validate C_S00_AXI_DATA_WIDTH 20 | return true 21 | } 22 | 23 | proc update_PARAM_VALUE.C_S00_AXI_ADDR_WIDTH { PARAM_VALUE.C_S00_AXI_ADDR_WIDTH } { 24 | # Procedure called to update C_S00_AXI_ADDR_WIDTH when any of the dependent parameters in the arguments change 25 | } 26 | 27 | proc validate_PARAM_VALUE.C_S00_AXI_ADDR_WIDTH { PARAM_VALUE.C_S00_AXI_ADDR_WIDTH } { 28 | # Procedure called to validate C_S00_AXI_ADDR_WIDTH 29 | return true 30 | } 31 | 32 | proc update_PARAM_VALUE.C_S00_AXI_BASEADDR { PARAM_VALUE.C_S00_AXI_BASEADDR } { 33 | # Procedure called to update C_S00_AXI_BASEADDR when any of the dependent parameters in the arguments change 34 | } 35 | 36 | proc validate_PARAM_VALUE.C_S00_AXI_BASEADDR { PARAM_VALUE.C_S00_AXI_BASEADDR } { 37 | # Procedure called to validate C_S00_AXI_BASEADDR 38 | return true 39 | } 40 | 41 | proc update_PARAM_VALUE.C_S00_AXI_HIGHADDR { PARAM_VALUE.C_S00_AXI_HIGHADDR } { 42 | # Procedure called to update C_S00_AXI_HIGHADDR when any of the dependent parameters in the arguments change 43 | } 44 | 45 | proc validate_PARAM_VALUE.C_S00_AXI_HIGHADDR { PARAM_VALUE.C_S00_AXI_HIGHADDR } { 46 | # Procedure called to validate C_S00_AXI_HIGHADDR 47 | return true 48 | } 49 | 50 | 51 | proc update_MODELPARAM_VALUE.C_S00_AXI_DATA_WIDTH { MODELPARAM_VALUE.C_S00_AXI_DATA_WIDTH PARAM_VALUE.C_S00_AXI_DATA_WIDTH } { 52 | # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value 53 | set_property value [get_property value ${PARAM_VALUE.C_S00_AXI_DATA_WIDTH}] ${MODELPARAM_VALUE.C_S00_AXI_DATA_WIDTH} 54 | } 55 | 56 | proc update_MODELPARAM_VALUE.C_S00_AXI_ADDR_WIDTH { MODELPARAM_VALUE.C_S00_AXI_ADDR_WIDTH PARAM_VALUE.C_S00_AXI_ADDR_WIDTH } { 57 | # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value 58 | set_property value [get_property value ${PARAM_VALUE.C_S00_AXI_ADDR_WIDTH}] ${MODELPARAM_VALUE.C_S00_AXI_ADDR_WIDTH} 59 | } 60 | 61 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/hdl/PS_PL_v1_0.v: -------------------------------------------------------------------------------- 1 | 2 | `timescale 1 ns / 1 ps 3 | 4 | module PS_PL_v1_0 # 5 | ( 6 | // Users to add parameters here 7 | 8 | // User parameters ends 9 | // Do not modify the parameters beyond this line 10 | 11 | 12 | // Parameters of Axi Slave Bus Interface S00_AXI 13 | parameter integer C_S00_AXI_DATA_WIDTH = 32, 14 | parameter integer C_S00_AXI_ADDR_WIDTH = 4 15 | ) 16 | ( 17 | // Users to add ports here 18 | input [9-1:0] pre_result, 19 | input pre_result_valid, 20 | output [16-1:0] raw_data, 21 | output raw_data_valid, 22 | // User ports ends 23 | // Do not modify the ports beyond this line 24 | 25 | 26 | // Ports of Axi Slave Bus Interface S00_AXI 27 | input wire s00_axi_aclk, 28 | input wire s00_axi_aresetn, 29 | input wire [C_S00_AXI_ADDR_WIDTH-1 : 0] s00_axi_awaddr, 30 | input wire [2 : 0] s00_axi_awprot, 31 | input wire s00_axi_awvalid, 32 | output wire s00_axi_awready, 33 | input wire [C_S00_AXI_DATA_WIDTH-1 : 0] s00_axi_wdata, 34 | input wire [(C_S00_AXI_DATA_WIDTH/8)-1 : 0] s00_axi_wstrb, 35 | input wire s00_axi_wvalid, 36 | output wire s00_axi_wready, 37 | output wire [1 : 0] s00_axi_bresp, 38 | output wire s00_axi_bvalid, 39 | input wire s00_axi_bready, 40 | input wire [C_S00_AXI_ADDR_WIDTH-1 : 0] s00_axi_araddr, 41 | input wire [2 : 0] s00_axi_arprot, 42 | input wire s00_axi_arvalid, 43 | output wire s00_axi_arready, 44 | output wire [C_S00_AXI_DATA_WIDTH-1 : 0] s00_axi_rdata, 45 | output wire [1 : 0] s00_axi_rresp, 46 | output wire s00_axi_rvalid, 47 | input wire s00_axi_rready 48 | ); 49 | // Instantiation of Axi Bus Interface S00_AXI 50 | PS_PL_v1_0_S00_AXI # ( 51 | .C_S_AXI_DATA_WIDTH(C_S00_AXI_DATA_WIDTH), 52 | .C_S_AXI_ADDR_WIDTH(C_S00_AXI_ADDR_WIDTH) 53 | ) PS_PL_v1_0_S00_AXI_inst ( 54 | .pre_result(pre_result), 55 | .pre_result_valid(pre_result_valid), 56 | .raw_data(raw_data), 57 | .raw_data_valid(raw_data_valid), 58 | .S_AXI_ACLK(s00_axi_aclk), 59 | .S_AXI_ARESETN(s00_axi_aresetn), 60 | .S_AXI_AWADDR(s00_axi_awaddr), 61 | .S_AXI_AWPROT(s00_axi_awprot), 62 | .S_AXI_AWVALID(s00_axi_awvalid), 63 | .S_AXI_AWREADY(s00_axi_awready), 64 | .S_AXI_WDATA(s00_axi_wdata), 65 | .S_AXI_WSTRB(s00_axi_wstrb), 66 | .S_AXI_WVALID(s00_axi_wvalid), 67 | .S_AXI_WREADY(s00_axi_wready), 68 | .S_AXI_BRESP(s00_axi_bresp), 69 | .S_AXI_BVALID(s00_axi_bvalid), 70 | .S_AXI_BREADY(s00_axi_bready), 71 | .S_AXI_ARADDR(s00_axi_araddr), 72 | .S_AXI_ARPROT(s00_axi_arprot), 73 | .S_AXI_ARVALID(s00_axi_arvalid), 74 | .S_AXI_ARREADY(s00_axi_arready), 75 | .S_AXI_RDATA(s00_axi_rdata), 76 | .S_AXI_RRESP(s00_axi_rresp), 77 | .S_AXI_RVALID(s00_axi_rvalid), 78 | .S_AXI_RREADY(s00_axi_rready) 79 | ); 80 | 81 | // Add user logic here 82 | 83 | // User logic ends 84 | 85 | endmodule 86 | -------------------------------------------------------------------------------- /verilog/sources_1/new/layer_norm_1.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 17:17:16 7 | // Design Name: 8 | // Module Name: layer_norm 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module layer_norm_1( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input [2:0] block_sel, 30 | 31 | output [16-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) 44 | time_step_pre <= 'd29; 45 | else if (data_in_valid) begin 46 | time_step_pre <= time_step_pre + 1'b1; 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if (~rst_n) begin 52 | time_step <= 0; 53 | end 54 | else if (time_step_pre <= 'd29) begin 55 | time_step <= time_step_pre; 56 | end 57 | end 58 | 59 | wire [30*8-1:0] alpha; 60 | wire [30*8-1:0] beta; 61 | wire [2:0] sel = block_sel; 62 | 63 | alpha_rom_1 alpha_rom_1 ( 64 | .clka(clk), // input wire clka 65 | .ena(data_in_valid), // input wire ena 66 | .addra(sel), // input wire [2 : 0] addra 67 | .douta(alpha) // output wire [719 : 0] douta 68 | ); 69 | 70 | beta_rom_1 beta_rom_1 ( 71 | .clka(clk), // input wire clka 72 | .ena(data_in_valid), // input wire ena 73 | .addra(sel), // input wire [2 : 0] addra 74 | .douta(beta) // output wire [719 : 0] douta 75 | ); 76 | 77 | genvar i; 78 | generate 79 | for (i = 0; i < 16; i = i + 1) begin 80 | wire signed [8-1:0] alpha_i = alpha[time_step*8 +: 8]; 81 | wire signed [8-1:0] beta_i = beta[time_step*8 +: 8]; 82 | 83 | // always@(posedge clk or negedge rst_n) begin 84 | // if (~rst_n) begin 85 | // data_out[i] <= 0; 86 | // end 87 | // else if (data_in_valid) begin 88 | // data_out[i] <= (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 89 | // end 90 | // end 91 | assign data_out[i] = (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 92 | end 93 | endgenerate 94 | 95 | always@(posedge clk,negedge rst_n)begin 96 | if(~rst_n) 97 | done <= 0; 98 | else if (time_step == 5'd29) 99 | done <= 1; 100 | end 101 | 102 | always@(posedge clk,negedge rst_n)begin 103 | if(~rst_n) 104 | data_out_valid <= 0; 105 | else if (data_in_valid) 106 | data_out_valid <= 1; 107 | else 108 | data_out_valid <= 0; 109 | end 110 | 111 | endmodule 112 | -------------------------------------------------------------------------------- /verilog/sources_1/new/layer_norm_3.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 14:59:41 7 | // Design Name: 8 | // Module Name: layer_norm_3 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module layer_norm_3( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input [2:0] block_sel, 30 | 31 | output [16-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) 44 | time_step_pre <= 'd29; 45 | else if (data_in_valid) begin 46 | time_step_pre <= time_step_pre + 1'b1; 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if (~rst_n) begin 52 | time_step <= 0; 53 | end 54 | else if (time_step_pre <= 'd29) begin 55 | time_step <= time_step_pre; 56 | end 57 | end 58 | 59 | wire [30*8-1:0] alpha; 60 | wire [30*8-1:0] beta; 61 | wire [2:0] sel = block_sel; 62 | 63 | alpha_rom_3 alpha_rom_3 ( 64 | .clka(clk), // input wire clka 65 | .ena(data_in_valid), // input wire ena 66 | .addra(sel), // input wire [2 : 0] addra 67 | .douta(alpha) // output wire [719 : 0] douta 68 | ); 69 | 70 | beta_rom_3 beta_rom_3 ( 71 | .clka(clk), // input wire clka 72 | .ena(data_in_valid), // input wire ena 73 | .addra(sel), // input wire [2 : 0] addra 74 | .douta(beta) // output wire [719 : 0] douta 75 | ); 76 | 77 | genvar i; 78 | generate 79 | for (i = 0; i < 16; i = i + 1) begin 80 | wire signed [8-1:0] alpha_i = alpha[time_step*8 +: 8]; 81 | wire signed [8-1:0] beta_i = beta[time_step*8 +: 8]; 82 | 83 | // always@(posedge clk or negedge rst_n) begin 84 | // if (~rst_n) begin 85 | // data_out[i] <= 0; 86 | // end 87 | // else if (data_in_valid) begin 88 | // data_out[i] <= (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 89 | // end 90 | // end 91 | assign data_out[i] = (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 92 | end 93 | endgenerate 94 | 95 | always@(posedge clk,negedge rst_n)begin 96 | if(~rst_n) 97 | done <= 0; 98 | else if (time_step == 5'd29) 99 | done <= 1; 100 | end 101 | 102 | always@(posedge clk,negedge rst_n)begin 103 | if(~rst_n) 104 | data_out_valid <= 0; 105 | else if (data_in_valid) 106 | data_out_valid <= 1; 107 | else 108 | data_out_valid <= 0; 109 | end 110 | 111 | endmodule 112 | -------------------------------------------------------------------------------- /verilog/sources_1/new/layer_norm_2.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 23:44:24 7 | // Design Name: 8 | // Module Name: layer_norm_2 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module layer_norm_2( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input [2:0] block_sel, 30 | 31 | output [16-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) 44 | time_step_pre <= 'd29; 45 | else if (data_in_valid) begin 46 | time_step_pre <= time_step_pre + 1'b1; 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if (~rst_n) begin 52 | time_step <= 0; 53 | end 54 | else if (time_step_pre <= 'd29) begin 55 | time_step <= time_step_pre; 56 | end 57 | end 58 | 59 | wire [30*8-1:0] alpha; 60 | wire [30*8-1:0] beta; 61 | wire [2:0] sel = block_sel; 62 | 63 | alpha_rom_2 alpha_rom_2 ( 64 | .clka(clk), // input wire clka 65 | .ena(data_in_valid), // input wire ena 66 | .addra(sel), // input wire [2 : 0] addra 67 | .douta(alpha) // output wire [719 : 0] douta 68 | ); 69 | 70 | beta_rom_2 beta_rom_2 ( 71 | .clka(clk), // input wire clka 72 | .ena(data_in_valid), // input wire ena 73 | .addra(sel), // input wire [2 : 0] addra 74 | .douta(beta) // output wire [719 : 0] douta 75 | ); 76 | 77 | genvar i; 78 | generate 79 | for (i = 0; i < 16; i = i + 1) begin 80 | wire signed [8-1:0] alpha_i = alpha[time_step*8 +: 8]; 81 | wire signed [8-1:0] beta_i = beta[time_step*8 +: 8]; 82 | 83 | // always@(posedge clk or negedge rst_n) begin 84 | // if (~rst_n) begin 85 | // data_out[i] <= 0; 86 | // end 87 | // else if (data_in_valid) begin 88 | // data_out[i] <= (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 89 | // end 90 | // end 91 | assign data_out[i] = (data_in[i] * alpha_i + beta_i) > 0 ? 1 : 0; 92 | end 93 | endgenerate 94 | 95 | always@(posedge clk,negedge rst_n)begin 96 | if(~rst_n) 97 | done <= 0; 98 | else if (time_step == 5'd29) 99 | done <= 1; 100 | end 101 | 102 | always@(posedge clk,negedge rst_n)begin 103 | if(~rst_n) 104 | data_out_valid <= 0; 105 | else if (data_in_valid) 106 | data_out_valid <= 1; 107 | else 108 | data_out_valid <= 0; 109 | end 110 | 111 | endmodule 112 | 113 | -------------------------------------------------------------------------------- /verilog/sources_1/bd/design_1/design_1.bda: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 0x0080000000 25 | C_S00_AXI_BASEADDR 26 | 0x008000FFFF 27 | C_S00_AXI_HIGHADDR 28 | Data 29 | /zynq_ultra_ps_e_0 30 | M_AXI_HPM0_LPD 31 | SEG_PS_PL_0_S00_AXI_reg 32 | xilinx.com:ip:zynq_ultra_ps_e:3.4 33 | both 34 | /PS_PL_0 35 | S00_AXI 36 | S00_AXI_reg 37 | xilinx.com:user:PS_PL:1.0 38 | register 39 | AC 40 | 41 | 42 | active 43 | 2 44 | PM 45 | 46 | 47 | design_1 48 | BC 49 | 50 | 51 | 2 52 | design_1 53 | VR 54 | 55 | 56 | 57 | 58 | 2 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /verilog/sources_1/new/binary_intermediate_1.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 10:17:56 7 | // Design Name: 8 | // Module Name: binary_intermediate_1 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module binary_intermediate_1( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input wire [2-1:0] block_sel, 30 | 31 | output reg [64-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) begin 44 | time_step_pre <= 'd29; 45 | end 46 | else if (data_in_valid) begin 47 | time_step_pre <= time_step_pre + 1'b1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | time_step <= 0; 54 | end 55 | else if (time_step_pre <= 'd29) begin 56 | time_step <= time_step_pre; 57 | end 58 | end 59 | 60 | wire [16*64-1:0] inter_w_data; 61 | wire [2:0] sel = block_sel; 62 | attention_intermediate_1 attention_intermediate_1 ( 63 | .clka(clk), // input wire clka 64 | .ena(data_in_valid), // input wire ena 65 | .addra(sel), // input wire [2 : 0] addra 66 | .douta(inter_w_data) // output wire [1023 : 0] douta 67 | ); 68 | 69 | genvar i; 70 | integer j; 71 | 72 | generate 73 | for (i = 0; i < 64; i = i + 1) begin 74 | wire [15:0] inter_xor_result = ~(inter_w_data[i*16 +: 16] ^ data_in); 75 | 76 | wire [4:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15]; 77 | 78 | // always@(posedge clk or negedge rst_n) begin 79 | // if (~rst_n) begin 80 | // inter_popcount_out <= 0; 81 | // end 82 | // else if (data_in_valid) begin 83 | // for (j = 0; j < 16; j = j + 1) begin 84 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 85 | // end 86 | // end 87 | // end 88 | 89 | always@(posedge clk or negedge rst_n) begin 90 | if (~rst_n) begin 91 | data_out[i] <= 0; 92 | end 93 | else if (data_in_valid) begin 94 | data_out[i] <= (2*inter_popcount_out-16) > 0 ? 1 : 0; 95 | end 96 | end 97 | end 98 | endgenerate 99 | 100 | 101 | 102 | always@(posedge clk,negedge rst_n)begin 103 | if(~rst_n) 104 | done <= 0; 105 | else if (time_step == 5'd29) 106 | done <= 1; 107 | end 108 | 109 | always@(posedge clk,negedge rst_n)begin 110 | if(~rst_n) 111 | data_out_valid <= 0; 112 | else if (data_in_valid) 113 | data_out_valid <= 1; 114 | else 115 | data_out_valid <= 0; 116 | end 117 | 118 | 119 | endmodule 120 | -------------------------------------------------------------------------------- /verilog/sources_1/new/encoder_hidden_state_1.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 09:43:10 7 | // Design Name: 8 | // Module Name: encoder_hidden_state_1 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module encoder_hidden_state_1( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input wire [2-1:0] block_sel, 30 | 31 | output reg [256-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) begin 44 | time_step_pre <= 'd29; 45 | end 46 | else if (data_in_valid) begin 47 | time_step_pre <= time_step_pre + 1'b1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | time_step <= 0; 54 | end 55 | else if (time_step_pre <= 'd29) begin 56 | time_step <= time_step_pre; 57 | end 58 | end 59 | 60 | 61 | wire [16*256-1:0] inter_w_data; 62 | wire [2:0] sel = block_sel; 63 | encoder_hidden_state_1_rom encoder_hidden_state_1_rom ( 64 | .clka(clk), // input wire clka 65 | .ena(data_in_valid), // input wire ena 66 | .addra(sel), // input wire [2 : 0] addra 67 | .douta(inter_w_data) // output wire [4095 : 0] douta 68 | ); 69 | 70 | genvar i; 71 | integer j; 72 | 73 | generate 74 | for (i = 0; i < 256; i = i + 1) begin 75 | wire [15:0] inter_xor_result = ~(inter_w_data[i*16 +: 16] ^ data_in); 76 | 77 | wire [4:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15]; 78 | 79 | // reg [4:0] inter_popcount_out; 80 | // 81 | // always@(posedge clk or negedge rst_n) begin 82 | // if (~rst_n) begin 83 | // inter_popcount_out <= 0; 84 | // end 85 | // else if (data_in_valid) begin 86 | // for (j = 0; j < 16; j = j + 1) begin 87 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 88 | // end 89 | // end 90 | // end 91 | 92 | always@(posedge clk or negedge rst_n) begin 93 | if (~rst_n) begin 94 | data_out[i] <= 0; 95 | end 96 | else if (data_in_valid) begin 97 | data_out[i] <= (2*inter_popcount_out-16) > 0 ? 1 : 0; 98 | end 99 | end 100 | end 101 | endgenerate 102 | 103 | 104 | 105 | always@(posedge clk,negedge rst_n)begin 106 | if(~rst_n) 107 | done <= 0; 108 | else if (time_step == 5'd29) 109 | done <= 1; 110 | end 111 | 112 | always@(posedge clk,negedge rst_n)begin 113 | if(~rst_n) 114 | data_out_valid <= 0; 115 | else if (data_in_valid) 116 | data_out_valid <= 1; 117 | else 118 | data_out_valid <= 0; 119 | end 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | endmodule 134 | -------------------------------------------------------------------------------- /vitis/src/platform.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * 3 | * Copyright (C) 2010 - 2015 Xilinx, Inc. All rights reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in 13 | * all copies or substantial portions of the Software. 14 | * 15 | * Use of the Software is limited solely to applications: 16 | * (a) running on a Xilinx device, or 17 | * (b) that interact with a Xilinx device through a bus or interconnect. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 | * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 24 | * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | * SOFTWARE. 26 | * 27 | * Except as contained in this notice, the name of the Xilinx shall not be used 28 | * in advertising or otherwise to promote the sale, use or other dealings in 29 | * this Software without prior written authorization from Xilinx. 30 | * 31 | ******************************************************************************/ 32 | 33 | #include "xparameters.h" 34 | #include "xil_cache.h" 35 | 36 | #include "platform_config.h" 37 | 38 | /* 39 | * Uncomment one of the following two lines, depending on the target, 40 | * if ps7/psu init source files are added in the source directory for 41 | * compiling example outside of SDK. 42 | */ 43 | /*#include "ps7_init.h"*/ 44 | /*#include "psu_init.h"*/ 45 | 46 | #ifdef STDOUT_IS_16550 47 | #include "xuartns550_l.h" 48 | 49 | #define UART_BAUD 9600 50 | #endif 51 | 52 | void 53 | enable_caches() 54 | { 55 | #ifdef __PPC__ 56 | Xil_ICacheEnableRegion(CACHEABLE_REGION_MASK); 57 | Xil_DCacheEnableRegion(CACHEABLE_REGION_MASK); 58 | #elif __MICROBLAZE__ 59 | #ifdef XPAR_MICROBLAZE_USE_ICACHE 60 | Xil_ICacheEnable(); 61 | #endif 62 | #ifdef XPAR_MICROBLAZE_USE_DCACHE 63 | Xil_DCacheEnable(); 64 | #endif 65 | #endif 66 | } 67 | 68 | void 69 | disable_caches() 70 | { 71 | #ifdef __MICROBLAZE__ 72 | #ifdef XPAR_MICROBLAZE_USE_DCACHE 73 | Xil_DCacheDisable(); 74 | #endif 75 | #ifdef XPAR_MICROBLAZE_USE_ICACHE 76 | Xil_ICacheDisable(); 77 | #endif 78 | #endif 79 | } 80 | 81 | void 82 | init_uart() 83 | { 84 | #ifdef STDOUT_IS_16550 85 | XUartNs550_SetBaud(STDOUT_BASEADDR, XPAR_XUARTNS550_CLOCK_HZ, UART_BAUD); 86 | XUartNs550_SetLineControlReg(STDOUT_BASEADDR, XUN_LCR_8_DATA_BITS); 87 | #endif 88 | /* Bootrom/BSP configures PS7/PSU UART to 115200 bps */ 89 | } 90 | 91 | void 92 | init_platform() 93 | { 94 | /* 95 | * If you want to run this example outside of SDK, 96 | * uncomment one of the following two lines and also #include "ps7_init.h" 97 | * or #include "ps7_init.h" at the top, depending on the target. 98 | * Make sure that the ps7/psu_init.c and ps7/psu_init.h files are included 99 | * along with this example source files for compilation. 100 | */ 101 | /* ps7_init();*/ 102 | /* psu_init();*/ 103 | enable_caches(); 104 | init_uart(); 105 | } 106 | 107 | void 108 | cleanup_platform() 109 | { 110 | disable_caches(); 111 | } 112 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/example_designs/bfm_design/design.tcl: -------------------------------------------------------------------------------- 1 | proc create_ipi_design { offsetfile design_name } { 2 | create_bd_design $design_name 3 | open_bd_design $design_name 4 | 5 | # Create Clock and Reset Ports 6 | set ACLK [ create_bd_port -dir I -type clk ACLK ] 7 | set_property -dict [ list CONFIG.FREQ_HZ {100000000} CONFIG.PHASE {0.000} CONFIG.CLK_DOMAIN "${design_name}_ACLK" ] $ACLK 8 | set ARESETN [ create_bd_port -dir I -type rst ARESETN ] 9 | set_property -dict [ list CONFIG.POLARITY {ACTIVE_LOW} ] $ARESETN 10 | set_property CONFIG.ASSOCIATED_RESET ARESETN $ACLK 11 | 12 | # Create instance: PS_PL_0, and set properties 13 | set PS_PL_0 [ create_bd_cell -type ip -vlnv xilinx.com:user:PS_PL:1.0 PS_PL_0] 14 | 15 | # Create instance: master_0, and set properties 16 | set master_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_vip master_0] 17 | set_property -dict [ list CONFIG.PROTOCOL {AXI4LITE} CONFIG.INTERFACE_MODE {MASTER} ] $master_0 18 | 19 | # Create interface connections 20 | connect_bd_intf_net [get_bd_intf_pins master_0/M_AXI ] [get_bd_intf_pins PS_PL_0/S00_AXI] 21 | 22 | # Create port connections 23 | connect_bd_net -net aclk_net [get_bd_ports ACLK] [get_bd_pins master_0/ACLK] [get_bd_pins PS_PL_0/S00_AXI_ACLK] 24 | connect_bd_net -net aresetn_net [get_bd_ports ARESETN] [get_bd_pins master_0/ARESETN] [get_bd_pins PS_PL_0/S00_AXI_ARESETN] 25 | set_property target_simulator XSim [current_project] 26 | set_property -name {xsim.simulate.runtime} -value {100ms} -objects [get_filesets sim_1] 27 | 28 | # Auto assign address 29 | assign_bd_address 30 | 31 | # Copy all address to interface_address.vh file 32 | set bd_path [file dirname [get_property NAME [get_files ${design_name}.bd]]] 33 | upvar 1 $offsetfile offset_file 34 | set offset_file "${bd_path}/PS_PL_v1_0_tb_include.svh" 35 | set fp [open $offset_file "w"] 36 | puts $fp "`ifndef PS_PL_v1_0_tb_include_vh_" 37 | puts $fp "`define PS_PL_v1_0_tb_include_vh_\n" 38 | puts $fp "//Configuration current bd names" 39 | puts $fp "`define BD_NAME ${design_name}" 40 | puts $fp "`define BD_INST_NAME ${design_name}_i" 41 | puts $fp "`define BD_WRAPPER ${design_name}_wrapper\n" 42 | puts $fp "//Configuration address parameters" 43 | 44 | puts $fp "`endif" 45 | close $fp 46 | } 47 | 48 | set ip_path [file dirname [file normalize [get_property XML_FILE_NAME [ipx::get_cores xilinx.com:user:PS_PL:1.0]]]] 49 | set test_bench_file ${ip_path}/example_designs/bfm_design/PS_PL_v1_0_tb.sv 50 | set interface_address_vh_file "" 51 | 52 | # Set IP Repository and Update IP Catalogue 53 | set repo_paths [get_property ip_repo_paths [current_fileset]] 54 | if { [lsearch -exact -nocase $repo_paths $ip_path ] == -1 } { 55 | set_property ip_repo_paths "$ip_path [get_property ip_repo_paths [current_fileset]]" [current_fileset] 56 | update_ip_catalog 57 | } 58 | 59 | set design_name "" 60 | set all_bd {} 61 | set all_bd_files [get_files *.bd -quiet] 62 | foreach file $all_bd_files { 63 | set file_name [string range $file [expr {[string last "/" $file] + 1}] end] 64 | set bd_name [string range $file_name 0 [expr {[string last "." $file_name] -1}]] 65 | lappend all_bd $bd_name 66 | } 67 | 68 | for { set i 1 } { 1 } { incr i } { 69 | set design_name "PS_PL_v1_0_bfm_${i}" 70 | if { [lsearch -exact -nocase $all_bd $design_name ] == -1 } { 71 | break 72 | } 73 | } 74 | 75 | create_ipi_design interface_address_vh_file ${design_name} 76 | validate_bd_design 77 | 78 | set wrapper_file [make_wrapper -files [get_files ${design_name}.bd] -top -force] 79 | import_files -force -norecurse $wrapper_file 80 | 81 | set_property SOURCE_SET sources_1 [get_filesets sim_1] 82 | import_files -fileset sim_1 -norecurse -force $test_bench_file 83 | remove_files -quiet -fileset sim_1 PS_PL_v1_0_tb_include.vh 84 | import_files -fileset sim_1 -norecurse -force $interface_address_vh_file 85 | set_property top PS_PL_v1_0_tb [get_filesets sim_1] 86 | set_property top_lib {} [get_filesets sim_1] 87 | set_property top_file {} [get_filesets sim_1] 88 | launch_simulation -simset sim_1 -mode behavioral 89 | -------------------------------------------------------------------------------- /verilog/sources_1/new/binary_intermediate_2.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 11:42:35 7 | // Design Name: 8 | // Module Name: binary_intermediate_2 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module binary_intermediate_2( 24 | input clk, 25 | input rst_n, 26 | 27 | input [64-1:0] data_in, 28 | input data_in_valid, 29 | input wire [2-1:0] block_sel, 30 | 31 | output reg [16-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) 44 | time_step_pre <= 'd29; 45 | else if (data_in_valid) begin 46 | time_step_pre <= time_step_pre + 1'b1; 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if (~rst_n) begin 52 | time_step <= 0; 53 | end 54 | else if (time_step_pre <= 'd29) begin 55 | time_step <= time_step_pre; 56 | end 57 | end 58 | 59 | wire [16*64-1:0] inter_w_data; 60 | wire [2:0] sel = block_sel; 61 | attention_intermediate_2 attention_intermediate_2 ( 62 | .clka(clk), // input wire clka 63 | .ena(data_in_valid), // input wire ena 64 | .addra(sel), // input wire [2 : 0] addra 65 | .douta(inter_w_data) // output wire [1023 : 0] douta 66 | ); 67 | 68 | genvar i; 69 | integer j; 70 | 71 | generate 72 | for (i = 0; i < 16; i = i + 1) begin 73 | wire [63:0] inter_xor_result = ~(inter_w_data[i*64 +: 64] ^ data_in); 74 | 75 | wire [6:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15] + inter_xor_result[16] + inter_xor_result[17] + inter_xor_result[18] + inter_xor_result[19] + inter_xor_result[20] + inter_xor_result[21] + inter_xor_result[22] + inter_xor_result[23] + inter_xor_result[24] + inter_xor_result[25] + inter_xor_result[26] + inter_xor_result[27] + inter_xor_result[28] + inter_xor_result[29] + inter_xor_result[30] + inter_xor_result[31] + inter_xor_result[32] + inter_xor_result[33] + inter_xor_result[34] + inter_xor_result[35] + inter_xor_result[36] + inter_xor_result[37] + inter_xor_result[38] + inter_xor_result[39] + inter_xor_result[40] + inter_xor_result[41] + inter_xor_result[42] + inter_xor_result[43] + inter_xor_result[44] + inter_xor_result[45] + inter_xor_result[46] + inter_xor_result[47] + inter_xor_result[48] + inter_xor_result[49] + inter_xor_result[50] + inter_xor_result[51] + inter_xor_result[52] + inter_xor_result[53] + inter_xor_result[54] + inter_xor_result[55] + inter_xor_result[56] + inter_xor_result[57] + inter_xor_result[58] + inter_xor_result[59] + inter_xor_result[60] + inter_xor_result[61] + inter_xor_result[62] + inter_xor_result[63]; 76 | 77 | // reg [6:0] inter_popcount_out; 78 | // 79 | // always@(posedge clk or negedge rst_n) begin 80 | // if (~rst_n) begin 81 | // inter_popcount_out <= 0; 82 | // end 83 | // else if (data_in_valid) begin 84 | // for (j = 0; j < 64; j = j + 1) begin 85 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 86 | // end 87 | // end 88 | // end 89 | 90 | always@(posedge clk or negedge rst_n) begin 91 | if (~rst_n) begin 92 | data_out[i] <= 0; 93 | end 94 | else if (data_in_valid) begin 95 | data_out[i] <= (2*inter_popcount_out-64) > 0 ? 1 : 0; 96 | end 97 | end 98 | end 99 | endgenerate 100 | 101 | 102 | 103 | always@(posedge clk,negedge rst_n)begin 104 | if(~rst_n) 105 | done <= 0; 106 | else if (time_step == 5'd29) 107 | done <= 1; 108 | end 109 | 110 | always@(posedge clk,negedge rst_n)begin 111 | if(~rst_n) 112 | data_out_valid <= 0; 113 | else if (data_in_valid) 114 | data_out_valid <= 1; 115 | else 116 | data_out_valid <= 0; 117 | end 118 | 119 | 120 | endmodule 121 | -------------------------------------------------------------------------------- /verilog/sources_1/new/binary_score.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/10 21:56:19 7 | // Design Name: 8 | // Module Name: binary_score 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module binary_score( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] query_in, 28 | input [30*16-1:0] key_in, 29 | input data_in_valid, 30 | 31 | output [30-1:0] data_out_1, 32 | output [30-1:0] data_out_2, 33 | output [30-1:0] data_out_3, 34 | output [30-1:0] data_out_4, 35 | output reg data_out_valid, 36 | output reg done 37 | ); 38 | 39 | wire [3-1:0] threshold = 'd4; // Bool Function 40 | 41 | reg [4:0] time_step; 42 | 43 | always@(posedge clk,negedge rst_n)begin 44 | if(~rst_n) 45 | time_step <= 0; 46 | else if(data_in_valid==1'b1) 47 | time_step <= time_step +1'b1; 48 | end 49 | 50 | /* 51 | * Head 1 52 | */ 53 | genvar i_1; 54 | integer j_1; 55 | generate 56 | for (i_1 = 0; i_1 < 30; i_1 = i_1 + 1) begin 57 | wire [3:0] score_xor_result_1 = ~(key_in[i_1*16 +: 4] ^ query_in[3:0]); 58 | 59 | wire [2:0] score_popcount_out_1 = score_xor_result_1[0] + score_xor_result_1[1] + score_xor_result_1[2] + score_xor_result_1[3]; 60 | 61 | // 计算POPCOUNT 62 | // reg [2:0] score_popcount_out_1; 63 | // 64 | // always@(posedge clk or negedge rst_n) begin 65 | // if (~rst_n) begin 66 | // score_popcount_out_1 <= 0; 67 | // end 68 | // else if (data_in_valid) begin 69 | // for (j_1 = 0; j_1 < 4; j_1 = j_1 + 1) begin 70 | // score_popcount_out_1 = score_popcount_out_1 + score_xor_result_1[j_1]; 71 | // end 72 | // end 73 | // end 74 | 75 | assign data_out_1[i_1] = ((2*score_popcount_out_1-threshold) > 0 && data_in_valid) ? 1 : 0; 76 | end 77 | endgenerate 78 | 79 | 80 | /* 81 | * Head 2 82 | */ 83 | genvar i_2; 84 | integer j_2; 85 | generate 86 | for (i_2 = 0; i_2 < 30; i_2 = i_2 + 1) begin 87 | wire [3:0] score_xor_result_2 = key_in[i_2*16+4 +: 4] ^ query_in[7:4]; 88 | 89 | // 计算POPCOUNT 90 | reg [2:0] score_popcount_out_2; 91 | 92 | always@(posedge clk or negedge rst_n) begin 93 | if (~rst_n) begin 94 | score_popcount_out_2 <= 0; 95 | end 96 | else if (data_in_valid) begin 97 | for (j_2 = 0; j_2 < 4; j_2 = j_2 + 1) begin 98 | score_popcount_out_2 = score_popcount_out_2 + score_xor_result_2[j_2]; 99 | end 100 | end 101 | end 102 | 103 | assign data_out_2[i_2] = ((2*score_popcount_out_2-threshold) > 0 && data_in_valid) ? 1 : 0; 104 | end 105 | endgenerate 106 | 107 | /* 108 | * Head 3 109 | */ 110 | genvar i_3; 111 | integer j_3; 112 | generate 113 | for (i_3 = 0; i_3 < 30; i_3 = i_3 + 1) begin 114 | wire [3:0] score_xor_result_3 = key_in[i_3*16+8 +: 4] ^ query_in[11:8]; 115 | 116 | // 计算POPCOUNT 117 | reg [2:0] score_popcount_out_3; 118 | 119 | always@(posedge clk or negedge rst_n) begin 120 | if (~rst_n) begin 121 | score_popcount_out_3 <= 0; 122 | end 123 | else if (data_in_valid) begin 124 | for (j_3 = 0; j_3 < 4; j_3 = j_3 + 1) begin 125 | score_popcount_out_3 = score_popcount_out_3 + score_xor_result_3[j_3]; 126 | end 127 | end 128 | end 129 | 130 | assign data_out_3[i_3] = ((2*score_popcount_out_3-threshold) > 0 && data_in_valid) ? 1 : 0; 131 | end 132 | endgenerate 133 | 134 | /* 135 | * Head 4 136 | */ 137 | genvar i_4; 138 | integer j_4; 139 | generate 140 | for (i_4 = 0; i_4 < 30; i_4 = i_4 + 1) begin 141 | wire [3:0] score_xor_result_4 = key_in[i_4*16+12 +: 4] ^ query_in[15:12]; 142 | 143 | // 计算POPCOUNT 144 | reg [2:0] score_popcount_out_4; 145 | 146 | always@(posedge clk or negedge rst_n) begin 147 | if (~rst_n) begin 148 | score_popcount_out_4 <= 0; 149 | end 150 | else if (data_in_valid) begin 151 | for (j_4 = 0; j_4 < 4; j_4 = j_4 + 1) begin 152 | score_popcount_out_4 = score_popcount_out_4 + score_xor_result_4[j_4]; 153 | end 154 | end 155 | end 156 | 157 | assign data_out_4[i_4] = ((2*score_popcount_out_4-threshold) > 0 && data_in_valid) ? 1 : 0; 158 | end 159 | endgenerate 160 | 161 | always@(posedge clk,negedge rst_n)begin 162 | if(~rst_n) 163 | done <= 0; 164 | else if (time_step == 5'd29) 165 | done <= 1; 166 | end 167 | 168 | always@(posedge clk,negedge rst_n)begin 169 | if(~rst_n) 170 | data_out_valid <= 0; 171 | else if (time_step == 5'd29) 172 | data_out_valid <= 1; 173 | end 174 | 175 | endmodule 176 | -------------------------------------------------------------------------------- /verilog/param_1/encoder_hidden_state_1.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 ||||||| -------------------------------------------------------------------------------- /verilog/param_1/encoder_hidden_state_2.coe: -------------------------------------------------------------------------------- 1 | memory_initialization_radix=16; 2 | memory_initialization_vector= 3 || FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, 5 ||||| -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/example_designs/debug_hw_design/design.tcl: -------------------------------------------------------------------------------- 1 | 2 | proc create_ipi_design { offsetfile design_name } { 3 | 4 | create_bd_design $design_name 5 | open_bd_design $design_name 6 | 7 | # Create and configure Clock/Reset 8 | create_bd_cell -type ip -vlnv xilinx.com:ip:clk_wiz sys_clk_0 9 | create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset sys_reset_0 10 | 11 | #Constraints will be provided manually while pin planning. 12 | create_bd_port -dir I -type rst reset_rtl 13 | set_property CONFIG.POLARITY [get_property CONFIG.POLARITY [get_bd_pins sys_clk_0/reset]] [get_bd_ports reset_rtl] 14 | connect_bd_net [get_bd_pins sys_reset_0/ext_reset_in] [get_bd_ports reset_rtl] 15 | connect_bd_net [get_bd_ports reset_rtl] [get_bd_pins sys_clk_0/reset] 16 | set external_reset_port reset_rtl 17 | create_bd_port -dir I -type clk clock_rtl 18 | connect_bd_net [get_bd_pins sys_clk_0/clk_in1] [get_bd_ports clock_rtl] 19 | set external_clock_port clock_rtl 20 | 21 | #Avoid IPI DRC, make clock port synchronous to reset 22 | if { $external_clock_port ne "" && $external_reset_port ne "" } { 23 | set_property CONFIG.ASSOCIATED_RESET $external_reset_port [get_bd_ports $external_clock_port] 24 | } 25 | 26 | # Connect other sys_reset pins 27 | connect_bd_net [get_bd_pins sys_reset_0/slowest_sync_clk] [get_bd_pins sys_clk_0/clk_out1] 28 | connect_bd_net [get_bd_pins sys_clk_0/locked] [get_bd_pins sys_reset_0/dcm_locked] 29 | 30 | # Create instance: PS_PL_0, and set properties 31 | set PS_PL_0 [ create_bd_cell -type ip -vlnv xilinx.com:user:PS_PL:1.0 PS_PL_0 ] 32 | 33 | # Create instance: jtag_axi_0, and set properties 34 | set jtag_axi_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:jtag_axi jtag_axi_0 ] 35 | set_property -dict [list CONFIG.PROTOCOL {0}] [get_bd_cells jtag_axi_0] 36 | connect_bd_net [get_bd_pins jtag_axi_0/aclk] [get_bd_pins sys_clk_0/clk_out1] 37 | connect_bd_net [get_bd_pins jtag_axi_0/aresetn] [get_bd_pins sys_reset_0/peripheral_aresetn] 38 | 39 | # Create instance: axi_peri_interconnect, and set properties 40 | set axi_peri_interconnect [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect axi_peri_interconnect ] 41 | connect_bd_net [get_bd_pins axi_peri_interconnect/ACLK] [get_bd_pins sys_clk_0/clk_out1] 42 | connect_bd_net [get_bd_pins axi_peri_interconnect/ARESETN] [get_bd_pins sys_reset_0/interconnect_aresetn] 43 | set_property -dict [ list CONFIG.NUM_SI {1} ] $axi_peri_interconnect 44 | connect_bd_net [get_bd_pins axi_peri_interconnect/S00_ACLK] [get_bd_pins sys_clk_0/clk_out1] 45 | connect_bd_net [get_bd_pins axi_peri_interconnect/S00_ARESETN] [get_bd_pins sys_reset_0/peripheral_aresetn] 46 | connect_bd_intf_net [get_bd_intf_pins jtag_axi_0/M_AXI] [get_bd_intf_pins axi_peri_interconnect/S00_AXI] 47 | 48 | set_property -dict [ list CONFIG.NUM_MI {1} ] $axi_peri_interconnect 49 | connect_bd_net [get_bd_pins axi_peri_interconnect/M00_ACLK] [get_bd_pins sys_clk_0/clk_out1] 50 | connect_bd_net [get_bd_pins axi_peri_interconnect/M00_ARESETN] [get_bd_pins sys_reset_0/peripheral_aresetn] 51 | 52 | # Connect all clock & reset of PS_PL_0 slave interfaces.. 53 | connect_bd_intf_net [get_bd_intf_pins axi_peri_interconnect/M00_AXI] [get_bd_intf_pins PS_PL_0/S00_AXI] 54 | connect_bd_net [get_bd_pins PS_PL_0/s00_axi_aclk] [get_bd_pins sys_clk_0/clk_out1] 55 | connect_bd_net [get_bd_pins PS_PL_0/s00_axi_aresetn] [get_bd_pins sys_reset_0/peripheral_aresetn] 56 | 57 | 58 | # Auto assign address 59 | assign_bd_address 60 | 61 | # Copy all address to PS_PL_v1_0_include.tcl file 62 | set bd_path [get_property DIRECTORY [current_project]]/[current_project].srcs/[current_fileset]/bd 63 | upvar 1 $offsetfile offset_file 64 | set offset_file "${bd_path}/PS_PL_v1_0_include.tcl" 65 | set fp [open $offset_file "w"] 66 | puts $fp "# Configuration address parameters" 67 | 68 | set offset [get_property OFFSET [get_bd_addr_segs /jtag_axi_0/Data/SEG_PS_PL_0_S00_AXI_* ]] 69 | puts $fp "set s00_axi_addr ${offset}" 70 | 71 | close $fp 72 | } 73 | 74 | # Set IP Repository and Update IP Catalogue 75 | set ip_path [file dirname [file normalize [get_property XML_FILE_NAME [ipx::get_cores xilinx.com:user:PS_PL:1.0]]]] 76 | set hw_test_file ${ip_path}/example_designs/debug_hw_design/PS_PL_v1_0_hw_test.tcl 77 | 78 | set repo_paths [get_property ip_repo_paths [current_fileset]] 79 | if { [lsearch -exact -nocase $repo_paths $ip_path ] == -1 } { 80 | set_property ip_repo_paths "$ip_path [get_property ip_repo_paths [current_fileset]]" [current_fileset] 81 | update_ip_catalog 82 | } 83 | 84 | set design_name "" 85 | set all_bd {} 86 | set all_bd_files [get_files *.bd -quiet] 87 | foreach file $all_bd_files { 88 | set file_name [string range $file [expr {[string last "/" $file] + 1}] end] 89 | set bd_name [string range $file_name 0 [expr {[string last "." $file_name] -1}]] 90 | lappend all_bd $bd_name 91 | } 92 | 93 | for { set i 1 } { 1 } { incr i } { 94 | set design_name "PS_PL_v1_0_hw_${i}" 95 | if { [lsearch -exact -nocase $all_bd $design_name ] == -1 } { 96 | break 97 | } 98 | } 99 | 100 | set intf_address_include_file "" 101 | create_ipi_design intf_address_include_file ${design_name} 102 | save_bd_design 103 | validate_bd_design 104 | 105 | set wrapper_file [make_wrapper -files [get_files ${design_name}.bd] -top -force] 106 | import_files -force -norecurse $wrapper_file 107 | 108 | puts "-------------------------------------------------------------------------------------------------" 109 | puts "INFO NEXT STEPS : Until this stage, debug hardware design has been created, " 110 | puts " please perform following steps to test design in targeted board." 111 | puts "1. Generate bitstream" 112 | puts "2. Setup your targeted board, open hardware manager and open new(or existing) hardware target" 113 | puts "3. Download generated bitstream" 114 | puts "4. Run generated hardware test using below command, this invokes basic read/write operation" 115 | puts " to every interface present in the peripheral : xilinx.com:user:myip:1.0" 116 | puts " : source -notrace ${hw_test_file}" 117 | puts "-------------------------------------------------------------------------------------------------" 118 | 119 | -------------------------------------------------------------------------------- /verilog/sources_1/new/binary_query.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/11 22:33:54 7 | // Design Name: 8 | // Module Name: binary_query 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module binary_query( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16*30-1:0] value_in, 28 | input [30-1:0] score_in_1, 29 | input [30-1:0] score_in_2, 30 | input [30-1:0] score_in_3, 31 | input [30-1:0] score_in_4, 32 | input data_in_valid, 33 | 34 | output reg [16-1:0] data_out, 35 | output reg data_out_valid, 36 | output reg done 37 | ); 38 | 39 | reg [4:0] time_step; 40 | 41 | always@(posedge clk,negedge rst_n)begin 42 | if(~rst_n) 43 | time_step <= 0; 44 | else if(data_in_valid==1'b1) 45 | time_step <= time_step +1'b1; 46 | end 47 | 48 | integer j; 49 | wire [29:0] value_xor_result_1_1 = value_in[0*30 +: 30] ^ score_in_1; 50 | wire [29:0] value_xor_result_1_2 = value_in[1*30 +: 30] ^ score_in_1; 51 | wire [29:0] value_xor_result_1_3 = value_in[2*30 +: 30] ^ score_in_1; 52 | wire [29:0] value_xor_result_1_4 = value_in[3*30 +: 30] ^ score_in_1; 53 | wire [29:0] value_xor_result_2_1 = value_in[4*30 +: 30] ^ score_in_2; 54 | wire [29:0] value_xor_result_2_2 = value_in[5*30 +: 30] ^ score_in_2; 55 | wire [29:0] value_xor_result_2_3 = value_in[6*30 +: 30] ^ score_in_2; 56 | wire [29:0] value_xor_result_2_4 = value_in[7*30 +: 30] ^ score_in_2; 57 | wire [29:0] value_xor_result_3_1 = value_in[8*30 +: 30] ^ score_in_3; 58 | wire [29:0] value_xor_result_3_2 = value_in[9*30 +: 30] ^ score_in_3; 59 | wire [29:0] value_xor_result_3_3 = value_in[10*30 +: 30] ^ score_in_3; 60 | wire [29:0] value_xor_result_3_4 = value_in[11*30 +: 30] ^ score_in_3; 61 | wire [29:0] value_xor_result_4_1 = value_in[12*30 +: 30] ^ score_in_4; 62 | wire [29:0] value_xor_result_4_2 = value_in[13*30 +: 30] ^ score_in_4; 63 | wire [29:0] value_xor_result_4_3 = value_in[14*30 +: 30] ^ score_in_4; 64 | wire [29:0] value_xor_result_4_4 = value_in[15*30 +: 30] ^ score_in_4; 65 | 66 | reg [8:0] value_popcount_out_1_1; 67 | reg [8:0] value_popcount_out_1_2; 68 | reg [8:0] value_popcount_out_1_3; 69 | reg [8:0] value_popcount_out_1_4; 70 | reg [8:0] value_popcount_out_2_1; 71 | reg [8:0] value_popcount_out_2_2; 72 | reg [8:0] value_popcount_out_2_3; 73 | reg [8:0] value_popcount_out_2_4; 74 | reg [8:0] value_popcount_out_3_1; 75 | reg [8:0] value_popcount_out_3_2; 76 | reg [8:0] value_popcount_out_3_3; 77 | reg [8:0] value_popcount_out_3_4; 78 | reg [8:0] value_popcount_out_4_1; 79 | reg [8:0] value_popcount_out_4_2; 80 | reg [8:0] value_popcount_out_4_3; 81 | reg [8:0] value_popcount_out_4_4; 82 | 83 | always@(posedge clk or negedge rst_n) begin 84 | if (~rst_n) begin 85 | value_popcount_out_1_1 <= 0; 86 | value_popcount_out_1_2 <= 0; 87 | value_popcount_out_1_3 <= 0; 88 | value_popcount_out_1_4 <= 0; 89 | value_popcount_out_2_1 <= 0; 90 | value_popcount_out_2_2 <= 0; 91 | value_popcount_out_2_3 <= 0; 92 | value_popcount_out_2_4 <= 0; 93 | value_popcount_out_3_1 <= 0; 94 | value_popcount_out_3_2 <= 0; 95 | value_popcount_out_3_3 <= 0; 96 | value_popcount_out_3_4 <= 0; 97 | value_popcount_out_4_1 <= 0; 98 | value_popcount_out_4_2 <= 0; 99 | value_popcount_out_4_3 <= 0; 100 | value_popcount_out_4_4 <= 0; 101 | end 102 | else if (data_in_valid) begin 103 | for (j = 0; j < 30; j = j + 1) begin 104 | value_popcount_out_1_1 <= value_popcount_out_1_1 + value_xor_result_1_1[j]; 105 | value_popcount_out_1_2 <= value_popcount_out_1_2 + value_xor_result_1_2[j]; 106 | value_popcount_out_1_3 <= value_popcount_out_1_3 + value_xor_result_1_3[j]; 107 | value_popcount_out_1_4 <= value_popcount_out_1_4 + value_xor_result_1_4[j]; 108 | value_popcount_out_2_1 <= value_popcount_out_2_1 + value_xor_result_2_1[j]; 109 | value_popcount_out_2_2 <= value_popcount_out_2_2 + value_xor_result_2_2[j]; 110 | value_popcount_out_2_3 <= value_popcount_out_2_3 + value_xor_result_2_3[j]; 111 | value_popcount_out_2_4 <= value_popcount_out_2_4 + value_xor_result_2_4[j]; 112 | value_popcount_out_3_1 <= value_popcount_out_3_1 + value_xor_result_3_1[j]; 113 | value_popcount_out_3_2 <= value_popcount_out_3_2 + value_xor_result_3_2[j]; 114 | value_popcount_out_3_3 <= value_popcount_out_3_3 + value_xor_result_3_3[j]; 115 | value_popcount_out_3_4 <= value_popcount_out_3_4 + value_xor_result_3_4[j]; 116 | value_popcount_out_4_1 <= value_popcount_out_4_1 + value_xor_result_4_1[j]; 117 | value_popcount_out_4_2 <= value_popcount_out_4_2 + value_xor_result_4_2[j]; 118 | value_popcount_out_4_3 <= value_popcount_out_4_3 + value_xor_result_4_3[j]; 119 | value_popcount_out_4_4 <= value_popcount_out_4_4 + value_xor_result_4_4[j]; 120 | end 121 | end 122 | end 123 | 124 | always@(posedge clk or negedge rst_n) begin 125 | if (~rst_n) begin 126 | data_out <= 0; 127 | end 128 | else if (data_in_valid) begin 129 | data_out[0] <= (2*value_popcount_out_1_1-30) > 0 ? 1 : 0; 130 | data_out[1] <= (2*value_popcount_out_1_2-30) > 0 ? 1 : 0; 131 | data_out[2] <= (2*value_popcount_out_1_3-30) > 0 ? 1 : 0; 132 | data_out[3] <= (2*value_popcount_out_1_4-30) > 0 ? 1 : 0; 133 | data_out[4] <= (2*value_popcount_out_2_1-30) > 0 ? 1 : 0; 134 | data_out[5] <= (2*value_popcount_out_2_2-30) > 0 ? 1 : 0; 135 | data_out[6] <= (2*value_popcount_out_2_3-30) > 0 ? 1 : 0; 136 | data_out[7] <= (2*value_popcount_out_2_4-30) > 0 ? 1 : 0; 137 | data_out[8] <= (2*value_popcount_out_3_1-30) > 0 ? 1 : 0; 138 | data_out[9] <= (2*value_popcount_out_3_2-30) > 0 ? 1 : 0; 139 | data_out[10] <= (2*value_popcount_out_3_3-30) > 0 ? 1 : 0; 140 | data_out[11] <= (2*value_popcount_out_3_4-30) > 0 ? 1 : 0; 141 | data_out[12] <= (2*value_popcount_out_4_1-30) > 0 ? 1 : 0; 142 | data_out[13] <= (2*value_popcount_out_4_2-30) > 0 ? 1 : 0; 143 | data_out[14] <= (2*value_popcount_out_4_3-30) > 0 ? 1 : 0; 144 | data_out[15] <= (2*value_popcount_out_4_4-30) > 0 ? 1 : 0; 145 | end 146 | end 147 | 148 | always@(posedge clk,negedge rst_n)begin 149 | if(~rst_n) 150 | data_out_valid <= 0; 151 | else if (data_in_valid) 152 | data_out_valid <= 1; 153 | else 154 | data_out_valid <= 0; 155 | end 156 | 157 | always@(posedge clk,negedge rst_n)begin 158 | if(~rst_n) 159 | done <= 0; 160 | else if (time_step == 5'd29) 161 | done <= 1; 162 | end 163 | 164 | endmodule 165 | -------------------------------------------------------------------------------- /verilog/sources_1/new/output_2.v: -------------------------------------------------------------------------------- 1 | module output_2( 2 | input clk, 3 | input rst_n, 4 | 5 | input [256-1:0] data_in, 6 | input data_in_valid, 7 | 8 | output reg [9-1:0] data_out, 9 | output reg data_out_valid 10 | ); 11 | 12 | wire [256-1:0] inter_w_data; 13 | wire sel = 0; 14 | output_2_rom output_2_rom ( 15 | .clka(clk), // input wire clka 16 | .ena(data_in_valid), // input wire ena 17 | .addra(sel), // input wire [4 : 0] addra 18 | .douta(inter_w_data) // output wire [3839 : 0] douta 19 | ); 20 | 21 | wire [256-1:0] inter_xor_result = ~(inter_w_data ^ data_in); 22 | 23 | wire [8:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15] + inter_xor_result[16] + inter_xor_result[17] + inter_xor_result[18] + inter_xor_result[19] + inter_xor_result[20] + inter_xor_result[21] + inter_xor_result[22] + inter_xor_result[23] + inter_xor_result[24] + inter_xor_result[25] + inter_xor_result[26] + inter_xor_result[27] + inter_xor_result[28] + inter_xor_result[29] + inter_xor_result[30] + inter_xor_result[31] + inter_xor_result[32] + inter_xor_result[33] + inter_xor_result[34] + inter_xor_result[35] + inter_xor_result[36] + inter_xor_result[37] + inter_xor_result[38] + inter_xor_result[39] + inter_xor_result[40] + inter_xor_result[41] + inter_xor_result[42] + inter_xor_result[43] + inter_xor_result[44] + inter_xor_result[45] + inter_xor_result[46] + inter_xor_result[47] + inter_xor_result[48] + inter_xor_result[49] + inter_xor_result[50] + inter_xor_result[51] + inter_xor_result[52] + inter_xor_result[53] + inter_xor_result[54] + inter_xor_result[55] + inter_xor_result[56] + inter_xor_result[57] + inter_xor_result[58] + inter_xor_result[59] + inter_xor_result[60] + inter_xor_result[61] + inter_xor_result[62] + inter_xor_result[63] + inter_xor_result[64] + inter_xor_result[65] + inter_xor_result[66] + inter_xor_result[67] + inter_xor_result[68] + inter_xor_result[69] + inter_xor_result[70] + inter_xor_result[71] + inter_xor_result[72] + inter_xor_result[73] + inter_xor_result[74] + inter_xor_result[75] + inter_xor_result[76] + inter_xor_result[77] + inter_xor_result[78] + inter_xor_result[79] + inter_xor_result[80] + inter_xor_result[81] + inter_xor_result[82] + inter_xor_result[83] + inter_xor_result[84] + inter_xor_result[85] + inter_xor_result[86] + inter_xor_result[87] + inter_xor_result[88] + inter_xor_result[89] + inter_xor_result[90] + inter_xor_result[91] + inter_xor_result[92] + inter_xor_result[93] + inter_xor_result[94] + inter_xor_result[95] + inter_xor_result[96] + inter_xor_result[97] + inter_xor_result[98] + inter_xor_result[99] + inter_xor_result[100] + inter_xor_result[101] + inter_xor_result[102] + inter_xor_result[103] + inter_xor_result[104] + inter_xor_result[105] + inter_xor_result[106] + inter_xor_result[107] + inter_xor_result[108] + inter_xor_result[109] + inter_xor_result[110] + inter_xor_result[111] + inter_xor_result[112] + inter_xor_result[113] + inter_xor_result[114] + inter_xor_result[115] + inter_xor_result[116] + inter_xor_result[117] + inter_xor_result[118] + inter_xor_result[119] + inter_xor_result[120] + inter_xor_result[121] + inter_xor_result[122] + inter_xor_result[123] + inter_xor_result[124] + inter_xor_result[125] + inter_xor_result[126] + inter_xor_result[127] + inter_xor_result[128] + inter_xor_result[129] + inter_xor_result[130] + inter_xor_result[131] + inter_xor_result[132] + inter_xor_result[133] + inter_xor_result[134] + inter_xor_result[135] + inter_xor_result[136] + inter_xor_result[137] + inter_xor_result[138] + inter_xor_result[139] + inter_xor_result[140] + inter_xor_result[141] + inter_xor_result[142] + inter_xor_result[143] + inter_xor_result[144] + inter_xor_result[145] + inter_xor_result[146] + inter_xor_result[147] + inter_xor_result[148] + inter_xor_result[149] + inter_xor_result[150] + inter_xor_result[151] + inter_xor_result[152] + inter_xor_result[153] + inter_xor_result[154] + inter_xor_result[155] + inter_xor_result[156] + inter_xor_result[157] + inter_xor_result[158] + inter_xor_result[159] + inter_xor_result[160] + inter_xor_result[161] + inter_xor_result[162] + inter_xor_result[163] + inter_xor_result[164] + inter_xor_result[165] + inter_xor_result[166] + inter_xor_result[167] + inter_xor_result[168] + inter_xor_result[169] + inter_xor_result[170] + inter_xor_result[171] + inter_xor_result[172] + inter_xor_result[173] + inter_xor_result[174] + inter_xor_result[175] + inter_xor_result[176] + inter_xor_result[177] + inter_xor_result[178] + inter_xor_result[179] + inter_xor_result[180] + inter_xor_result[181] + inter_xor_result[182] + inter_xor_result[183] + inter_xor_result[184] + inter_xor_result[185] + inter_xor_result[186] + inter_xor_result[187] + inter_xor_result[188] + inter_xor_result[189] + inter_xor_result[190] + inter_xor_result[191] + inter_xor_result[192] + inter_xor_result[193] + inter_xor_result[194] + inter_xor_result[195] + inter_xor_result[196] + inter_xor_result[197] + inter_xor_result[198] + inter_xor_result[199] + inter_xor_result[200] + inter_xor_result[201] + inter_xor_result[202] + inter_xor_result[203] + inter_xor_result[204] + inter_xor_result[205] + inter_xor_result[206] + inter_xor_result[207] + inter_xor_result[208] + inter_xor_result[209] + inter_xor_result[210] + inter_xor_result[211] + inter_xor_result[212] + inter_xor_result[213] + inter_xor_result[214] + inter_xor_result[215] + inter_xor_result[216] + inter_xor_result[217] + inter_xor_result[218] + inter_xor_result[219] + inter_xor_result[220] + inter_xor_result[221] + inter_xor_result[222] + inter_xor_result[223] + inter_xor_result[224] + inter_xor_result[225] + inter_xor_result[226] + inter_xor_result[227] + inter_xor_result[228] + inter_xor_result[229] + inter_xor_result[230] + inter_xor_result[231] + inter_xor_result[232] + inter_xor_result[233] + inter_xor_result[234] + inter_xor_result[235] + inter_xor_result[236] + inter_xor_result[237] + inter_xor_result[238] + inter_xor_result[239] + inter_xor_result[240] + inter_xor_result[241] + inter_xor_result[242] + inter_xor_result[243] + inter_xor_result[244] + inter_xor_result[245] + inter_xor_result[246] + inter_xor_result[247] + inter_xor_result[248] + inter_xor_result[249] + inter_xor_result[250] + inter_xor_result[251] + inter_xor_result[252] + inter_xor_result[253] + inter_xor_result[254] + inter_xor_result[255]; 24 | 25 | // reg [9-1:0] inter_popcount_out; 26 | // 27 | // always @(posedge clk or negedge rst_n) begin 28 | // if (~rst_n) begin 29 | // inter_popcount_out <= 0; 30 | // end 31 | // else if (data_in_valid) begin 32 | // inter_popcount_out <= 0; 33 | // for (integer j = 0; j < 256; j = j + 1) begin 34 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 35 | // end 36 | // end 37 | // end 38 | 39 | reg data_out_valid_pre; 40 | always @(posedge clk or negedge rst_n) begin 41 | if (~rst_n) begin 42 | data_out <= 0; 43 | data_out_valid_pre <= 0; 44 | end 45 | else if (data_in_valid) begin 46 | data_out <= inter_popcount_out; //(2*inter_popcount_out-256) > 0 ? 1 : 0; 47 | data_out_valid_pre <= 1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | data_out_valid <= 0; 54 | end 55 | else if (data_in_valid) begin 56 | data_out_valid <= 1; 57 | end 58 | end 59 | endmodule 60 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/bd/bd.tcl: -------------------------------------------------------------------------------- 1 | 2 | proc init { cellpath otherInfo } { 3 | 4 | set cell_handle [get_bd_cells $cellpath] 5 | set all_busif [get_bd_intf_pins $cellpath/*] 6 | set axi_standard_param_list [list ID_WIDTH AWUSER_WIDTH ARUSER_WIDTH WUSER_WIDTH RUSER_WIDTH BUSER_WIDTH] 7 | set full_sbusif_list [list ] 8 | 9 | foreach busif $all_busif { 10 | if { [string equal -nocase [get_property MODE $busif] "slave"] == 1 } { 11 | set busif_param_list [list] 12 | set busif_name [get_property NAME $busif] 13 | if { [lsearch -exact -nocase $full_sbusif_list $busif_name ] == -1 } { 14 | continue 15 | } 16 | foreach tparam $axi_standard_param_list { 17 | lappend busif_param_list "C_${busif_name}_${tparam}" 18 | } 19 | bd::mark_propagate_only $cell_handle $busif_param_list 20 | } 21 | } 22 | } 23 | 24 | 25 | proc pre_propagate {cellpath otherInfo } { 26 | 27 | set cell_handle [get_bd_cells $cellpath] 28 | set all_busif [get_bd_intf_pins $cellpath/*] 29 | set axi_standard_param_list [list ID_WIDTH AWUSER_WIDTH ARUSER_WIDTH WUSER_WIDTH RUSER_WIDTH BUSER_WIDTH] 30 | 31 | foreach busif $all_busif { 32 | if { [string equal -nocase [get_property CONFIG.PROTOCOL $busif] "AXI4"] != 1 } { 33 | continue 34 | } 35 | if { [string equal -nocase [get_property MODE $busif] "master"] != 1 } { 36 | continue 37 | } 38 | 39 | set busif_name [get_property NAME $busif] 40 | foreach tparam $axi_standard_param_list { 41 | set busif_param_name "C_${busif_name}_${tparam}" 42 | 43 | set val_on_cell_intf_pin [get_property CONFIG.${tparam} $busif] 44 | set val_on_cell [get_property CONFIG.${busif_param_name} $cell_handle] 45 | 46 | if { [string equal -nocase $val_on_cell_intf_pin $val_on_cell] != 1 } { 47 | if { $val_on_cell != "" } { 48 | set_property CONFIG.${tparam} $val_on_cell $busif 49 | } 50 | } 51 | } 52 | } 53 | } 54 | 55 | 56 | proc propagate {cellpath otherInfo } { 57 | 58 | set cell_handle [get_bd_cells $cellpath] 59 | set all_busif [get_bd_intf_pins $cellpath/*] 60 | set axi_standard_param_list [list ID_WIDTH AWUSER_WIDTH ARUSER_WIDTH WUSER_WIDTH RUSER_WIDTH BUSER_WIDTH] 61 | 62 | foreach busif $all_busif { 63 | if { [string equal -nocase [get_property CONFIG.PROTOCOL $busif] "AXI4"] != 1 } { 64 | continue 65 | } 66 | if { [string equal -nocase [get_property MODE $busif] "slave"] != 1 } { 67 | continue 68 | } 69 | 70 | set busif_name [get_property NAME $busif] 71 | foreach tparam $axi_standard_param_list { 72 | set busif_param_name "C_${busif_name}_${tparam}" 73 | 74 | set val_on_cell_intf_pin [get_property CONFIG.${tparam} $busif] 75 | set val_on_cell [get_property CONFIG.${busif_param_name} $cell_handle] 76 | 77 | if { [string equal -nocase $val_on_cell_intf_pin $val_on_cell] != 1 } { 78 | #override property of bd_interface_net to bd_cell -- only for slaves. May check for supported values.. 79 | if { $val_on_cell_intf_pin != "" } { 80 | set_property CONFIG.${busif_param_name} $val_on_cell_intf_pin $cell_handle 81 | } 82 | } 83 | } 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /vitis/src/lscript.ld: -------------------------------------------------------------------------------- 1 | /*******************************************************************/ 2 | /* */ 3 | /* This file is automatically generated by linker script generator.*/ 4 | /* */ 5 | /* Version: 2018.3 */ 6 | /* */ 7 | /* Copyright (c) 2010-2019 Xilinx, Inc. All rights reserved. */ 8 | /* */ 9 | /* Description : ARM v8 Linker Script */ 10 | /* */ 11 | /*******************************************************************/ 12 | 13 | _STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; 14 | _HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x2000; 15 | 16 | _EL0_STACK_SIZE = DEFINED(_EL0_STACK_SIZE) ? _EL0_STACK_SIZE : 1024; 17 | _EL1_STACK_SIZE = DEFINED(_EL1_STACK_SIZE) ? _EL1_STACK_SIZE : 2048; 18 | _EL2_STACK_SIZE = DEFINED(_EL2_STACK_SIZE) ? _EL2_STACK_SIZE : 1024; 19 | 20 | /* Define Memories in the system */ 21 | 22 | MEMORY 23 | { 24 | psu_ddr_0_MEM_0 : ORIGIN = 0x0, LENGTH = 0x7FF00000 25 | psu_ddr_1_MEM_0 : ORIGIN = 0x800000000, LENGTH = 0x80000000 26 | psu_ocm_ram_0_MEM_0 : ORIGIN = 0xFFFC0000, LENGTH = 0x40000 27 | psu_qspi_linear_0_MEM_0 : ORIGIN = 0xC0000000, LENGTH = 0x20000000 28 | } 29 | 30 | /* Specify the default entry point to the program */ 31 | 32 | ENTRY(_vector_table) 33 | 34 | /* Define the sections, and where they are mapped in memory */ 35 | 36 | SECTIONS 37 | { 38 | .text : { 39 | KEEP (*(.vectors)) 40 | *(.boot) 41 | *(.text) 42 | *(.text.*) 43 | *(.gnu.linkonce.t.*) 44 | *(.plt) 45 | *(.gnu_warning) 46 | *(.gcc_execpt_table) 47 | *(.glue_7) 48 | *(.glue_7t) 49 | *(.ARM.extab) 50 | *(.gnu.linkonce.armextab.*) 51 | } > psu_ddr_0_MEM_0 52 | 53 | .init (ALIGN(64)) : { 54 | KEEP (*(.init)) 55 | } > psu_ddr_0_MEM_0 56 | 57 | .fini (ALIGN(64)) : { 58 | KEEP (*(.fini)) 59 | } > psu_ddr_0_MEM_0 60 | 61 | .interp : { 62 | KEEP (*(.interp)) 63 | } > psu_ddr_0_MEM_0 64 | 65 | .note-ABI-tag : { 66 | KEEP (*(.note-ABI-tag)) 67 | } > psu_ddr_0_MEM_0 68 | 69 | .rodata : { 70 | . = ALIGN(64); 71 | __rodata_start = .; 72 | *(.rodata) 73 | *(.rodata.*) 74 | *(.gnu.linkonce.r.*) 75 | __rodata_end = .; 76 | } > psu_ddr_0_MEM_0 77 | 78 | .rodata1 : { 79 | . = ALIGN(64); 80 | __rodata1_start = .; 81 | *(.rodata1) 82 | *(.rodata1.*) 83 | __rodata1_end = .; 84 | } > psu_ddr_0_MEM_0 85 | 86 | .sdata2 : { 87 | . = ALIGN(64); 88 | __sdata2_start = .; 89 | *(.sdata2) 90 | *(.sdata2.*) 91 | *(.gnu.linkonce.s2.*) 92 | __sdata2_end = .; 93 | } > psu_ddr_0_MEM_0 94 | 95 | .sbss2 : { 96 | . = ALIGN(64); 97 | __sbss2_start = .; 98 | *(.sbss2) 99 | *(.sbss2.*) 100 | *(.gnu.linkonce.sb2.*) 101 | __sbss2_end = .; 102 | } > psu_ddr_0_MEM_0 103 | 104 | .data : { 105 | . = ALIGN(64); 106 | __data_start = .; 107 | *(.data) 108 | *(.data.*) 109 | *(.gnu.linkonce.d.*) 110 | *(.jcr) 111 | *(.got) 112 | *(.got.plt) 113 | __data_end = .; 114 | } > psu_ddr_0_MEM_0 115 | 116 | .data1 : { 117 | . = ALIGN(64); 118 | __data1_start = .; 119 | *(.data1) 120 | *(.data1.*) 121 | __data1_end = .; 122 | } > psu_ddr_0_MEM_0 123 | 124 | .got : { 125 | *(.got) 126 | } > psu_ddr_0_MEM_0 127 | 128 | .got1 : { 129 | *(.got1) 130 | } > psu_ddr_0_MEM_0 131 | 132 | .got2 : { 133 | *(.got2) 134 | } > psu_ddr_0_MEM_0 135 | 136 | .note.gnu.build-id : { 137 | KEEP (*(.note.gnu.build-id)) 138 | } > psu_ddr_0_MEM_0 139 | 140 | .ctors : { 141 | . = ALIGN(64); 142 | __CTOR_LIST__ = .; 143 | ___CTORS_LIST___ = .; 144 | KEEP (*crtbegin.o(.ctors)) 145 | KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) 146 | KEEP (*(SORT(.ctors.*))) 147 | KEEP (*(.ctors)) 148 | __CTOR_END__ = .; 149 | ___CTORS_END___ = .; 150 | } > psu_ddr_0_MEM_0 151 | 152 | .dtors : { 153 | . = ALIGN(64); 154 | __DTOR_LIST__ = .; 155 | ___DTORS_LIST___ = .; 156 | KEEP (*crtbegin.o(.dtors)) 157 | KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) 158 | KEEP (*(SORT(.dtors.*))) 159 | KEEP (*(.dtors)) 160 | __DTOR_END__ = .; 161 | ___DTORS_END___ = .; 162 | } > psu_ddr_0_MEM_0 163 | 164 | .fixup : { 165 | __fixup_start = .; 166 | *(.fixup) 167 | __fixup_end = .; 168 | } > psu_ddr_0_MEM_0 169 | 170 | .eh_frame : { 171 | *(.eh_frame) 172 | } > psu_ddr_0_MEM_0 173 | 174 | .eh_framehdr : { 175 | __eh_framehdr_start = .; 176 | *(.eh_framehdr) 177 | __eh_framehdr_end = .; 178 | } > psu_ddr_0_MEM_0 179 | 180 | .gcc_except_table : { 181 | *(.gcc_except_table) 182 | } > psu_ddr_0_MEM_0 183 | 184 | .mmu_tbl0 (ALIGN(4096)) : { 185 | __mmu_tbl0_start = .; 186 | *(.mmu_tbl0) 187 | __mmu_tbl0_end = .; 188 | } > psu_ddr_0_MEM_0 189 | 190 | .mmu_tbl1 (ALIGN(4096)) : { 191 | __mmu_tbl1_start = .; 192 | *(.mmu_tbl1) 193 | __mmu_tbl1_end = .; 194 | } > psu_ddr_0_MEM_0 195 | 196 | .mmu_tbl2 (ALIGN(4096)) : { 197 | __mmu_tbl2_start = .; 198 | *(.mmu_tbl2) 199 | __mmu_tbl2_end = .; 200 | } > psu_ddr_0_MEM_0 201 | 202 | .ARM.exidx : { 203 | __exidx_start = .; 204 | *(.ARM.exidx*) 205 | *(.gnu.linkonce.armexidix.*.*) 206 | __exidx_end = .; 207 | } > psu_ddr_0_MEM_0 208 | 209 | .preinit_array : { 210 | . = ALIGN(64); 211 | __preinit_array_start = .; 212 | KEEP (*(SORT(.preinit_array.*))) 213 | KEEP (*(.preinit_array)) 214 | __preinit_array_end = .; 215 | } > psu_ddr_0_MEM_0 216 | 217 | .init_array : { 218 | . = ALIGN(64); 219 | __init_array_start = .; 220 | KEEP (*(SORT(.init_array.*))) 221 | KEEP (*(.init_array)) 222 | __init_array_end = .; 223 | } > psu_ddr_0_MEM_0 224 | 225 | .fini_array : { 226 | . = ALIGN(64); 227 | __fini_array_start = .; 228 | KEEP (*(SORT(.fini_array.*))) 229 | KEEP (*(.fini_array)) 230 | __fini_array_end = .; 231 | } > psu_ddr_0_MEM_0 232 | 233 | .ARM.attributes : { 234 | __ARM.attributes_start = .; 235 | *(.ARM.attributes) 236 | __ARM.attributes_end = .; 237 | } > psu_ddr_0_MEM_0 238 | 239 | .sdata : { 240 | . = ALIGN(64); 241 | __sdata_start = .; 242 | *(.sdata) 243 | *(.sdata.*) 244 | *(.gnu.linkonce.s.*) 245 | __sdata_end = .; 246 | } > psu_ddr_0_MEM_0 247 | 248 | .sbss (NOLOAD) : { 249 | . = ALIGN(64); 250 | __sbss_start = .; 251 | *(.sbss) 252 | *(.sbss.*) 253 | *(.gnu.linkonce.sb.*) 254 | . = ALIGN(64); 255 | __sbss_end = .; 256 | } > psu_ddr_0_MEM_0 257 | 258 | .tdata : { 259 | . = ALIGN(64); 260 | __tdata_start = .; 261 | *(.tdata) 262 | *(.tdata.*) 263 | *(.gnu.linkonce.td.*) 264 | __tdata_end = .; 265 | } > psu_ddr_0_MEM_0 266 | 267 | .tbss : { 268 | . = ALIGN(64); 269 | __tbss_start = .; 270 | *(.tbss) 271 | *(.tbss.*) 272 | *(.gnu.linkonce.tb.*) 273 | __tbss_end = .; 274 | } > psu_ddr_0_MEM_0 275 | 276 | .bss (NOLOAD) : { 277 | . = ALIGN(64); 278 | __bss_start__ = .; 279 | *(.bss) 280 | *(.bss.*) 281 | *(.gnu.linkonce.b.*) 282 | *(COMMON) 283 | . = ALIGN(64); 284 | __bss_end__ = .; 285 | } > psu_ddr_0_MEM_0 286 | 287 | _SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); 288 | 289 | _SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); 290 | 291 | /* Generate Stack and Heap definitions */ 292 | 293 | .heap (NOLOAD) : { 294 | . = ALIGN(64); 295 | _heap = .; 296 | HeapBase = .; 297 | _heap_start = .; 298 | . += _HEAP_SIZE; 299 | _heap_end = .; 300 | HeapLimit = .; 301 | } > psu_ddr_0_MEM_0 302 | 303 | .stack (NOLOAD) : { 304 | . = ALIGN(64); 305 | _el3_stack_end = .; 306 | . += _STACK_SIZE; 307 | __el3_stack = .; 308 | _el2_stack_end = .; 309 | . += _EL2_STACK_SIZE; 310 | . = ALIGN(64); 311 | __el2_stack = .; 312 | _el1_stack_end = .; 313 | . += _EL1_STACK_SIZE; 314 | . = ALIGN(64); 315 | __el1_stack = .; 316 | _el0_stack_end = .; 317 | . += _EL0_STACK_SIZE; 318 | . = ALIGN(64); 319 | __el0_stack = .; 320 | } > psu_ddr_0_MEM_0 321 | 322 | _end = .; 323 | } 324 | 325 | -------------------------------------------------------------------------------- /verilog/sources_1/new/encoder_hidden_state_2.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 14:46:50 7 | // Design Name: 8 | // Module Name: encoder_hidden_state_2 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module encoder_hidden_state_2( 24 | input clk, 25 | input rst_n, 26 | 27 | input [256-1:0] data_in, 28 | input data_in_valid, 29 | input wire [2-1:0] block_sel, 30 | 31 | output reg [16-1:0] data_out, 32 | output reg data_out_valid, 33 | output reg done 34 | ); 35 | 36 | reg [4:0] time_step_pre; 37 | reg [4:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd29) begin 44 | time_step_pre <= 'd29; 45 | end 46 | else if (data_in_valid) begin 47 | time_step_pre <= time_step_pre + 1'b1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | time_step <= 0; 54 | end 55 | else if (time_step_pre <= 'd29) begin 56 | time_step <= time_step_pre; 57 | end 58 | end 59 | 60 | 61 | wire [16*256-1:0] inter_w_data; 62 | wire [2:0] sel = block_sel; 63 | encoder_hidden_state_2_rom encoder_hidden_state_2_rom ( 64 | .clka(clk), // input wire clka 65 | .ena(data_in_valid), // input wire ena 66 | .addra(sel), // input wire [2 : 0] addra 67 | .douta(inter_w_data) // output wire [4095 : 0] douta 68 | ); 69 | 70 | genvar i; 71 | integer j; 72 | 73 | generate 74 | for (i = 0; i < 16; i = i + 1) begin 75 | wire [255:0] inter_xor_result = ~(inter_w_data[i*256 +: 256] ^ data_in); 76 | 77 | wire [8:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15] + inter_xor_result[16] + inter_xor_result[17] + inter_xor_result[18] + inter_xor_result[19] + inter_xor_result[20] + inter_xor_result[21] + inter_xor_result[22] + inter_xor_result[23] + inter_xor_result[24] + inter_xor_result[25] + inter_xor_result[26] + inter_xor_result[27] + inter_xor_result[28] + inter_xor_result[29] + inter_xor_result[30] + inter_xor_result[31] + inter_xor_result[32] + inter_xor_result[33] + inter_xor_result[34] + inter_xor_result[35] + inter_xor_result[36] + inter_xor_result[37] + inter_xor_result[38] + inter_xor_result[39] + inter_xor_result[40] + inter_xor_result[41] + inter_xor_result[42] + inter_xor_result[43] + inter_xor_result[44] + inter_xor_result[45] + inter_xor_result[46] + inter_xor_result[47] + inter_xor_result[48] + inter_xor_result[49] + inter_xor_result[50] + inter_xor_result[51] + inter_xor_result[52] + inter_xor_result[53] + inter_xor_result[54] + inter_xor_result[55] + inter_xor_result[56] + inter_xor_result[57] + inter_xor_result[58] + inter_xor_result[59] + inter_xor_result[60] + inter_xor_result[61] + inter_xor_result[62] + inter_xor_result[63] + inter_xor_result[64] + inter_xor_result[65] + inter_xor_result[66] + inter_xor_result[67] + inter_xor_result[68] + inter_xor_result[69] + inter_xor_result[70] + inter_xor_result[71] + inter_xor_result[72] + inter_xor_result[73] + inter_xor_result[74] + inter_xor_result[75] + inter_xor_result[76] + inter_xor_result[77] + inter_xor_result[78] + inter_xor_result[79] + inter_xor_result[80] + inter_xor_result[81] + inter_xor_result[82] + inter_xor_result[83] + inter_xor_result[84] + inter_xor_result[85] + inter_xor_result[86] + inter_xor_result[87] + inter_xor_result[88] + inter_xor_result[89] + inter_xor_result[90] + inter_xor_result[91] + inter_xor_result[92] + inter_xor_result[93] + inter_xor_result[94] + inter_xor_result[95] + inter_xor_result[96] + inter_xor_result[97] + inter_xor_result[98] + inter_xor_result[99] + inter_xor_result[100] + inter_xor_result[101] + inter_xor_result[102] + inter_xor_result[103] + inter_xor_result[104] + inter_xor_result[105] + inter_xor_result[106] + inter_xor_result[107] + inter_xor_result[108] + inter_xor_result[109] + inter_xor_result[110] + inter_xor_result[111] + inter_xor_result[112] + inter_xor_result[113] + inter_xor_result[114] + inter_xor_result[115] + inter_xor_result[116] + inter_xor_result[117] + inter_xor_result[118] + inter_xor_result[119] + inter_xor_result[120] + inter_xor_result[121] + inter_xor_result[122] + inter_xor_result[123] + inter_xor_result[124] + inter_xor_result[125] + inter_xor_result[126] + inter_xor_result[127] + inter_xor_result[128] + inter_xor_result[129] + inter_xor_result[130] + inter_xor_result[131] + inter_xor_result[132] + inter_xor_result[133] + inter_xor_result[134] + inter_xor_result[135] + inter_xor_result[136] + inter_xor_result[137] + inter_xor_result[138] + inter_xor_result[139] + inter_xor_result[140] + inter_xor_result[141] + inter_xor_result[142] + inter_xor_result[143] + inter_xor_result[144] + inter_xor_result[145] + inter_xor_result[146] + inter_xor_result[147] + inter_xor_result[148] + inter_xor_result[149] + inter_xor_result[150] + inter_xor_result[151] + inter_xor_result[152] + inter_xor_result[153] + inter_xor_result[154] + inter_xor_result[155] + inter_xor_result[156] + inter_xor_result[157] + inter_xor_result[158] + inter_xor_result[159] + inter_xor_result[160] + inter_xor_result[161] + inter_xor_result[162] + inter_xor_result[163] + inter_xor_result[164] + inter_xor_result[165] + inter_xor_result[166] + inter_xor_result[167] + inter_xor_result[168] + inter_xor_result[169] + inter_xor_result[170] + inter_xor_result[171] + inter_xor_result[172] + inter_xor_result[173] + inter_xor_result[174] + inter_xor_result[175] + inter_xor_result[176] + inter_xor_result[177] + inter_xor_result[178] + inter_xor_result[179] + inter_xor_result[180] + inter_xor_result[181] + inter_xor_result[182] + inter_xor_result[183] + inter_xor_result[184] + inter_xor_result[185] + inter_xor_result[186] + inter_xor_result[187] + inter_xor_result[188] + inter_xor_result[189] + inter_xor_result[190] + inter_xor_result[191] + inter_xor_result[192] + inter_xor_result[193] + inter_xor_result[194] + inter_xor_result[195] + inter_xor_result[196] + inter_xor_result[197] + inter_xor_result[198] + inter_xor_result[199] + inter_xor_result[200] + inter_xor_result[201] + inter_xor_result[202] + inter_xor_result[203] + inter_xor_result[204] + inter_xor_result[205] + inter_xor_result[206] + inter_xor_result[207] + inter_xor_result[208] + inter_xor_result[209] + inter_xor_result[210] + inter_xor_result[211] + inter_xor_result[212] + inter_xor_result[213] + inter_xor_result[214] + inter_xor_result[215] + inter_xor_result[216] + inter_xor_result[217] + inter_xor_result[218] + inter_xor_result[219] + inter_xor_result[220] + inter_xor_result[221] + inter_xor_result[222] + inter_xor_result[223] + inter_xor_result[224] + inter_xor_result[225] + inter_xor_result[226] + inter_xor_result[227] + inter_xor_result[228] + inter_xor_result[229] + inter_xor_result[230] + inter_xor_result[231] + inter_xor_result[232] + inter_xor_result[233] + inter_xor_result[234] + inter_xor_result[235] + inter_xor_result[236] + inter_xor_result[237] + inter_xor_result[238] + inter_xor_result[239] + inter_xor_result[240] + inter_xor_result[241] + inter_xor_result[242] + inter_xor_result[243] + inter_xor_result[244] + inter_xor_result[245] + inter_xor_result[246] + inter_xor_result[247] + inter_xor_result[248] + inter_xor_result[249] + inter_xor_result[250] + inter_xor_result[251] + inter_xor_result[252] + inter_xor_result[253] + inter_xor_result[254] + inter_xor_result[255]; 78 | 79 | // reg [8:0] inter_popcount_out; 80 | // 81 | // always@(posedge clk or negedge rst_n) begin 82 | // if (~rst_n) begin 83 | // inter_popcount_out <= 0; 84 | // end 85 | // else if (data_in_valid) begin 86 | // for (j = 0; j < 256; j = j + 1) begin 87 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 88 | // end 89 | // end 90 | // end 91 | 92 | always@(posedge clk or negedge rst_n) begin 93 | if (~rst_n) begin 94 | data_out[i] <= 0; 95 | end 96 | else if (data_in_valid) begin 97 | data_out[i] <= (2*inter_popcount_out-256) > 0 ? 1 : 0; 98 | end 99 | end 100 | end 101 | endgenerate 102 | 103 | 104 | 105 | always@(posedge clk,negedge rst_n)begin 106 | if(~rst_n) 107 | done <= 0; 108 | else if (time_step == 5'd29) 109 | done <= 1; 110 | end 111 | 112 | always@(posedge clk,negedge rst_n)begin 113 | if(~rst_n) 114 | data_out_valid <= 0; 115 | else if (data_in_valid) 116 | data_out_valid <= 1; 117 | else 118 | data_out_valid <= 0; 119 | end 120 | 121 | endmodule 122 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/example_designs/bfm_design/PS_PL_v1_0_tb.sv: -------------------------------------------------------------------------------- 1 | 2 | `timescale 1ns / 1ps 3 | `include "PS_PL_v1_0_tb_include.svh" 4 | 5 | import axi_vip_pkg::*; 6 | import PS_PL_v1_0_bfm_1_master_0_0_pkg::*; 7 | 8 | module PS_PL_v1_0_tb(); 9 | 10 | 11 | xil_axi_uint error_cnt = 0; 12 | xil_axi_uint comparison_cnt = 0; 13 | axi_transaction wr_transaction; 14 | axi_transaction rd_transaction; 15 | axi_monitor_transaction mst_monitor_transaction; 16 | axi_monitor_transaction master_moniter_transaction_queue[$]; 17 | xil_axi_uint master_moniter_transaction_queue_size =0; 18 | axi_monitor_transaction mst_scb_transaction; 19 | axi_monitor_transaction passthrough_monitor_transaction; 20 | axi_monitor_transaction passthrough_master_moniter_transaction_queue[$]; 21 | xil_axi_uint passthrough_master_moniter_transaction_queue_size =0; 22 | axi_monitor_transaction passthrough_mst_scb_transaction; 23 | axi_monitor_transaction passthrough_slave_moniter_transaction_queue[$]; 24 | xil_axi_uint passthrough_slave_moniter_transaction_queue_size =0; 25 | axi_monitor_transaction passthrough_slv_scb_transaction; 26 | axi_monitor_transaction slv_monitor_transaction; 27 | axi_monitor_transaction slave_moniter_transaction_queue[$]; 28 | xil_axi_uint slave_moniter_transaction_queue_size =0; 29 | axi_monitor_transaction slv_scb_transaction; 30 | xil_axi_uint mst_agent_verbosity = 0; 31 | xil_axi_uint slv_agent_verbosity = 0; 32 | xil_axi_uint passthrough_agent_verbosity = 0; 33 | bit clock; 34 | bit reset; 35 | integer result_slave; 36 | bit [31:0] S00_AXI_test_data[3:0]; 37 | localparam LC_AXI_BURST_LENGTH = 8; 38 | localparam LC_AXI_DATA_WIDTH = 32; 39 | task automatic COMPARE_DATA; 40 | input [(LC_AXI_BURST_LENGTH * LC_AXI_DATA_WIDTH)-1:0]expected; 41 | input [(LC_AXI_BURST_LENGTH * LC_AXI_DATA_WIDTH)-1:0]actual; 42 | begin 43 | if (expected === 'hx || actual === 'hx) begin 44 | $display("TESTBENCH ERROR! COMPARE_DATA cannot be performed with an expected or actual vector that is all 'x'!"); 45 | result_slave = 0; $stop; 46 | end 47 | if (actual != expected) begin 48 | $display("TESTBENCH ERROR! Data expected is not equal to actual.", " expected = 0x%h",expected, " actual = 0x%h",actual); 49 | result_slave = 0; 50 | $stop; 51 | end 52 | else 53 | begin 54 | $display("TESTBENCH Passed! Data expected is equal to actual.", 55 | " expected = 0x%h",expected, " actual = 0x%h",actual); 56 | end 57 | end 58 | endtask 59 | integer i; 60 | integer j; 61 | xil_axi_uint trans_cnt_before_switch = 48; 62 | xil_axi_uint passthrough_cmd_switch_cnt = 0; 63 | event passthrough_mastermode_start_event; 64 | event passthrough_mastermode_end_event; 65 | event passthrough_slavemode_end_event; 66 | xil_axi_uint mtestID; 67 | xil_axi_ulong mtestADDR; 68 | xil_axi_len_t mtestBurstLength; 69 | xil_axi_size_t mtestDataSize; 70 | xil_axi_burst_t mtestBurstType; 71 | xil_axi_lock_t mtestLOCK; 72 | xil_axi_cache_t mtestCacheType = 0; 73 | xil_axi_prot_t mtestProtectionType = 3'b000; 74 | xil_axi_region_t mtestRegion = 4'b000; 75 | xil_axi_qos_t mtestQOS = 4'b000; 76 | xil_axi_data_beat dbeat; 77 | xil_axi_data_beat [255:0] mtestWUSER; 78 | xil_axi_data_beat mtestAWUSER = 'h0; 79 | xil_axi_data_beat mtestARUSER = 0; 80 | xil_axi_data_beat [255:0] mtestRUSER; 81 | xil_axi_uint mtestBUSER = 0; 82 | xil_axi_resp_t mtestBresp; 83 | xil_axi_resp_t[255:0] mtestRresp; 84 | bit [63:0] mtestWDataL; 85 | bit [63:0] mtestRDataL; 86 | axi_transaction pss_wr_transaction; 87 | axi_transaction pss_rd_transaction; 88 | axi_transaction reactive_transaction; 89 | axi_transaction rd_payload_transaction; 90 | axi_transaction wr_rand; 91 | axi_transaction rd_rand; 92 | axi_transaction wr_reactive; 93 | axi_transaction rd_reactive; 94 | axi_transaction wr_reactive2; 95 | axi_transaction rd_reactive2; 96 | axi_ready_gen bready_gen; 97 | axi_ready_gen rready_gen; 98 | axi_ready_gen awready_gen; 99 | axi_ready_gen wready_gen; 100 | axi_ready_gen arready_gen; 101 | axi_ready_gen bready_gen2; 102 | axi_ready_gen rready_gen2; 103 | axi_ready_gen awready_gen2; 104 | axi_ready_gen wready_gen2; 105 | axi_ready_gen arready_gen2; 106 | xil_axi_payload_byte data_mem[xil_axi_ulong]; 107 | PS_PL_v1_0_bfm_1_master_0_0_mst_t mst_agent_0; 108 | 109 | `BD_WRAPPER DUT( 110 | .ARESETN(reset), 111 | .ACLK(clock) 112 | ); 113 | 114 | initial begin 115 | mst_agent_0 = new("master vip agent",DUT.`BD_INST_NAME.master_0.inst.IF);//ms 116 | mst_agent_0.vif_proxy.set_dummy_drive_type(XIL_AXI_VIF_DRIVE_NONE); 117 | mst_agent_0.set_agent_tag("Master VIP"); 118 | mst_agent_0.set_verbosity(mst_agent_verbosity); 119 | mst_agent_0.start_master(); 120 | $timeformat (-12, 1, " ps", 1); 121 | end 122 | initial begin 123 | reset <= 1'b0; 124 | #200ns; 125 | reset <= 1'b1; 126 | repeat (5) @(negedge clock); 127 | end 128 | always #5 clock <= ~clock; 129 | initial begin 130 | S_AXI_TEST ( ); 131 | 132 | #1ns; 133 | $finish; 134 | end 135 | task automatic S_AXI_TEST; 136 | begin 137 | #1; 138 | $display("Sequential write transfers example similar to AXI BFM WRITE_BURST method starts"); 139 | mtestID = 0; 140 | mtestADDR = 64'h00000000; 141 | mtestBurstLength = 0; 142 | mtestDataSize = xil_axi_size_t'(xil_clog2(32/8)); 143 | mtestBurstType = XIL_AXI_BURST_TYPE_INCR; 144 | mtestLOCK = XIL_AXI_ALOCK_NOLOCK; 145 | mtestCacheType = 0; 146 | mtestProtectionType = 0; 147 | mtestRegion = 0; 148 | mtestQOS = 0; 149 | result_slave = 1; 150 | mtestWDataL[31:0] = 32'h00000001; 151 | for(int i = 0; i < 4;i++) begin 152 | S00_AXI_test_data[i] <= mtestWDataL[31:0]; 153 | mst_agent_0.AXI4LITE_WRITE_BURST( 154 | mtestADDR, 155 | mtestProtectionType, 156 | mtestWDataL, 157 | mtestBresp 158 | ); 159 | mtestWDataL[31:0] = mtestWDataL[31:0] + 1; 160 | mtestADDR = mtestADDR + 64'h4; 161 | end 162 | $display("Sequential write transfers example similar to AXI BFM WRITE_BURST method completes"); 163 | $display("Sequential read transfers example similar to AXI BFM READ_BURST method starts"); 164 | mtestID = 0; 165 | mtestADDR = 64'h00000000; 166 | mtestBurstLength = 0; 167 | mtestDataSize = xil_axi_size_t'(xil_clog2(32/8)); 168 | mtestBurstType = XIL_AXI_BURST_TYPE_INCR; 169 | mtestLOCK = XIL_AXI_ALOCK_NOLOCK; 170 | mtestCacheType = 0; 171 | mtestProtectionType = 0; 172 | mtestRegion = 0; 173 | mtestQOS = 0; 174 | for(int i = 0; i < 4;i++) begin 175 | mst_agent_0.AXI4LITE_READ_BURST( 176 | mtestADDR, 177 | mtestProtectionType, 178 | mtestRDataL, 179 | mtestRresp 180 | ); 181 | mtestADDR = mtestADDR + 64'h4; 182 | COMPARE_DATA(S00_AXI_test_data[i],mtestRDataL); 183 | end 184 | $display("Sequential read transfers example similar to AXI BFM READ_BURST method completes"); 185 | $display("Sequential read transfers example similar to AXI VIP READ_BURST method completes"); 186 | $display("---------------------------------------------------------"); 187 | $display("EXAMPLE TEST S00_AXI: PTGEN_TEST_FINISHED!"); 188 | if ( result_slave ) begin 189 | $display("PTGEN_TEST: PASSED!"); 190 | end else begin 191 | $display("PTGEN_TEST: FAILED!"); 192 | end 193 | $display("---------------------------------------------------------"); 194 | end 195 | endtask 196 | 197 | endmodule 198 | -------------------------------------------------------------------------------- /verilog/sources_1/new/attention.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/10 16:23:40 7 | // Design Name: 8 | // Module Name: attention 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module attention( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input [2:0] block_sel, 30 | 31 | output [16-1:0] data_out, 32 | output data_out_valid 33 | ); 34 | // ========================================================================================================= 35 | // =====================================Get Query Key Value================================================= 36 | // ========================================================================================================= 37 | 38 | /* 39 | * time_step计数,也是qkv的时间步坐标 40 | */ 41 | 42 | reg [4:0] qkv_time_step_pre; 43 | reg [4:0] qkv_time_step; // 添加一个寄存器用于延迟 44 | 45 | always @(posedge clk or negedge rst_n) begin 46 | if (~rst_n) begin 47 | qkv_time_step_pre <= 0; 48 | end 49 | else if (qkv_time_step_pre == 'd29) 50 | qkv_time_step_pre <= 'd29; 51 | else if (data_in_valid) begin 52 | qkv_time_step_pre <= qkv_time_step_pre + 1'b1; 53 | end 54 | end 55 | 56 | always @(posedge clk or negedge rst_n) begin 57 | if (~rst_n) begin 58 | qkv_time_step <= 0; 59 | end 60 | else if (qkv_time_step_pre <= 'd29) begin 61 | qkv_time_step <= qkv_time_step_pre; 62 | end 63 | end 64 | 65 | /* 66 | * QKV数组赋值:30x16 67 | */ 68 | reg [16-1:0] query[30-1:0]; 69 | reg [16-1:0] key[30-1:0]; 70 | reg [16-1:0] value[30-1:0]; 71 | wire [16-1:0] query_out; 72 | wire [16-1:0] key_out; 73 | wire [16-1:0] value_out; 74 | wire qkv_data_valid; 75 | wire qkv_done; 76 | 77 | integer i,j; 78 | always@(posedge clk,negedge rst_n) begin 79 | if (~rst_n) begin 80 | for (i = 0; i < 30; i = i + 1) begin 81 | for (j = 0; j < 16; j = j + 1) begin 82 | query[i][j] <= 0; 83 | key[i][j] <= 0; 84 | value[i][j] <= 0; 85 | end 86 | end 87 | end 88 | else if (~qkv_done) begin 89 | query[qkv_time_step] <= query_out; 90 | key[qkv_time_step] <= key_out; 91 | value[qkv_time_step] <= value_out; 92 | end 93 | end 94 | 95 | /* 96 | * QKV实例化,得到QKV的结果 97 | */ 98 | binary_QKV binary_QKV( 99 | .clk (clk), 100 | .rst_n (rst_n), 101 | 102 | .data_in (data_in), 103 | .data_in_valid (data_in_valid), 104 | .block_sel (block_sel), 105 | 106 | .query_out (query_out), 107 | .key_out (key_out), 108 | .value_out (value_out), 109 | .data_out_valid (qkv_data_valid), 110 | .done (qkv_done) 111 | ); 112 | 113 | // ========================================================================================================= 114 | // =====================================Get Attention Scores================================================ 115 | // ========================================================================================================= 116 | reg [4:0] score_time_step; 117 | always@(posedge clk,negedge rst_n)begin 118 | if(~rst_n) 119 | score_time_step <= 0; 120 | else if (score_time_step == 'd29) 121 | score_time_step <= 'd29; 122 | else if (qkv_done==1'b1) 123 | score_time_step <= score_time_step +1'b1; 124 | end 125 | 126 | wire [30*16-1:0] key_flatten; 127 | genvar k; 128 | generate 129 | for (k=0;k<30;k=k+1)begin 130 | assign key_flatten[k*16 +: 16] = key[k]; 131 | end 132 | endgenerate 133 | 134 | reg [30-1:0] score_out_1[30-1:0]; 135 | reg [30-1:0] score_out_2[30-1:0]; 136 | reg [30-1:0] score_out_3[30-1:0]; 137 | reg [30-1:0] score_out_4[30-1:0]; 138 | wire [30-1:0] score_1; 139 | wire [30-1:0] score_2; 140 | wire [30-1:0] score_3; 141 | wire [30-1:0] score_4; 142 | wire score_out_valid; 143 | wire score_done; 144 | 145 | always@(posedge clk,negedge rst_n) begin 146 | if (~rst_n) begin 147 | score_out_1[score_time_step] <= 0; 148 | score_out_2[score_time_step] <= 0; 149 | score_out_3[score_time_step] <= 0; 150 | score_out_4[score_time_step] <= 0; 151 | end 152 | else if (qkv_done) begin 153 | score_out_1[score_time_step] <= score_1; 154 | score_out_2[score_time_step] <= score_2; 155 | score_out_3[score_time_step] <= score_3; 156 | score_out_4[score_time_step] <= score_4; 157 | end 158 | end 159 | 160 | binary_score binary_score( 161 | .clk (clk), 162 | .rst_n (rst_n), 163 | 164 | .query_in (query[score_time_step]), 165 | .key_in (key_flatten), 166 | .data_in_valid (qkv_done), 167 | 168 | .data_out_1 (score_1), 169 | .data_out_2 (score_2), 170 | .data_out_3 (score_3), 171 | .data_out_4 (score_4), 172 | .data_out_valid (score_out_valid), 173 | .done (score_done) 174 | ); 175 | 176 | // ========================================================================================================= 177 | // =====================================Get Weighted Value================================================== 178 | // ========================================================================================================= 179 | wire [30*16-1:0] value_flatten; 180 | genvar l; 181 | generate 182 | for (k=0;k<16;k=k+1)begin 183 | for (l=0;l<30;l=l+1)begin 184 | assign value_flatten[k*30+l] = value[l][k]; 185 | end 186 | end 187 | endgenerate 188 | 189 | reg [4:0] value_time_step; 190 | always@(posedge clk,negedge rst_n)begin 191 | if(~rst_n) 192 | value_time_step <= 0; 193 | else if (value_time_step == 'd29) 194 | value_time_step <= 'd29; 195 | else if (score_done==1'b1) 196 | value_time_step <= value_time_step +1'b1; 197 | end 198 | 199 | reg [16-1:0] value_weighted[30-1:0]; 200 | wire [16-1:0] value_weighted_out; 201 | wire value_weighted_valid; 202 | wire value_weighted_done; 203 | 204 | always@(posedge clk,negedge rst_n) begin 205 | if (~rst_n) begin 206 | value_weighted[value_time_step] <= 0; 207 | end 208 | else if (score_done) begin 209 | value_weighted[value_time_step] <= value_weighted_out; 210 | end 211 | end 212 | 213 | binary_query binary_query( 214 | .clk (clk), 215 | .rst_n (rst_n), 216 | 217 | .value_in (value_flatten), 218 | .score_in_1 (score_out_1[value_time_step]), 219 | .score_in_2 (score_out_2[value_time_step]), 220 | .score_in_3 (score_out_3[value_time_step]), 221 | .score_in_4 (score_out_4[value_time_step]), 222 | .data_in_valid (score_done), 223 | 224 | .data_out (value_weighted_out), 225 | .data_out_valid (value_weighted_valid), 226 | .done (value_weighted_done) 227 | ); 228 | 229 | // ========================================================================================================= 230 | // =====================================Get Intermediate 1================================================== 231 | // ========================================================================================================= 232 | 233 | 234 | reg [4:0] inter_1_time_step_pre; 235 | reg [4:0] inter_1_time_step; // 添加一个寄存器用于延迟 236 | 237 | always @(posedge clk or negedge rst_n) begin 238 | if (~rst_n) begin 239 | inter_1_time_step_pre <= 0; 240 | end 241 | else if (inter_1_time_step_pre == 'd29) 242 | inter_1_time_step_pre <= 'd29; 243 | else if (value_weighted_done) begin 244 | inter_1_time_step_pre <= inter_1_time_step_pre + 1'b1; 245 | end 246 | end 247 | 248 | always @(posedge clk or negedge rst_n) begin 249 | if (~rst_n) begin 250 | inter_1_time_step <= 0; 251 | end 252 | else if (inter_1_time_step_pre <= 'd29) begin 253 | inter_1_time_step <= inter_1_time_step_pre; 254 | end 255 | end 256 | 257 | reg [64-1:0] attention_inter_1[30-1:0]; 258 | wire [64-1:0] attention_inter_1_out; 259 | wire attention_inter_1_valid; 260 | wire attention_inter_1_done; 261 | 262 | always@(posedge clk,negedge rst_n) begin 263 | if (~rst_n) begin 264 | for (i = 0; i < 30; i = i + 1) begin 265 | for (j = 0; j < 64; j = j + 1) begin 266 | attention_inter_1[i][j] <= 0; 267 | end 268 | end 269 | end 270 | else if (~attention_inter_1_done) begin 271 | attention_inter_1[inter_1_time_step] <= attention_inter_1_out; 272 | end 273 | end 274 | 275 | binary_intermediate_1 binary_intermediate_1( 276 | .clk (clk), 277 | .rst_n (rst_n), 278 | 279 | .data_in (value_weighted[inter_1_time_step]), 280 | .data_in_valid (value_weighted_done), 281 | .block_sel (block_sel), 282 | 283 | .data_out (attention_inter_1_out), 284 | .data_out_valid (attention_inter_1_valid), 285 | .done (attention_inter_1_done) 286 | ); 287 | 288 | // ========================================================================================================= 289 | // =====================================Get Intermediate 2================================================== 290 | // ========================================================================================================= 291 | 292 | 293 | reg [4:0] inter_2_time_step_pre; 294 | reg [4:0] inter_2_time_step; // 添加一个寄存器用于延迟 295 | 296 | always @(posedge clk or negedge rst_n) begin 297 | if (~rst_n) begin 298 | inter_2_time_step_pre <= 0; 299 | end 300 | else if (inter_2_time_step_pre == 'd29) 301 | inter_2_time_step_pre <= 'd29; 302 | else if (attention_inter_1_done) begin 303 | inter_2_time_step_pre <= inter_2_time_step_pre + 1'b1; 304 | end 305 | end 306 | 307 | always @(posedge clk or negedge rst_n) begin 308 | if (~rst_n) begin 309 | inter_2_time_step <= 0; 310 | end 311 | else if (inter_2_time_step_pre <= 'd29) begin 312 | inter_2_time_step <= inter_2_time_step_pre; 313 | end 314 | end 315 | 316 | reg [16-1:0] attention_inter_2[30-1:0]; 317 | wire [16-1:0] attention_inter_2_out; 318 | wire attention_inter_2_valid; 319 | wire attention_inter_2_done; 320 | 321 | always@(posedge clk,negedge rst_n) begin 322 | if (~rst_n) begin 323 | for (i = 0; i < 30; i = i + 1) begin 324 | for (j = 0; j < 16; j = j + 1) begin 325 | attention_inter_2[i][j] <= 0; 326 | end 327 | end 328 | end 329 | else if (~attention_inter_2_done) begin 330 | attention_inter_2[inter_2_time_step] <= attention_inter_2_out; 331 | end 332 | end 333 | 334 | binary_intermediate_2 binary_intermediate_2( 335 | .clk (clk), 336 | .rst_n (rst_n), 337 | 338 | .data_in (attention_inter_1[inter_2_time_step]), 339 | .data_in_valid (attention_inter_1_done), 340 | .block_sel (block_sel), 341 | 342 | .data_out (data_out), // attention_inter_2_out 343 | .data_out_valid (attention_inter_2_valid), // 344 | .done (data_out_valid) // attention_inter_2_done 345 | ); 346 | 347 | 348 | endmodule 349 | -------------------------------------------------------------------------------- /verilog/sources_1/bd/design_1/ip/design_1_transformer_0_0/design_1_transformer_0_0.xci: -------------------------------------------------------------------------------- 1 | 2 | 3 | xilinx.com 4 | xci 5 | unknown 6 | 1.0 7 | 8 | 9 | design_1_transformer_0_0 10 | 11 | 12 | 13 | 14 | 15 | design_1_zynq_ultra_ps_e_0_0_pl_clk0 16 | 96968727 17 | 0 18 | 0 19 | 0.0 20 | 0 21 | design_1_transformer_0_0 22 | zynquplus 23 | 24 | 25 | xczu15eg 26 | ffvb1156 27 | VERILOG 28 | 29 | MIXED 30 | -2 31 | 32 | I 33 | TRUE 34 | TRUE 35 | IP_Integrator 36 | 1 37 | TRUE 38 | ../../../../../../project_3.gen/sources_1/bd/design_1/ip/design_1_transformer_0_0 39 | 40 | ../../ipshared 41 | 2022.1 42 | OOC_HIERARCHICAL 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /verilog/sources_1/new/encoder.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/12 16:18:24 7 | // Design Name: 8 | // Module Name: encoder 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module encoder( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | input [2:0] block_sel, 30 | 31 | output [16-1:0] data_out, 32 | output data_out_valid, 33 | output done 34 | ); 35 | 36 | // ========================================================================================================= 37 | // ============================================Layer Norm=================================================== 38 | // ========================================================================================================= 39 | reg [4:0] ln_time_step_1_pre; 40 | reg [4:0] ln_time_step_1; 41 | 42 | always @(posedge clk or negedge rst_n) begin 43 | if (~rst_n) begin 44 | ln_time_step_1_pre <= 0; 45 | end 46 | else if (data_in_valid) begin 47 | ln_time_step_1_pre <= ln_time_step_1_pre + 1'b1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | ln_time_step_1 <= 0; 54 | end 55 | else if (ln_time_step_1_pre <= 'd29) begin 56 | ln_time_step_1 <= ln_time_step_1_pre; 57 | end 58 | end 59 | 60 | reg [16-1:0] ln_1[30-1:0]; 61 | wire [16-1:0] ln_1_out; 62 | wire ln_1_valid; 63 | wire ln_1_done; 64 | 65 | integer i,j; 66 | always@(posedge clk,negedge rst_n) begin 67 | if (~rst_n) begin 68 | for (i = 0; i < 30; i = i + 1) begin 69 | for (j = 0; j < 16; j = j + 1) begin 70 | ln_1[i][j] <= 0; 71 | end 72 | end 73 | end 74 | else if (~ln_1_done) begin 75 | ln_1[ln_time_step_1] <= ln_1_out; 76 | end 77 | end 78 | 79 | layer_norm_1 layer_norm_1( 80 | .clk (clk), 81 | .rst_n (rst_n), 82 | 83 | .data_in (data_in), 84 | .data_in_valid (data_in_valid), 85 | .block_sel (block_sel), 86 | 87 | .data_out (ln_1_out), 88 | .data_out_valid (ln_1_valid), 89 | .done (ln_1_done) 90 | ); 91 | 92 | 93 | // ========================================================================================================= 94 | // ==========================================Self Attention + Shortcut====================================== 95 | // ========================================================================================================= 96 | reg [4:0] attention_time_step_pre; 97 | reg [4:0] attention_time_step; 98 | reg [4:0] attention_time_step_out; 99 | 100 | always @(posedge clk or negedge rst_n) begin 101 | if (~rst_n) begin 102 | attention_time_step_pre <= 0; 103 | end 104 | else if (attention_time_step_pre == 'd29) 105 | attention_time_step_pre <= 'd29; 106 | else if (ln_1_done) begin 107 | attention_time_step_pre <= attention_time_step_pre + 1'b1; 108 | end 109 | end 110 | 111 | always @(posedge clk or negedge rst_n) begin 112 | if (~rst_n) begin 113 | attention_time_step <= 0; 114 | end 115 | else if (attention_time_step_pre <= 'd29) begin 116 | attention_time_step <= attention_time_step_pre; 117 | end 118 | end 119 | 120 | always @(posedge clk or negedge rst_n) begin 121 | if (~rst_n) begin 122 | attention_time_step_out <= 0; 123 | end 124 | else if (attention_time_step_out == 'd29) 125 | attention_time_step_out <= 'd29; 126 | else if (attention_valid) begin 127 | attention_time_step_out <= attention_time_step_out + 'd1; 128 | end 129 | end 130 | 131 | reg [16-1:0] attention[30-1:0]; 132 | wire [16-1:0] attention_out; 133 | wire attention_valid; 134 | reg attention_done; 135 | 136 | always@(posedge clk,negedge rst_n) begin 137 | if (~rst_n) begin 138 | for (i = 0; i < 30; i = i + 1) begin 139 | for (j = 0; j < 16; j = j + 1) begin 140 | attention[i][j] <= 0; 141 | end 142 | end 143 | end 144 | else if (attention_valid) begin 145 | attention[attention_time_step_out] <= attention_out + ln_1[attention_time_step_out]; // shortcut 146 | end 147 | end 148 | 149 | always@(posedge clk,negedge rst_n) begin 150 | if (~rst_n) begin 151 | attention_done <= 0; 152 | end 153 | else if (attention_time_step_out == 'd29) begin 154 | attention_done <= 1; 155 | end 156 | end 157 | 158 | attention attention_U( 159 | .clk (clk), 160 | .rst_n (rst_n), 161 | 162 | .data_in (ln_1[attention_time_step]), 163 | .data_in_valid (ln_1_done), 164 | .block_sel (block_sel), 165 | 166 | .data_out (attention_out), 167 | .data_out_valid (attention_valid) 168 | ); 169 | 170 | 171 | // ========================================================================================================= 172 | // ============================================Layer Norm=================================================== 173 | // ========================================================================================================= 174 | reg [4:0] ln_time_step_2_pre; 175 | reg [4:0] ln_time_step_2; 176 | 177 | always @(posedge clk or negedge rst_n) begin 178 | if (~rst_n) begin 179 | ln_time_step_2_pre <= 0; 180 | end 181 | else if (ln_time_step_2_pre == 'd29) 182 | ln_time_step_2_pre <= 'd29; 183 | else if (attention_done) begin 184 | ln_time_step_2_pre <= ln_time_step_2_pre + 1'b1; 185 | end 186 | end 187 | 188 | always @(posedge clk or negedge rst_n) begin 189 | if (~rst_n) begin 190 | ln_time_step_2 <= 0; 191 | end 192 | else if (ln_time_step_2_pre <= 'd29) begin 193 | ln_time_step_2 <= ln_time_step_2_pre; 194 | end 195 | end 196 | 197 | reg [16-1:0] ln_2[30-1:0]; 198 | wire [16-1:0] ln_2_out; 199 | wire ln_2_valid; 200 | wire ln_2_done; 201 | 202 | always@(posedge clk,negedge rst_n) begin 203 | if (~rst_n) begin 204 | for (i = 0; i < 30; i = i + 1) begin 205 | for (j = 0; j < 16; j = j + 1) begin 206 | ln_2[i][j] <= 0; 207 | end 208 | end 209 | end 210 | else if (~ln_2_done) begin 211 | ln_2[ln_time_step_2] <= ln_2_out; 212 | end 213 | end 214 | 215 | layer_norm_2 layer_norm_2( 216 | .clk (clk), 217 | .rst_n (rst_n), 218 | 219 | .data_in (attention[ln_time_step_2]), 220 | .data_in_valid (attention_done), 221 | .block_sel (block_sel), 222 | 223 | .data_out (ln_2_out), 224 | .data_out_valid (ln_2_valid), 225 | .done (ln_2_done) 226 | ); 227 | 228 | 229 | 230 | // ========================================================================================================= 231 | // ============================================Hidden State 1=============================================== 232 | // ========================================================================================================= 233 | reg [4:0] hidden_state_1_time_step_pre; 234 | reg [4:0] hidden_state_1_time_step; 235 | 236 | always @(posedge clk or negedge rst_n) begin 237 | if (~rst_n) begin 238 | hidden_state_1_time_step_pre <= 0; 239 | end 240 | else if (hidden_state_1_time_step_pre == 'd29) 241 | hidden_state_1_time_step_pre <= 'd29; 242 | else if (ln_2_done) begin 243 | hidden_state_1_time_step_pre <= hidden_state_1_time_step_pre + 1'b1; 244 | end 245 | end 246 | 247 | always @(posedge clk or negedge rst_n) begin 248 | if (~rst_n) begin 249 | hidden_state_1_time_step <= 0; 250 | end 251 | else if (hidden_state_1_time_step_pre <= 'd29) begin 252 | hidden_state_1_time_step <= hidden_state_1_time_step_pre; 253 | end 254 | end 255 | 256 | reg [256-1:0] hd_1[30-1:0]; 257 | wire [256-1:0] hd_1_out; 258 | wire hd_1_valid; 259 | wire hd_1_done; 260 | 261 | always@(posedge clk,negedge rst_n) begin 262 | if (~rst_n) begin 263 | for (i = 0; i < 30; i = i + 1) begin 264 | for (j = 0; j < 256; j = j + 1) begin 265 | hd_1[i][j] <= 0; 266 | end 267 | end 268 | end 269 | else if (~hd_1_done) begin 270 | hd_1[hidden_state_1_time_step] <= hd_1_out; 271 | end 272 | end 273 | 274 | encoder_hidden_state_1 encoder_hidden_state_1( 275 | .clk (clk), 276 | .rst_n (rst_n), 277 | 278 | .data_in (ln_2[hidden_state_1_time_step]), 279 | .data_in_valid (ln_2_done), 280 | .block_sel (block_sel), 281 | 282 | .data_out (hd_1_out), 283 | .data_out_valid (hd_1_valid), 284 | .done (hd_1_done) 285 | ); 286 | 287 | // ========================================================================================================= 288 | // ============================================Hidden State 2=============================================== 289 | // ========================================================================================================= 290 | reg [4:0] hidden_state_2_time_step_pre; 291 | reg [4:0] hidden_state_2_time_step; 292 | 293 | always @(posedge clk or negedge rst_n) begin 294 | if (~rst_n) begin 295 | hidden_state_2_time_step_pre <= 0; 296 | end 297 | else if (hidden_state_2_time_step_pre == 'd29) 298 | hidden_state_2_time_step_pre <= 'd29; 299 | else if (hd_1_done) begin 300 | hidden_state_2_time_step_pre <= hidden_state_2_time_step_pre + 1'b1; 301 | end 302 | end 303 | 304 | always @(posedge clk or negedge rst_n) begin 305 | if (~rst_n) begin 306 | hidden_state_2_time_step <= 0; 307 | end 308 | else if (hidden_state_2_time_step_pre <= 'd29) begin 309 | hidden_state_2_time_step <= hidden_state_2_time_step_pre; 310 | end 311 | end 312 | 313 | reg [16-1:0] hd_2[30-1:0]; 314 | wire [16-1:0] hd_2_out; 315 | wire hd_2_valid; 316 | wire hd_2_done; 317 | 318 | always@(posedge clk,negedge rst_n) begin 319 | if (~rst_n) begin 320 | for (i = 0; i < 30; i = i + 1) begin 321 | for (j = 0; j < 16; j = j + 1) begin 322 | hd_2[i][j] <= 0; 323 | end 324 | end 325 | end 326 | else if (~hd_2_done) begin 327 | hd_2[hidden_state_2_time_step] <= hd_2_out + ln_2[hidden_state_2_time_step]; // shortcut 328 | end 329 | end 330 | 331 | encoder_hidden_state_2 encoder_hidden_state_2( 332 | .clk (clk), 333 | .rst_n (rst_n), 334 | 335 | .data_in (hd_1[hidden_state_2_time_step]), 336 | .data_in_valid (hd_1_done), 337 | .block_sel (block_sel), 338 | 339 | .data_out (hd_2_out), 340 | .data_out_valid (hd_2_valid), 341 | .done (hd_2_done) 342 | ); 343 | 344 | // ========================================================================================================= 345 | // ============================================Layer Norm=================================================== 346 | // ========================================================================================================= 347 | reg [4:0] ln_time_step_3_pre; 348 | reg [4:0] ln_time_step_3; 349 | 350 | always @(posedge clk or negedge rst_n) begin 351 | if (~rst_n) begin 352 | ln_time_step_3_pre <= 0; 353 | end 354 | else if (ln_time_step_3_pre == 'd29) 355 | ln_time_step_3_pre <= 'd29; 356 | else if (hd_2_done) begin 357 | ln_time_step_3_pre <= ln_time_step_3_pre + 1'b1; 358 | end 359 | end 360 | 361 | always @(posedge clk or negedge rst_n) begin 362 | if (~rst_n) begin 363 | ln_time_step_3 <= 0; 364 | end 365 | else if (ln_time_step_3_pre <= 'd29) begin 366 | ln_time_step_3 <= ln_time_step_3_pre; 367 | end 368 | end 369 | 370 | // reg [16-1:0] ln_3[30-1:0]; 371 | // wire [16-1:0] ln_3_out; 372 | wire ln_3_valid; 373 | wire ln_3_done; 374 | 375 | // integer i,j; 376 | // always@(posedge clk,negedge rst_n) begin 377 | // if (~rst_n) begin 378 | // for (i = 0; i < 30; i = i + 1) begin 379 | // for (j = 0; j < 16; j = j + 1) begin 380 | // ln_3[i][j] <= 0; 381 | // end 382 | // end 383 | // end 384 | // else if (~ln_3_done) begin 385 | // ln_3[ln_time_step_3] <= ln_3_out; 386 | // end 387 | // end 388 | 389 | layer_norm_3 layer_norm_3( 390 | .clk (clk), 391 | .rst_n (rst_n), 392 | 393 | .data_in (hd_2[ln_time_step_3]), 394 | .data_in_valid (hd_2_done), 395 | .block_sel (block_sel), 396 | 397 | .data_out (data_out), 398 | .data_out_valid (data_out_valid), 399 | .done (done) 400 | ); 401 | 402 | 403 | 404 | 405 | 406 | 407 | endmodule 408 | -------------------------------------------------------------------------------- /verilog/sources_1/new/output_1.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 16:03:54 7 | // Design Name: 8 | // Module Name: output_1 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module output_1( 24 | input clk, 25 | input rst_n, 26 | 27 | input [480-1:0] data_in, 28 | input data_in_valid, 29 | 30 | output reg [8-1:0] data_out, 31 | output reg data_out_valid, 32 | output reg [6-1:0] time_step, 33 | output reg done 34 | ); 35 | 36 | reg [6-1:0] time_step_pre; 37 | reg [6-1:0] time_step; // 添加一个寄存器用于延迟 38 | 39 | always @(posedge clk or negedge rst_n) begin 40 | if (~rst_n) begin 41 | time_step_pre <= 0; 42 | end 43 | else if (time_step_pre == 'd31) begin 44 | time_step_pre <= 'd31; 45 | end 46 | else if (data_in_valid) begin 47 | time_step_pre <= time_step_pre + 1'b1; 48 | end 49 | end 50 | 51 | always @(posedge clk or negedge rst_n) begin 52 | if (~rst_n) begin 53 | time_step <= 0; 54 | end 55 | else if (time_step_pre <= 'd31) begin 56 | time_step <= time_step_pre; 57 | end 58 | end 59 | 60 | 61 | wire [8*480-1:0] inter_w_data; 62 | output_1_rom output_1_rom ( 63 | .clka(clk), // input wire clka 64 | .ena(data_in_valid), // input wire ena 65 | .addra(time_step), // input wire [4 : 0] addra 66 | .douta(inter_w_data) // output wire [3839 : 0] douta 67 | ); 68 | 69 | genvar i; 70 | integer j; 71 | 72 | generate 73 | for (i = 0; i < 8; i = i + 1) begin 74 | wire [480-1:0] inter_xor_result = ~(inter_w_data[i*480 +: 480] ^ data_in); 75 | 76 | wire [8:0] inter_popcount_out = inter_xor_result[0] + inter_xor_result[1] + inter_xor_result[2] + inter_xor_result[3] + inter_xor_result[4] + inter_xor_result[5] + inter_xor_result[6] + inter_xor_result[7] + inter_xor_result[8] + inter_xor_result[9] + inter_xor_result[10] + inter_xor_result[11] + inter_xor_result[12] + inter_xor_result[13] + inter_xor_result[14] + inter_xor_result[15] + inter_xor_result[16] + inter_xor_result[17] + inter_xor_result[18] + inter_xor_result[19] + inter_xor_result[20] + inter_xor_result[21] + inter_xor_result[22] + inter_xor_result[23] + inter_xor_result[24] + inter_xor_result[25] + inter_xor_result[26] + inter_xor_result[27] + inter_xor_result[28] + inter_xor_result[29] + inter_xor_result[30] + inter_xor_result[31] + inter_xor_result[32] + inter_xor_result[33] + inter_xor_result[34] + inter_xor_result[35] + inter_xor_result[36] + inter_xor_result[37] + inter_xor_result[38] + inter_xor_result[39] + inter_xor_result[40] + inter_xor_result[41] + inter_xor_result[42] + inter_xor_result[43] + inter_xor_result[44] + inter_xor_result[45] + inter_xor_result[46] + inter_xor_result[47] + inter_xor_result[48] + inter_xor_result[49] + inter_xor_result[50] + inter_xor_result[51] + inter_xor_result[52] + inter_xor_result[53] + inter_xor_result[54] + inter_xor_result[55] + inter_xor_result[56] + inter_xor_result[57] + inter_xor_result[58] + inter_xor_result[59] + inter_xor_result[60] + inter_xor_result[61] + inter_xor_result[62] + inter_xor_result[63] + inter_xor_result[64] + inter_xor_result[65] + inter_xor_result[66] + inter_xor_result[67] + inter_xor_result[68] + inter_xor_result[69] + inter_xor_result[70] + inter_xor_result[71] + inter_xor_result[72] + inter_xor_result[73] + inter_xor_result[74] + inter_xor_result[75] + inter_xor_result[76] + inter_xor_result[77] + inter_xor_result[78] + inter_xor_result[79] + inter_xor_result[80] + inter_xor_result[81] + inter_xor_result[82] + inter_xor_result[83] + inter_xor_result[84] + inter_xor_result[85] + inter_xor_result[86] + inter_xor_result[87] + inter_xor_result[88] + inter_xor_result[89] + inter_xor_result[90] + inter_xor_result[91] + inter_xor_result[92] + inter_xor_result[93] + inter_xor_result[94] + inter_xor_result[95] + inter_xor_result[96] + inter_xor_result[97] + inter_xor_result[98] + inter_xor_result[99] + inter_xor_result[100] + inter_xor_result[101] + inter_xor_result[102] + inter_xor_result[103] + inter_xor_result[104] + inter_xor_result[105] + inter_xor_result[106] + inter_xor_result[107] + inter_xor_result[108] + inter_xor_result[109] + inter_xor_result[110] + inter_xor_result[111] + inter_xor_result[112] + inter_xor_result[113] + inter_xor_result[114] + inter_xor_result[115] + inter_xor_result[116] + inter_xor_result[117] + inter_xor_result[118] + inter_xor_result[119] + inter_xor_result[120] + inter_xor_result[121] + inter_xor_result[122] + inter_xor_result[123] + inter_xor_result[124] + inter_xor_result[125] + inter_xor_result[126] + inter_xor_result[127] + inter_xor_result[128] + inter_xor_result[129] + inter_xor_result[130] + inter_xor_result[131] + inter_xor_result[132] + inter_xor_result[133] + inter_xor_result[134] + inter_xor_result[135] + inter_xor_result[136] + inter_xor_result[137] + inter_xor_result[138] + inter_xor_result[139] + inter_xor_result[140] + inter_xor_result[141] + inter_xor_result[142] + inter_xor_result[143] + inter_xor_result[144] + inter_xor_result[145] + inter_xor_result[146] + inter_xor_result[147] + inter_xor_result[148] + inter_xor_result[149] + inter_xor_result[150] + inter_xor_result[151] + inter_xor_result[152] + inter_xor_result[153] + inter_xor_result[154] + inter_xor_result[155] + inter_xor_result[156] + inter_xor_result[157] + inter_xor_result[158] + inter_xor_result[159] + inter_xor_result[160] + inter_xor_result[161] + inter_xor_result[162] + inter_xor_result[163] + inter_xor_result[164] + inter_xor_result[165] + inter_xor_result[166] + inter_xor_result[167] + inter_xor_result[168] + inter_xor_result[169] + inter_xor_result[170] + inter_xor_result[171] + inter_xor_result[172] + inter_xor_result[173] + inter_xor_result[174] + inter_xor_result[175] + inter_xor_result[176] + inter_xor_result[177] + inter_xor_result[178] + inter_xor_result[179] + inter_xor_result[180] + inter_xor_result[181] + inter_xor_result[182] + inter_xor_result[183] + inter_xor_result[184] + inter_xor_result[185] + inter_xor_result[186] + inter_xor_result[187] + inter_xor_result[188] + inter_xor_result[189] + inter_xor_result[190] + inter_xor_result[191] + inter_xor_result[192] + inter_xor_result[193] + inter_xor_result[194] + inter_xor_result[195] + inter_xor_result[196] + inter_xor_result[197] + inter_xor_result[198] + inter_xor_result[199] + inter_xor_result[200] + inter_xor_result[201] + inter_xor_result[202] + inter_xor_result[203] + inter_xor_result[204] + inter_xor_result[205] + inter_xor_result[206] + inter_xor_result[207] + inter_xor_result[208] + inter_xor_result[209] + inter_xor_result[210] + inter_xor_result[211] + inter_xor_result[212] + inter_xor_result[213] + inter_xor_result[214] + inter_xor_result[215] + inter_xor_result[216] + inter_xor_result[217] + inter_xor_result[218] + inter_xor_result[219] + inter_xor_result[220] + inter_xor_result[221] + inter_xor_result[222] + inter_xor_result[223] + inter_xor_result[224] + inter_xor_result[225] + inter_xor_result[226] + inter_xor_result[227] + inter_xor_result[228] + inter_xor_result[229] + inter_xor_result[230] + inter_xor_result[231] + inter_xor_result[232] + inter_xor_result[233] + inter_xor_result[234] + inter_xor_result[235] + inter_xor_result[236] + inter_xor_result[237] + inter_xor_result[238] + inter_xor_result[239] + inter_xor_result[240] + inter_xor_result[241] + inter_xor_result[242] + inter_xor_result[243] + inter_xor_result[244] + inter_xor_result[245] + inter_xor_result[246] + inter_xor_result[247] + inter_xor_result[248] + inter_xor_result[249] + inter_xor_result[250] + inter_xor_result[251] + inter_xor_result[252] + inter_xor_result[253] + inter_xor_result[254] + inter_xor_result[255] + inter_xor_result[256] + inter_xor_result[257] + inter_xor_result[258] + inter_xor_result[259] + inter_xor_result[260] + inter_xor_result[261] + inter_xor_result[262] + inter_xor_result[263] + inter_xor_result[264] + inter_xor_result[265] + inter_xor_result[266] + inter_xor_result[267] + inter_xor_result[268] + inter_xor_result[269] + inter_xor_result[270] + inter_xor_result[271] + inter_xor_result[272] + inter_xor_result[273] + inter_xor_result[274] + inter_xor_result[275] + inter_xor_result[276] + inter_xor_result[277] + inter_xor_result[278] + inter_xor_result[279] + inter_xor_result[280] + inter_xor_result[281] + inter_xor_result[282] + inter_xor_result[283] + inter_xor_result[284] + inter_xor_result[285] + inter_xor_result[286] + inter_xor_result[287] + inter_xor_result[288] + inter_xor_result[289] + inter_xor_result[290] + inter_xor_result[291] + inter_xor_result[292] + inter_xor_result[293] + inter_xor_result[294] + inter_xor_result[295] + inter_xor_result[296] + inter_xor_result[297] + inter_xor_result[298] + inter_xor_result[299] + inter_xor_result[300] + inter_xor_result[301] + inter_xor_result[302] + inter_xor_result[303] + inter_xor_result[304] + inter_xor_result[305] + inter_xor_result[306] + inter_xor_result[307] + inter_xor_result[308] + inter_xor_result[309] + inter_xor_result[310] + inter_xor_result[311] + inter_xor_result[312] + inter_xor_result[313] + inter_xor_result[314] + inter_xor_result[315] + inter_xor_result[316] + inter_xor_result[317] + inter_xor_result[318] + inter_xor_result[319] + inter_xor_result[320] + inter_xor_result[321] + inter_xor_result[322] + inter_xor_result[323] + inter_xor_result[324] + inter_xor_result[325] + inter_xor_result[326] + inter_xor_result[327] + inter_xor_result[328] + inter_xor_result[329] + inter_xor_result[330] + inter_xor_result[331] + inter_xor_result[332] + inter_xor_result[333] + inter_xor_result[334] + inter_xor_result[335] + inter_xor_result[336] + inter_xor_result[337] + inter_xor_result[338] + inter_xor_result[339] + inter_xor_result[340] + inter_xor_result[341] + inter_xor_result[342] + inter_xor_result[343] + inter_xor_result[344] + inter_xor_result[345] + inter_xor_result[346] + inter_xor_result[347] + inter_xor_result[348] + inter_xor_result[349] + inter_xor_result[350] + inter_xor_result[351] + inter_xor_result[352] + inter_xor_result[353] + inter_xor_result[354] + inter_xor_result[355] + inter_xor_result[356] + inter_xor_result[357] + inter_xor_result[358] + inter_xor_result[359] + inter_xor_result[360] + inter_xor_result[361] + inter_xor_result[362] + inter_xor_result[363] + inter_xor_result[364] + inter_xor_result[365] + inter_xor_result[366] + inter_xor_result[367] + inter_xor_result[368] + inter_xor_result[369] + inter_xor_result[370] + inter_xor_result[371] + inter_xor_result[372] + inter_xor_result[373] + inter_xor_result[374] + inter_xor_result[375] + inter_xor_result[376] + inter_xor_result[377] + inter_xor_result[378] + inter_xor_result[379] + inter_xor_result[380] + inter_xor_result[381] + inter_xor_result[382] + inter_xor_result[383] + inter_xor_result[384] + inter_xor_result[385] + inter_xor_result[386] + inter_xor_result[387] + inter_xor_result[388] + inter_xor_result[389] + inter_xor_result[390] + inter_xor_result[391] + inter_xor_result[392] + inter_xor_result[393] + inter_xor_result[394] + inter_xor_result[395] + inter_xor_result[396] + inter_xor_result[397] + inter_xor_result[398] + inter_xor_result[399] + inter_xor_result[400] + inter_xor_result[401] + inter_xor_result[402] + inter_xor_result[403] + inter_xor_result[404] + inter_xor_result[405] + inter_xor_result[406] + inter_xor_result[407] + inter_xor_result[408] + inter_xor_result[409] + inter_xor_result[410] + inter_xor_result[411] + inter_xor_result[412] + inter_xor_result[413] + inter_xor_result[414] + inter_xor_result[415] + inter_xor_result[416] + inter_xor_result[417] + inter_xor_result[418] + inter_xor_result[419] + inter_xor_result[420] + inter_xor_result[421] + inter_xor_result[422] + inter_xor_result[423] + inter_xor_result[424] + inter_xor_result[425] + inter_xor_result[426] + inter_xor_result[427] + inter_xor_result[428] + inter_xor_result[429] + inter_xor_result[430] + inter_xor_result[431] + inter_xor_result[432] + inter_xor_result[433] + inter_xor_result[434] + inter_xor_result[435] + inter_xor_result[436] + inter_xor_result[437] + inter_xor_result[438] + inter_xor_result[439] + inter_xor_result[440] + inter_xor_result[441] + inter_xor_result[442] + inter_xor_result[443] + inter_xor_result[444] + inter_xor_result[445] + inter_xor_result[446] + inter_xor_result[447] + inter_xor_result[448] + inter_xor_result[449] + inter_xor_result[450] + inter_xor_result[451] + inter_xor_result[452] + inter_xor_result[453] + inter_xor_result[454] + inter_xor_result[455] + inter_xor_result[456] + inter_xor_result[457] + inter_xor_result[458] + inter_xor_result[459] + inter_xor_result[460] + inter_xor_result[461] + inter_xor_result[462] + inter_xor_result[463] + inter_xor_result[464] + inter_xor_result[465] + inter_xor_result[466] + inter_xor_result[467] + inter_xor_result[468] + inter_xor_result[469] + inter_xor_result[470] + inter_xor_result[471] + inter_xor_result[472] + inter_xor_result[473] + inter_xor_result[474] + inter_xor_result[475] + inter_xor_result[476] + inter_xor_result[477] + inter_xor_result[478] + inter_xor_result[479]; 77 | 78 | // reg [9-1:0] inter_popcount_out; 79 | // 80 | // always@(posedge clk or negedge rst_n) begin 81 | // if (~rst_n) begin 82 | // inter_popcount_out <= 0; 83 | // end 84 | // else if (data_in_valid) begin 85 | // for (j = 0; j < 480; j = j + 1) begin 86 | // inter_popcount_out <= inter_popcount_out + inter_xor_result[j]; 87 | // end 88 | // end 89 | // end 90 | 91 | always@(posedge clk or negedge rst_n) begin 92 | if (~rst_n) begin 93 | data_out[i] <= 0; 94 | end 95 | else if (data_in_valid) begin 96 | data_out[i] <= (2*inter_popcount_out-480) > 0 ? 1 : 0; 97 | end 98 | end 99 | end 100 | endgenerate 101 | 102 | 103 | 104 | always@(posedge clk,negedge rst_n)begin 105 | if(~rst_n) 106 | done <= 0; 107 | else if (time_step == 'd31) 108 | done <= 1; 109 | end 110 | 111 | always@(posedge clk,negedge rst_n)begin 112 | if(~rst_n) 113 | data_out_valid <= 0; 114 | else if (data_in_valid) 115 | data_out_valid <= 1; 116 | else 117 | data_out_valid <= 0; 118 | end 119 | endmodule 120 | -------------------------------------------------------------------------------- /verilog/ip_repo/PS_PL_1.0/hdl/PS_PL_v1_0_S00_AXI.v: -------------------------------------------------------------------------------- 1 | 2 | `timescale 1 ns / 1 ps 3 | 4 | module PS_PL_v1_0_S00_AXI # 5 | ( 6 | // Users to add parameters here 7 | 8 | // User parameters ends 9 | // Do not modify the parameters beyond this line 10 | 11 | // Width of S_AXI data bus 12 | parameter integer C_S_AXI_DATA_WIDTH = 32, 13 | // Width of S_AXI address bus 14 | parameter integer C_S_AXI_ADDR_WIDTH = 4 15 | ) 16 | ( 17 | // Users to add ports here 18 | input [9-1:0] pre_result, 19 | input pre_result_valid, 20 | output [16-1:0] raw_data, 21 | output raw_data_valid, 22 | // User ports ends 23 | // Do not modify the ports beyond this line 24 | 25 | // Global Clock Signal 26 | input wire S_AXI_ACLK, 27 | // Global Reset Signal. This Signal is Active LOW 28 | input wire S_AXI_ARESETN, 29 | // Write address (issued by master, acceped by Slave) 30 | input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_AWADDR, 31 | // Write channel Protection type. This signal indicates the 32 | // privilege and security level of the transaction, and whether 33 | // the transaction is a data access or an instruction access. 34 | input wire [2 : 0] S_AXI_AWPROT, 35 | // Write address valid. This signal indicates that the master signaling 36 | // valid write address and control information. 37 | input wire S_AXI_AWVALID, 38 | // Write address ready. This signal indicates that the slave is ready 39 | // to accept an address and associated control signals. 40 | output wire S_AXI_AWREADY, 41 | // Write data (issued by master, acceped by Slave) 42 | input wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_WDATA, 43 | // Write strobes. This signal indicates which byte lanes hold 44 | // valid data. There is one write strobe bit for each eight 45 | // bits of the write data bus. 46 | input wire [(C_S_AXI_DATA_WIDTH/8)-1 : 0] S_AXI_WSTRB, 47 | // Write valid. This signal indicates that valid write 48 | // data and strobes are available. 49 | input wire S_AXI_WVALID, 50 | // Write ready. This signal indicates that the slave 51 | // can accept the write data. 52 | output wire S_AXI_WREADY, 53 | // Write response. This signal indicates the status 54 | // of the write transaction. 55 | output wire [1 : 0] S_AXI_BRESP, 56 | // Write response valid. This signal indicates that the channel 57 | // is signaling a valid write response. 58 | output wire S_AXI_BVALID, 59 | // Response ready. This signal indicates that the master 60 | // can accept a write response. 61 | input wire S_AXI_BREADY, 62 | // Read address (issued by master, acceped by Slave) 63 | input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR, 64 | // Protection type. This signal indicates the privilege 65 | // and security level of the transaction, and whether the 66 | // transaction is a data access or an instruction access. 67 | input wire [2 : 0] S_AXI_ARPROT, 68 | // Read address valid. This signal indicates that the channel 69 | // is signaling valid read address and control information. 70 | input wire S_AXI_ARVALID, 71 | // Read address ready. This signal indicates that the slave is 72 | // ready to accept an address and associated control signals. 73 | output wire S_AXI_ARREADY, 74 | // Read data (issued by slave) 75 | output wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_RDATA, 76 | // Read response. This signal indicates the status of the 77 | // read transfer. 78 | output wire [1 : 0] S_AXI_RRESP, 79 | // Read valid. This signal indicates that the channel is 80 | // signaling the required read data. 81 | output wire S_AXI_RVALID, 82 | // Read ready. This signal indicates that the master can 83 | // accept the read data and response information. 84 | input wire S_AXI_RREADY 85 | ); 86 | 87 | // AXI4LITE signals 88 | reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_awaddr; 89 | reg axi_awready; 90 | reg axi_wready; 91 | reg [1 : 0] axi_bresp; 92 | reg axi_bvalid; 93 | reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_araddr; 94 | reg axi_arready; 95 | reg [C_S_AXI_DATA_WIDTH-1 : 0] axi_rdata; 96 | reg [1 : 0] axi_rresp; 97 | reg axi_rvalid; 98 | 99 | // Example-specific design signals 100 | // local parameter for addressing 32 bit / 64 bit C_S_AXI_DATA_WIDTH 101 | // ADDR_LSB is used for addressing 32/64 bit registers/memories 102 | // ADDR_LSB = 2 for 32 bits (n downto 2) 103 | // ADDR_LSB = 3 for 64 bits (n downto 3) 104 | localparam integer ADDR_LSB = (C_S_AXI_DATA_WIDTH/32) + 1; 105 | localparam integer OPT_MEM_ADDR_BITS = 1; 106 | //---------------------------------------------- 107 | //-- Signals for user logic register space example 108 | //------------------------------------------------ 109 | //-- Number of Slave Registers 4 110 | reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg0; 111 | reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg1; 112 | reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg2; 113 | reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg3; 114 | wire slv_reg_rden; 115 | wire slv_reg_wren; 116 | reg [C_S_AXI_DATA_WIDTH-1:0] reg_data_out; 117 | integer byte_index; 118 | reg aw_en; 119 | 120 | // I/O Connections assignments 121 | 122 | assign S_AXI_AWREADY = axi_awready; 123 | assign S_AXI_WREADY = axi_wready; 124 | assign S_AXI_BRESP = axi_bresp; 125 | assign S_AXI_BVALID = axi_bvalid; 126 | assign S_AXI_ARREADY = axi_arready; 127 | assign S_AXI_RDATA = axi_rdata; 128 | assign S_AXI_RRESP = axi_rresp; 129 | assign S_AXI_RVALID = axi_rvalid; 130 | // Implement axi_awready generation 131 | // axi_awready is asserted for one S_AXI_ACLK clock cycle when both 132 | // S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_awready is 133 | // de-asserted when reset is low. 134 | 135 | always @( posedge S_AXI_ACLK ) 136 | begin 137 | if ( S_AXI_ARESETN == 1'b0 ) 138 | begin 139 | axi_awready <= 1'b0; 140 | aw_en <= 1'b1; 141 | end 142 | else 143 | begin 144 | if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en) 145 | begin 146 | // slave is ready to accept write address when 147 | // there is a valid write address and write data 148 | // on the write address and data bus. This design 149 | // expects no outstanding transactions. 150 | axi_awready <= 1'b1; 151 | aw_en <= 1'b0; 152 | end 153 | else if (S_AXI_BREADY && axi_bvalid) 154 | begin 155 | aw_en <= 1'b1; 156 | axi_awready <= 1'b0; 157 | end 158 | else 159 | begin 160 | axi_awready <= 1'b0; 161 | end 162 | end 163 | end 164 | 165 | // Implement axi_awaddr latching 166 | // This process is used to latch the address when both 167 | // S_AXI_AWVALID and S_AXI_WVALID are valid. 168 | 169 | always @( posedge S_AXI_ACLK ) 170 | begin 171 | if ( S_AXI_ARESETN == 1'b0 ) 172 | begin 173 | axi_awaddr <= 0; 174 | end 175 | else 176 | begin 177 | if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en) 178 | begin 179 | // Write Address latching 180 | axi_awaddr <= S_AXI_AWADDR; 181 | end 182 | end 183 | end 184 | 185 | // Implement axi_wready generation 186 | // axi_wready is asserted for one S_AXI_ACLK clock cycle when both 187 | // S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_wready is 188 | // de-asserted when reset is low. 189 | 190 | always @( posedge S_AXI_ACLK ) 191 | begin 192 | if ( S_AXI_ARESETN == 1'b0 ) 193 | begin 194 | axi_wready <= 1'b0; 195 | end 196 | else 197 | begin 198 | if (~axi_wready && S_AXI_WVALID && S_AXI_AWVALID && aw_en ) 199 | begin 200 | // slave is ready to accept write data when 201 | // there is a valid write address and write data 202 | // on the write address and data bus. This design 203 | // expects no outstanding transactions. 204 | axi_wready <= 1'b1; 205 | end 206 | else 207 | begin 208 | axi_wready <= 1'b0; 209 | end 210 | end 211 | end 212 | 213 | // Implement memory mapped register select and write logic generation 214 | // The write data is accepted and written to memory mapped registers when 215 | // axi_awready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted. Write strobes are used to 216 | // select byte enables of slave registers while writing. 217 | // These registers are cleared when reset (active low) is applied. 218 | // Slave register write enable is asserted when valid address and data are available 219 | // and the slave is ready to accept the write address and write data. 220 | assign slv_reg_wren = axi_wready && S_AXI_WVALID && axi_awready && S_AXI_AWVALID; 221 | 222 | always @( posedge S_AXI_ACLK ) 223 | begin 224 | if ( S_AXI_ARESETN == 1'b0 ) 225 | begin 226 | slv_reg0 <= 0; 227 | slv_reg1 <= 0; 228 | // slv_reg2 <= 0; 229 | // slv_reg3 <= 0; 230 | end 231 | else begin 232 | if (slv_reg_wren) 233 | begin 234 | case ( axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] ) 235 | 2'h0: 236 | for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) 237 | if ( S_AXI_WSTRB[byte_index] == 1 ) begin 238 | // Respective byte enables are asserted as per write strobes 239 | // Slave register 0 240 | slv_reg0[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; 241 | end 242 | 2'h1: 243 | for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) 244 | if ( S_AXI_WSTRB[byte_index] == 1 ) begin 245 | // Respective byte enables are asserted as per write strobes 246 | // Slave register 1 247 | slv_reg1[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; 248 | end 249 | // 2'h2: 250 | // for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) 251 | // if ( S_AXI_WSTRB[byte_index] == 1 ) begin 252 | // // Respective byte enables are asserted as per write strobes 253 | // // Slave register 2 254 | // slv_reg2[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; 255 | // end 256 | // 2'h3: 257 | // for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 ) 258 | // if ( S_AXI_WSTRB[byte_index] == 1 ) begin 259 | // // Respective byte enables are asserted as per write strobes 260 | // // Slave register 3 261 | // slv_reg3[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8]; 262 | // end 263 | default : begin 264 | slv_reg0 <= slv_reg0; 265 | slv_reg1 <= slv_reg1; 266 | // slv_reg2 <= slv_reg2; 267 | // slv_reg3 <= slv_reg3; 268 | end 269 | endcase 270 | end 271 | end 272 | end 273 | 274 | // Implement write response logic generation 275 | // The write response and response valid signals are asserted by the slave 276 | // when axi_wready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted. 277 | // This marks the acceptance of address and indicates the status of 278 | // write transaction. 279 | 280 | always @( posedge S_AXI_ACLK ) 281 | begin 282 | if ( S_AXI_ARESETN == 1'b0 ) 283 | begin 284 | axi_bvalid <= 0; 285 | axi_bresp <= 2'b0; 286 | end 287 | else 288 | begin 289 | if (axi_awready && S_AXI_AWVALID && ~axi_bvalid && axi_wready && S_AXI_WVALID) 290 | begin 291 | // indicates a valid write response is available 292 | axi_bvalid <= 1'b1; 293 | axi_bresp <= 2'b0; // 'OKAY' response 294 | end // work error responses in future 295 | else 296 | begin 297 | if (S_AXI_BREADY && axi_bvalid) 298 | //check if bready is asserted while bvalid is high) 299 | //(there is a possibility that bready is always asserted high) 300 | begin 301 | axi_bvalid <= 1'b0; 302 | end 303 | end 304 | end 305 | end 306 | 307 | // Implement axi_arready generation 308 | // axi_arready is asserted for one S_AXI_ACLK clock cycle when 309 | // S_AXI_ARVALID is asserted. axi_awready is 310 | // de-asserted when reset (active low) is asserted. 311 | // The read address is also latched when S_AXI_ARVALID is 312 | // asserted. axi_araddr is reset to zero on reset assertion. 313 | 314 | always @( posedge S_AXI_ACLK ) 315 | begin 316 | if ( S_AXI_ARESETN == 1'b0 ) 317 | begin 318 | axi_arready <= 1'b0; 319 | axi_araddr <= 32'b0; 320 | end 321 | else 322 | begin 323 | if (~axi_arready && S_AXI_ARVALID) 324 | begin 325 | // indicates that the slave has acceped the valid read address 326 | axi_arready <= 1'b1; 327 | // Read address latching 328 | axi_araddr <= S_AXI_ARADDR; 329 | end 330 | else 331 | begin 332 | axi_arready <= 1'b0; 333 | end 334 | end 335 | end 336 | 337 | // Implement axi_arvalid generation 338 | // axi_rvalid is asserted for one S_AXI_ACLK clock cycle when both 339 | // S_AXI_ARVALID and axi_arready are asserted. The slave registers 340 | // data are available on the axi_rdata bus at this instance. The 341 | // assertion of axi_rvalid marks the validity of read data on the 342 | // bus and axi_rresp indicates the status of read transaction.axi_rvalid 343 | // is deasserted on reset (active low). axi_rresp and axi_rdata are 344 | // cleared to zero on reset (active low). 345 | always @( posedge S_AXI_ACLK ) 346 | begin 347 | if ( S_AXI_ARESETN == 1'b0 ) 348 | begin 349 | axi_rvalid <= 0; 350 | axi_rresp <= 0; 351 | end 352 | else 353 | begin 354 | if (axi_arready && S_AXI_ARVALID && ~axi_rvalid) 355 | begin 356 | // Valid read data is available at the read data bus 357 | axi_rvalid <= 1'b1; 358 | axi_rresp <= 2'b0; // 'OKAY' response 359 | end 360 | else if (axi_rvalid && S_AXI_RREADY) 361 | begin 362 | // Read data is accepted by the master 363 | axi_rvalid <= 1'b0; 364 | end 365 | end 366 | end 367 | 368 | // Implement memory mapped register select and read logic generation 369 | // Slave register read enable is asserted when valid address is available 370 | // and the slave is ready to accept the read address. 371 | assign slv_reg_rden = axi_arready & S_AXI_ARVALID & ~axi_rvalid; 372 | always @(*) 373 | begin 374 | // Address decoding for reading registers 375 | case ( axi_araddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] ) 376 | // 2'h0 : reg_data_out <= slv_reg0; 377 | // 2'h1 : reg_data_out <= slv_reg1; 378 | 2'h2 : reg_data_out <= slv_reg2; 379 | 2'h3 : reg_data_out <= slv_reg3; 380 | default : reg_data_out <= 0; 381 | endcase 382 | end 383 | 384 | // Output register or memory read data 385 | always @( posedge S_AXI_ACLK ) 386 | begin 387 | if ( S_AXI_ARESETN == 1'b0 ) 388 | begin 389 | axi_rdata <= 0; 390 | end 391 | else 392 | begin 393 | // When there is a valid read address (S_AXI_ARVALID) with 394 | // acceptance of read address by the slave (axi_arready), 395 | // output the read dada 396 | if (slv_reg_rden) 397 | begin 398 | axi_rdata <= reg_data_out; // register read data 399 | end 400 | end 401 | end 402 | 403 | // Add user logic here 404 | assign raw_data = slv_reg0[16-1:0]; 405 | assign raw_data_valid = slv_reg1[0]; 406 | 407 | always@(posedge S_AXI_ACLK) begin 408 | if (~S_AXI_ARESETN) begin 409 | slv_reg2 <= 0; 410 | slv_reg3 <= 0; 411 | end 412 | else if (pre_result_valid) begin 413 | slv_reg2 <= {23'b0, pre_result}; 414 | slv_reg3 <= {31'b0, pre_result_valid}; 415 | end 416 | end 417 | // User logic ends 418 | 419 | endmodule 420 | -------------------------------------------------------------------------------- /verilog/sources_1/new/transformer.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | ////////////////////////////////////////////////////////////////////////////////// 3 | // Company: 4 | // Engineer: 5 | // 6 | // Create Date: 2023/11/13 15:12:34 7 | // Design Name: 8 | // Module Name: transformer 9 | // Project Name: 10 | // Target Devices: 11 | // Tool Versions: 12 | // Description: 13 | // 14 | // Dependencies: 15 | // 16 | // Revision: 17 | // Revision 0.01 - File Created 18 | // Additional Comments: 19 | // 20 | ////////////////////////////////////////////////////////////////////////////////// 21 | 22 | 23 | module transformer( 24 | input clk, 25 | input rst_n, 26 | 27 | input [16-1:0] data_in, 28 | input data_in_valid, 29 | 30 | output [9-1:0] data_out, 31 | output data_out_valid 32 | ); 33 | 34 | // ========================================================================================================= 35 | // ============================================Block 1====================================================== 36 | // ========================================================================================================= 37 | reg [4:0] block_1_time_step_pre; 38 | reg [4:0] block_1_time_step; 39 | reg [4:0] block_1_time_step_out; 40 | 41 | always @(posedge clk or negedge rst_n) begin 42 | if (~rst_n) begin 43 | block_1_time_step_pre <= 0; 44 | end 45 | else if (data_in_valid) begin 46 | block_1_time_step_pre <= block_1_time_step_pre + 1'b1; 47 | end 48 | end 49 | 50 | always @(posedge clk or negedge rst_n) begin 51 | if (~rst_n) begin 52 | block_1_time_step <= 0; 53 | end 54 | else if (block_1_time_step_pre <= 'd29) begin 55 | block_1_time_step <= block_1_time_step_pre; 56 | end 57 | end 58 | 59 | always @(posedge clk or negedge rst_n) begin 60 | if (~rst_n) begin 61 | block_1_time_step_out <= 0; 62 | end 63 | else if (block_1_time_step_out == 'd29) 64 | block_1_time_step_out <= 'd29; 65 | else if (block_1_valid) begin 66 | block_1_time_step_out <= block_1_time_step_out + 'd1; 67 | end 68 | end 69 | 70 | reg [16-1:0] block_1[30-1:0]; 71 | wire [16-1:0] block_1_out; 72 | wire block_1_valid; 73 | wire block_1_done; 74 | 75 | integer i,j; 76 | always@(posedge clk,negedge rst_n) begin 77 | if (~rst_n) begin 78 | for (i = 0; i < 30; i = i + 1) begin 79 | for (j = 0; j < 16; j = j + 1) begin 80 | block_1[i][j] <= 0; 81 | end 82 | end 83 | end 84 | else if (~block_1_done) begin 85 | block_1[block_1_time_step_out] <= block_1_out; 86 | end 87 | end 88 | 89 | // always@(posedge clk,negedge rst_n) begin 90 | // if (~rst_n) begin 91 | // block_1_done <= 0; 92 | // end 93 | // else if (block_1_time_step_out == 'd29) begin 94 | // block_1_done <= 1; 95 | // end 96 | // end 97 | 98 | encoder encoder_block_1( 99 | .clk (clk), 100 | .rst_n (rst_n), 101 | 102 | .data_in (data_in), 103 | .data_in_valid (data_in_valid), 104 | .block_sel (0), 105 | 106 | .data_out (block_1_out), 107 | .data_out_valid (block_1_valid), 108 | .done (block_1_done) 109 | ); 110 | 111 | // // ========================================================================================================= 112 | // // ============================================Block 2====================================================== 113 | // // ========================================================================================================= 114 | // reg [4:0] block_2_time_step_pre; 115 | // reg [4:0] block_2_time_step; 116 | // reg [4:0] block_2_time_step_out; 117 | 118 | // always @(posedge clk or negedge rst_n) begin 119 | // if (~rst_n) begin 120 | // block_2_time_step_pre <= 0; 121 | // end 122 | // else if (block_2_time_step_pre == 'd29) 123 | // block_2_time_step_pre <= 'd29; 124 | // else if (block_1_done) begin 125 | // block_2_time_step_pre <= block_2_time_step_pre + 1'b1; 126 | // end 127 | // end 128 | 129 | // always @(posedge clk or negedge rst_n) begin 130 | // if (~rst_n) begin 131 | // block_2_time_step <= 0; 132 | // end 133 | // else if (block_2_time_step_pre <= 'd29) begin 134 | // block_2_time_step <= block_2_time_step_pre; 135 | // end 136 | // end 137 | 138 | // always @(posedge clk or negedge rst_n) begin 139 | // if (~rst_n) begin 140 | // block_2_time_step_out <= 0; 141 | // end 142 | // else if (block_2_time_step_out == 'd29) 143 | // block_2_time_step_out <= 'd29; 144 | // else if (block_2_valid) begin 145 | // block_2_time_step_out <= block_2_time_step_out + 'd1; 146 | // end 147 | // end 148 | 149 | // reg [16-1:0] block_2[30-1:0]; 150 | // wire [16-1:0] block_2_out; 151 | // wire block_2_valid; 152 | // wire block_2_done; 153 | 154 | // always@(posedge clk,negedge rst_n) begin 155 | // if (~rst_n) begin 156 | // for (i = 0; i < 30; i = i + 1) begin 157 | // for (j = 0; j < 16; j = j + 1) begin 158 | // block_2[i][j] <= 0; 159 | // end 160 | // end 161 | // end 162 | // else if (~block_2_done) begin 163 | // block_2[block_2_time_step_out] <= block_2_out; 164 | // end 165 | // end 166 | 167 | //// always@(posedge clk,negedge rst_n) begin 168 | //// if (~rst_n) begin 169 | //// block_1_done <= 0; 170 | //// end 171 | //// else if (block_1_time_step_out == 'd29) begin 172 | //// block_1_done <= 1; 173 | //// end 174 | //// end 175 | 176 | // encoder encoder_block_2( 177 | // .clk (clk), 178 | // .rst_n (rst_n), 179 | 180 | // .data_in (block_1[block_2_time_step]), 181 | // .data_in_valid (block_1_done), 182 | // .block_sel (1), 183 | 184 | // .data_out (block_2_out), 185 | // .data_out_valid (block_2_valid), 186 | // .done (block_2_done) 187 | // ); 188 | 189 | // // ========================================================================================================= 190 | // // ============================================Block 3====================================================== 191 | // // ========================================================================================================= 192 | // reg [4:0] block_3_time_step_pre; 193 | // reg [4:0] block_3_time_step; 194 | // reg [4:0] block_3_time_step_out; 195 | 196 | // always @(posedge clk or negedge rst_n) begin 197 | // if (~rst_n) begin 198 | // block_3_time_step_pre <= 0; 199 | // end 200 | // else if (block_3_time_step_pre == 'd29) 201 | // block_3_time_step_pre <= 'd29; 202 | // else if (block_2_done) begin 203 | // block_3_time_step_pre <= block_3_time_step_pre + 1'b1; 204 | // end 205 | // end 206 | 207 | // always @(posedge clk or negedge rst_n) begin 208 | // if (~rst_n) begin 209 | // block_3_time_step <= 0; 210 | // end 211 | // else if (block_3_time_step_pre <= 'd29) begin 212 | // block_3_time_step <= block_3_time_step_pre; 213 | // end 214 | // end 215 | 216 | // always @(posedge clk or negedge rst_n) begin 217 | // if (~rst_n) begin 218 | // block_3_time_step_out <= 0; 219 | // end 220 | // else if (block_3_time_step_out == 'd29) 221 | // block_3_time_step_out <= 'd29; 222 | // else if (block_3_valid) begin 223 | // block_3_time_step_out <= block_3_time_step_out + 'd1; 224 | // end 225 | // end 226 | 227 | // reg [16-1:0] block_3[30-1:0]; 228 | // wire [16-1:0] block_3_out; 229 | // wire block_3_valid; 230 | // wire block_3_done; 231 | 232 | // always@(posedge clk,negedge rst_n) begin 233 | // if (~rst_n) begin 234 | // for (i = 0; i < 30; i = i + 1) begin 235 | // for (j = 0; j < 16; j = j + 1) begin 236 | // block_3[i][j] <= 0; 237 | // end 238 | // end 239 | // end 240 | // else if (~block_3_done) begin 241 | // block_3[block_3_time_step_out] <= block_3_out; 242 | // end 243 | // end 244 | 245 | // encoder encoder_block_3( 246 | // .clk (clk), 247 | // .rst_n (rst_n), 248 | 249 | // .data_in (block_2[block_3_time_step]), 250 | // .data_in_valid (block_2_done), 251 | // .block_sel (2), 252 | 253 | // .data_out (block_3_out), 254 | // .data_out_valid (block_3_valid), 255 | // .done (block_3_done) 256 | // ); 257 | 258 | // // ========================================================================================================= 259 | // // ============================================Block 4====================================================== 260 | // // ========================================================================================================= 261 | // reg [4:0] block_4_time_step_pre; 262 | // reg [4:0] block_4_time_step; 263 | // reg [4:0] block_4_time_step_out; 264 | 265 | // always @(posedge clk or negedge rst_n) begin 266 | // if (~rst_n) begin 267 | // block_4_time_step_pre <= 0; 268 | // end 269 | // else if (block_4_time_step_pre == 'd29) 270 | // block_4_time_step_pre <= 'd29; 271 | // else if (block_3_done) begin 272 | // block_4_time_step_pre <= block_4_time_step_pre + 1'b1; 273 | // end 274 | // end 275 | 276 | // always @(posedge clk or negedge rst_n) begin 277 | // if (~rst_n) begin 278 | // block_4_time_step <= 0; 279 | // end 280 | // else if (block_4_time_step_pre <= 'd29) begin 281 | // block_4_time_step <= block_4_time_step_pre; 282 | // end 283 | // end 284 | 285 | // always @(posedge clk or negedge rst_n) begin 286 | // if (~rst_n) begin 287 | // block_4_time_step_out <= 0; 288 | // end 289 | // else if (block_4_time_step_out == 'd29) 290 | // block_4_time_step_out <= 'd29; 291 | // else if (block_4_valid) begin 292 | // block_4_time_step_out <= block_4_time_step_out + 'd1; 293 | // end 294 | // end 295 | 296 | // reg [16-1:0] block_4[30-1:0]; 297 | // wire [16-1:0] block_4_out; 298 | // wire block_4_valid; 299 | // wire block_4_done; 300 | 301 | // always@(posedge clk,negedge rst_n) begin 302 | // if (~rst_n) begin 303 | // for (i = 0; i < 30; i = i + 1) begin 304 | // for (j = 0; j < 16; j = j + 1) begin 305 | // block_4[i][j] <= 0; 306 | // end 307 | // end 308 | // end 309 | // else if (~block_4_done) begin 310 | // block_4[block_4_time_step_out] <= block_4_out; 311 | // end 312 | // end 313 | 314 | // encoder encoder_block_4( 315 | // .clk (clk), 316 | // .rst_n (rst_n), 317 | 318 | // .data_in (block_3[block_4_time_step]), 319 | // .data_in_valid (block_3_done), 320 | // .block_sel (3), 321 | 322 | // .data_out (block_4_out), 323 | // .data_out_valid (block_4_valid), 324 | // .done (block_4_done) 325 | // ); 326 | 327 | // // ========================================================================================================= 328 | // // ============================================Block 5====================================================== 329 | // // ========================================================================================================= 330 | // reg [4:0] block_5_time_step_pre; 331 | // reg [4:0] block_5_time_step; 332 | // reg [4:0] block_5_time_step_out; 333 | 334 | // always @(posedge clk or negedge rst_n) begin 335 | // if (~rst_n) begin 336 | // block_5_time_step_pre <= 0; 337 | // end 338 | // else if (block_5_time_step_pre == 'd29) 339 | // block_5_time_step_pre <= 'd29; 340 | // else if (block_4_done) begin 341 | // block_5_time_step_pre <= block_5_time_step_pre + 1'b1; 342 | // end 343 | // end 344 | 345 | // always @(posedge clk or negedge rst_n) begin 346 | // if (~rst_n) begin 347 | // block_5_time_step <= 0; 348 | // end 349 | // else if (block_5_time_step_pre <= 'd29) begin 350 | // block_5_time_step <= block_5_time_step_pre; 351 | // end 352 | // end 353 | 354 | // always @(posedge clk or negedge rst_n) begin 355 | // if (~rst_n) begin 356 | // block_5_time_step_out <= 0; 357 | // end 358 | // else if (block_5_time_step_out == 'd29) 359 | // block_5_time_step_out <= 'd29; 360 | // else if (block_5_valid) begin 361 | // block_5_time_step_out <= block_5_time_step_out + 'd1; 362 | // end 363 | // end 364 | 365 | // reg [16-1:0] block_5[30-1:0]; 366 | // wire [16-1:0] block_5_out; 367 | // wire block_5_valid; 368 | // wire block_5_done; 369 | 370 | // always@(posedge clk,negedge rst_n) begin 371 | // if (~rst_n) begin 372 | // for (i = 0; i < 30; i = i + 1) begin 373 | // for (j = 0; j < 16; j = j + 1) begin 374 | // block_5[i][j] <= 0; 375 | // end 376 | // end 377 | // end 378 | // else if (~block_5_done) begin 379 | // block_5[block_5_time_step_out] <= block_5_out; 380 | // end 381 | // end 382 | 383 | // encoder encoder_block_5( 384 | // .clk (clk), 385 | // .rst_n (rst_n), 386 | 387 | // .data_in (block_4[block_5_time_step]), 388 | // .data_in_valid (block_4_done), 389 | // .block_sel (4), 390 | 391 | // .data_out (block_5_out), 392 | // .data_out_valid (block_5_valid), 393 | // .done (block_5_done) 394 | // ); 395 | 396 | // // ========================================================================================================= 397 | // // ============================================Block 6====================================================== 398 | // // ========================================================================================================= 399 | // reg [4:0] block_6_time_step_pre; 400 | // reg [4:0] block_6_time_step; 401 | // reg [4:0] block_6_time_step_out; 402 | 403 | // always @(posedge clk or negedge rst_n) begin 404 | // if (~rst_n) begin 405 | // block_6_time_step_pre <= 0; 406 | // end 407 | // else if (block_6_time_step_pre == 'd29) 408 | // block_6_time_step_pre <= 'd29; 409 | // else if (block_5_done) begin 410 | // block_6_time_step_pre <= block_6_time_step_pre + 1'b1; 411 | // end 412 | // end 413 | 414 | // always @(posedge clk or negedge rst_n) begin 415 | // if (~rst_n) begin 416 | // block_6_time_step <= 0; 417 | // end 418 | // else if (block_6_time_step_pre <= 'd29) begin 419 | // block_6_time_step <= block_6_time_step_pre; 420 | // end 421 | // end 422 | 423 | // always @(posedge clk or negedge rst_n) begin 424 | // if (~rst_n) begin 425 | // block_6_time_step_out <= 0; 426 | // end 427 | // else if (block_6_time_step_out == 'd29) 428 | // block_6_time_step_out <= 'd29; 429 | // else if (block_6_valid) begin 430 | // block_6_time_step_out <= block_6_time_step_out + 'd1; 431 | // end 432 | // end 433 | 434 | // reg [16-1:0] block_6[30-1:0]; 435 | // wire [16-1:0] block_6_out; 436 | // wire block_6_valid; 437 | // wire block_6_done; 438 | 439 | // always@(posedge clk,negedge rst_n) begin 440 | // if (~rst_n) begin 441 | // for (i = 0; i < 30; i = i + 1) begin 442 | // for (j = 0; j < 16; j = j + 1) begin 443 | // block_6[i][j] <= 0; 444 | // end 445 | // end 446 | // end 447 | // else if (~block_6_done) begin 448 | // block_6[block_6_time_step_out] <= block_6_out; 449 | // end 450 | // end 451 | 452 | // encoder encoder_block_6( 453 | // .clk (clk), 454 | // .rst_n (rst_n), 455 | 456 | // .data_in (block_5[block_6_time_step]), 457 | // .data_in_valid (block_5_done), 458 | // .block_sel (4), 459 | 460 | // .data_out (block_6_out), 461 | // .data_out_valid (block_6_valid), 462 | // .done (block_6_done) 463 | // ); 464 | 465 | // ========================================================================================================= 466 | // ============================================Flatten====================================================== 467 | // ========================================================================================================= 468 | reg [480-1:0] data_flatten; 469 | reg data_flatten_done; 470 | always@(posedge clk or negedge rst_n) begin 471 | if (~rst_n) begin 472 | data_flatten_done <= 0; 473 | for (i = 0; i < 30; i = i + 1) begin 474 | for (j = 0; j < 16; j = j + 1) begin 475 | data_flatten[i*16+j] <= 0; 476 | end 477 | end 478 | end 479 | else if (block_1_done) begin 480 | data_flatten_done <= 1; 481 | for (i = 0; i < 30; i = i + 1) begin 482 | for (j = 0; j < 16; j = j + 1) begin 483 | data_flatten[i*16+j] <= block_1[i][j]; 484 | end 485 | end 486 | end 487 | end 488 | 489 | 490 | // ========================================================================================================= 491 | // ============================================Output 1===================================================== 492 | // ========================================================================================================= 493 | 494 | reg [256-1:0] output_1; 495 | wire [8-1:0] output_1_out; 496 | wire [6-1:0] output_1_time_step; 497 | wire output_1_valid; 498 | wire output_1_done; 499 | 500 | always@(posedge clk,negedge rst_n) begin 501 | if (~rst_n) begin 502 | output_1 <= 'd0; 503 | end 504 | else if (~output_1_done) begin 505 | output_1[output_1_time_step*8 +: 8] <= output_1_out; 506 | end 507 | end 508 | 509 | output_1 output_1_U( 510 | .clk (clk), 511 | .rst_n (rst_n), 512 | 513 | .data_in (data_flatten), 514 | .data_in_valid (data_flatten_done), 515 | 516 | .data_out (output_1_out), 517 | .data_out_valid (output_1_valid), 518 | .time_step (output_1_time_step), 519 | .done (output_1_done) 520 | ); 521 | 522 | // ========================================================================================================= 523 | // ============================================Output 2===================================================== 524 | // ========================================================================================================= 525 | 526 | output_2 output_2_U( 527 | .clk (clk), 528 | .rst_n (rst_n), 529 | 530 | .data_in (output_1), 531 | .data_in_valid (output_1_done), 532 | 533 | .data_out (data_out), 534 | .data_out_valid (data_out_valid) 535 | ); 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | endmodule 550 | --------------------------------------------------------------------------------