├── baseline └── README.md ├── BugFix.log ├── hw ├── source │ ├── select.v │ ├── fifo.v │ ├── fifo_parser_copy.v │ ├── fifo_parser_lit.v │ ├── queue_token.v │ ├── copyread_selector.v │ ├── control.v │ ├── distributor.v │ ├── copytoken_selector.v │ ├── decompressor_wrapper.v │ ├── lit_selector.v │ ├── axi_io.v │ ├── ram_module.v │ ├── io_control.v │ ├── ram_block.v │ ├── parser_sub.v │ ├── data_out.v │ ├── decompressor.v │ └── parser.v ├── Makefile ├── action_config.sh └── interface │ ├── action_axi_master.vhd │ └── action_wrapper.vhd ├── Makefile ├── sw ├── Makefile ├── snap_example.h └── snap_decompressor.c ├── README.md ├── LICENSE └── ip └── create_action_ip.tcl /baseline/README.md: -------------------------------------------------------------------------------- 1 | Multi-thread Software Snappy Benchmark (used as a baseline for this project) 2 | -------------------------------------------------------------------------------- /BugFix.log: -------------------------------------------------------------------------------- 1 | ## This file records the fixed bugs. 2 | 3 | #2019-01-31 bresp bug: the AFU needs to wait until the last write data be successful (or the 'bresp' signal back) before the 'done' signal is set and sent. ## Jian Fang 4 | 5 | #2019-01-31 app_ready bug: The app_ready signal needs to set to 1 after reset. Leave it empty or set it to 0 will cause wready signal(for write data) unset, and thus the write data will never send. This bug can not be seen in the PSLSE (version upto 2018-08-31) simulation platform but be presented in the pratical platform. ## Jian Fang 6 | 7 | #2019-03-05 fifo in/out bug: FIFO data_in and FIFO wr-en signal should be input 8 | -------------------------------------------------------------------------------- /hw/source/select.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: select 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 13th, July 2018 7 | Function: generate a NUM_SEL-to-1 MUX with output bandwidth with NUM_WIDTH 8 | ****************************/ 9 | `timescale 1ns/1ps 10 | 11 | module select 12 | #( 13 | parameter NUM_SEL=16, 14 | NUM_LOG=4, 15 | NUM_WIDTH=64 16 | ) 17 | ( 18 | input[NUM_WIDTH*NUM_SEL-1:0] data_in, 19 | input[NUM_LOG-1:0] sel, 20 | output[NUM_WIDTH-1:0] data_out 21 | ); 22 | 23 | 24 | reg[NUM_WIDTH-1:0] data_array[NUM_SEL-1:0]; 25 | generate 26 | genvar i; 27 | for(i=0;i=3); 62 | assign fifo_full=(counter==8); 63 | assign dout=fifo_out; 64 | assign prog_full=fifo_half; 65 | assign full=fifo_full; 66 | 67 | endmodule 68 | -------------------------------------------------------------------------------- /hw/source/fifo_parser_copy.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | 3 | module fifo_parser_copy 4 | #( 5 | parameter WIDTH=33, 6 | parameter DEPTH=8 7 | ) 8 | ( 9 | input clk, 10 | input srst, 11 | 12 | // .almost_full(), 13 | output full, 14 | input[WIDTH-1:0] din, 15 | input wr_en, 16 | 17 | output empty, 18 | output[WIDTH-1:0] dout, 19 | input rd_en, 20 | 21 | output valid, 22 | output prog_full, 23 | output wr_rst_busy, 24 | output rd_rst_busy 25 | ); 26 | reg [WIDTH-1:0]fifo_out; 27 | reg [WIDTH-1:0]ram[7:0]; 28 | reg [3:0]read_ptr,write_ptr,counter; 29 | wire fifo_half,fifo_full; 30 | 31 | always@(posedge clk) 32 | if(srst) 33 | begin 34 | read_ptr<=0; 35 | write_ptr<=0; 36 | counter<=0; 37 | end 38 | else 39 | case({rd_en,wr_en}) 40 | 2'b00: 41 | counter=counter; 42 | 2'b01: 43 | begin 44 | ram[write_ptr]=din; 45 | counter=counter+1; 46 | write_ptr=(write_ptr==7)?0:write_ptr+1; 47 | end 48 | 2'b10: 49 | begin 50 | fifo_out=ram[read_ptr]; 51 | counter=counter-1; 52 | read_ptr=(read_ptr==7)?0:read_ptr+1; 53 | end 54 | 2'b11: 55 | begin 56 | ram[write_ptr]=din; 57 | fifo_out=ram[read_ptr]; 58 | write_ptr=(write_ptr==7)?0:write_ptr+1; 59 | read_ptr=(read_ptr==7)?0:read_ptr+1; 60 | end 61 | endcase 62 | 63 | assign empty=(counter==0); 64 | assign fifo_half=(counter>=3); 65 | assign fifo_full=(counter==8); 66 | assign dout=fifo_out; 67 | assign prog_full=fifo_half; 68 | assign full=fifo_full; 69 | 70 | endmodule 71 | -------------------------------------------------------------------------------- /hw/source/fifo_parser_lit.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | 3 | module fifo_parser_lit 4 | #( 5 | parameter WIDTH=85, 6 | parameter DEPTH=8 7 | ) 8 | ( 9 | input clk, 10 | input srst, 11 | 12 | // .almost_full(), 13 | output full, 14 | input[WIDTH-1:0] din, 15 | input wr_en, 16 | 17 | output empty, 18 | output[WIDTH-1:0] dout, 19 | input rd_en, 20 | 21 | output valid, 22 | output prog_full, 23 | output wr_rst_busy, 24 | output rd_rst_busy 25 | ); 26 | reg [WIDTH-1:0]fifo_out; 27 | reg [WIDTH-1:0]ram[7:0]; 28 | reg [3:0]read_ptr,write_ptr,counter; 29 | wire fifo_half,fifo_full; 30 | 31 | always@(posedge clk) 32 | if(srst) 33 | begin 34 | read_ptr<=0; 35 | write_ptr<=0; 36 | counter<=0; 37 | end 38 | else 39 | case({rd_en,wr_en}) 40 | 2'b00: 41 | counter=counter; 42 | 2'b01: 43 | begin 44 | ram[write_ptr]=din; 45 | counter=counter+1; 46 | write_ptr=(write_ptr==7)?0:write_ptr+1; 47 | end 48 | 2'b10: 49 | begin 50 | fifo_out=ram[read_ptr]; 51 | counter=counter-1; 52 | read_ptr=(read_ptr==7)?0:read_ptr+1; 53 | end 54 | 2'b11: 55 | begin 56 | ram[write_ptr]=din; 57 | fifo_out=ram[read_ptr]; 58 | write_ptr=(write_ptr==7)?0:write_ptr+1; 59 | read_ptr=(read_ptr==7)?0:read_ptr+1; 60 | end 61 | endcase 62 | 63 | assign empty=(counter==0); 64 | assign fifo_half=(counter>=3); 65 | assign fifo_full=(counter==8); 66 | assign dout=fifo_out; 67 | assign prog_full=fifo_half; 68 | assign full=fifo_full; 69 | 70 | endmodule 71 | -------------------------------------------------------------------------------- /sw/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 International Business Machines 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # 18 | # Generate HDL version of the HLS sources 19 | # 20 | # The generated HDL depends on the chip which is used and 21 | # therefore must match what is being used to build the 22 | # toplevel SNAP bitstream. 23 | # 24 | # FIXME Pass part_number and other parameters from toplevel 25 | # build-system as required. 26 | # 27 | 28 | # This is solution specific. Check if we can replace this by generics too. 29 | 30 | # Finding $SNAP_ROOT 31 | ifndef SNAP_ROOT 32 | # check if we are in sw folder of an action (three directories below snap root) 33 | ifneq ("$(wildcard ../../../ActionTypes.md)","") 34 | SNAP_ROOT=$(abspath ../../../) 35 | else 36 | $(info You are not building your software from the default directory (/path/to/snap/actions//sw) or specified a wrong $$SNAP_ROOT.) 37 | $(error Please make sure that $$SNAP_ROOT is set up correctly.) 38 | endif 39 | endif 40 | 41 | projs += snap_decompressor snap_example_ddr snap_example_nvme snap_example_set 42 | projs += snap_example_qnvme 43 | 44 | # If you have the host code outside of the default snap directory structure, 45 | # change to /path/to/snap/actions/software.mk 46 | include $(SNAP_ROOT)/actions/software.mk 47 | -------------------------------------------------------------------------------- /hw/source/queue_token.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: queue_token 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 10th Sept, 2018 7 | Function: The queue (you can also call it FIFO) to store the slice from the preparser (the first level parser) 8 | ****************************/ 9 | ////format of input : | 18Byte data | 16bit token position | 16bit address | 1bit flag to check whether starts with literal content | 10 | `timescale 1ns/1ps 11 | 12 | module queue_token( 13 | input clk, 14 | input rst_n, 15 | ///////input and output of page 16 | input[143:0] data_in, 17 | input[15:0] position_in, 18 | input[16:0] address_in, 19 | input[2:0] garbage_in, 20 | input lit_flag_in, 21 | input wrreq, 22 | 23 | output[143:0] data_out, 24 | output[15:0] position_out, 25 | output[16:0] address_out, 26 | output[2:0] garbage_out, 27 | output lit_flag_out, 28 | output valid_out, 29 | ////////control signal 30 | 31 | 32 | input rdreq, 33 | output isempty, 34 | 35 | output almost_full 36 | ); 37 | 38 | reg valid_reg; 39 | always@(posedge clk)begin 40 | if(~rst_n)begin 41 | valid_reg <=1'b0; 42 | end else if(isempty==1'b0 & valid_reg==1'b0)begin 43 | valid_reg <=1'b1; 44 | end else if(rdreq)begin 45 | valid_reg <= ~isempty; 46 | end 47 | end 48 | 49 | wire[180:0] q; 50 | page_fifo pf0( 51 | .clk(clk), 52 | .srst(~rst_n), 53 | .din({data_in,position_in,address_in,garbage_in,lit_flag_in}), 54 | .wr_en(wrreq), 55 | .rd_en(isempty?1'b0:rdreq), 56 | .dout(q), 57 | .full(), 58 | .valid(), 59 | .empty(isempty), 60 | .prog_full(almost_full), 61 | .wr_rst_busy(), 62 | .rd_rst_busy() 63 | ); 64 | assign data_out = q[180:37]; 65 | assign position_out = q[36:21]; 66 | assign address_out = q[20:4]; 67 | assign garbage_out = q[3:1]; 68 | assign lit_flag_out = q[0]; 69 | assign valid_out = valid_reg; 70 | 71 | endmodule 72 | -------------------------------------------------------------------------------- /sw/snap_example.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 International Business Machines 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __SNAP_FW_EXA__ 18 | #define __SNAP_FW_EXA__ 19 | 20 | /* 21 | * This makes it obvious that we are influenced by HLS details ... 22 | * The ACTION control bits are defined in the following file. 23 | */ 24 | #include 25 | 26 | /* Header file for SNAP Framework example code */ 27 | #define ACTION_TYPE_EXAMPLE 0x10140000 /* Action Type */ 28 | 29 | #define ACTION_CONFIG 0x30 30 | #define ACTION_CONFIG_COUNT 1 /* Count Mode */ 31 | #define ACTION_CONFIG_COPY_HH 2 /* Memcopy Host to Host */ 32 | #define ACTION_CONFIG_COPY_HD 3 /* Memcopy Host to DDR */ 33 | #define ACTION_CONFIG_COPY_DH 4 /* Memcopy DDR to Host */ 34 | #define ACTION_CONFIG_COPY_DD 5 /* Memcopy DDR to DDR */ 35 | #define ACTION_CONFIG_COPY_HDH 6 /* Memcopy Host to DDR to Host */ 36 | #define ACTION_CONFIG_MEMSET_H 8 /* Memset Host Memory */ 37 | #define ACTION_CONFIG_MEMSET_F 9 /* Memset FPGA Memory */ 38 | #define ACTION_CONFIG_COPY_DN 0x0a /* Copy DDR to NVME drive 0 */ 39 | #define ACTION_CONFIG_COPY_ND 0x0b /* Copy NVME drive 0 to DDR */ 40 | #define NVME_DRIVE1 0x10 /* Select Drive 1 for 0a and 0b */ 41 | 42 | #define ACTION_SRC_LOW 0x34 /* LBA for 0A, 1A, 0B and 1B */ 43 | #define ACTION_SRC_HIGH 0x38 44 | #define ACTION_DEST_LOW 0x3c /* LBA for 0A, 1A, 0B and 1B */ 45 | #define ACTION_DEST_HIGH 0x40 46 | #define ACTION_CNT 0x44 /* Count Register or # of 512 Byte Blocks for NVME */ 47 | #define ACTION_RD_SIZE 0x44 /*size of bytes read from host memory*/ 48 | #define ACTION_WR_SIZE 0x48 /*size of bytes read write to host memory*/ 49 | 50 | #endif /* __SNAP_FW_EXA__ */ 51 | -------------------------------------------------------------------------------- /hw/source/copyread_selector.v: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | File name: copyread_selector 3 | Author: Jianyu Chen 4 | School: Delft Univsersity of Technology 5 | Date: 12th July, 2018 6 | Description: This is a selector to select write command for 16 BRAMs in 16 ram_module. The source of the incoming 7 | commands can also from the copytoken_selector (the recycle unit on the paper). The command from 8 | copytoken_selector will get priority. 9 | Round-Robin is the stategy to do the selection if multiple ram_module have copy commands targeting this selector at the same time 10 | Each of this module is corresponding to one BRAM 11 | ********************************************/ 12 | 13 | `timescale 1ns/1ps 14 | 15 | module copyread_selector( 16 | input clk, 17 | input rst_n, 18 | 19 | input[1295:0] data_in, ///81*16 [80:72] address of ram [71:64] byte valid [63:0] data 20 | input[15:0] data_valid, ///whether input data is valid 21 | 22 | output[15:0] rd_out, 23 | output[63:0] data_out, 24 | output[8:0] address_out, 25 | output[7:0] validbyte_out, 26 | output valid_out 27 | ); 28 | 29 | reg[80:0] data_buff; 30 | reg valid_buff; 31 | wire[80:0] data_w; 32 | always@(posedge clk)begin 33 | data_buff <=data_w; 34 | valid_buff <=(data_valid!=16'b0); //if one of the parser gives request, it is valid 35 | end 36 | 37 | /////////////////////for arbiter 38 | reg[15:0] base; 39 | wire[15:0] grand_w; 40 | always@(posedge clk)begin 41 | if(~rst_n)begin 42 | base <= 16'b0000_0000_0000_0001; 43 | end 44 | else if(rd_out==0)begin 45 | base <= base; 46 | end 47 | else begin 48 | base <= {rd_out[14:0],rd_out[15]}; 49 | end 50 | end 51 | 52 | arbiter 53 | #( 54 | .WIDTH(16) 55 | )arbiter0 56 | ( 57 | .req(data_valid), 58 | .grant(grand_w), 59 | .base(base) 60 | ); 61 | ///////////////////////////////// 62 | 63 | function [15:0] onehot_int; 64 | input [15:0] in; 65 | integer i; 66 | begin 67 | onehot_int = 0; 68 | for (i = 15; i >= 0; i=i-1) begin 69 | if (in[i]) 70 | onehot_int = i; 71 | end 72 | end 73 | endfunction 74 | 75 | select 76 | #( 77 | .NUM_SEL (16), 78 | .NUM_LOG (4), 79 | .NUM_WIDTH (81) 80 | )select0 81 | ( 82 | .data_in(data_in), 83 | .sel(onehot_int(grand_w)), 84 | .data_out(data_w) 85 | ); 86 | assign rd_out =grand_w; 87 | assign data_out =data_buff[63:0]; 88 | assign validbyte_out=data_buff[71:64]; 89 | assign address_out =data_buff[80:72]; 90 | assign valid_out =valid_buff; 91 | 92 | endmodule 93 | -------------------------------------------------------------------------------- /hw/source/control.v: -------------------------------------------------------------------------------- 1 | module control# 2 | ( 3 | parameter NUM_PARSER=6, 4 | PARSER_ALLONE=16'hffff 5 | ) 6 | ( 7 | input clk, 8 | input rst_n, 9 | input start, 10 | 11 | input tf_empty, //token fifo 12 | input[NUM_PARSER-1:0] ps_finish, 13 | input page_input_finish, 14 | input[NUM_PARSER-1:0] ps_empty, //parser 15 | input[15:0] ram_empty,//ram 16 | 17 | // input block_out_finish 18 | input cl_finish, 19 | output page_finish //all decompression has finished, the data is in BRAMs, but the output is not yet finished 20 | 21 | ); 22 | 23 | reg all_empty; 24 | reg[15:0] all_empty_delay; 25 | reg page_input_finish_flag; 26 | always@(posedge clk)begin 27 | if((ps_empty==PARSER_ALLONE[NUM_PARSER-1:0]) & ram_empty==16'hffff & tf_empty)begin 28 | all_empty<=1'b1; 29 | end else begin 30 | all_empty<=1'b0; 31 | end 32 | 33 | if(start)begin 34 | page_input_finish_flag<=1'b0; 35 | end if(page_input_finish) begin 36 | page_input_finish_flag<=1'b1; 37 | end 38 | 39 | all_empty_delay[15:1] <=all_empty_delay[14:0]; 40 | all_empty_delay[0] <=all_empty; 41 | end 42 | 43 | reg[2:0] state; 44 | reg page_finish_r,block_finish_r; 45 | always@(posedge clk)begin 46 | if(~rst_n)begin 47 | state <= 3'd0; 48 | end else 49 | case(state) 50 | 3'd0:begin 51 | page_finish_r<=1'b0; 52 | block_finish_r<=1'b0; 53 | if(~tf_empty)begin 54 | state<=3'd1; 55 | end 56 | end 57 | 3'd1:begin //idle case 58 | page_finish_r<=1'b0; 59 | block_finish_r<=1'b0; 60 | 61 | /*if all the data in this file has been preparsed and token fifo is empty, which means all data is in parsers or later stages, go to next state*/ 62 | if(page_input_finish & tf_empty)begin 63 | state<=3'd3; 64 | end 65 | end 66 | /* 3'd2:begin//wait for the block finish 67 | if(all_empty_delay==6'b1111_11 & all_empty==1'b1)begin 68 | state<=3'd4; 69 | block_finish_r<=1'b1; 70 | end 71 | end 72 | */ 73 | 3'd3:begin //wait for the page clean finished 74 | //when all data in a file (not 64KB block) is processed, set page_finish_r to high to inform other module 75 | if(all_empty_delay==16'hffff & all_empty==1'b1 & tf_empty)begin 76 | page_finish_r<=1'b1; 77 | end 78 | //after all the valid bits in the BRAMs are cleaned, go to next state 79 | if(cl_finish)begin 80 | state <= 3'd4; 81 | end 82 | end 83 | 3'd4:begin //reset the signals and go back to initial state 84 | block_finish_r <=1'b0; 85 | state <=3'd0; 86 | page_finish_r <=1'b0; 87 | end 88 | default:state<=3'd0; 89 | endcase 90 | end 91 | assign page_finish=page_finish_r; 92 | //assign block_finish=block_finish_r; 93 | 94 | endmodule 95 | -------------------------------------------------------------------------------- /hw/source/distributor.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: distributor 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 10th Sept, 2018 7 | Function: Get slice from queue_token module (a fifo to store the slices). And distribute the slice to 8 | one of the idle parsers (2nd level parser) using Round-Robin strategy 9 | ****************************/ 10 | `timescale 1ns/1ps 11 | 12 | module distributor 13 | #( parameter NUM_PARSER=6,BASE_INIT = 1 14 | ) 15 | ( 16 | input clk, 17 | input rst_n, 18 | 19 | ///////input and output of page 20 | input[143:0] data_in, 21 | input[15:0] position_in, 22 | input[16:0] address_in, 23 | input[2:0] garbage_in, 24 | input lit_flag_in, 25 | 26 | input stop, ///stop the distributor 27 | 28 | input valid_in, 29 | input[NUM_PARSER-1:0] ready, ///whether each parser is ready to receive new page 30 | 31 | output[143:0] data_out, 32 | output[15:0] position_out, 33 | output[16:0] address_out, 34 | output[2:0] garbage_out, 35 | output lit_flag_out, 36 | 37 | output rdreq, 38 | output[NUM_PARSER-1:0] valid_out 39 | ); 40 | /********for test and debug only***********/ 41 | reg[15:0] cnt_total,cnt_out; 42 | always@(posedge clk)begin 43 | if(~rst_n)begin 44 | cnt_total<=16'b0; 45 | end else begin 46 | cnt_total<=cnt_total+16'b1; 47 | end 48 | 49 | if(~rst_n)begin 50 | cnt_out<=16'b0; 51 | end else if(rdreq) begin 52 | cnt_out<=cnt_out+16'b1; 53 | end 54 | 55 | end 56 | /***********************/ 57 | 58 | 59 | reg stop_reg; 60 | always@(posedge clk)begin 61 | if(~rst_n)begin 62 | stop_reg <=1'b0; 63 | end else begin 64 | stop_reg <=stop; 65 | end 66 | end 67 | 68 | /////for arbiter 69 | wire[NUM_PARSER-1:0] grand_w; 70 | reg[NUM_PARSER-1:0] base; 71 | arbiter 72 | #( 73 | .WIDTH(NUM_PARSER) 74 | )arbiter0 75 | ( 76 | .req(ready), 77 | .grant(grand_w), 78 | .base(base) 79 | ); 80 | always@(posedge clk)begin 81 | if(~rst_n)begin 82 | base <= BASE_INIT; 83 | end 84 | else if(ready==0 | (valid_in==1'b0))begin //if no parser is ready to receive or no data in fifo 85 | base <= base; 86 | end 87 | else begin 88 | base <= grand_w; 89 | end 90 | end 91 | 92 | /* 93 | ////extend valid_in signal to NUM_PARSER 94 | reg[NUM_PARSER-1:0] valid_total; 95 | integer i; 96 | always@(*)begin 97 | for(i=0;i= 0; i=i-1) begin 87 | if (in[i]) 88 | onehot_int = i; 89 | end 90 | end 91 | endfunction 92 | 93 | select 94 | #( 95 | .NUM_SEL (NUM_PARSER), 96 | .NUM_LOG (NUM_LOG), 97 | .NUM_WIDTH (33) 98 | )select0 99 | ( 100 | .data_in(data_in), 101 | .sel(onehot_int(grand_w)), 102 | .data_out(data_w) 103 | ); 104 | 105 | assign rd_out =stop?0:grand_w; 106 | assign unsolved_rd_out=stop?1'b0:((copy_valid==0)?unsolved_valid_in:1'b0); 107 | assign address_out =data_buff[32:24]; 108 | assign bvalid_out =data_buff[23:16]; 109 | assign offset_out =data_buff[15:0]; 110 | assign valid_out =valid_buff; 111 | 112 | endmodule 113 | -------------------------------------------------------------------------------- /hw/source/decompressor_wrapper.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: decompressor_wrapper 3 | Author: Jian Fang 4 | Date: 14th May, 2019 5 | Function: Wrapper of the decompressor, able to reset register for multiple runs 6 | ****************************/ 7 | `timescale 1ns/1ps 8 | 9 | module decompressor_wrapper( 10 | input clk, 11 | input rst_n, 12 | 13 | output last, // Whether the data is the last one in a burst 14 | output done, // Whether the decompression is done 15 | input start, // Start the decompressor after the compression_length and decompression_length is set 16 | // The user should set it to 1 for starting the decompressor, and need to set it back to 0 after 1 cycle 17 | // TODO: add logic to only check the rising edge of the clk instead of checking '1'; 18 | // transform it to only have 1 cycle in "HIGH" 19 | 20 | input[511:0] in_data, //The compressed data 21 | input in_data_valid, //Whether or not the data on the in_data port is valid. 22 | output in_data_ready, //Whether or not the decompressor is ready to receive data on its in_data port 23 | 24 | input[34:0] compression_length, //length of the data before decompression (compressed data) 25 | input[31:0] decompression_length, //length of the data after decompression (uncompressed data) 26 | input in_metadata_valid, //Whether or not the data on the compression_length and decompression_length ports is valid. 27 | output in_metadata_ready, //Whether or not the decompressor is ready to receive data on its compression_length and decompression_length ports. 28 | 29 | output[511:0] out_data, //The decompressed data 30 | output out_data_valid, //Whether or not the data on the out_data port is valid 31 | output[63:0] out_data_byte_valid, 32 | input out_data_ready //Whether or not the component following the decompressor is ready to receive data. 33 | ); 34 | 35 | wire dec_done; 36 | wire almost_full; 37 | 38 | reg start_reg; 39 | always@(posedge clk)begin 40 | if(~rst_n)begin 41 | start_reg <= 1'b0; 42 | end else begin 43 | start_reg <= start; 44 | end 45 | end 46 | 47 | reg [34:0]com_len_reg; 48 | reg [31:0]dec_len_reg; 49 | reg in_metadata_ready_reg; 50 | always@(posedge clk)begin 51 | if(~rst_n)begin 52 | com_len_reg <= 35'b0; 53 | dec_len_reg <= 32'b0; 54 | end else begin 55 | if(in_metadata_valid && in_metadata_ready_reg) begin 56 | com_len_reg <= compression_length; 57 | dec_len_reg <= decompression_length; 58 | end 59 | end 60 | end 61 | 62 | always@(posedge clk)begin 63 | if(~rst_n)begin 64 | in_metadata_ready_reg <= 1'b1; 65 | end else begin 66 | if(in_metadata_ready_reg) begin // ready to get metadata 67 | if(in_metadata_valid) begin 68 | in_metadata_ready_reg <= 1'b0; // lock the ready signal after the metadata is set 69 | end 70 | end 71 | else begin // not ready to get the metadata, means it is busy. Unlock after the decompression is "done" 72 | if(dec_done) 73 | in_metadata_ready_reg <= 1'b1; 74 | end 75 | end 76 | end 77 | 78 | 79 | decompressor dec0( 80 | .clk(clk), 81 | .rst_n(rst_n), 82 | .data(in_data), 83 | .valid_in(in_data_valid), 84 | .start(start_reg), 85 | .compression_length(com_len_reg), 86 | .decompression_length(dec_len_reg), 87 | .wr_ready(out_data_ready), 88 | 89 | .data_fifo_almostfull(almost_full), 90 | 91 | .done(dec_done), 92 | .last(last),///whether it is the last 64B of a burst 93 | .data_out(out_data), 94 | .byte_valid_out(out_data_byte_valid), 95 | .valid_out(out_data_valid) 96 | ); 97 | 98 | assign in_metadata_ready = in_metadata_ready_reg; 99 | assign done = dec_done; 100 | assign in_data_ready = ~almost_full; 101 | 102 | endmodule 103 | -------------------------------------------------------------------------------- /hw/source/lit_selector.v: -------------------------------------------------------------------------------- 1 | ///data from read result will always be processed first 2 | `timescale 1ns/1ps 3 | 4 | module lit_selector 5 | #( 6 | parameter NUM_PARSER=6, 7 | NUM_LOG=3, 8 | BASE_INIT=6'b1 9 | ) 10 | ( 11 | input clk, 12 | input rst_n, 13 | input[64*NUM_PARSER-1:0] data_lit, 14 | input[9*NUM_PARSER-1:0] lit_address, 15 | input[8*NUM_PARSER-1:0] byte_valid_in, 16 | input[NUM_PARSER-1:0] lit_valid, //whether the data of parser is for this selector 17 | 18 | input[63:0] data_copy, //data from read result 19 | input[8:0] address_copy, //address from read result 20 | input[7:0] byte_valid_copy, 21 | input copy_valid, 22 | 23 | output[NUM_PARSER-1:0] rd_out, ///select a parser to read 24 | output[63:0] data_out, 25 | output[8:0] address_out, 26 | output[7:0] byte_valid_out, 27 | output valid_out 28 | 29 | ); 30 | reg[63:0] data_buff; 31 | reg[8:0] address_buff; 32 | reg[7:0] byte_valid_buff; 33 | reg valid_buff; 34 | reg[NUM_PARSER-1:0] base; 35 | wire[63:0] data_w; 36 | wire[8:0] address_w; 37 | wire[7:0] byte_valid_w; 38 | wire[NUM_PARSER-1:0] grant_w; 39 | 40 | always@(posedge clk)begin 41 | address_buff<=copy_valid?address_copy:address_w; 42 | data_buff <=copy_valid?data_copy:data_w; 43 | byte_valid_buff<=copy_valid?byte_valid_copy:byte_valid_w; 44 | valid_buff <=(lit_valid!=0)|copy_valid; ///if there is one req, valid 45 | end 46 | 47 | always@(posedge clk)begin 48 | if((~rst_n) | (grant_w==0))begin 49 | base <= BASE_INIT; 50 | end 51 | else if(copy_valid)begin 52 | base <= grant_w; 53 | end 54 | else begin 55 | if(NUM_PARSER == 1)begin //if there is only once parser, no need to shift 56 | base <= grant_w; 57 | end else begin 58 | base <= {grant_w[NUM_PARSER-2:0],grant_w[NUM_PARSER-1]}; ///left shift 59 | end 60 | end 61 | end 62 | 63 | arbiter 64 | #( 65 | .WIDTH(NUM_PARSER) 66 | )arbiter0 67 | ( 68 | .req(lit_valid), 69 | .grant(grant_w), 70 | .base(base) 71 | ); 72 | 73 | function [NUM_LOG-1:0] onehot_int; 74 | input [NUM_PARSER-1:0] in; 75 | integer i; 76 | begin 77 | onehot_int = 0; 78 | for (i = NUM_PARSER-1; i >= 0; i=i-1) begin 79 | if (in[i]) 80 | onehot_int = i; 81 | end 82 | end 83 | endfunction 84 | 85 | select 86 | #( 87 | .NUM_SEL (NUM_PARSER), 88 | .NUM_LOG (NUM_LOG), 89 | .NUM_WIDTH (64) 90 | )select_data 91 | ( 92 | .data_in(data_lit), 93 | .sel(onehot_int(grant_w)), 94 | .data_out(data_w) 95 | ); 96 | 97 | select 98 | #( 99 | .NUM_SEL (NUM_PARSER), 100 | .NUM_LOG (NUM_LOG), 101 | .NUM_WIDTH (9) 102 | )select_address 103 | ( 104 | .data_in(lit_address), 105 | .sel(onehot_int(grant_w)), 106 | .data_out(address_w) 107 | ); 108 | 109 | select 110 | #( 111 | .NUM_SEL (NUM_PARSER), 112 | .NUM_LOG (NUM_LOG), 113 | .NUM_WIDTH (8) 114 | )select_bytevalid 115 | ( 116 | .data_in(byte_valid_in), 117 | .sel(onehot_int(grant_w)), 118 | .data_out(byte_valid_w) 119 | ); 120 | 121 | assign data_out =data_buff; 122 | assign address_out =address_buff; 123 | assign byte_valid_out=byte_valid_buff; 124 | assign valid_out =valid_buff; 125 | assign rd_out =copy_valid?0:grant_w; 126 | 127 | endmodule 128 | 129 | 130 | module arbiter 131 | #( 132 | parameter WIDTH = 6 133 | ) 134 | ( 135 | req, grant, base 136 | ); 137 | 138 | input [WIDTH-1:0] req; 139 | output [WIDTH-1:0] grant; 140 | input [WIDTH-1:0] base; 141 | 142 | wire [2*WIDTH-1:0] double_req = {req,req}; 143 | wire [2*WIDTH-1:0] double_grant = double_req & ~(double_req-base); 144 | assign grant = double_grant[WIDTH-1:0] | double_grant[2*WIDTH-1:WIDTH]; 145 | 146 | endmodule 147 | -------------------------------------------------------------------------------- /hw/source/axi_io.v: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | File name: axi_io 3 | Author: Jianyu Chen 4 | School: Delft Univsersity of Technology 5 | Date: 10th Sept, 2018 6 | Description: Top level of the axi protocal interface, containing the decompressor and an io_control module to control 7 | the input and output data flow from axi interface. 8 | NOTICE: if you want to migrate the decompressor to other platform or other interface. Only the decompressor 9 | module is needed. 10 | ********************************************/ 11 | `timescale 1ns/1ps 12 | 13 | module axi_io 14 | #( 15 | parameter C_M_AXI_ADDR_WIDTH=64, 16 | C_M_AXI_DATA_WIDTH=512 17 | )( 18 | input clk, 19 | input rst_n, 20 | //////ports from axi_slave module 21 | input start, 22 | output done, 23 | output idle, 24 | output ready, 25 | 26 | input[C_M_AXI_ADDR_WIDTH-1:0] src_addr, //address to read from host memory 27 | input[C_M_AXI_ADDR_WIDTH-1:0] des_addr, ///address to write result to host memory 28 | input[31:0] compression_length, 29 | input[31:0] decompression_length, 30 | /////////ports to read data from host memory 31 | output dma_rd_req, 32 | output[C_M_AXI_ADDR_WIDTH-1:0] dma_rd_addr, 33 | output[7:0] dma_rd_len, 34 | input dma_rd_req_ack, 35 | input[C_M_AXI_DATA_WIDTH-1:0] dma_rd_data, 36 | input dma_rd_data_valid, 37 | output dma_rd_data_taken, 38 | ///////// ports to write data to host memory 39 | output dma_wr_req, 40 | output[C_M_AXI_ADDR_WIDTH-1:0] dma_wr_addr, 41 | output[7:0] dma_wr_len, 42 | input dma_wr_req_ack, 43 | output[C_M_AXI_DATA_WIDTH-1:0] dma_wr_data, 44 | output dma_wr_wvalid, 45 | output[63:0] dma_wr_data_strobe, 46 | output dma_wr_data_last, 47 | input dma_wr_ready, 48 | output dma_wr_bready, 49 | input dma_wr_done 50 | 51 | 52 | ); 53 | wire dec_almostfull; 54 | 55 | /******************** 56 | reorder the input and output data 57 | data for dma is in this order: byte n,byte n-1,...,byte 1,byte 0, 58 | data for decompressor is in a reverse order: byte 0,byte 1,...byte n-1,byte n 59 | ********************/ 60 | wire[C_M_AXI_DATA_WIDTH-1:0] dec_data_in,dec_data_out; 61 | wire[C_M_AXI_ADDR_WIDTH-1:0] dec_byte_valid; 62 | genvar i; 63 | generate 64 | for(i=0;i<(C_M_AXI_DATA_WIDTH/8);i=i+1)begin 65 | assign dec_data_in[i*8+7:i*8+0] = dma_rd_data[C_M_AXI_DATA_WIDTH-i*8-1:C_M_AXI_DATA_WIDTH-i*8-8]; 66 | assign dma_wr_data[C_M_AXI_DATA_WIDTH-i*8-1:C_M_AXI_DATA_WIDTH-i*8-8] = dec_data_out[i*8+7:i*8+0]; 67 | assign dma_wr_data_strobe[C_M_AXI_ADDR_WIDTH-1-i]=dec_byte_valid[i]; 68 | end 69 | endgenerate 70 | /*******************/ 71 | wire done_decompressor; 72 | wire done_control; 73 | 74 | decompressor d0( 75 | .clk(clk), 76 | .rst_n(rst_n), 77 | .data(dec_data_in), 78 | .valid_in(dma_rd_data_valid), 79 | .start(start), 80 | .compression_length({3'b0,compression_length}), 81 | .decompression_length(decompression_length), 82 | .wr_ready(dma_wr_ready), 83 | 84 | .data_fifo_almostfull(dec_almostfull), 85 | 86 | .done(done_decompressor), 87 | .last(dma_wr_data_last), 88 | .data_out(dec_data_out), 89 | .byte_valid_out(dec_byte_valid), 90 | .valid_out(dma_wr_wvalid) 91 | ); 92 | io_control io_control0( 93 | .clk(clk), 94 | .rst_n(rst_n), 95 | 96 | .src_addr(src_addr), 97 | .rd_req(dma_rd_req), 98 | .rd_req_ack(dma_rd_req_ack), 99 | .rd_len(dma_rd_len), 100 | .done_i(done_decompressor), 101 | .start(start), 102 | .idle(idle), 103 | .ready(ready), 104 | .rd_address(dma_rd_addr), 105 | .done_out(done_control), 106 | 107 | .wr_valid(dma_wr_wvalid), 108 | .wr_ready(dma_wr_ready), 109 | .des_addr(des_addr), 110 | .wr_req(dma_wr_req), 111 | .wr_req_ack(dma_wr_req_ack), 112 | .wr_len(dma_wr_len), 113 | .wr_address(dma_wr_addr), 114 | .bready(dma_wr_bready), 115 | .bresp(dma_wr_done), 116 | 117 | .decompression_length(decompression_length), 118 | .compression_length({3'b0,compression_length}) 119 | 120 | ); 121 | assign dma_rd_data_taken = ~dec_almostfull; 122 | assign done = done_decompressor && done_control; 123 | 124 | endmodule 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FPGA-Snappy-Decompressor (Work-in) 2 | An FPGA-based hardware Snappy decompressor. This is a new kind of decompressor architecture that can process multiple literal token and copy token in parallel. 3 | 4 | Current State 5 | --- 6 | Simulation: xsim+snap(POWER9+CAPI2.0+ADM9V3) under 250MHz\ 7 | Measurement: POWER9+CAPI2.0+ADM9V3 under 250MHz\ 8 | Future: 1) Move to OpenCAPI Platform; 2) Multiple engines 9 | 10 | For Users 11 | ---- 12 | For users who want to integrate the decompressor in your own system, please use the wrapper file (decompressor_wrapper) as the top-level file of the decompressor. \ 13 | The decompressor can perform as a box that processes on the input stream and output the decompressed stream. The only requirement is to set the comprssion_length and the decompression_length before the data streamed in. \ 14 | All metadata and data communication is under ready/valid handshake protocol. 15 | 16 | The decompressor uses the following interface: 17 | 18 | input clk, // the clock signal 19 | input rst_n, // the reset signal 20 | output last, // Whether the data is the last one in a burst 21 | output done, // Whether the decompression is done 22 | input start, // Start the decompressor after the compression_length and decompression_length is set 23 | // The user should set it to 1 for starting the decompressor, and need to set it back to 0 after 1 cycle 24 | 25 | input[511:0] in_data, //The compressed data 26 | input in_data_valid, //Whether or not the data on the in_data port is valid. 27 | output in_data_ready, //Whether or not the decompressor is ready to receive data on its in_data port 28 | 29 | input[34:0] compression_length, //length of the data before decompression (compressed data) 30 | input[31:0] decompression_length, //length of the data after decompression (uncompressed data) 31 | input in_metadata_valid, //Whether or not the data on the compression_length and decompression_length ports is valid. 32 | output in_metadata_ready, //Whether or not the decompressor is ready to receive data on its compression_length and decompression_length ports. 33 | 34 | output[511:0] out_data, //The decompressed data 35 | output out_data_valid, //Whether or not the data on the out_data port is valid 36 | output[63:0] out_data_byte_valid, //Which bytes of the output is valid 37 | input out_data_ready //Whether or not the component following the decompressor is ready to receive data. 38 | 39 | A communication protocol should follow a few step. \ 40 | (1) Set metadata (compression_length and decompression_length)\ 41 | (2) Set "start" \ 42 | (3) stream data in for decompression\ 43 | (4) After "done" signal return, a new decompression can be processed and start again from Step (1). 44 | 45 | 46 | Working platform 47 | ---- 48 | Currently, the decompressor is used on IBM CAPI 2.0 with SNAP interface. See: https://github.com/open-power/snap \ 49 | The demo will work based on this platform: fetch data from memory, do decompression and send decompression result back 50 | 51 | Generating IPs 52 | ---- 53 | Currently, this project utilizes some IP cores which is generated by the tcl file (create_action_ip.tcl) 54 | 55 | Directory and file 56 | --- 57 | ip: IP files for the decompressor (tcl files)\ 58 | source: Verilog files for the decompressor\ 59 | interface: VHDL file to connect the decompressor to IBM CAPI platform and run a demo\ 60 | sw: software to test the decompressor on IBM CAPI platform\ 61 | Doc: documents for the decompressor\ 62 | (if you want to use the decompressor on other platform, only files in user_ip and source are needed) 63 | 64 | Recommended compression software 65 | ---- 66 | If you use the decompression software from Google, the perfromance of this decompression maybe bad for some special data with extremly high data dependency. In this case, it is recommended to use a modified compression software: https://github.com/ChenJianyunp/snappy-c \ 67 | In this version, the compression algerithm is slightly changed, but the compression result is still in standard Snappy format. And it will cause almost no change on the compression ratio, while greatly reduce the data dependency and make the parallel decompression more efficient. 68 | 69 | Parameters of implementation on Vivado: 70 | ---- 71 | Currently, this decompressor pass the test building on ADM-9V3 FPGA card (FPGA: XCVU3P-2 - FFVC1517) in a clock speed of 250MHz. Please choose the the following place and route strategy:\ 72 | place strategy: Congestion_SpreadLogic_medium\ 73 | route: strategy: AlternateCLBRouting\ 74 | On default strategy, the timing constrain may fail due to congestion 75 | 76 | Publication 77 | ------ 78 | 1. A work-in-paper is accepted in CODES+ISSS 2018, see: https://ieeexplore.ieee.org/document/8525953 \ 79 | 2. A regular paper "Refine and Recycle: A Method to Increase Decompression Parallelism" is accepted in ASAP 2019, see: https://ieeexplore.ieee.org/document/8825015/ \ 80 | 3. A journal paper extended this work to a multi-engine instance: "An efficient high-throughput LZ77-based decompressor in reconfigurable logic", see: https://link.springer.com/article/10.1007/s11265-020-01547-w 81 | 82 | Contact 83 | ---- 84 | If you have some questions or recommendations for this project, please contact Jianyu Chen(1327079210@qq.com) or Jian Fang(fangjian_alpc@163.com) 85 | 86 | Update log 87 | ---- 88 | | Jianyu Chen | 18-11-2018: Fix a bug on the length of garbage_cnt\ 89 | | Jianyu Chen | 25-11-2018: Fix a bug of overflow on page_fifo\ 90 | | Jianyu Chen | 26-11-2018: Fix a bug loss the last slice\ 91 | | Jian Fang | 22-01-2019: Fix a bug in handshake of AXI protocol (1.read/write length; 2.FSM for input that less than 4KB)\ 92 | | Jian Fang | 01-02-2019: Fix a bug on app_ready signal\ 93 | | Jian Fang | 01-02-2019: Fix a bug on write responses(bresp signal, need to wait until the last bresp back for the write data)\ 94 | | Jianyu Chen | 04-02-2019: Fix the bug in the calculation of token length of 3Byte literal token\ 95 | | Jianyu Chen | 05-02-2019: Fix a bug when calculate the length of a literal token within a slice\ 96 | | Jianyu Chen | 05-02-2019: Fix the bug in checking the empty of decompressor when the file is very small\ 97 | | Jian Fang | 05-03-2019: Fix the bug of input/output of 'data_in' and 'wr_en' signals in the parser fifo (both lit and copy)\ 98 | | Jianyu Chen | 28-03-2019: Fix the bug on the wrong literature size on parser\ 99 | | Jianyu Chen | 14-04-2019: Fix the bug in passing the wrong NUM_PARSER to control module\ 100 | | Jianyu Chen | 23-04-2019: Fix a bug: the length of lit_length is too short\ 101 | | Jian Fang | 14-05-2019: Add decompressor wrapper for reusing this block in other designs 102 | -------------------------------------------------------------------------------- /hw/source/ram_module.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: ram_module 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 10th Sept, 2018 7 | Function: The module to process the BRAM-based command. In each clock cycle, it can process a write operation 8 | and a copy command, and will generate 1 or 2 or 0 write command and maybe also a unsolved copy command. 9 | NOTICE: this module contains not only BRAM! but also circuit to process the command and FIFOs to store 10 | the commands. 11 | ****************************/ 12 | `timescale 1ns/1ps 13 | 14 | module ram_module#( 15 | parameter BLOCKNUM=4'b0, ///define the number of this block, from 0 to 15 16 | HALFULL_THRESH=9'd24 17 | )( 18 | input clk, 19 | input rst_n, 20 | input rd_en, ///read the ram 21 | output empty, ///whether all fifo are full 22 | input block_out_finish, //when the block output is finished 23 | input page_finish, //in the end of a file, clean all data 24 | 25 | ///signal for writing data 26 | input valid_wr_in, 27 | input[63:0] lit_in, 28 | input[8:0] lit_address, 29 | input[7:0] lit_valid, 30 | 31 | //signal for reading data 32 | input valid_rd_in, 33 | input[8:0] copy_address, 34 | input[7:0] copy_valid_in, //choose bytes to read 35 | input[15:0] copy_offset_in, 36 | 37 | output lit_almost_full, ///even or odd fifo almost full 38 | 39 | ///signal for even fifo 40 | input even_rd, 41 | output[80:0] even_data_out, ////[80:72] address of ram [71:64] byte valid [63:0] data 42 | output[7:0] even_ram_select, 43 | 44 | ///signal for odd fifo 45 | input odd_rd, 46 | output[80:0] odd_data_out, /////[80:72] address of ram [71:64] byte valid [63:0] data 47 | output[7:0] odd_ram_select, 48 | 49 | ////////signal for unsolved fifo 50 | input unsolved_rd, 51 | output unsolved_almost_full, 52 | output unsolved_half_full, 53 | output[32:0] unsolved_data_out, 54 | output unsolved_valid_out, 55 | /////output the decompression result 56 | output[63:0] data_out 57 | ); 58 | 59 | wire ram_unsolved_valid; 60 | wire[32:0] ram_unsolved_token; 61 | 62 | wire[7:0] odd_ram_select_w,even_ram_select_w; 63 | 64 | wire even_valid_out,odd_valid_out; 65 | wire ram_even_valid,ram_odd_valid; 66 | wire[63:0] ram_even_data,ram_odd_data; 67 | wire[7:0] ram_even_hit,ram_odd_hit; 68 | wire[8:0] ram_even_address,ram_odd_address; 69 | 70 | wire[15:0] ram_select_w; 71 | wire even_empty,odd_empty,unsolved_empty; 72 | 73 | wire[63:0] ram_data_out; 74 | ram_block#( 75 | .BLOCKNUM(BLOCKNUM) 76 | )ram_block0 77 | ( 78 | .clk(clk), 79 | .rst_n(rst_n), 80 | .rd_en(rd_en), ///read the ram 81 | .block_out_finish(block_out_finish), //when the page is finished 82 | .page_finish(page_finish), //in the end of a file, clean all data 83 | 84 | ///signal for writing data 85 | .valid_wr_in(valid_wr_in), 86 | .lit_in(lit_in), 87 | .lit_address(lit_address), 88 | .lit_valid(lit_valid), 89 | 90 | //signal for reading data 91 | .valid_rd_in(valid_rd_in), 92 | .copy_address(copy_address), 93 | .copy_valid_in(copy_valid_in), //choose bytes to read 94 | .copy_offset_in(copy_offset_in), 95 | 96 | 97 | ///// 98 | .unsolved_valid_out(ram_unsolved_valid), 99 | .unsolved_token_out(ram_unsolved_token), 100 | 101 | .even_valid_out(ram_even_valid), 102 | .even_data_out(ram_even_data), 103 | .even_hit_out(ram_even_hit), 104 | .even_address_out(ram_even_address), 105 | 106 | .odd_valid_out(ram_odd_valid), 107 | .odd_data_out(ram_odd_data), 108 | .odd_hit_out(ram_odd_hit), 109 | .odd_address_out(ram_odd_address), 110 | 111 | .ram_select_out(ram_select_w), 112 | 113 | ////////output the compression result 114 | .data_out(ram_data_out) 115 | ); 116 | /**********wire for debug*/ 117 | wire debug_even_full,debug_odd_full,debug_unsolved_full; 118 | always@(posedge clk)begin 119 | if(debug_even_full)begin 120 | $display("even fifo full %d",BLOCKNUM); 121 | end 122 | if(debug_odd_full)begin 123 | $display("odd fifo full %d",BLOCKNUM); 124 | end 125 | if(debug_unsolved_full)begin 126 | $display("unsolved fifo full %d",BLOCKNUM); 127 | end 128 | 129 | end 130 | 131 | /***********************/ 132 | 133 | 134 | 135 | wire even_almost_full,odd_almost_full; 136 | 137 | wire[7:0] ram_select_even,ram_select_odd; 138 | assign ram_select_even={ram_select_w[14],ram_select_w[12],ram_select_w[10],ram_select_w[8],ram_select_w[6],ram_select_w[4],ram_select_w[2],ram_select_w[0]}; 139 | assign ram_select_odd ={ram_select_w[15],ram_select_w[13],ram_select_w[11],ram_select_w[9],ram_select_w[7],ram_select_w[5],ram_select_w[3],ram_select_w[1]}; 140 | ////width: 89bit [88:25]:data [24:17]:byte valid [16:8]:address [7:0]:ram_select 141 | reg even_valid_reg; 142 | always@(posedge clk)begin 143 | if(~rst_n)begin 144 | even_valid_reg <=1'b0; 145 | end else if(even_empty==1'b0 & even_valid_reg==1'b0)begin 146 | even_valid_reg <=1'b1; 147 | end else if(even_rd)begin 148 | even_valid_reg <= ~even_empty; 149 | end 150 | end 151 | reg even_rd_w; 152 | always@(*)begin 153 | if(even_empty)begin 154 | even_rd_w <=1'b0; 155 | end 156 | else if((even_valid_reg==1'b0)|even_rd)begin 157 | even_rd_w <=1'b1; 158 | end 159 | else begin 160 | even_rd_w <=1'b0; 161 | end 162 | end 163 | read_result_fifo data_even( 164 | .clk(clk), 165 | .srst(~rst_n), 166 | 167 | // .almost_full(), 168 | .full(debug_even_full), 169 | .din({ram_even_address,ram_even_hit,ram_even_data,ram_select_even}), 170 | .wr_en(ram_even_valid), 171 | 172 | .empty(even_empty), 173 | .dout({even_data_out,even_ram_select_w}), 174 | .rd_en(even_rd_w), 175 | 176 | .valid(), 177 | .prog_full(even_almost_full), 178 | .wr_rst_busy(), 179 | .rd_rst_busy() 180 | ); 181 | assign even_valid_out=even_valid_reg; 182 | 183 | reg odd_valid_reg; 184 | always@(posedge clk)begin 185 | if(~rst_n)begin 186 | odd_valid_reg <=1'b0; 187 | end else if(odd_empty==1'b0 & odd_valid_reg==1'b0)begin 188 | odd_valid_reg <=1'b1; 189 | end else if(odd_rd)begin 190 | odd_valid_reg <= ~odd_empty; 191 | end 192 | end 193 | 194 | reg odd_rd_w; 195 | always@(*)begin 196 | if(odd_empty)begin 197 | odd_rd_w <=1'b0; 198 | end 199 | else if((odd_valid_reg==1'b0)|odd_rd)begin 200 | odd_rd_w <=1'b1; 201 | end 202 | else begin 203 | odd_rd_w <=1'b0; 204 | end 205 | end 206 | read_result_fifo data_odd( 207 | .clk(clk), 208 | .srst(~rst_n), 209 | 210 | // .almost_full(), 211 | .full(debug_odd_full), 212 | .din({ram_odd_address,ram_odd_hit,ram_odd_data,ram_select_odd}), 213 | .wr_en(ram_odd_valid), 214 | 215 | .empty(odd_empty), 216 | .dout({odd_data_out,odd_ram_select_w}), 217 | .rd_en(odd_rd_w), 218 | 219 | .valid(), 220 | .prog_full(odd_almost_full), 221 | .wr_rst_busy(), 222 | .rd_rst_busy() 223 | ); 224 | assign odd_valid_out=odd_valid_reg; 225 | 226 | wire[8:0] unsolved_data_cnt; 227 | reg unsolved_valid_reg; 228 | always@(posedge clk)begin 229 | if(~rst_n)begin 230 | unsolved_valid_reg <=1'b0; 231 | end else if(unsolved_empty==1'b0 & unsolved_valid_reg==1'b0)begin 232 | unsolved_valid_reg <=1'b1; 233 | end else if(unsolved_rd)begin 234 | unsolved_valid_reg <= ~unsolved_empty; 235 | end 236 | end 237 | reg unsolved_rd_w; 238 | always@(*)begin 239 | if(unsolved_empty)begin 240 | unsolved_rd_w <=1'b0; 241 | end 242 | else if((unsolved_valid_reg==1'b0)|unsolved_rd)begin 243 | unsolved_rd_w <=1'b1; 244 | end 245 | else begin 246 | unsolved_rd_w <=1'b0; 247 | end 248 | end 249 | unsolved_fifo unsolved_fifo0( 250 | .clk(clk), 251 | .srst(~rst_n), 252 | 253 | // .almost_full(), 254 | .full(debug_unsolved_full), 255 | .din(ram_unsolved_token), 256 | .wr_en(ram_unsolved_valid), 257 | 258 | .empty(unsolved_empty), 259 | .dout(unsolved_data_out), 260 | .rd_en(unsolved_rd_w), 261 | 262 | .data_count(unsolved_data_cnt), 263 | .prog_full(unsolved_almost_full), 264 | .valid(), 265 | .wr_rst_busy(), 266 | .rd_rst_busy() 267 | ); 268 | assign unsolved_valid_out=unsolved_valid_reg; 269 | reg unsolved_half_full_r; 270 | always@(posedge clk)begin 271 | unsolved_half_full_r <= (unsolved_data_cnt>HALFULL_THRESH); 272 | end 273 | 274 | assign unsolved_half_full=(unsolved_data_cnt>HALFULL_THRESH); 275 | assign lit_almost_full=even_almost_full | odd_almost_full; 276 | assign empty =even_empty & odd_empty & unsolved_empty; 277 | assign odd_ram_select= odd_ram_select_w & {8{odd_valid_out}}; 278 | assign even_ram_select= even_ram_select_w & {8{even_valid_out}}; 279 | assign data_out=ram_data_out; 280 | endmodule 281 | -------------------------------------------------------------------------------- /hw/source/io_control.v: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | File name: io_control 3 | Author: Jianyu Chen 4 | School: Delft Univsersity of Technology 5 | Date: 10th Sept, 2018 6 | Description: The module to contol the input and output dataflow. 7 | Each burst read will acquire 4K data, except the last burst read of a file (it can be less) 8 | Each burst write will write 4K data, also except the last one 9 | This module is to control the dataflow of axi protocal interface. 10 | ********************************************/ 11 | `timescale 1ns/1ps 12 | 13 | module io_control( 14 | input clk, 15 | input rst_n, 16 | 17 | input[63:0] src_addr, 18 | output rd_req, 19 | input rd_req_ack, 20 | output[7:0] rd_len, 21 | output[63:0] rd_address, 22 | 23 | input wr_valid, 24 | input wr_ready, 25 | input[63:0] des_addr, 26 | output wr_req, 27 | input wr_req_ack, 28 | output[7:0] wr_len, 29 | output[63:0] wr_address, 30 | output bready, 31 | input bresp, 32 | 33 | input done_i, 34 | input start, 35 | output idle, 36 | output ready, 37 | output done_out, 38 | 39 | input[31:0] decompression_length, 40 | input[34:0] compression_length 41 | ); 42 | 43 | /****************solved the read data*************/ 44 | reg[34:0] compression_length_r; ///[34:12]:number of 4k blocks [11:6]:number of 64B [5:0]:fraction 45 | reg[63:0] rd_address_r; 46 | reg[7:0] rd_len_r; 47 | reg rd_req_r; 48 | reg[2:0] rd_state; 49 | reg read_done_r; 50 | 51 | always@(posedge clk)begin 52 | if(~rst_n)begin 53 | rd_req_r <= 1'b0; 54 | rd_state <= 3'd0; 55 | read_done_r <= 1'b0; 56 | end else case(rd_state) 57 | 3'd0:begin 58 | if(start)begin 59 | //Round the length to the upper 64*n, n is an integer. Because the bandwidth is 64Byte 60 | if(compression_length[5:0]!=6'b0)begin 61 | compression_length_r[34:6] <= compression_length[34:6] + 29'd1; 62 | end else begin 63 | compression_length_r[34:6] <= compression_length[34:6]; 64 | end 65 | 66 | rd_address_r <= src_addr; 67 | rd_req_r <= 1'b0; 68 | rd_state <= 3'd1; 69 | end 70 | end 71 | 3'd1:begin // the state to read the first 4KB chunk of the 64KB Snappy block 72 | //If the block is greater than 4KB (64*64), read a 4KB block. If not, read all the block 73 | if(compression_length_r[34:6]<=29'd64)begin 74 | rd_len_r <= {2'd0,compression_length_r[11:6]-6'd1}; 75 | compression_length_r[34:6] <= 29'd0; 76 | rd_state <= 3'd3; 77 | end else begin 78 | rd_len_r <= 8'b11_1111; 79 | compression_length_r[34:6] <= compression_length_r[34:6]-29'd64; 80 | rd_state <= 3'd2; 81 | end 82 | rd_req_r <= 1'b1; 83 | end 84 | 3'd2:begin//the state to read the the block 85 | //once get an acknowlege, read the next chunk 86 | if(rd_req_ack)begin 87 | rd_address_r <= rd_address_r+64'd4096; 88 | if(compression_length_r[34:6]<=29'd64)begin 89 | rd_state <= 3'd3; 90 | rd_len_r <= {2'd0,compression_length_r[11:6]-6'd1}; 91 | compression_length_r[34:6] <= 29'd0; 92 | end else begin 93 | rd_len_r <= 8'b11_1111; 94 | compression_length_r[34:6] <= compression_length_r[34:6]-29'd64; 95 | end 96 | end 97 | end 98 | 3'd3:begin//state to reset the rd_req_ack 99 | if(rd_req_ack)begin 100 | rd_req_r <= 1'b0; 101 | rd_state <= 3'd4; 102 | end 103 | end 104 | 3'd4:begin 105 | read_done_r <= 1'b1; 106 | rd_state <= 3'd0; 107 | end 108 | 109 | default:rd_state <= 3'd0; 110 | endcase 111 | end 112 | 113 | /****************write data*****************/ 114 | reg[31:0] decompression_length_r; ///[32:12]:number of 4k blocks [11:6]:number of 64B [5:0]:fraction 115 | reg[63:0] wr_address_r; 116 | reg[2:0] wr_state; 117 | reg[7:0] wr_len_r; 118 | reg wr_req_r; 119 | reg[63:0] wr_req_count; 120 | reg[63:0] wr_done_count; // a counter to count the write_done of the data write before the done signal is sent. 121 | reg done_out_r; 122 | always@(posedge clk)begin 123 | if(~rst_n)begin 124 | wr_state <= 3'd0; 125 | wr_req_r <= 1'b0; 126 | wr_req_count <= 64'b0; 127 | done_out_r <= 1'b0; 128 | end else case(wr_state) 129 | 3'd0:begin // initial state 130 | if(start)begin 131 | //similar to the read case 132 | if(decompression_length[5:0]!=6'b0)begin 133 | decompression_length_r[31:6] <= decompression_length[31:6]+29'd1; 134 | end else begin 135 | decompression_length_r[31:6] <= decompression_length[31:6]; 136 | end 137 | 138 | wr_req_count <= 64'b0; 139 | wr_state <= 3'd1; 140 | wr_req_r <= 1'b0; 141 | wr_address_r <= des_addr; 142 | end 143 | end 144 | 3'd1:begin // state for sending the first 4K block 145 | if(decompression_length_r[31:6]<=26'd64)begin 146 | wr_len_r <= {2'b0,decompression_length_r[11:6]-6'd1}; 147 | decompression_length_r[31:6] <= 26'd0; 148 | wr_state <= 3'd3; 149 | end else begin 150 | wr_len_r <= 8'b11_1111; 151 | decompression_length_r[31:6] <= decompression_length_r[31:6]-26'd64; 152 | wr_state <= 3'd2; 153 | end 154 | wr_req_r <= 1'b1; 155 | end 156 | 3'd2:begin // state for sending the rest 4K blocks 157 | if(wr_req_ack)begin 158 | wr_req_count <= wr_req_count+64'b1; 159 | wr_address_r <= wr_address_r+64'd4096; 160 | if(decompression_length_r[31:6]<=26'd64)begin 161 | wr_len_r <= {2'b0,decompression_length_r[11:6]-6'd1}; 162 | decompression_length_r[31:6] <= 26'd0; 163 | wr_state <= 3'd3; 164 | end else begin 165 | wr_len_r <= 8'b11_1111; 166 | decompression_length_r[31:6] <= decompression_length_r[31:6]-26'd64; 167 | end 168 | end 169 | end 170 | 3'd3:begin // state for waiting the last wr_req_r is acknolodged 171 | if(wr_req_ack)begin 172 | wr_req_count <= wr_req_count+64'b1; 173 | wr_req_r <= 1'b0; 174 | wr_state <= 3'd4; 175 | end 176 | end 177 | 178 | 3'd4:begin 179 | if(wr_done_count==wr_req_count && read_done_r) begin //write request ack count equal to write data ack count and the read is done 180 | done_out_r <= 1'b1; 181 | wr_state <= 3'd0; 182 | end 183 | end 184 | 185 | default:wr_state <= 3'd0; 186 | endcase 187 | end 188 | 189 | always@(posedge clk) 190 | begin 191 | if(~rst_n) 192 | begin 193 | wr_done_count <=64'b0; 194 | end 195 | else if(start) 196 | begin 197 | wr_done_count <=64'b0; 198 | end 199 | else if(bresp) 200 | begin 201 | wr_done_count <=wr_done_count+64'b1; 202 | end 203 | end 204 | 205 | reg idle_r; 206 | reg bready_r; 207 | reg ready_r; 208 | always@(posedge clk)begin 209 | if(~rst_n)begin 210 | idle_r <= 1'b1; 211 | bready_r <= 1'b0; 212 | end else if(start)begin 213 | idle_r <= 1'b0; 214 | bready_r <= 1'b1; 215 | end else if(done_i && done_out_r)begin 216 | idle_r <= 1'b1; 217 | bready_r <= 1'b0; 218 | end 219 | end 220 | 221 | always@(posedge clk) begin 222 | if(~rst_n)begin 223 | ready_r <= 1'b0; 224 | end else begin 225 | ready_r <= 1'b1; 226 | end 227 | end 228 | 229 | assign rd_address = rd_address_r; 230 | assign rd_req = rd_req_r; 231 | assign rd_len = rd_len_r; 232 | assign idle = idle_r; 233 | assign ready = ready_r; 234 | 235 | assign wr_address = wr_address_r; 236 | assign wr_req = wr_req_r; 237 | assign wr_len = wr_len_r; 238 | assign bready = bready_r; 239 | 240 | assign done_out = done_out_r; 241 | 242 | endmodule 243 | -------------------------------------------------------------------------------- /hw/source/ram_block.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: ram_block 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 13th July, 2018 7 | Function: instantiate a BRAM block to store the decompression result. 8 | Also contains other circuit to process the write command and 9 | copy command. 10 | ****************************/ 11 | 12 | `timescale 1ns/1ps 13 | 14 | module ram_block 15 | #( parameter BLOCKNUM=4'b0 ///define the number of this block, from 0 to 15 16 | ) 17 | ( 18 | input clk, 19 | input rst_n, 20 | input rd_en, ///read the ram 21 | input block_out_finish, //when the page is finished 22 | input page_finish, //in the end of a file, clean all data 23 | ///signal for writing data 24 | input valid_wr_in, 25 | input[63:0] lit_in, 26 | input[8:0] lit_address, 27 | input[7:0] lit_valid, 28 | 29 | //signal for reading data 30 | input valid_rd_in, 31 | input[8:0] copy_address, 32 | input[7:0] copy_valid_in, //choose bytes to read 33 | input[15:0] copy_offset_in, 34 | 35 | 36 | ///// 37 | output unsolved_valid_out, 38 | output[32:0] unsolved_token_out, 39 | 40 | output even_valid_out, 41 | output[63:0] even_data_out, 42 | output[7:0] even_hit_out, 43 | output[8:0] even_address_out, 44 | 45 | output odd_valid_out, 46 | output[63:0] odd_data_out, 47 | output[7:0] odd_hit_out, 48 | output[8:0] odd_address_out, 49 | 50 | output[15:0] ram_select_out, 51 | 52 | ////////output the compression result 53 | output[63:0] data_out 54 | 55 | ); 56 | 57 | /*************************************** 58 | process write command 59 | ***************************************/ 60 | reg valid_wr_buff; 61 | reg[63:0] lit_data_buff; 62 | reg[8:0] lit_address_buff; 63 | reg[7:0] lit_valid_buff; 64 | reg[7:0] lit_byte_enable; 65 | 66 | reg[8:0] cl_address; 67 | reg cl_flag; 68 | reg[2:0] cl_state; 69 | reg valid_inverse; /// 70 | always@(*)begin 71 | 72 | lit_data_buff <=lit_in; 73 | lit_address_buff<=cl_flag?cl_address:lit_address; 74 | valid_wr_buff <=cl_flag?1'b1:valid_wr_in; 75 | lit_valid_buff <=cl_flag?8'b0:(lit_valid^{8{valid_inverse}}); 76 | lit_byte_enable <=cl_flag?8'hff:lit_valid; 77 | end 78 | 79 | 80 | always@(posedge clk)begin 81 | if(~rst_n)begin 82 | cl_state <= 3'd0; 83 | cl_flag <= 1'b0; 84 | end else 85 | case(cl_state) 86 | 3'd0:begin///idle state 87 | cl_address <=9'b0; 88 | if(page_finish)begin 89 | cl_state <=3'd1; 90 | cl_flag <=1'b1; 91 | end 92 | end 93 | 3'd1:begin ///increasing address to clean 94 | cl_address <=cl_address+9'd1; 95 | if(cl_address==9'd511)begin 96 | cl_state <=3'd2; 97 | cl_flag <=1'b0; 98 | end 99 | end 100 | 3'd2:begin 101 | cl_state <=3'd0; 102 | cl_flag <=1'b0; 103 | end 104 | default:cl_state<=3'd0; 105 | endcase 106 | 107 | if(~rst_n)begin 108 | valid_inverse<=1'b0; 109 | end else 110 | if(page_finish)begin 111 | valid_inverse<=1'b0; 112 | end else 113 | if(block_out_finish)begin 114 | valid_inverse<=~valid_inverse; 115 | end 116 | end 117 | 118 | /*************************************** 119 | process copy command 120 | ***************************************/ 121 | wire[7:0] copy_valid_w,copy_valid_w2; 122 | wire[63:0] copy_data_w; 123 | 124 | //1st stage 125 | reg valid_rd_buff; 126 | reg[8:0] copy_address_buff; 127 | reg[7:0] copy_valid_buff; 128 | reg[15:0] copy_offset_buff; 129 | always@(*)begin //if change to posedge, add reset 130 | valid_rd_buff <=valid_rd_in; 131 | copy_address_buff <=copy_address; 132 | copy_valid_buff <=copy_valid_in; 133 | copy_offset_buff <=copy_offset_in; 134 | end 135 | 136 | //2nd stage 137 | reg valid_rd_buff2; 138 | reg[8:0] copy_address_buff2; 139 | reg[7:0] copy_valid_buff2; 140 | reg[15:0] copy_offset_buff2; 141 | reg[15:0] des_address2; ///address of the destination 142 | always@(posedge clk)begin 143 | if(~rst_n)begin 144 | valid_rd_buff2 <= 1'b0; 145 | end else begin 146 | valid_rd_buff2 <= valid_rd_buff; 147 | 148 | end 149 | copy_valid_buff2 <= copy_valid_buff; 150 | copy_address_buff2 <=copy_address_buff; 151 | copy_offset_buff2 <=copy_offset_buff; 152 | 153 | des_address2 <={copy_address_buff,BLOCKNUM,3'b0}+copy_offset_buff; 154 | end 155 | 156 | //3rd stage(fetch read result in this stage) 157 | reg valid_rd_buff3; 158 | reg[8:0] copy_address_buff3; 159 | reg[7:0] copy_valid_buff3; 160 | reg[15:0] copy_offset_buff3; 161 | reg[15:0] des_address3; 162 | reg[63:0] data_3; 163 | reg[7:0] hit_3; ///whether the bytes are read 164 | always@(posedge clk)begin 165 | if(~rst_n)begin 166 | valid_rd_buff3 <= 1'b0; 167 | end else begin 168 | valid_rd_buff3 <= valid_rd_buff2; 169 | 170 | end 171 | copy_valid_buff3<= copy_valid_buff2; 172 | copy_address_buff3 <= copy_address_buff2; 173 | 174 | copy_offset_buff3 <= copy_offset_buff2; 175 | des_address3 <= des_address2; 176 | 177 | 178 | data_3 <= copy_data_w[63:0]; 179 | hit_3 <= copy_valid_w[7:0] & copy_valid_buff2; 180 | end 181 | assign copy_valid_w=copy_valid_w2^{8{valid_inverse}}; 182 | 183 | reg[10:0] debug1,debug2,debug3;////for debug only 184 | 185 | //4th stage 186 | reg valid_rd_buff4; 187 | reg[8:0] copy_address_buff4; 188 | reg[15:0] copy_offset_buff4; 189 | reg[15:0] des_address4; 190 | reg[32:0] unsolved_token4; //unsolved read 191 | reg valid_unsolved4; 192 | 193 | reg[127:0] data_4; 194 | reg[15:0] hit_4; 195 | wire[127:0] data_shift; 196 | assign data_shift ={data_3,64'b0} >>{copy_offset_buff3[2:0],3'b0}; 197 | always@(posedge clk)begin 198 | if(~rst_n)begin 199 | valid_rd_buff4 <= 1'b0; 200 | end else begin 201 | valid_rd_buff4 <= valid_rd_buff3; 202 | end 203 | 204 | des_address4 <=des_address3; 205 | unsolved_token4 <={copy_address_buff3,hit_3^copy_valid_buff3,copy_offset_buff3}; 206 | 207 | debug1<=copy_address_buff3; 208 | debug2<=hit_3^copy_valid_buff3; 209 | debug3<=copy_offset_buff3; 210 | 211 | if(~rst_n)begin 212 | valid_unsolved4 <= 1'b0; 213 | end else begin 214 | valid_unsolved4 <= ((hit_3^copy_valid_buff3)!=8'b0)&valid_rd_buff3; 215 | end 216 | 217 | data_4 <=data_shift[127:0]; 218 | hit_4 <={hit_3,8'b0} >>copy_offset_buff3[2:0]; 219 | end 220 | 221 | ///5th stage 222 | reg valid_odd_5,valid_even_5; 223 | reg[63:0] data_odd_5,data_even_5; 224 | reg[7:0] hit_odd_5,hit_even_5; 225 | reg[8:0] address_odd_5,address_even_5; 226 | reg[15:0] ram_select_5; ///select 2 rams to write, {ram15,ram14 ....ram0} 227 | wire[31:0] ram_select_w; 228 | wire[15:0] des_address_plus; 229 | assign des_address_plus[15:3] = des_address4[15:3]+13'b1; 230 | assign ram_select_w={16'b11,16'b11}<>{address_in[2:0],3'b0}; 54 | assign wr_1_w=~(16'h7fff>>length); 55 | assign address_1_0_w=address_in[15:3]; 56 | assign address_1_1_w=address_in[15:3]+13'd1; 57 | assign address_1_2_w=address_in[15:3]+13'd2; 58 | assign address_1_3_w=address_in[15:3]+13'd3; 59 | always@(posedge clk)begin 60 | data_1 <=data_1_w; 61 | wr_1 <=wr_1_w; 62 | address_1 <=address_in; 63 | address_1_0 <=address_1_0_w[15:3]; 64 | address_1_1 <=address_1_1_w[15:3]; 65 | address_1_2 <=address_1_2_w[15:3]; 66 | address_1_3 <=address_1_3_w[15:3]; 67 | 68 | if(~rst_n)begin 69 | valid_1 <= 1'b0; 70 | end else begin 71 | valid_1 <=valid_in; 72 | end 73 | 74 | end 75 | 76 | reg[31:0] wr_2; 77 | reg[63:0] data_2_0,data_2_1,data_2_2,data_2_3; 78 | reg[8:0] address_2_0,address_2_1,address_2_2,address_2_3; 79 | reg[3:0] ram_select0,ram_select1,ram_select2,ram_select3; 80 | reg valid_2; 81 | wire[47:0] wr_2_w; 82 | assign wr_2_w={wr_1,32'b0}>>address_1[4:0]; 83 | always@(posedge clk)begin 84 | wr_2[31:16] <= (wr_2_w[47:32]|wr_2_w[15:0]); 85 | wr_2[15:0] <= wr_2_w[31:16]; 86 | 87 | case(address_1[4:3]) 88 | 2'd0:begin data_2_0<=data_1[183:120]; data_2_1<=data_1[119:56]; data_2_2<={data_1[55:0],8'b0}; data_2_3<=64'b0; 89 | address_2_0<=address_1_0[12:4]; address_2_1<=address_1_1[12:4]; address_2_2<=address_1_2[12:4]; address_2_3<=address_1_3[12:4]; 90 | ram_select0<=(4'b0001<> length_in); 189 | // address_2 <= address_1; 190 | address_rd_2<=address_in-offset_in; 191 | offset_2 <=offset_in; 192 | if(~rst_n)begin 193 | valid_2 <= 1'b0; 194 | end else begin 195 | valid_2 <= valid_in; 196 | end 197 | 198 | 199 | ///////for debug 200 | // if(valid_1)begin 201 | // if(length<=offset)begin 202 | // $display("address: %d %d",address_1,PARSER_NUM); 203 | // $display("length: %d",length); 204 | // $display("offset: %d",offset); 205 | // end 206 | // end 207 | /////////////////// 208 | 209 | end 210 | 211 | reg[127:0] rd_3; 212 | reg[143:0] address_3; //address for 16 rams, each is 9-bits 213 | reg[15:0] offset_3; 214 | reg[15:0] ram_select_3; 215 | wire[15:0] ram_select_3_w; 216 | wire[207:0] address_3_w; 217 | wire[143:0] address_3_w2; 218 | wire[63:0] base; 219 | reg valid_3; 220 | wire[255:0] rd_3_w; 221 | assign rd_3_w={rd_2,rd_2}>>address_rd_2[6:0]; 222 | 223 | genvar i; 224 | generate 225 | for(i=0;i<16;i=i+1)begin 226 | assign base[4*i+3:4*i] =i[3:0]-address_rd_2[6:3]; 227 | assign address_3_w[13*i+12:13*i]=address_rd_2[15:3]+{7'b0,base[4*i+3:4*i]}; 228 | assign address_3_w2[9*i+8:9*i] =address_3_w[13*i+12:13*i+4]; 229 | assign ram_select_3_w[i] =rd_3_w[127-8*i] | rd_3_w[126-8*i] | rd_3_w[125-8*i] | rd_3_w[124-8*i] | rd_3_w[123-8*i] | rd_3_w[122-8*i] | rd_3_w[121-8*i] | rd_3_w[120-8*i]; 230 | end 231 | endgenerate 232 | 233 | always@(posedge clk)begin 234 | rd_3 <=rd_3_w[127:0]; 235 | address_3 <=address_3_w2; 236 | offset_3 <=offset_2; 237 | ram_select_3<=ram_select_3_w; 238 | if(~rst_n)begin 239 | valid_3 <= 1'b0; 240 | end else begin 241 | valid_3 <= valid_2; 242 | end 243 | 244 | end 245 | 246 | //assign valid_out =valid_3; 247 | assign rd_out =rd_3; 248 | assign address_out =address_3; 249 | assign offset_out =offset_3; 250 | assign ram_select =ram_select_3&{16{valid_3}}; 251 | 252 | endmodule 253 | -------------------------------------------------------------------------------- /hw/source/data_out.v: -------------------------------------------------------------------------------- 1 | /*********************************** 2 | Author: Jianyu Chen 3 | School: Delft University of Technology 4 | Date: 7th, July 2018 5 | Function: Store the decompression result and output the result while doing decompression. 6 | The content of the BRAM here will be the same as the BRAM in ram_block modules. 7 | And the read port of this module is used to output data 8 | ***********************************/ 9 | 10 | `timescale 1ns/1ps 11 | 12 | module data_out( 13 | input clk, 14 | input rst_n, 15 | 16 | input start, 17 | // input block_finish, 18 | input ready, ///whether dma is ready to receive data 19 | ///signal for writing data 20 | input[31:0] decompression_length, 21 | input[15:0] valid_wr_in, 22 | input[1023:0] lit_in, 23 | input[143:0] lit_address, 24 | input[127:0] lit_valid, 25 | input page_finish, ///if all the page has already processed 26 | ///// 27 | output block_out_finish, ///all data in block has outputed 28 | output page_out_finish, 29 | output cl_finish, ///all data are cleaned 30 | output last, ///whether it is the last 64B of a burst 31 | output[511:0] data_o, 32 | output[63:0] byte_valid_o, 33 | output valid_o 34 | ); 35 | wire[1023:0] data_w; 36 | wire[127:0] valid_w,valid_w2; 37 | reg valid_upper,valid_lower; 38 | reg[1023:0] data_out_buff; 39 | 40 | reg[25:0] rd_address_w; 41 | reg valid_out_w; 42 | 43 | reg block_out_finish_r; 44 | reg[8:0] wr_address; 45 | reg[25:0] rd_address; //[25:10]:block count [9:1] the address of ram from 0 to 512, [0] select upper or lower 46 | //reg[15:0] block_cnt; ///cnt the number of the block 47 | reg[2:0] state; 48 | reg rd_valid; 49 | reg wr_flag; 50 | reg final_valid; 51 | reg cl_finish_flag; 52 | reg valid_inverse; 53 | reg page_out_finish_r; 54 | 55 | //reg[31:0] decompression_length_r; 56 | reg[25:0] max_address; ///max address for ram -1 57 | always@(posedge clk)begin 58 | if(start)begin 59 | if(decompression_length[5:0]==6'b0)begin 60 | max_address <= decompression_length[31:6]-26'b1; 61 | end else begin 62 | max_address <= decompression_length[31:6]; 63 | end 64 | end 65 | end 66 | 67 | 68 | /*In the beginning, the valid bits in all BRAM are 0, and the data is valid when the corresponding valid bit is set to 1. 69 | After writing the first block, the valid bit is all 1 (if the first bit is 64KB). So in the second block, data is valid 70 | when the corresponding valid bit is set to 1. After decompressing the file, reset all valid bit to 0. In order to make the 71 | logic easy, i have "valid_inverse" register, this register is inversed after every block. The valid bit is NOR with valid_inverse 72 | before writing to BRAM and after reading from BRAM*/ 73 | always@(posedge clk)begin 74 | if(~rst_n)begin 75 | state <= 3'd0; 76 | rd_valid <= 1'b1; 77 | wr_address <= 3'd0; 78 | end 79 | case(state) 80 | 3'd0:begin ///idle state 81 | if(start)begin 82 | state <=3'd1; 83 | valid_inverse <=1'b0; 84 | end else begin 85 | rd_valid <=1'b0; 86 | end 87 | 88 | wr_flag <=1'b0; 89 | final_valid <=1'b0; 90 | page_out_finish_r<=1'b0; 91 | cl_finish_flag<=1'b0; 92 | rd_address <=26'b0; 93 | block_out_finish_r<=1'b0; 94 | end 95 | 3'd1:begin //read 96 | block_out_finish_r<=1'b0; 97 | 98 | if(rd_address!=max_address)begin//increment the address until the max_address 99 | rd_address <= rd_address_w; 100 | rd_valid <= 1'b1; 101 | end else begin 102 | //rd_valid <= 1'b0; 103 | /****************************************************/ 104 | if(valid_o & ready) begin 105 | rd_valid <= 1'b0; 106 | end 107 | /****************************************************/ 108 | end 109 | /* 110 | if((rd_address[9:0]==10'd1022) & valid_lower & ready)begin 111 | state<=3'd2; 112 | end else if((rd_address==max_address) & page_finish & ready)begin //whether output all the data in page 113 | state<=3'd3; 114 | final_valid <=1'b1; 115 | end 116 | */ 117 | /****************************************************/ 118 | if(rd_address==max_address) begin 119 | if(page_finish & ready)begin //whether output all the data in page 120 | state<=3'd3; 121 | final_valid <=1'b1; 122 | end 123 | end 124 | else if((rd_address[9:0]==10'd1022) & (rd_address!=(max_address-1)) & valid_lower & ready)begin 125 | state<=3'd2; 126 | end 127 | /****************************************************/ 128 | end 129 | 3'd2:begin //clean the input 130 | if(valid_upper & ready)begin 131 | valid_inverse <= ~valid_inverse; 132 | state <= 3'd1; 133 | block_out_finish_r <= 1'b1; 134 | rd_valid <= 1'b0; 135 | rd_address <= rd_address_w; 136 | end 137 | end 138 | 139 | 3'd3:begin 140 | if(ready)begin 141 | state <=3'd4; 142 | wr_flag <=1'b1; 143 | final_valid <=1'b0; 144 | rd_valid <=1'b0; 145 | end 146 | wr_address <=3'd0; 147 | end 148 | 149 | 3'd4:begin ////clean all the valid bit in BRAM 150 | wr_address <=wr_address+9'd1; 151 | valid_inverse <=1'b0; 152 | rd_valid <=1'b0; 153 | if(wr_address==9'd511)begin 154 | state <=3'd0; 155 | page_out_finish_r<=1'b1; 156 | cl_finish_flag <=1'b1; 157 | end else begin 158 | wr_flag <=1'b1; 159 | end 160 | end 161 | default:state <=3'd0; 162 | endcase 163 | end 164 | 165 | reg[9:0] rd_address_lowerl; 166 | /*the bandwidth of 16BRAM is 1024bit, however, the bandwidth of axi is only 512bit. So i take turns to read 167 | the lower 512 bits and and the higher 512 bits*/ 168 | always@(*)begin 169 | //if the rd_address[0] is 0, read the lower 512 bits, and the other way around 170 | if(((rd_address[0]==1'b0&valid_lower)|(rd_address[0]==1'b1&valid_upper))&ready&rd_valid)begin 171 | rd_address_w <=rd_address+26'b1; 172 | valid_out_w<=1'b1; 173 | end else begin 174 | rd_address_w <=rd_address; 175 | valid_out_w<=1'b0; 176 | end 177 | 178 | rd_address_lowerl<=rd_address_w+10'b1; 179 | end 180 | 181 | 182 | genvar ram_i; 183 | generate 184 | for(ram_i=0;ram_i<16;ram_i=ram_i+1)begin:generate_ram 185 | 186 | reg[8:0] rd_address_w2; 187 | always@(*)begin 188 | if(ram_i<8)begin 189 | rd_address_w2 <=rd_address_lowerl[9:1]; 190 | end else begin 191 | rd_address_w2 <=rd_address_w[9:1]; 192 | end 193 | end 194 | 195 | 196 | reg valid_wr_buff; 197 | reg[63:0] lit_buff; 198 | reg[8:0] lit_address_buff; 199 | reg[7:0] lit_valid_buff; 200 | reg[7:0] lit_byte_valid; 201 | wire[7:0] dina_valid_w; 202 | wire[8:0] addra_w; 203 | always@(posedge clk)begin 204 | if(~rst_n)begin 205 | valid_wr_buff <= 1'b0; 206 | end else begin 207 | valid_wr_buff <= valid_wr_in[ram_i]; 208 | end 209 | 210 | lit_buff <=lit_in[64*ram_i+63:64*ram_i]; 211 | lit_address_buff<=lit_address[9*ram_i+8:9*ram_i]; 212 | lit_valid_buff <=lit_valid[8*ram_i+7:8*ram_i]^{8{valid_inverse}}; 213 | lit_byte_valid <=lit_valid[8*ram_i+7:8*ram_i]; 214 | data_out_buff <=data_w; 215 | end 216 | //if it is in the state 4, make all valid bits 0, and give the wr_address to the BRAM 217 | assign dina_valid_w = wr_flag?8'b0:lit_valid_buff; 218 | assign addra_w = wr_flag?wr_address:lit_address_buff; 219 | blockram result_ram0( 220 | .addra(addra_w), 221 | .clka(clk), 222 | .dina({dina_valid_w[7],lit_buff[63:56],dina_valid_w[6],lit_buff[55:48],dina_valid_w[5],lit_buff[47:40],dina_valid_w[4],lit_buff[39:32],dina_valid_w[3],lit_buff[31:24],dina_valid_w[2],lit_buff[23:16],dina_valid_w[1],lit_buff[15:8],dina_valid_w[0],lit_buff[7:0]}), 223 | .ena(wr_flag|valid_wr_buff), 224 | .wea(wr_flag?8'hff:lit_byte_valid), 225 | 226 | .addrb(rd_address_w2), 227 | .clkb(clk), 228 | .doutb({valid_w2[127-8*ram_i-0],data_w[1023-64*ram_i-0:1023-64*ram_i-7],valid_w2[127-8*ram_i-1],data_w[1023-64*ram_i-8:1023-64*ram_i-15],valid_w2[127-8*ram_i-2],data_w[1023-64*ram_i-16:1023-64*ram_i-23],valid_w2[127-8*ram_i-3],data_w[1023-64*ram_i-24:1023-64*ram_i-31],valid_w2[127-8*ram_i-4],data_w[1023-64*ram_i-32:1023-64*ram_i-39],valid_w2[127-8*ram_i-5],data_w[1023-64*ram_i-40:1023-64*ram_i-47],valid_w2[127-8*ram_i-6],data_w[1023-64*ram_i-48:1023-64*ram_i-55],valid_w2[127-8*ram_i-7],data_w[1023-64*ram_i-56:1023-64*ram_i-63]}), 229 | .enb(1'b1) 230 | ); 231 | end 232 | 233 | endgenerate 234 | 235 | /****generate the last signal for each burst*****/ 236 | reg last_r; 237 | always@(posedge clk)begin 238 | if(~rst_n)begin 239 | last_r <=1'b0; 240 | end else if((rd_address_w[5:0]==6'b11_1111))begin 241 | last_r <=1'b1; 242 | end else begin 243 | last_r <=1'b0; 244 | end 245 | end 246 | 247 | reg block_out_finish_buff_r,block_out_finish_buff_r2; 248 | reg page_out_finish_buff_r; 249 | always@(posedge clk)begin 250 | block_out_finish_buff_r <=block_out_finish_r; 251 | block_out_finish_buff_r2<=block_out_finish_buff_r; 252 | 253 | page_out_finish_buff_r<=page_out_finish_r; 254 | end 255 | 256 | /**********************************/ 257 | reg[511:0] data_upper,data_lower; 258 | always@(posedge clk)begin 259 | valid_upper <=(valid_w[63:0]==~64'b0); 260 | valid_lower <=(valid_w[127:64]==~64'b0); 261 | 262 | data_upper <=data_w[511:0]; 263 | data_lower <=data_w[1023:512]; 264 | end 265 | 266 | 267 | assign valid_w =valid_w2^{128{valid_inverse}}; 268 | assign block_out_finish=block_out_finish_buff_r2; 269 | 270 | assign page_out_finish=page_out_finish_buff_r; 271 | assign cl_finish=cl_finish_flag; 272 | assign data_o =rd_address[0]?data_upper:data_lower; 273 | assign byte_valid_o =(~64'b0); 274 | assign valid_o =((rd_address[0]?valid_upper:valid_lower)&rd_valid)| final_valid; 275 | assign last =valid_o & (final_valid |last_r); 276 | 277 | endmodule 278 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /hw/interface/action_axi_master.vhd: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------------- 2 | ---------------------------------------------------------------------------- 3 | -- 4 | -- Copyright 2016 International Business Machines 5 | -- 6 | -- Licensed under the Apache License, Version 2.0 (the "License"); 7 | -- you may not use this file except in compliance with the License. 8 | -- You may obtain a copy of the License at 9 | -- 10 | -- http://www.apache.org/licenses/LICENSE-2.0 11 | -- 12 | -- Unless required by applicable law or agreed to in writing, software 13 | -- distributed under the License is distributed on an "AS IS" BASIS, 14 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | -- See the License for the specific language governing permissions AND 16 | -- limitations under the License. 17 | -- 18 | ---------------------------------------------------------------------------- 19 | ---------------------------------------------------------------------------- 20 | library ieee; 21 | use ieee.std_logic_1164.all; 22 | use ieee.numeric_std.all; 23 | 24 | entity action_axi_master is 25 | generic ( 26 | -- Users to add parameters here 27 | 28 | -- User parameters ends 29 | -- Do not modify the parameters beyond this line 30 | 31 | 32 | 33 | -- Thread ID Width 34 | C_M_AXI_ID_WIDTH : integer := 1; 35 | -- Width of Address Bus 36 | C_M_AXI_ADDR_WIDTH : integer := 64; 37 | -- Width of Data Bus 38 | C_M_AXI_DATA_WIDTH : integer := 512; 39 | -- Width of User Write Address Bus 40 | C_M_AXI_AWUSER_WIDTH : integer := 0; 41 | -- Width of User Read Address Bus 42 | C_M_AXI_ARUSER_WIDTH : integer := 0; 43 | -- Width of User Write Data Bus 44 | C_M_AXI_WUSER_WIDTH : integer := 0; 45 | -- Width of User Read Data Bus 46 | C_M_AXI_RUSER_WIDTH : integer := 0; 47 | -- Width of User Response Bus 48 | C_M_AXI_BUSER_WIDTH : integer := 0 49 | ); 50 | port ( 51 | -- Users to add ports here 52 | 53 | dma_rd_req_i : in std_logic; 54 | dma_rd_addr_i : in std_logic_vector(C_M_AXI_ADDR_WIDTH -1 downto 0); 55 | dma_rd_len_i : in std_logic_vector( 7 downto 0); 56 | dma_rd_req_ack_o : out std_logic; 57 | dma_rd_data_o : out std_logic_vector(C_M_AXI_DATA_WIDTH - 1 downto 0); 58 | dma_rd_data_valid_o : out std_logic; 59 | dma_rd_data_last_o : out std_logic; 60 | dma_rd_data_taken_i : in std_logic; 61 | dma_rd_context_id : in std_logic_vector(C_M_AXI_ARUSER_WIDTH - 1 downto 0); 62 | 63 | 64 | dma_wr_req_i : in std_logic; 65 | dma_wr_addr_i : in std_logic_vector( C_M_AXI_ADDR_WIDTH - 1 downto 0); 66 | dma_wr_len_i : in std_logic_vector( 7 downto 0); 67 | dma_wr_req_ack_o : out std_logic; 68 | dma_wr_data_i : in std_logic_vector(C_M_AXI_DATA_WIDTH -1 downto 0); 69 | dma_wr_wvalid : in std_logic; 70 | dma_wr_data_strobe_i: in std_logic_vector(C_M_AXI_DATA_WIDTH/8-1 downto 0); 71 | dma_wr_data_last_i : in std_logic; 72 | dma_wr_ready_o : out std_logic; 73 | dma_wr_bready_i : in std_logic; 74 | dma_wr_done_o : out std_logic; 75 | dma_wr_context_id : in std_logic_vector(C_M_AXI_AWUSER_WIDTH - 1 downto 0); 76 | 77 | M_AXI_ACLK : in std_logic; 78 | M_AXI_ARESETN : in std_logic; 79 | M_AXI_AWID : out std_logic_vector(C_M_AXI_ID_WIDTH-1 downto 0); 80 | M_AXI_AWADDR : out std_logic_vector(C_M_AXI_ADDR_WIDTH-1 downto 0); 81 | M_AXI_AWLEN : out std_logic_vector(7 downto 0); 82 | M_AXI_AWSIZE : out std_logic_vector(2 downto 0); 83 | M_AXI_AWBURST : out std_logic_vector(1 downto 0); 84 | M_AXI_AWLOCK : out std_logic_vector(1 downto 0); 85 | M_AXI_AWCACHE : out std_logic_vector(3 downto 0); 86 | M_AXI_AWPROT : out std_logic_vector(2 downto 0); 87 | M_AXI_AWQOS : out std_logic_vector(3 downto 0); 88 | M_AXI_AWUSER : out std_logic_vector(C_M_AXI_AWUSER_WIDTH-1 downto 0); 89 | M_AXI_AWVALID : out std_logic; 90 | M_AXI_AWREADY : in std_logic; 91 | M_AXI_WDATA : out std_logic_vector(C_M_AXI_DATA_WIDTH-1 downto 0); 92 | M_AXI_WSTRB : out std_logic_vector(C_M_AXI_DATA_WIDTH/8-1 downto 0); 93 | M_AXI_WLAST : out std_logic; 94 | M_AXI_WUSER : out std_logic_vector(C_M_AXI_WUSER_WIDTH-1 downto 0); 95 | M_AXI_WVALID : out std_logic; 96 | M_AXI_WREADY : in std_logic; 97 | M_AXI_BID : in std_logic_vector(C_M_AXI_ID_WIDTH-1 downto 0); 98 | M_AXI_BRESP : in std_logic_vector(1 downto 0); 99 | M_AXI_BUSER : in std_logic_vector(C_M_AXI_BUSER_WIDTH-1 downto 0); 100 | M_AXI_BVALID : in std_logic; 101 | M_AXI_BREADY : out std_logic; 102 | M_AXI_ARUSER : out std_logic_vector(C_M_AXI_ARUSER_WIDTH-1 downto 0); 103 | M_AXI_ARID : out std_logic_vector(C_M_AXI_ID_WIDTH-1 downto 0); 104 | M_AXI_ARADDR : out std_logic_vector(C_M_AXI_ADDR_WIDTH-1 downto 0); 105 | M_AXI_ARLEN : out std_logic_vector(7 downto 0); 106 | M_AXI_ARSIZE : out std_logic_vector(2 downto 0); 107 | M_AXI_ARBURST : out std_logic_vector(1 downto 0); 108 | M_AXI_ARLOCK : out std_logic_vector(1 downto 0); 109 | M_AXI_ARCACHE : out std_logic_vector(3 downto 0); 110 | M_AXI_ARPROT : out std_logic_vector(2 downto 0); 111 | M_AXI_ARQOS : out std_logic_vector(3 downto 0); 112 | M_AXI_ARVALID : out std_logic; 113 | M_AXI_ARREADY : in std_logic; 114 | M_AXI_RID : in std_logic_vector(C_M_AXI_ID_WIDTH-1 downto 0); 115 | M_AXI_RDATA : in std_logic_vector(C_M_AXI_DATA_WIDTH-1 downto 0); 116 | M_AXI_RRESP : in std_logic_vector(1 downto 0); 117 | M_AXI_RLAST : in std_logic; 118 | M_AXI_RUSER : in std_logic_vector(C_M_AXI_RUSER_WIDTH-1 downto 0); 119 | M_AXI_RVALID : in std_logic; 120 | M_AXI_RREADY : out std_logic 121 | ); 122 | end action_axi_master; 123 | 124 | architecture action_axi_master of action_axi_master is 125 | 126 | 127 | -- function called clogb2 that returns an integer which has the 128 | --value of the ceiling of the log base 2 129 | 130 | function clogb2 (bit_depth : integer) return integer is 131 | variable depth : integer := bit_depth; 132 | variable count : integer := 1; 133 | begin 134 | for clogb2 in 1 to bit_depth loop -- Works for up to 32 bit integers 135 | if (bit_depth <= 2) then 136 | count := 1; 137 | else 138 | if(depth <= 1) then 139 | count := count; 140 | else 141 | depth := depth / 2; 142 | count := count + 1; 143 | end if; 144 | end if; 145 | end loop; 146 | return(count); 147 | end; 148 | 149 | function or_reduce (signal arg : std_logic_vector) return std_logic is 150 | variable result : std_logic; 151 | 152 | begin 153 | result := '0'; 154 | for i in arg'low to arg'high loop 155 | result := result or arg(i); 156 | end loop; -- i 157 | return result; 158 | end or_reduce; 159 | 160 | 161 | signal axi_awaddr : std_logic_vector(C_M_AXI_ADDR_WIDTH-1 downto 0); 162 | signal axi_awvalid : std_logic; 163 | signal axi_wdata : std_logic_vector(C_M_AXI_DATA_WIDTH-1 downto 0); 164 | signal axi_wlast : std_logic; 165 | signal axi_wvalid : std_logic; 166 | signal axi_wstrb : std_logic_vector(C_M_AXI_DATA_WIDTH/8-1 downto 0); 167 | signal axi_bready : std_logic; 168 | signal axi_araddr : std_logic_vector(C_M_AXI_ADDR_WIDTH-1 downto 0); 169 | signal axi_arvalid : std_logic; 170 | signal axi_rready : std_logic; 171 | signal axi_awlen : std_logic_vector(7 downto 0); 172 | signal axi_arlen : std_logic_vector(7 downto 0); 173 | signal wr_req_wait_cycle : std_logic; 174 | signal rd_req_wait_cycle : std_logic; 175 | signal rd_req_ack : std_logic; 176 | signal wr_req_ack : std_logic; 177 | 178 | 179 | begin 180 | 181 | 182 | M_AXI_AWID <= (others => '0'); 183 | M_AXI_AWADDR <= axi_awaddr; 184 | M_AXI_AWLEN <= axi_awlen; 185 | M_AXI_AWSIZE <= std_logic_vector( to_unsigned(clogb2((C_M_AXI_DATA_WIDTH/8)-1), 3) ); 186 | M_AXI_AWBURST <= "01"; 187 | M_AXI_AWLOCK <= (others => '0'); 188 | M_AXI_AWCACHE <= "0010"; 189 | M_AXI_AWPROT <= "000"; 190 | M_AXI_AWQOS <= x"0"; 191 | M_AXI_AWUSER <= dma_wr_context_id; 192 | M_AXI_AWVALID <= axi_awvalid; 193 | M_AXI_WDATA <= axi_wdata; 194 | M_AXI_WSTRB <= axi_wstrb; 195 | M_AXI_WLAST <= axi_wlast; 196 | M_AXI_WUSER <= (others => '0'); 197 | M_AXI_WVALID <= axi_wvalid; 198 | M_AXI_BREADY <= axi_bready; 199 | M_AXI_ARID <= (others => '0'); 200 | M_AXI_ARADDR <= axi_araddr; 201 | M_AXI_ARLEN <= axi_arlen; 202 | M_AXI_ARSIZE <= std_logic_vector( to_unsigned( clogb2((C_M_AXI_DATA_WIDTH/8)-1),3 )); 203 | M_AXI_ARBURST <= "01"; 204 | M_AXI_ARLOCK <= (others => '0'); 205 | M_AXI_ARCACHE <= "0010"; 206 | M_AXI_ARPROT <= "000"; 207 | M_AXI_ARQOS <= x"0"; 208 | M_AXI_ARUSER <= dma_rd_context_id; 209 | M_AXI_ARVALID <= axi_arvalid; 210 | M_AXI_RREADY <= axi_rready; 211 | 212 | 213 | axi_w: process(M_AXI_ACLK) 214 | begin 215 | if (rising_edge (M_AXI_ACLK)) then 216 | dma_wr_req_ack_o <= '0'; 217 | dma_wr_done_o <= '0'; 218 | if M_AXI_ARESETN = '0' then 219 | axi_awvalid <= '0'; 220 | axi_bready <= '0'; 221 | wr_req_wait_cycle <= '0'; 222 | else 223 | wr_req_wait_cycle <= '0'; 224 | if dma_wr_req_i = '1' and wr_req_wait_cycle = '0' then 225 | axi_awaddr <= dma_wr_addr_i; 226 | axi_awlen <= dma_wr_len_i; 227 | axi_awvalid <= '1'; 228 | end if; 229 | if axi_awvalid = '1' and M_AXI_AWREADY = '1' then 230 | dma_wr_req_ack_o <= '1'; 231 | axi_awvalid <= '0'; 232 | wr_req_wait_cycle <= '1'; 233 | end if; 234 | axi_bready <= dma_wr_bready_i; 235 | if M_AXI_BVALID = '1' then 236 | dma_wr_done_o <= '1'; 237 | end if; 238 | end if; 239 | 240 | end if; 241 | end process; 242 | 243 | 244 | 245 | 246 | axi_rready <= dma_rd_data_taken_i; 247 | dma_rd_data_last_o <= M_AXI_RLAST; 248 | dma_rd_data_valid_o <= M_AXI_RVALID; 249 | dma_rd_data_o <= M_AXI_RDATA; 250 | 251 | 252 | axi_write_buffer: 253 | process(M_AXI_ACLK,M_AXI_WREADY, axi_wvalid ) 254 | begin 255 | if (rising_edge (M_AXI_ACLK)) then 256 | if M_AXI_ARESETN = '0' then 257 | axi_wvalid <= '0'; 258 | else 259 | if M_AXI_WREADY = '1' or axi_wvalid = '0' then 260 | axi_wdata <= dma_wr_data_i; 261 | axi_wvalid <= dma_wr_wvalid; 262 | axi_wstrb <= dma_wr_data_strobe_i; 263 | axi_wlast <= dma_wr_data_last_i; 264 | end if; 265 | end if; 266 | 267 | end if; 268 | dma_wr_ready_o <= '1'; 269 | if M_AXI_WREADY = '0' and axi_wvalid = '1' then 270 | dma_wr_ready_o <= '0'; 271 | end if; 272 | end process; 273 | 274 | 275 | 276 | axi_r: process(M_AXI_ACLK) 277 | begin 278 | if (rising_edge (M_AXI_ACLK)) then 279 | dma_rd_req_ack_o <= '0'; 280 | if (M_AXI_ARESETN = '0' ) then 281 | axi_arvalid <= '0'; 282 | rd_req_wait_cycle <= '0'; 283 | else 284 | rd_req_wait_cycle <= '0'; 285 | if dma_rd_req_i = '1' and rd_req_wait_cycle = '0' then 286 | axi_arvalid <= '1'; 287 | axi_araddr <= dma_rd_addr_i; 288 | axi_arlen <= dma_rd_len_i; 289 | end if; 290 | if axi_arvalid = '1' and M_AXI_ARREADY = '1' then 291 | axi_arvalid <= '0'; 292 | dma_rd_req_ack_o <= '1'; 293 | rd_req_wait_cycle <= '1'; 294 | end if; 295 | end if; 296 | 297 | end if; 298 | end process; 299 | 300 | end action_axi_master; 301 | -------------------------------------------------------------------------------- /ip/create_action_ip.tcl: -------------------------------------------------------------------------------- 1 | 2 | ## Env Variables 3 | 4 | set action_root [lindex $argv 0] 5 | set fpga_part [lindex $argv 1] 6 | #set fpga_part xcvu9p-flgb2104-2l-e 7 | #set action_root ../ 8 | 9 | set aip_dir $action_root/ip 10 | set log_dir $action_root/../../hardware/logs 11 | set log_file $log_dir/create_action_ip.log 12 | set src_dir $aip_dir/action_ip_prj/action_ip_prj.srcs/sources_1/ip 13 | 14 | ## Create a new Vivado IP Project 15 | puts "\[CREATE_ACTION_IPs...\] start [clock format [clock seconds] -format {%T %a %b %d %Y}]" 16 | #puts " FPGACHIP = $fpga_part" 17 | #puts " ACTION_ROOT = $action_root" 18 | #puts " Creating IP in $src_dir" 19 | create_project action_ip_prj $aip_dir/action_ip_prj -force -part $fpga_part -ip >> $log_file 20 | 21 | # Project IP Settings 22 | 23 | puts " generating IP blockram" 24 | #add_files -norecurse $src_dir/blockram/blockram.xci >> $log_file 25 | create_ip -name blk_mem_gen -vendor xilinx.com -library ip -version 8.* -module_name blockram >> $log_file 26 | set_property -dict [list \ 27 | CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ 28 | CONFIG.Assume_Synchronous_Clk {true} \ 29 | CONFIG.Write_Width_A {72} \ 30 | CONFIG.Write_Depth_A {512} \ 31 | CONFIG.Read_Width_A {72} \ 32 | CONFIG.Operating_Mode_A {READ_FIRST} \ 33 | CONFIG.Write_Width_B {72} \ 34 | CONFIG.Read_Width_B {72} \ 35 | CONFIG.Operating_Mode_B {READ_FIRST} \ 36 | CONFIG.Enable_B {Use_ENB_Pin} \ 37 | CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ 38 | CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ 39 | CONFIG.Port_B_Clock {100} \ 40 | CONFIG.Port_B_Enable_Rate {100} \ 41 | CONFIG.Use_Byte_Write_Enable {true} \ 42 | CONFIG.Fill_Remaining_Memory_Locations {true} \ 43 | ] [get_ips blockram] 44 | 45 | set_property generate_synth_checkpoint false [get_files $src_dir/blockram/blockram.xci] >> $log_file 46 | generate_target {instantiation_template} [get_files $src_dir/blockram/blockram.xci] >> $log_file 47 | generate_target all [get_files $src_dir/blockram/blockram.xci] >> $log_file 48 | export_ip_user_files -of_objects [get_files $src_dir/blockram/blockram.xci] -no_script -force >> $log_file 49 | export_simulation -of_objects [get_files $src_dir/blockram/blockram.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 50 | 51 | 52 | #add_files -norecurse $src_dir/data_fifo/data_fifo.xci >> $log_file 53 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.* -module_name data_fifo >> $log_file 54 | set_property -dict [list \ 55 | CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} \ 56 | CONFIG.asymmetric_port_width {true} \ 57 | CONFIG.Input_Data_Width {512} \ 58 | CONFIG.Input_Depth {512} \ 59 | CONFIG.Output_Data_Width {128} \ 60 | CONFIG.Output_Depth {2048} \ 61 | CONFIG.Use_Embedded_Registers {false} \ 62 | CONFIG.Almost_Full_Flag {true} \ 63 | CONFIG.Valid_Flag {true} \ 64 | CONFIG.Use_Extra_Logic {true} \ 65 | CONFIG.Data_Count_Width {9} \ 66 | CONFIG.Write_Data_Count_Width {10} \ 67 | CONFIG.Read_Data_Count_Width {12} \ 68 | CONFIG.Programmable_Full_Type {Single_Programmable_Full_Threshold_Constant} \ 69 | CONFIG.Full_Threshold_Assert_Value {500} \ 70 | CONFIG.Full_Threshold_Negate_Value {499} \ 71 | ] [get_ips data_fifo] 72 | 73 | set_property generate_synth_checkpoint false [get_files $src_dir/data_fifo/data_fifo.xci] >> $log_file 74 | generate_target {instantiation_template} [get_files $src_dir/data_fifo/data_fifo.xci] >> $log_file 75 | generate_target all [get_files $src_dir/data_fifo/data_fifo.xci] >> $log_file 76 | export_ip_user_files -of_objects [get_files $src_dir/data_fifo/data_fifo.xci] -no_script -force >> $log_file 77 | export_simulation -of_objects [get_files $src_dir/data_fifo/data_fifo.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 78 | 79 | 80 | #add_files -norecurse $src_dir/debugram/debugram.xci >> $log_file 81 | create_ip -name blk_mem_gen -vendor xilinx.com -library ip -version 8.* -module_name debugram >> $log_file 82 | set_property -dict [list \ 83 | CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ 84 | CONFIG.Assume_Synchronous_Clk {true} \ 85 | CONFIG.Write_Width_A {64} \ 86 | CONFIG.Write_Depth_A {512} \ 87 | CONFIG.Read_Width_A {64} \ 88 | CONFIG.Operating_Mode_A {READ_FIRST} \ 89 | CONFIG.Write_Width_B {64} \ 90 | CONFIG.Read_Width_B {64} \ 91 | CONFIG.Operating_Mode_B {READ_FIRST} \ 92 | CONFIG.Enable_B {Use_ENB_Pin} \ 93 | CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ 94 | CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ 95 | CONFIG.Port_B_Clock {100} \ 96 | CONFIG.Port_B_Enable_Rate {100} \ 97 | CONFIG.Use_Byte_Write_Enable {true} \ 98 | CONFIG.Byte_Size {8} \ 99 | CONFIG.Write_Width_A {64} \ 100 | CONFIG.Read_Width_A {64} \ 101 | CONFIG.Fill_Remaining_Memory_Locations {true} \ 102 | ] [get_ips debugram] 103 | 104 | set_property generate_synth_checkpoint false [get_files $src_dir/debugram/debugram.xci] >> $log_file 105 | generate_target {instantiation_template} [get_files $src_dir/debugram/debugram.xci] >> $log_file 106 | generate_target all [get_files $src_dir/debugram/debugram.xci] >> $log_file 107 | export_ip_user_files -of_objects [get_files $src_dir/debugram/debugram.xci] -no_script -force >> $log_file 108 | export_simulation -of_objects [get_files $src_dir/debugram/debugram.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 109 | 110 | 111 | #add_files -norecurse $src_dir/page_fifo/page_fifo.xci >> $log_file 112 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.* -module_name page_fifo >> $log_file 113 | set_property -dict [list \ 114 | CONFIG.Input_Data_Width {181} \ 115 | CONFIG.Input_Depth {512} \ 116 | CONFIG.Output_Data_Width {181} \ 117 | CONFIG.Output_Depth {512} \ 118 | CONFIG.Valid_Flag {true} \ 119 | CONFIG.Data_Count_Width {9} \ 120 | CONFIG.Write_Data_Count_Width {9} \ 121 | CONFIG.Read_Data_Count_Width {9} \ 122 | CONFIG.Programmable_Full_Type {Single_Programmable_Full_Threshold_Constant} \ 123 | CONFIG.Full_Threshold_Assert_Value {500} \ 124 | CONFIG.Full_Threshold_Negate_Value {499} \ 125 | ] [get_ips page_fifo] 126 | 127 | set_property generate_synth_checkpoint false [get_files $src_dir/page_fifo/page_fifo.xci] >> $log_file 128 | generate_target {instantiation_template} [get_files $src_dir/page_fifo/page_fifo.xci] >> $log_file 129 | generate_target all [get_files $src_dir/page_fifo/page_fifo.xci] >> $log_file 130 | export_ip_user_files -of_objects [get_files $src_dir/page_fifo/page_fifo.xci] -no_script -force >> $log_file 131 | export_simulation -of_objects [get_files $src_dir/page_fifo/page_fifo.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 132 | 133 | #add_files -norecurse $src_dir/result_ram/result_ram.xci >> $log_file 134 | create_ip -name blk_mem_gen -vendor xilinx.com -library ip -version 8.* -module_name result_ram >> $log_file 135 | set_property -dict [list \ 136 | CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ 137 | CONFIG.Use_Byte_Write_Enable {true} \ 138 | CONFIG.Write_Width_A {72} \ 139 | CONFIG.Write_Depth_A {512} \ 140 | CONFIG.Read_Width_A {72} \ 141 | CONFIG.Operating_Mode_A {READ_FIRST} \ 142 | CONFIG.Write_Width_B {72} \ 143 | CONFIG.Read_Width_B {72} \ 144 | CONFIG.Enable_B {Use_ENB_Pin} \ 145 | CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ 146 | CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ 147 | CONFIG.Fill_Remaining_Memory_Locations {true} \ 148 | CONFIG.Port_B_Clock {100} \ 149 | CONFIG.Port_B_Enable_Rate {100} \ 150 | ] [get_ips result_ram] 151 | 152 | set_property generate_synth_checkpoint false [get_files $src_dir/result_ram/result_ram.xci] >> $log_file 153 | generate_target {instantiation_template} [get_files $src_dir/result_ram/result_ram.xci] >> $log_file 154 | generate_target all [get_files $src_dir/result_ram/result_ram.xci] >> $log_file 155 | export_ip_user_files -of_objects [get_files $src_dir/result_ram/result_ram.xci] -no_script -force >> $log_file 156 | export_simulation -of_objects [get_files $src_dir/result_ram/result_ram.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 157 | 158 | 159 | #add_files -norecurse $src_dir/unsolved_fifo/unsolved_fifo.xci >> $log_file 160 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.* -module_name unsolved_fifo >> $log_file 161 | set_property -dict [list \ 162 | CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} \ 163 | CONFIG.Input_Data_Width {33} \ 164 | CONFIG.Input_Depth {512} \ 165 | CONFIG.Output_Data_Width {33} \ 166 | CONFIG.Output_Depth {512} \ 167 | CONFIG.Use_Embedded_Registers {false} \ 168 | CONFIG.Almost_Full_Flag {true} \ 169 | CONFIG.Valid_Flag {true} \ 170 | CONFIG.Data_Count {true} \ 171 | CONFIG.Data_Count_Width {9} \ 172 | CONFIG.Write_Data_Count_Width {9} \ 173 | CONFIG.Read_Data_Count_Width {9} \ 174 | CONFIG.Programmable_Full_Type {Single_Programmable_Full_Threshold_Constant} \ 175 | CONFIG.Full_Threshold_Assert_Value {450} \ 176 | CONFIG.Full_Threshold_Negate_Value {449} \ 177 | ] [get_ips unsolved_fifo] 178 | 179 | set_property generate_synth_checkpoint false [get_files $src_dir/unsolved_fifo/unsolved_fifo.xci] >> $log_file 180 | generate_target {instantiation_template} [get_files $src_dir/unsolved_fifo/unsolved_fifo.xci] >> $log_file 181 | generate_target all [get_files $src_dir/unsolved_fifo/unsolved_fifo.xci] >> $log_file 182 | export_ip_user_files -of_objects [get_files $src_dir/unsolved_fifo/unsolved_fifo.xci] -no_script -force >> $log_file 183 | export_simulation -of_objects [get_files $src_dir/unsolved_fifo/unsolved_fifo.xci] -directory $aip_dir/ip_user_files/sim_scripts -force >> $log_file 184 | 185 | close_project 186 | puts "\[CREATE_ACTION_IPs...\] done [clock format [clock seconds] -format {%T %a %b %d %Y}]" 187 | -------------------------------------------------------------------------------- /sw/snap_decompressor.c: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | Name: snap_decompressor 3 | Author: Jianyu Chen 4 | School: Delft Univsersity of Technology 5 | Date: 12th July, 2018 6 | Function: This a program to test the hardware Snappy decompressor, 7 | it will read compressed data from a file, send the command 8 | to the FPGA (or FPGA simulation). After the decompression, 9 | it will output the decompression result to a file. 10 | ********************************************/ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "snap_example.h" 29 | 30 | /* defaults */ 31 | #define START_DELAY 200 32 | #define END_DELAY 2000 33 | #define STEP_DELAY 200 34 | #define DEFAULT_MEMCPY_BLOCK 4096 35 | #define DEFAULT_MEMCPY_ITER 1 36 | #define ACTION_WAIT_TIME 1 /* Default in sec */ 37 | 38 | #define MEGAB (1024*1024ull) 39 | #define GIGAB (1024 * MEGAB) 40 | 41 | 42 | #define VERBOSE0(fmt, ...) do { \ 43 | printf(fmt, ## __VA_ARGS__); \ 44 | } while (0) 45 | 46 | #define VERBOSE1(fmt, ...) do { \ 47 | if (verbose_level > 0) \ 48 | printf(fmt, ## __VA_ARGS__); \ 49 | } while (0) 50 | 51 | #define VERBOSE2(fmt, ...) do { \ 52 | if (verbose_level > 1) \ 53 | printf(fmt, ## __VA_ARGS__); \ 54 | } while (0) 55 | 56 | 57 | #define VERBOSE3(fmt, ...) do { \ 58 | if (verbose_level > 2) \ 59 | printf(fmt, ## __VA_ARGS__); \ 60 | } while (0) 61 | 62 | #define VERBOSE4(fmt, ...) do { \ 63 | if (verbose_level > 3) \ 64 | printf(fmt, ## __VA_ARGS__); \ 65 | } while (0) 66 | 67 | static const char *version = GIT_VERSION; 68 | static int verbose_level = 0; 69 | 70 | int get_decompression_length(uint8_t *); 71 | 72 | static uint64_t get_usec(void) 73 | { 74 | struct timeval t; 75 | 76 | gettimeofday(&t, NULL); 77 | return t.tv_sec * 1000000 + t.tv_usec; 78 | } 79 | 80 | 81 | static void free_mem(void *a) 82 | { 83 | VERBOSE2("Free Mem %p\n", a); 84 | if (a) 85 | free(a); 86 | } 87 | 88 | 89 | /* Action or Kernel Write and Read are 32 bit MMIO */ 90 | static void action_write(struct snap_card* h, uint32_t addr, uint32_t data) 91 | { 92 | int rc; 93 | 94 | rc = snap_mmio_write32(h, (uint64_t)addr, data); 95 | if (0 != rc) 96 | VERBOSE0("Write MMIO 32 Err\n"); 97 | return; 98 | } 99 | 100 | 101 | /* 102 | * an complete function alternative 103 | * same as action_action_completed but more MMIO info feedback 104 | */ 105 | static int snap_action_completed_withMMIO(struct snap_action *action, int *rc, int timeout) 106 | { 107 | // More MMIO read can be done in this function 108 | 109 | int _rc = 0; 110 | uint32_t action_data = 0; 111 | struct snap_card *card = (struct snap_card *)action; 112 | unsigned long t0; 113 | int dt, timeout_us; 114 | 115 | uint32_t rc2=0; 116 | int counter=0; 117 | 118 | /* Busy poll timout sec */ 119 | t0 = get_usec(); 120 | dt = 0; 121 | timeout_us = timeout * 1000 * 1000; 122 | while (dt < timeout_us) { 123 | _rc = snap_mmio_read32(card, ACTION_CONTROL, &action_data); 124 | 125 | if(rc2!=action_data) { 126 | counter ++; 127 | printf("State %d -- (Register Code): %d\n",counter,action_data); 128 | rc2=action_data; 129 | } 130 | 131 | /* TODO: 132 | * 1. add more MMIO read if needed 133 | * 2. #define 134 | */ 135 | 136 | if ((action_data & ACTION_CONTROL_IDLE) == ACTION_CONTROL_IDLE) 137 | break; 138 | dt = (int)(get_usec() - t0); 139 | } 140 | if (rc) 141 | *rc = _rc; 142 | 143 | // Test the rc in calling function for normal or timeout (rc=0) termination 144 | return (action_data & ACTION_CONTROL_IDLE) == ACTION_CONTROL_IDLE; 145 | } 146 | 147 | 148 | /* 149 | * Start Action and wait for Idle. 150 | */ 151 | static int action_wait_idle(struct snap_card* h, int timeout, uint64_t *elapsed) 152 | { 153 | int rc = 0; 154 | uint64_t t_start; /* time in usec */ 155 | uint64_t td = 0; /* Diff time in usec */ 156 | 157 | /* FIXME Use struct snap_action and not struct snap_card */ 158 | snap_action_start((void*)h); 159 | 160 | /* Wait for Action to go back to Idle */ 161 | t_start = get_usec(); 162 | // rc = snap_action_completed((void*)h, NULL, timeout); 163 | rc = snap_action_completed_withMMIO((void*)h, NULL, timeout); 164 | if (rc) rc = 0; /* Good */ 165 | else rc = ETIME; /* Timeout */ 166 | if (0 != rc) 167 | VERBOSE0("%s Timeout Error\n", __func__); 168 | td = get_usec() - t_start; 169 | *elapsed = td; 170 | return rc; 171 | } 172 | 173 | 174 | static void action_decompress(struct snap_card* h, 175 | void *dest, 176 | const void *src, 177 | size_t rd_size, 178 | size_t wr_size) 179 | { 180 | uint64_t addr; 181 | 182 | VERBOSE1(" decompress from %p to %p\n with input size %ld and output size %ld\n", src, dest, rd_size,wr_size); 183 | addr = (uint64_t)dest; 184 | action_write(h, ACTION_DEST_LOW, (uint32_t)(addr & 0xffffffff)); 185 | action_write(h, ACTION_DEST_HIGH, (uint32_t)(addr >> 32)); 186 | addr = (uint64_t)src; 187 | action_write(h, ACTION_SRC_LOW, (uint32_t)(addr & 0xffffffff)); 188 | action_write(h, ACTION_SRC_HIGH, (uint32_t)(addr >> 32)); 189 | action_write(h, ACTION_RD_SIZE, rd_size); 190 | action_write(h, ACTION_WR_SIZE, wr_size); 191 | 192 | return; 193 | } 194 | 195 | 196 | 197 | static int do_decompression(struct snap_card *h, 198 | snap_action_flag_t flags, 199 | int timeout, 200 | void *dest, 201 | void *src, 202 | unsigned long rd_size, 203 | unsigned long wr_size, 204 | int skip_Detach 205 | ) 206 | { 207 | int rc; 208 | struct snap_action *act = NULL; 209 | uint64_t td; 210 | 211 | /* attach the action */ 212 | act = snap_attach_action(h, ACTION_TYPE_EXAMPLE, flags, 5 * timeout); 213 | if (NULL == act) { 214 | VERBOSE0("Error: Can not attach Action: %x\n", ACTION_TYPE_EXAMPLE); 215 | VERBOSE0(" Try to run snap_main tool\n"); 216 | return 0x100; 217 | } 218 | 219 | /* send action control data */ 220 | action_decompress(h, dest, src, rd_size,wr_size); 221 | 222 | /* start the action and wait for it ends */ 223 | rc = action_wait_idle(h, timeout, &td); 224 | 225 | if (rc == 0 ) // No timeout 226 | printf("Decompression was done in %lf ms\n", (double)(td/1000.)); 227 | 228 | if(skip_Detach==0) { /* No '-S' option, so do not skip detach*/ 229 | if (0 != snap_detach_action(act)) { 230 | VERBOSE0("Error: Can not detach Action: %x\n", ACTION_TYPE_EXAMPLE); 231 | rc |= 0x100; 232 | } 233 | } 234 | else { 235 | printf("Warning: Action detach is skipped!\n"); 236 | } 237 | return rc; 238 | } 239 | 240 | /*calculate the length of the uncompressed data 241 | src: the source of the compressed data*/ 242 | int get_decompression_length(uint8_t * src){ 243 | int length=0; 244 | length|=(src[0] & 0x7f); 245 | if(src[0]&0x80){ 246 | length |= (src[1]&0x7f)<<7; 247 | }else{ 248 | return length; 249 | } 250 | if(src[1]&0x80){ 251 | length |= (src[2]&0x7f)<<14; 252 | }else{ 253 | return length; 254 | } 255 | if(src[2]&0x80){ 256 | length |= (src[3]&0x7f)<<21; 257 | }else{ 258 | return length; 259 | } 260 | if(src[3]&0x80){ 261 | length |= (src[4]&0x7f)<<28; 262 | }else{ 263 | return length; 264 | } 265 | return length; 266 | } 267 | 268 | 269 | static int decompression_test(struct snap_card* dnc, 270 | snap_action_flag_t attach_flags, 271 | int timeout,/* Timeout to wait in sec */ 272 | char* inputfile, 273 | char* outputfile, 274 | int skip_Detach 275 | ) 276 | { 277 | int rc; 278 | void *src = NULL; 279 | void *dest = NULL; 280 | 281 | /*prepare read data and write space*/ 282 | 283 | uint8_t *ibuff = NULL, *obuff = NULL; 284 | ssize_t size = 0; 285 | size_t set_size = 1*64*1024; 286 | 287 | printf("1: The input file is: %s\n",inputfile); 288 | size = __file_size(inputfile); 289 | printf("The size of the input is %d \n",(int)size); 290 | ibuff = snap_malloc(size); 291 | if (ibuff == NULL){ 292 | printf("ibuff null"); 293 | return 1; 294 | } 295 | 296 | printf("2: The output file is: %s\n",outputfile); 297 | 298 | rc = __file_read(inputfile, ibuff, size); 299 | set_size=get_decompression_length(ibuff); ///calculate the length of the output 300 | printf("The size of the output is %d \n",(int)set_size); 301 | 302 | /*At the end of decompression, there maybe some garbage with the size of less than 64 bytes. 303 | inorder to save the hardware resource, the garbage will also be transfered back, so in the 304 | software side, always allocate a more memory for writing back. */ 305 | obuff = snap_malloc(set_size+128); 306 | if (obuff == NULL){ 307 | printf("obuff null"); 308 | return 1; 309 | } 310 | 311 | /* initial the memory to 'A' for debug */ 312 | memset(obuff, (int)('A'), set_size+128); 313 | 314 | if (rc < 0){ 315 | printf("rc null"); 316 | return 1; 317 | } 318 | src = (void *)ibuff; 319 | dest = (void *)obuff; 320 | 321 | rc = do_decompression(dnc, attach_flags, timeout, dest, src, size, set_size, skip_Detach); 322 | if (0 == rc) { 323 | printf("decompression finished - compression factor on this file was %d %% \n", (int)(100. - (100.*size)/set_size)); 324 | } 325 | /******output the decompression result******/ 326 | FILE * pFile; 327 | pFile=fopen(outputfile,"wb"); 328 | fwrite((void*)obuff,sizeof(char),set_size,pFile); 329 | 330 | free_mem(ibuff); 331 | free_mem(obuff); 332 | 333 | return 0; 334 | } 335 | 336 | 337 | 338 | static void usage(const char *prog) 339 | { 340 | VERBOSE0("SNAP Based FPGA Snappy Decompressor.\n" 341 | " e.g. %s -v -t 10 -i -o \n", prog); 342 | VERBOSE0("Usage: %s\n" 343 | " -h, --help print usage information\n" 344 | " -v, --verbose verbose mode\n" 345 | " -C, --card use this card for operation\n" 346 | " -V, --version\n" 347 | " -t, --timeout Timeout after N sec (default 1 sec)\n" 348 | " -s, --start Start delay in msec (default %d)\n" 349 | " -e, --end End delay time in msec (default %d)\n" 350 | " -i, --input Specify the input file (in simulation, please use abs path)\n" 351 | " -o, --ouput Specify the output file (in simulation, please use abs path)\n" 352 | " -S. --skip Skip detach for debug only (do not use this in release version)\n" 353 | 354 | " -B, --size64 Number of 64 Bytes Blocks for Memcopy (default 0)\n" 355 | " -A, --align Memcpy alignemend (default 4 KB)\n" 356 | , prog, START_DELAY, END_DELAY); 357 | } 358 | 359 | static void printVersion() 360 | { 361 | const char date_version[128] = "Decompressor 2019-02-01-v001"; 362 | printf("**************************************************************\n"); // 58 * 363 | printf("** App Version: %-*s**\n", 40, date_version); // 18 chars, need 40 more 364 | printf("**************************************************************\n\n"); 365 | } 366 | 367 | 368 | int main(int argc, char *argv[]) 369 | { 370 | char device[128]; 371 | char inputfile[256]="testdata/test.snp"; 372 | char outputfile[256]="testdata/test.txt"; 373 | int skip_Detach = 0; 374 | struct snap_card *dn; /* lib snap handle */ 375 | int start_delay = START_DELAY; 376 | int end_delay = END_DELAY; 377 | int card_no = 0; 378 | int cmd; 379 | int num_64 = 0; /* Default is 0 64 Bytes Blocks */ 380 | int rc = 1; 381 | int memcpy_align = DEFAULT_MEMCPY_BLOCK; 382 | uint64_t cir; 383 | int timeout = ACTION_WAIT_TIME; 384 | snap_action_flag_t attach_flags = 0; 385 | unsigned long ioctl_data; 386 | unsigned long dma_align; 387 | unsigned long dma_min_size; 388 | char card_name[16]; /* Space for Card name */ 389 | 390 | /* print the Software Version */ 391 | printVersion(); 392 | 393 | /*********************** Argument Parsing *************************/ 394 | while (1) { 395 | int option_index = 0; 396 | static struct option long_options[] = { 397 | { "card", required_argument, NULL, 'C' }, 398 | { "verbose", no_argument, NULL, 'v' }, 399 | { "help", no_argument, NULL, 'h' }, 400 | { "version", no_argument, NULL, 'V' }, 401 | { "start", required_argument, NULL, 's' }, 402 | { "end", required_argument, NULL, 'e' }, 403 | { "input", required_argument, NULL, 'i' }, 404 | { "output", required_argument, NULL, 'o' }, 405 | { "size64", required_argument, NULL, 'B' }, 406 | { "align", required_argument, NULL, 'A' }, 407 | { "timeout", required_argument, NULL, 't' }, 408 | { "irq", no_argument, NULL, 'I' }, 409 | { "skip", no_argument, NULL, 'S' }, 410 | { 0, no_argument, NULL, 0 }, 411 | }; 412 | cmd = getopt_long(argc, argv, "C:s:e:i:o:B:A:t:IvVh", 413 | long_options, &option_index); 414 | if (cmd == -1) /* all params processed ? */ 415 | break; 416 | 417 | switch (cmd) { 418 | case 'v': /* verbose */ 419 | verbose_level++; 420 | break; 421 | case 'V': /* version */ 422 | VERBOSE0("%s\n", version); 423 | exit(EXIT_SUCCESS);; 424 | case 'h': /* help */ 425 | usage(argv[0]); 426 | exit(EXIT_SUCCESS);; 427 | case 'C': /* card */ 428 | card_no = strtol(optarg, (char **)NULL, 0); 429 | break; 430 | case 's': /* start delay */ 431 | start_delay = strtol(optarg, (char **)NULL, 0); 432 | break; 433 | case 'e': /* end delay */ 434 | end_delay = strtol(optarg, (char **)NULL, 0); 435 | break; 436 | case 'i': /* input file */ 437 | strcpy(inputfile,optarg); 438 | break; 439 | case 'o': /* output file */ 440 | strcpy(outputfile,optarg); 441 | break; 442 | case 'S': /* skip detach */ 443 | skip_Detach++; 444 | break; 445 | case 'B': /* size64 */ 446 | num_64 = strtol(optarg, (char **)NULL, 0); 447 | break; 448 | case 'A': /* align */ 449 | memcpy_align = strtol(optarg, (char **)NULL, 0); 450 | if (memcpy_align > DEFAULT_MEMCPY_BLOCK) { 451 | VERBOSE0("ERROR: Align (-A %d) is to high. Max: %d Bytes\n", 452 | memcpy_align, DEFAULT_MEMCPY_BLOCK); 453 | exit(1); 454 | } 455 | break; 456 | case 't': /* timeout */ 457 | timeout = strtol(optarg, (char **)NULL, 0); /* in sec */ 458 | break; 459 | case 'I': /* irq */ 460 | attach_flags = SNAP_ACTION_DONE_IRQ | SNAP_ATTACH_IRQ; 461 | break; 462 | default: 463 | usage(argv[0]); 464 | exit(EXIT_FAILURE); 465 | } 466 | } 467 | 468 | if (end_delay > 16000) { 469 | usage(argv[0]); 470 | exit(1); 471 | } 472 | if (start_delay > end_delay) { 473 | usage(argv[0]); 474 | exit(1); 475 | } 476 | if (card_no > 4) { 477 | usage(argv[0]); 478 | exit(1); 479 | } 480 | /*********************** End Argument Parsing *************************/ 481 | 482 | 483 | sprintf(device, "/dev/cxl/afu%d.0s", card_no); 484 | VERBOSE2("Open Card: %d device: %s\n", card_no, device); 485 | dn = snap_card_alloc_dev(device, SNAP_VENDOR_ID_IBM, SNAP_DEVICE_ID_SNAP); 486 | if (NULL == dn) { 487 | VERBOSE0("ERROR: Can not Open (%s)\n", device); 488 | errno = ENODEV; 489 | perror("ERROR"); 490 | return -1; 491 | } 492 | 493 | /* Read Card Name */ 494 | snap_card_ioctl(dn, GET_CARD_NAME, (unsigned long)&card_name); 495 | VERBOSE1("SNAP on %s", card_name); 496 | 497 | snap_card_ioctl(dn, GET_SDRAM_SIZE, (unsigned long)&ioctl_data); 498 | VERBOSE1(" Card, %d MB of Card Ram avilable. ", (int)ioctl_data); 499 | 500 | snap_card_ioctl(dn, GET_DMA_ALIGN, (unsigned long)&dma_align); 501 | VERBOSE1(" (Align: %d ", (int)dma_align); 502 | 503 | snap_card_ioctl(dn, GET_DMA_MIN_SIZE, (unsigned long)&dma_min_size); 504 | VERBOSE1(" Min DMA: %d Bytes)\n", (int)dma_min_size); 505 | 506 | /* Check Align and DMA Min Size */ 507 | if (memcpy_align & (int)(dma_align-1)) { 508 | VERBOSE0("ERROR: Option -A %d must be a multiple of %d Bytes for %s Cards.\n", 509 | memcpy_align, (int)dma_align, card_name); 510 | rc = 0x100; 511 | goto __exit1; 512 | } 513 | if (num_64*64 & (int)(dma_min_size-1)) { 514 | VERBOSE0("ERROR: Option -B %d must be a multiple of %d Bytes for %s Cards.\n", 515 | num_64, (int)dma_min_size, card_name); 516 | rc = 0x100; 517 | goto __exit1; 518 | } 519 | snap_mmio_read64(dn, SNAP_S_CIR, &cir); 520 | VERBOSE1("Start of Action Card Handle: %p Context: %d\n", dn, (int)(cir & 0x1ff)); 521 | 522 | /* start decompression */ 523 | rc=decompression_test(dn, attach_flags, timeout, inputfile, outputfile, skip_Detach); 524 | 525 | __exit1: 526 | // Unmap AFU MMIO registers, if previously mapped 527 | VERBOSE2("Free Card Handle: %p\n", dn); 528 | snap_card_free(dn); 529 | 530 | VERBOSE1("End of Test rc: %d\n", rc); 531 | return rc; 532 | } 533 | -------------------------------------------------------------------------------- /hw/interface/action_wrapper.vhd: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------------- 2 | ---------------------------------------------------------------------------- 3 | -- 4 | -- Copyright 2016,2017 International Business Machines 5 | -- 6 | -- Licensed under the Apache License, Version 2.0 (the "License"); 7 | -- you may not use this file except in compliance with the License. 8 | -- You may obtain a copy of the License at 9 | -- 10 | -- http://www.apache.org/licenses/LICENSE-2.0 11 | -- 12 | -- Unless required by applicable law or agreed to in writing, software 13 | -- distributed under the License is distributed on an "AS IS" BASIS, 14 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | -- See the License for the specific language governing permissions AND 16 | -- limitations under the License. 17 | -- 18 | ---------------------------------------------------------------------------- 19 | ---------------------------------------------------------------------------- 20 | 21 | LIBRARY ieee; 22 | USE ieee.std_logic_1164.all; 23 | USE ieee.std_logic_misc.all; 24 | USE ieee.std_logic_unsigned.all; 25 | USE ieee.numeric_std.all; 26 | 27 | USE work.psl_accel_types.ALL; 28 | USE work.action_types.ALL; 29 | 30 | 31 | ENTITY action_wrapper IS 32 | PORT ( 33 | ap_clk : IN STD_LOGIC; 34 | ap_rst_n : IN STD_LOGIC; 35 | interrupt : OUT STD_LOGIC; 36 | interrupt_src : OUT STD_LOGIC_VECTOR(INT_BITS-2 DOWNTO 0); 37 | interrupt_ctx : OUT STD_LOGIC_VECTOR(CONTEXT_BITS-1 DOWNTO 0); 38 | interrupt_ack : IN STD_LOGIC; 39 | -- 40 | -- 41 | -- AXI Control Register Interface 42 | s_axi_ctrl_reg_araddr : IN STD_LOGIC_VECTOR ( C_S_AXI_CTRL_REG_ADDR_WIDTH-1 DOWNTO 0 ); 43 | s_axi_ctrl_reg_arready : OUT STD_LOGIC; 44 | s_axi_ctrl_reg_arvalid : IN STD_LOGIC; 45 | s_axi_ctrl_reg_awaddr : IN STD_LOGIC_VECTOR ( C_S_AXI_CTRL_REG_ADDR_WIDTH-1 DOWNTO 0 ); 46 | s_axi_ctrl_reg_awready : OUT STD_LOGIC; 47 | s_axi_ctrl_reg_awvalid : IN STD_LOGIC; 48 | s_axi_ctrl_reg_bready : IN STD_LOGIC; 49 | s_axi_ctrl_reg_bresp : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 50 | s_axi_ctrl_reg_bvalid : OUT STD_LOGIC; 51 | s_axi_ctrl_reg_rdata : OUT STD_LOGIC_VECTOR ( C_S_AXI_CTRL_REG_DATA_WIDTH-1 DOWNTO 0 ); 52 | s_axi_ctrl_reg_rready : IN STD_LOGIC; 53 | s_axi_ctrl_reg_rresp : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 54 | s_axi_ctrl_reg_rvalid : OUT STD_LOGIC; 55 | s_axi_ctrl_reg_wdata : IN STD_LOGIC_VECTOR ( C_S_AXI_CTRL_REG_DATA_WIDTH-1 DOWNTO 0 ); 56 | s_axi_ctrl_reg_wready : OUT STD_LOGIC; 57 | s_axi_ctrl_reg_wstrb : IN STD_LOGIC_VECTOR ( (C_S_AXI_CTRL_REG_DATA_WIDTH/8)-1 DOWNTO 0 ); 58 | s_axi_ctrl_reg_wvalid : IN STD_LOGIC; 59 | -- 60 | -- AXI Host Memory Interface 61 | m_axi_host_mem_araddr : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ADDR_WIDTH-1 DOWNTO 0 ); 62 | m_axi_host_mem_arburst : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 63 | m_axi_host_mem_arcache : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 64 | m_axi_host_mem_arid : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0 ); 65 | m_axi_host_mem_arlen : OUT STD_LOGIC_VECTOR ( 7 DOWNTO 0 ); 66 | m_axi_host_mem_arlock : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 67 | m_axi_host_mem_arprot : OUT STD_LOGIC_VECTOR ( 2 DOWNTO 0 ); 68 | m_axi_host_mem_arqos : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 69 | m_axi_host_mem_arready : IN STD_LOGIC; 70 | m_axi_host_mem_arregion : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 71 | m_axi_host_mem_arsize : OUT STD_LOGIC_VECTOR ( 2 DOWNTO 0 ); 72 | m_axi_host_mem_aruser : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ARUSER_WIDTH-1 DOWNTO 0 ); 73 | m_axi_host_mem_arvalid : OUT STD_LOGIC; 74 | m_axi_host_mem_awaddr : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ADDR_WIDTH-1 DOWNTO 0 ); 75 | m_axi_host_mem_awburst : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 76 | m_axi_host_mem_awcache : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 77 | m_axi_host_mem_awid : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0 ); 78 | m_axi_host_mem_awlen : OUT STD_LOGIC_VECTOR ( 7 DOWNTO 0 ); 79 | m_axi_host_mem_awlock : OUT STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 80 | m_axi_host_mem_awprot : OUT STD_LOGIC_VECTOR ( 2 DOWNTO 0 ); 81 | m_axi_host_mem_awqos : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 82 | m_axi_host_mem_awready : IN STD_LOGIC; 83 | m_axi_host_mem_awregion : OUT STD_LOGIC_VECTOR ( 3 DOWNTO 0 ); 84 | m_axi_host_mem_awsize : OUT STD_LOGIC_VECTOR ( 2 DOWNTO 0 ); 85 | m_axi_host_mem_awuser : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_AWUSER_WIDTH-1 DOWNTO 0 ); 86 | m_axi_host_mem_awvalid : OUT STD_LOGIC; 87 | m_axi_host_mem_bid : IN STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0 ); 88 | m_axi_host_mem_bready : OUT STD_LOGIC; 89 | m_axi_host_mem_bresp : IN STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 90 | m_axi_host_mem_buser : IN STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_BUSER_WIDTH-1 DOWNTO 0 ); 91 | m_axi_host_mem_bvalid : IN STD_LOGIC; 92 | m_axi_host_mem_rdata : IN STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_DATA_WIDTH-1 DOWNTO 0 ); 93 | m_axi_host_mem_rid : IN STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0 ); 94 | m_axi_host_mem_rlast : IN STD_LOGIC; 95 | m_axi_host_mem_rready : OUT STD_LOGIC; 96 | m_axi_host_mem_rresp : IN STD_LOGIC_VECTOR ( 1 DOWNTO 0 ); 97 | m_axi_host_mem_ruser : IN STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_RUSER_WIDTH-1 DOWNTO 0 ); 98 | m_axi_host_mem_rvalid : IN STD_LOGIC; 99 | m_axi_host_mem_wdata : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_DATA_WIDTH-1 DOWNTO 0 ); 100 | m_axi_host_mem_wlast : OUT STD_LOGIC; 101 | m_axi_host_mem_wready : IN STD_LOGIC; 102 | m_axi_host_mem_wstrb : OUT STD_LOGIC_VECTOR ( (C_M_AXI_HOST_MEM_DATA_WIDTH/8)-1 DOWNTO 0 ); 103 | m_axi_host_mem_wuser : OUT STD_LOGIC_VECTOR ( C_M_AXI_HOST_MEM_WUSER_WIDTH-1 DOWNTO 0 ); 104 | m_axi_host_mem_wvalid : OUT STD_LOGIC 105 | ); 106 | END action_wrapper; 107 | 108 | ARCHITECTURE STRUCTURE OF action_wrapper IS 109 | COMPONENT action_example IS 110 | GENERIC ( 111 | -- Parameters for Axi Master Bus Interface AXI_CARD_MEM0 : to on-card SDRAM 112 | C_AXI_CARD_MEM0_ID_WIDTH : integer; 113 | C_AXI_CARD_MEM0_ADDR_WIDTH : integer; 114 | C_AXI_CARD_MEM0_DATA_WIDTH : integer; 115 | C_AXI_CARD_MEM0_AWUSER_WIDTH : integer; 116 | C_AXI_CARD_MEM0_ARUSER_WIDTH : integer; 117 | C_AXI_CARD_MEM0_WUSER_WIDTH : integer; 118 | C_AXI_CARD_MEM0_RUSER_WIDTH : integer; 119 | C_AXI_CARD_MEM0_BUSER_WIDTH : integer; 120 | 121 | -- Parameters for Axi Slave Bus Interface AXI_CTRL_REG 122 | C_AXI_CTRL_REG_DATA_WIDTH : integer; 123 | C_AXI_CTRL_REG_ADDR_WIDTH : integer; 124 | 125 | -- Parameters for Axi Master Bus Interface AXI_HOST_MEM : to Host memory 126 | C_AXI_HOST_MEM_ID_WIDTH : integer; 127 | C_AXI_HOST_MEM_ADDR_WIDTH : integer; 128 | C_AXI_HOST_MEM_DATA_WIDTH : integer; 129 | C_AXI_HOST_MEM_AWUSER_WIDTH : integer; 130 | C_AXI_HOST_MEM_ARUSER_WIDTH : integer; 131 | C_AXI_HOST_MEM_WUSER_WIDTH : integer; 132 | C_AXI_HOST_MEM_RUSER_WIDTH : integer; 133 | C_AXI_HOST_MEM_BUSER_WIDTH : integer; 134 | INT_BITS : integer; 135 | CONTEXT_BITS : integer 136 | ); 137 | 138 | PORT ( 139 | action_clk : IN STD_LOGIC; 140 | action_rst_n : IN STD_LOGIC; 141 | int_req : OUT STD_LOGIC; 142 | int_src : OUT STD_LOGIC_VECTOR(INT_BITS-2 DOWNTO 0); 143 | int_ctx : OUT STD_LOGIC_VECTOR(CONTEXT_BITS-1 DOWNTO 0); 144 | int_req_ack : IN STD_LOGIC; 145 | 146 | 147 | -- 148 | -- Ports of Axi Slave Bus Interface AXI_CTRL_REG 149 | axi_ctrl_reg_awaddr : IN STD_LOGIC_VECTOR(C_S_AXI_CTRL_REG_ADDR_WIDTH-1 DOWNTO 0); 150 | axi_ctrl_reg_awvalid : IN STD_LOGIC; 151 | axi_ctrl_reg_awready : OUT STD_LOGIC; 152 | axi_ctrl_reg_wdata : IN STD_LOGIC_VECTOR(C_S_AXI_CTRL_REG_DATA_WIDTH-1 DOWNTO 0); 153 | axi_ctrl_reg_wstrb : IN STD_LOGIC_VECTOR((C_S_AXI_CTRL_REG_DATA_WIDTH/8)-1 DOWNTO 0); 154 | axi_ctrl_reg_wvalid : IN STD_LOGIC; 155 | axi_ctrl_reg_wready : OUT STD_LOGIC; 156 | axi_ctrl_reg_bresp : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 157 | axi_ctrl_reg_bvalid : OUT STD_LOGIC; 158 | axi_ctrl_reg_bready : IN STD_LOGIC; 159 | axi_ctrl_reg_araddr : IN STD_LOGIC_VECTOR(C_S_AXI_CTRL_REG_ADDR_WIDTH-1 DOWNTO 0); 160 | axi_ctrl_reg_arvalid : IN STD_LOGIC; 161 | axi_ctrl_reg_arready : OUT STD_LOGIC; 162 | axi_ctrl_reg_rdata : OUT STD_LOGIC_VECTOR(C_S_AXI_CTRL_REG_DATA_WIDTH-1 DOWNTO 0); 163 | axi_ctrl_reg_rresp : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 164 | axi_ctrl_reg_rvalid : OUT STD_LOGIC; 165 | axi_ctrl_reg_rready : IN STD_LOGIC; 166 | -- 167 | -- Ports of Axi Master Bus Interface AXI_HOST_MEM 168 | -- to HOST memory 169 | axi_host_mem_awaddr : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ADDR_WIDTH-1 DOWNTO 0); 170 | axi_host_mem_awlen : OUT STD_LOGIC_VECTOR(7 DOWNTO 0); 171 | axi_host_mem_awsize : OUT STD_LOGIC_VECTOR(2 DOWNTO 0); 172 | axi_host_mem_awburst : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 173 | axi_host_mem_awlock : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 174 | axi_host_mem_awcache : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 175 | axi_host_mem_awprot : OUT STD_LOGIC_VECTOR(2 DOWNTO 0); 176 | axi_host_mem_awregion : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 177 | axi_host_mem_awqos : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 178 | axi_host_mem_awvalid : OUT STD_LOGIC; 179 | axi_host_mem_awready : IN STD_LOGIC; 180 | axi_host_mem_wdata : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_DATA_WIDTH-1 DOWNTO 0); 181 | axi_host_mem_wstrb : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_DATA_WIDTH/8-1 DOWNTO 0); 182 | axi_host_mem_wlast : OUT STD_LOGIC; 183 | axi_host_mem_wvalid : OUT STD_LOGIC; 184 | axi_host_mem_wready : IN STD_LOGIC; 185 | axi_host_mem_bresp : IN STD_LOGIC_VECTOR(1 DOWNTO 0); 186 | axi_host_mem_bvalid : IN STD_LOGIC; 187 | axi_host_mem_bready : OUT STD_LOGIC; 188 | axi_host_mem_araddr : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ADDR_WIDTH-1 DOWNTO 0); 189 | axi_host_mem_arlen : OUT STD_LOGIC_VECTOR(7 DOWNTO 0); 190 | axi_host_mem_arsize : OUT STD_LOGIC_VECTOR(2 DOWNTO 0); 191 | axi_host_mem_arburst : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 192 | axi_host_mem_arlock : OUT STD_LOGIC_VECTOR(1 DOWNTO 0); 193 | axi_host_mem_arcache : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 194 | axi_host_mem_arprot : OUT STD_LOGIC_VECTOR(2 DOWNTO 0); 195 | axi_host_mem_arregion : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 196 | axi_host_mem_arqos : OUT STD_LOGIC_VECTOR(3 DOWNTO 0); 197 | axi_host_mem_arvalid : OUT STD_LOGIC; 198 | axi_host_mem_arready : IN STD_LOGIC; 199 | axi_host_mem_rdata : IN STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_DATA_WIDTH-1 DOWNTO 0); 200 | axi_host_mem_rresp : IN STD_LOGIC_VECTOR(1 DOWNTO 0); 201 | axi_host_mem_rlast : IN STD_LOGIC; 202 | axi_host_mem_rvalid : IN STD_LOGIC; 203 | axi_host_mem_rready : OUT STD_LOGIC; 204 | axi_host_mem_arid : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0); 205 | axi_host_mem_aruser : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ARUSER_WIDTH-1 DOWNTO 0); 206 | axi_host_mem_awid : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0); 207 | axi_host_mem_awuser : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_AWUSER_WIDTH-1 DOWNTO 0); 208 | axi_host_mem_bid : IN STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0); 209 | axi_host_mem_buser : IN STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_BUSER_WIDTH-1 DOWNTO 0); 210 | axi_host_mem_rid : IN STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_ID_WIDTH-1 DOWNTO 0); 211 | axi_host_mem_ruser : IN STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_RUSER_WIDTH-1 DOWNTO 0); 212 | axi_host_mem_wuser : OUT STD_LOGIC_VECTOR(C_M_AXI_HOST_MEM_WUSER_WIDTH-1 DOWNTO 0) 213 | ); 214 | END COMPONENT action_example; 215 | 216 | BEGIN 217 | action_0: COMPONENT action_example 218 | GENERIC MAP ( 219 | -- Parameters for Axi Master Bus Interface AXI_CARD_MEM0 : to on-card SDRAM 220 | C_AXI_CARD_MEM0_ID_WIDTH => C_M_AXI_CARD_MEM0_ID_WIDTH, 221 | C_AXI_CARD_MEM0_ADDR_WIDTH => C_M_AXI_CARD_MEM0_ADDR_WIDTH, 222 | C_AXI_CARD_MEM0_DATA_WIDTH => C_M_AXI_CARD_MEM0_DATA_WIDTH, 223 | C_AXI_CARD_MEM0_AWUSER_WIDTH => C_M_AXI_CARD_MEM0_AWUSER_WIDTH, 224 | C_AXI_CARD_MEM0_ARUSER_WIDTH => C_M_AXI_CARD_MEM0_ARUSER_WIDTH, 225 | C_AXI_CARD_MEM0_WUSER_WIDTH => C_M_AXI_CARD_MEM0_WUSER_WIDTH, 226 | C_AXI_CARD_MEM0_RUSER_WIDTH => C_M_AXI_CARD_MEM0_RUSER_WIDTH, 227 | C_AXI_CARD_MEM0_BUSER_WIDTH => C_M_AXI_CARD_MEM0_BUSER_WIDTH, 228 | 229 | -- Parameters for Axi Slave Bus Interface AXI_CTRL_REG 230 | C_AXI_CTRL_REG_DATA_WIDTH => C_S_AXI_CTRL_REG_DATA_WIDTH, 231 | C_AXI_CTRL_REG_ADDR_WIDTH => C_S_AXI_CTRL_REG_ADDR_WIDTH, 232 | 233 | -- Parameters for Axi Master Bus Interface AXI_HOST_MEM : to Host memory 234 | C_AXI_HOST_MEM_ID_WIDTH => C_M_AXI_HOST_MEM_ID_WIDTH, 235 | C_AXI_HOST_MEM_ADDR_WIDTH => C_M_AXI_HOST_MEM_ADDR_WIDTH, 236 | C_AXI_HOST_MEM_DATA_WIDTH => C_M_AXI_HOST_MEM_DATA_WIDTH, 237 | C_AXI_HOST_MEM_AWUSER_WIDTH => C_M_AXI_HOST_MEM_AWUSER_WIDTH, 238 | C_AXI_HOST_MEM_ARUSER_WIDTH => C_M_AXI_HOST_MEM_ARUSER_WIDTH, 239 | C_AXI_HOST_MEM_WUSER_WIDTH => C_M_AXI_HOST_MEM_WUSER_WIDTH, 240 | C_AXI_HOST_MEM_RUSER_WIDTH => C_M_AXI_HOST_MEM_RUSER_WIDTH, 241 | C_AXI_HOST_MEM_BUSER_WIDTH => C_M_AXI_HOST_MEM_BUSER_WIDTH, 242 | INT_BITS => INT_BITS, 243 | CONTEXT_BITS => CONTEXT_BITS 244 | ) 245 | PORT MAP ( 246 | action_clk => ap_clk, 247 | action_rst_n => ap_rst_n, 248 | int_req => interrupt, 249 | int_src => interrupt_src, 250 | int_ctx => interrupt_ctx, 251 | int_req_ack => interrupt_ack, 252 | 253 | axi_ctrl_reg_araddr => s_axi_ctrl_reg_araddr, 254 | axi_ctrl_reg_arready => s_axi_ctrl_reg_arready, 255 | axi_ctrl_reg_arvalid => s_axi_ctrl_reg_arvalid, 256 | axi_ctrl_reg_awaddr => s_axi_ctrl_reg_awaddr, 257 | axi_ctrl_reg_awready => s_axi_ctrl_reg_awready, 258 | axi_ctrl_reg_awvalid => s_axi_ctrl_reg_awvalid, 259 | axi_ctrl_reg_bready => s_axi_ctrl_reg_bready, 260 | axi_ctrl_reg_bresp => s_axi_ctrl_reg_bresp, 261 | axi_ctrl_reg_bvalid => s_axi_ctrl_reg_bvalid, 262 | axi_ctrl_reg_rdata => s_axi_ctrl_reg_rdata, 263 | axi_ctrl_reg_rready => s_axi_ctrl_reg_rready, 264 | axi_ctrl_reg_rresp => s_axi_ctrl_reg_rresp, 265 | axi_ctrl_reg_rvalid => s_axi_ctrl_reg_rvalid, 266 | axi_ctrl_reg_wdata => s_axi_ctrl_reg_wdata, 267 | axi_ctrl_reg_wready => s_axi_ctrl_reg_wready, 268 | axi_ctrl_reg_wstrb => s_axi_ctrl_reg_wstrb, 269 | axi_ctrl_reg_wvalid => s_axi_ctrl_reg_wvalid, 270 | axi_host_mem_araddr => m_axi_host_mem_araddr, 271 | axi_host_mem_arburst => m_axi_host_mem_arburst, 272 | axi_host_mem_arcache => m_axi_host_mem_arcache, 273 | axi_host_mem_arid => m_axi_host_mem_arid, 274 | axi_host_mem_arlen => m_axi_host_mem_arlen, 275 | axi_host_mem_arlock => m_axi_host_mem_arlock, 276 | axi_host_mem_arprot => m_axi_host_mem_arprot, 277 | axi_host_mem_arqos => m_axi_host_mem_arqos, 278 | axi_host_mem_arready => m_axi_host_mem_arready, 279 | axi_host_mem_arregion => m_axi_host_mem_arregion, 280 | axi_host_mem_arsize => m_axi_host_mem_arsize, 281 | axi_host_mem_aruser => m_axi_host_mem_aruser, 282 | axi_host_mem_arvalid => m_axi_host_mem_arvalid, 283 | axi_host_mem_awaddr => m_axi_host_mem_awaddr, 284 | axi_host_mem_awburst => m_axi_host_mem_awburst, 285 | axi_host_mem_awcache => m_axi_host_mem_awcache, 286 | axi_host_mem_awid => m_axi_host_mem_awid, 287 | axi_host_mem_awlen => m_axi_host_mem_awlen, 288 | axi_host_mem_awlock => m_axi_host_mem_awlock, 289 | axi_host_mem_awprot => m_axi_host_mem_awprot, 290 | axi_host_mem_awqos => m_axi_host_mem_awqos, 291 | axi_host_mem_awready => m_axi_host_mem_awready, 292 | axi_host_mem_awregion => m_axi_host_mem_awregion, 293 | axi_host_mem_awsize => m_axi_host_mem_awsize, 294 | axi_host_mem_awuser => m_axi_host_mem_awuser, 295 | axi_host_mem_awvalid => m_axi_host_mem_awvalid, 296 | axi_host_mem_bid => m_axi_host_mem_bid, 297 | axi_host_mem_bready => m_axi_host_mem_bready, 298 | axi_host_mem_bresp => m_axi_host_mem_bresp, 299 | axi_host_mem_buser => m_axi_host_mem_buser, 300 | axi_host_mem_bvalid => m_axi_host_mem_bvalid, 301 | axi_host_mem_rdata => m_axi_host_mem_rdata, 302 | axi_host_mem_rid => m_axi_host_mem_rid, 303 | axi_host_mem_rlast => m_axi_host_mem_rlast, 304 | axi_host_mem_rready => m_axi_host_mem_rready, 305 | axi_host_mem_rresp => m_axi_host_mem_rresp, 306 | axi_host_mem_ruser => m_axi_host_mem_ruser, 307 | axi_host_mem_rvalid => m_axi_host_mem_rvalid, 308 | axi_host_mem_wdata => m_axi_host_mem_wdata, 309 | axi_host_mem_wlast => m_axi_host_mem_wlast, 310 | axi_host_mem_wready => m_axi_host_mem_wready, 311 | axi_host_mem_wstrb => m_axi_host_mem_wstrb, 312 | axi_host_mem_wuser => m_axi_host_mem_wuser, 313 | axi_host_mem_wvalid => m_axi_host_mem_wvalid 314 | ); 315 | END STRUCTURE; 316 | -------------------------------------------------------------------------------- /hw/source/decompressor.v: -------------------------------------------------------------------------------- 1 | /**************************** 2 | Module name: decompressor 3 | Author: Jianyu Chen 4 | Email: chenjy0046@gmail.com 5 | School: Delft University of Technology 6 | Date: 24th Nov, 2018 7 | Function: The top level of the decompressor (without the axi protocal controller) 8 | ****************************/ 9 | `timescale 1ns/1ps 10 | 11 | module decompressor( 12 | input clk, 13 | input rst_n, 14 | input[511:0] data, 15 | input valid_in, 16 | input start, 17 | input[34:0] compression_length, //length of the data before after compression (compressed data) 18 | input[31:0] decompression_length, //length of the data after decompressor (uncompressed data) 19 | input wr_ready, 20 | 21 | output data_fifo_almostfull, 22 | 23 | output done, 24 | output last,///whether it is the last 64B of a burst 25 | output[511:0] data_out, 26 | output[63:0] byte_valid_out, 27 | output valid_out 28 | ); 29 | ///////parameters 30 | parameter NUM_PARSER=6, //number of Parser (2nd level parser) 31 | NUM_LOG=3, //log2(NUM_PARSER), at least 1 32 | PARSER_MASK=6'b111111; //if set to 0, the corresponding parser will be disabled, for test only 33 | wire[1023:0] data_w; 34 | 35 | wire ct_page_finish; 36 | wire dout_block_out_finish; 37 | //////// 38 | wire df_valid,df_empty; 39 | wire[127:0] df_dout; 40 | wire qt_almostfull; 41 | reg df_wr_en; 42 | always@(*)begin 43 | /*valid_in is RVALID signal in axi, and fifo_almostfull is the RRREADY signal, 44 | the input is valid only if both signals are high*/ 45 | if(valid_in & (~data_fifo_almostfull))begin 46 | df_wr_en <= 1'b1; 47 | end else begin 48 | df_wr_en <= 1'b0; 49 | end 50 | end 51 | data_fifo df0( 52 | .clk(clk), 53 | .srst(~rst_n), 54 | .din(data), 55 | .wr_en(df_wr_en), 56 | .rd_en((~qt_almostfull) & ~df_empty), 57 | .dout(df_dout), 58 | .almost_full(data_fifo_almostfull), 59 | .empty(df_empty), 60 | .valid(df_valid), 61 | .wr_rst_busy(), 62 | .rd_rst_busy() 63 | ); 64 | 65 | ///////////preparser 66 | wire[143:0] pre_dout; //18 bytes 67 | wire[15:0] pre_tokenpos; 68 | wire[16:0] pre_address; 69 | wire[2:0] pre_garbage; 70 | wire pre_startlit,pre_validout; 71 | wire pre_page_input_finish; 72 | preparser preparser0( 73 | .clk(clk), 74 | .data(df_dout), 75 | .valid(df_valid), 76 | .start(start), 77 | .compression_length_in(compression_length), 78 | .rst_n(rst_n), 79 | 80 | .data_out(pre_dout), 81 | .token_pos(pre_tokenpos), 82 | .address(pre_address), 83 | .start_lit(pre_startlit), ///whether this 18 Byte starts with literal 84 | .garbage_cnt_out(pre_garbage), 85 | .valid_out(pre_validout), 86 | .page_input_finish(pre_page_input_finish) 87 | ); 88 | 89 | /////////fifo for pages 90 | wire[143:0] qt_dout; 91 | wire[15:0] qt_tokenpos; 92 | wire[16:0] qt_address; 93 | wire[2:0] qt_garbage; 94 | wire qt_startlit,qt_validout; 95 | wire qt_isempty; 96 | wire dis_rdreq; 97 | reg qt_rdreq; 98 | always@(*)begin 99 | if((qt_validout==1'b0)|dis_rdreq)begin 100 | qt_rdreq <=1'b1; 101 | end else begin 102 | qt_rdreq <=1'b0; 103 | end 104 | end 105 | queue_token qt0( 106 | .clk(clk), 107 | 108 | ///////input and output of page 109 | .data_in(pre_dout), 110 | .position_in(pre_tokenpos), 111 | .address_in(pre_address), 112 | .garbage_in(pre_garbage), 113 | .lit_flag_in(pre_startlit), 114 | .wrreq(pre_validout), 115 | 116 | .data_out(qt_dout), 117 | .position_out(qt_tokenpos), 118 | .address_out(qt_address), 119 | .garbage_out(qt_garbage), 120 | .lit_flag_out(qt_startlit), 121 | .valid_out(qt_validout), 122 | ////////control signal 123 | .rst_n(rst_n), 124 | 125 | .rdreq(qt_rdreq), 126 | .isempty(qt_isempty), 127 | 128 | .almost_full(qt_almostfull) 129 | ); 130 | 131 | 132 | //////////distributor 133 | wire[143:0] dis_dout; 134 | wire[15:0] dis_tokenpos; 135 | wire[16:0] dis_address; 136 | wire[2:0] dis_garbage; 137 | wire dis_startlit; 138 | wire[NUM_PARSER-1:0] dis_validout; 139 | wire dis_stop; 140 | wire[NUM_PARSER-1:0] ps_page_req; 141 | distributor 142 | #( .NUM_PARSER(NUM_PARSER) 143 | )distributor0 144 | ( 145 | .clk(clk), 146 | .rst_n(rst_n), 147 | 148 | ///////input and output of page 149 | .data_in(qt_dout), 150 | .position_in(qt_tokenpos), 151 | .address_in(qt_address), 152 | .garbage_in(qt_garbage), 153 | .lit_flag_in(qt_startlit), 154 | 155 | .stop(dis_stop), ///stop the distributor 156 | 157 | .valid_in(qt_validout), 158 | .ready(ps_page_req&PARSER_MASK), ///whether each parser is ready to receive new page 159 | 160 | .data_out(dis_dout), 161 | .position_out(dis_tokenpos), 162 | .address_out(dis_address), 163 | .garbage_out(dis_garbage), 164 | .lit_flag_out(dis_startlit), 165 | 166 | .rdreq(dis_rdreq), 167 | .valid_out(dis_validout) 168 | ); 169 | 170 | 171 | 172 | //////generate parsers 173 | 174 | wire[NUM_PARSER-1:0] ps_block_finish; 175 | wire[NUM_PARSER-1:0] ps_empty; 176 | wire[NUM_PARSER*4-1:0] ps_lit_rd; 177 | wire[NUM_PARSER*256-1:0] ps_lit_data; 178 | wire[NUM_PARSER*36-1:0] ps_lit_address; 179 | wire[NUM_PARSER*32-1:0] ps_lit_wr; 180 | wire[NUM_PARSER*16-1:0] ps_lit_ram_select; 181 | 182 | wire[NUM_PARSER*16-1:0] ps_copy_rd; 183 | wire[NUM_PARSER*144-1:0] ps_copy_address; 184 | wire[NUM_PARSER*16-1:0] ps_copy_ram; 185 | wire[NUM_PARSER*128-1:0] ps_copy_rd_out; 186 | wire[NUM_PARSER*256-1:0] ps_offset_out; 187 | genvar ps_i; ///i for parsers 188 | generate 189 | for(ps_i=0;ps_i2); 124 | assign copy_sep_3b=(offset_3b<=length_3b) & (offset_3b>2); 125 | 126 | //the signal below is for debug only, do not synthesize it 127 | reg debug_signal1; 128 | reg overflow_flag; 129 | 130 | always@(posedge clk)begin 131 | 132 | if(~rst_n)begin 133 | state <= 3'd0; 134 | copy_valid <= 1'b0; 135 | lit_valid <= 1'b0; 136 | debug_signal1 <= 1'b0; 137 | end else 138 | case(state) 139 | 3'd0:begin ///idle state 140 | state <= 3'b1; 141 | slice_req <= 1'b1; 142 | block_finish_r <= 1'b0; 143 | ///set the output to sub parsers to invalid 144 | lit_valid <= 1'b0; 145 | copy_valid <= 1'b0; 146 | end 147 | 148 | 3'd1:begin 149 | if(valid_in)begin 150 | data_buff <= data; 151 | tokenpos_buff <= tokenpos_in; 152 | start_lit_buff <= start_lit_in; 153 | address_buff <= address_in; 154 | 155 | length_left <= 5'd16 - {2'b0,garbage_in}; 156 | slice_req <= 1'b0; 157 | garbage_buff <= garbage_in; 158 | 159 | if(address_in[16]^overflow_record)begin 160 | state <= 3'd3; 161 | end else begin 162 | state <= 3'd2; 163 | end 164 | /////for debug 165 | //if(data[143:112]==32'h3eb4_120e)begin 166 | /*if(address_in>17'h3910)begin 167 | $display("data detected %d",PARSER_NUM); 168 | end*/ 169 | 170 | end 171 | 172 | ///set the output to sub parsers to invalid 173 | lit_valid <=1'b0; 174 | copy_valid <=1'b0; 175 | end 176 | 177 | 3'd2:begin 178 | if(!stop_flag)begin ///only work if not stop 179 | data_buff <= (data_buff<<{lza_z,3'b0}); 180 | tokenpos_buff <= (tokenpos_buff< offset offset(data_buff[143:138]+6'b1))begin //if lengthleft-1>length of token 328 | lit_length <=data_buff[141:138]; end 329 | else begin lit_length <=length_left-5'd2; end 330 | 331 | lit_data <={data_buff[135:16],8'b0}; 332 | lit_address <=address_buff[15:0]; 333 | 334 | copy_valid <=1'b0; 335 | 336 | 337 | if(lza_a==1'b0)begin 338 | length_left <=5'b0; 339 | end else begin 340 | length_left <=length_left-5'd2-data_buff[143:138]; 341 | end 342 | 343 | end 344 | endcase 345 | end 346 | end 347 | else begin ///if stop 348 | lit_valid <=1'b0; 349 | copy_valid <=1'b0; 350 | end 351 | end 352 | 353 | 3'd3:begin ///if the overflow of address happens, it will first go to this state 354 | 355 | if(length_left ==5'b0)begin //if the current slice is totally processed, go back to state 1 356 | state <=3'd1; 357 | slice_req <=1'b1; 358 | end else 359 | if(page_finish)begin 360 | //if this file is finished and the BRAMs are cleaned, go back to the initial state 361 | state <=3'd0; 362 | end else 363 | if(block_out_finish)begin 364 | //if this slice is not totally processed, go back to state2 to continue 365 | state <=3'd2; 366 | end 367 | block_finish_r <=1'b0; 368 | lit_valid <=1'b0; 369 | copy_valid <=1'b0; 370 | end 371 | 372 | /* 373 | In Snappy, it is allowed to have offset < length, for example, in "abc abc abc abc" every "abc" 374 | is regarded as repitation of the last "abc" (except the first "abc", which is literal content), 375 | it is extremly slow to be solved directly, so I add this special state, in this state, each "abc" 376 | will be regarded as the repitition of the first "abc" 377 | */ 378 | 3'd4:begin ///solve the offset