├── LICENSE ├── README.md ├── app ├── .DS_Store └── sw │ ├── .DS_Store │ ├── Makefile │ └── app.cpp ├── quartus ├── par │ ├── README │ ├── ome2_ivt.qpf │ ├── ome2_ivt.qsf │ ├── qsf_afu_PAR_files.qsf │ ├── qsf_env_settings.qsf │ ├── qsf_qph_PAR_files.qsf │ ├── qsf_spl_PAR_files.qsf │ ├── sdc_qlp.sdc │ └── sdc_qph.sdc ├── quartus.sh └── setup.sh ├── rtl ├── afu_defines.vh ├── cci_std_afu.sv ├── fpga_arch.sv ├── framework_defines.vh ├── fthread │ ├── afu.v │ ├── fifo_reader.v │ ├── fifo_writer.v │ ├── fthread.v │ ├── fthread_controller.v │ ├── iolib │ │ ├── ReadConfigStruct.v │ │ ├── order_module_backpressure.v │ │ └── order_module_backpressure_wr.v │ ├── user_tx_rd_if.v │ └── user_tx_wr_if.v ├── fthread_shell │ ├── BatchRoundRobinArbitration.sv │ ├── RoundRobinArbitration.sv │ ├── cmd_interpreter.sv │ ├── cmd_processor.sv │ ├── csr_file.v │ ├── data_arbiter.sv │ ├── fpga_core.sv │ ├── fpga_server.sv │ ├── fpga_setup.v │ ├── io_requester.v │ ├── job_distributor.sv │ ├── job_manager.sv │ ├── job_queue.v │ ├── jobs_reader.sv │ ├── pipeline_agent.v │ ├── pt_module.v │ ├── regulator.sv │ └── server_io.v ├── mem │ ├── quick_fifo.v │ ├── spl_pt_mem.v │ └── spl_sdp_mem.v └── spl_defines.vh ├── standalone.sh └── sw ├── .DS_Store ├── core ├── centaur.h ├── fpga_defs.h ├── fpgaapi.cpp ├── fpgaapi.h ├── fpipe.h ├── fqueue.h ├── fthread.cpp ├── fthread.h ├── fthreadRec.cpp ├── fthreadRec.h ├── memory_manager.cpp ├── memory_manager.h ├── pipeline_job.h ├── utils.cpp ├── utils.h ├── workload_manager.cpp └── workload_manager.h └── platform ├── platform.cpp └── platform.h /README.md: -------------------------------------------------------------------------------- 1 | General Information 2 | =================================================== 3 | Centaur is a framwork for hybrid databases on the Intel HARP machine v1. 4 | This git only contains the core components of Centaur. The database part of Centaur is in the "doppiodb" git repository. 5 | 6 | The core components of Centaur can be used also for developing non-database applications. 7 | Following we will describe how to use Centaur with and without a database. 8 | 9 | Pre-requisites 10 | ==================================== 11 | 12 | Centaur is developed for the first version of Intel's HARP machine. to use Centaur you need to provide the folowing: 13 | 14 | In ~/workspace/Centaur/quartus/ run the commands 15 | 16 | mkdir qpi 17 | cp ~/path-to-qpi-qxp-intel-files/ome_bot-SPL.qxp qpi/ome_bot-SPL.qxp 18 | cp ~/path-to-qpi-qxp-intel-files/ome_top.sv qpi/ome_top.sv 19 | 20 | Install Intel AAL framework before installing Centaur. 21 | 22 | 23 | Using Centaur with DoppioDB 24 | ==================================== 25 | 26 | Clone Centaur and DoppioDB repositories to your workspace directory: 27 | 28 | git clone https://github.com/fpgasystems/Centaur.git 29 | git clone https://github.com/fpgasystems/doppiodb.git 30 | 31 | **Installation** 32 | 33 | set home directory to Centaur: 34 | 35 | export CENTAUR_HOME=~/workspace/Centaur 36 | 37 | In ~/workspace/doppiodb/fpga run **make**. This will compile the core and database components of Centaur. 38 | 39 | Then install MonetDB. In ~/workspace/doppiodb run the following commands: 40 | 41 | ./boostrap 42 | ./configure --prefix=$HOME/MonetDB 43 | make 44 | make install 45 | 46 | Your installation can be found in $HOME/MonetDB 47 | 48 | **Run Quartus** 49 | 50 | To run quartus and get a bitstream run the following 51 | 52 | cd ~/workspace/Centaur/quartus/ 53 | export DOPPIODB_HOME=~/workspace/doppiodb 54 | sh setup.sh 55 | quartus par/ome2_ivt.qpf 56 | 57 | 58 | Using Centaur without a database 59 | ============================================== 60 | 61 | Clone Centaur repository to your workspace directory: 62 | 63 | git clone https://github.com/fpgasystems/Centaur.git 64 | 65 | **Installation** 66 | 67 | set home directory to Centaur: 68 | 69 | export CENTAUR_HOME=~/workspace/Centaur 70 | 71 | set up the Centaur to standalone 72 | 73 | cd Centaur 74 | bash standalone.sh 75 | 76 | In ~/workspace/Centaur/app 77 | add your application to the app.cpp file 78 | 79 | run **make** to build your application. 80 | Your application executable "app" is in ~/workspace/Centaur/app. 81 | 82 | **Run Quartus** 83 | 84 | To run quartus and get a bitstream run the following 85 | 86 | cd ~/workspace/Centaur/quartus/ 87 | sh setup.sh 88 | quartus par/ome2_ivt.qpf 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /app/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpgasystems/Centaur/f3013a53a4ae2fa2822361ea968380d9fe958369/app/.DS_Store -------------------------------------------------------------------------------- /app/sw/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpgasystems/Centaur/f3013a53a4ae2fa2822361ea968380d9fe958369/app/sw/.DS_Store -------------------------------------------------------------------------------- /app/sw/Makefile: -------------------------------------------------------------------------------- 1 | ## Copyright (c) 2005-2015, Intel Corporation 2 | ## 3 | ## Redistribution and use in source and binary forms, with or without 4 | ## modification, are permitted provided that the following conditions are met: 5 | ## 6 | ## * Redistributions of source code must retain the above copyright notice, 7 | ## this list of conditions and the following disclaimer. 8 | ## * Redistributions in binary form must reproduce the above copyright notice, 9 | ## this list of conditions and the following disclaimer in the documentation 10 | ## and/or other materials provided with the distribution. 11 | ## * Neither the name of Intel Corporation nor the names of its contributors 12 | ## may be used to endorse or promote products derived from this software 13 | ## without specific prior written permission. 14 | ## 15 | ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | ## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | ## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | ## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | ## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | ## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | ## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | ## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | ## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | ## POSSIBILITY OF SUCH DAMAGE. 26 | ##**************************************************************************** 27 | ## Intel(R) Accelerator Abstraction Layer Library Software Developer 28 | ## Kit (SDK) 29 | ## Content: 30 | ## samples/Hello_SPL_LB/Makefile 31 | ## Author: 32 | ## Tim Whisonant, Intel Corporation 33 | ## Joseph Grecco, Intel Corporation 34 | ## History: 35 | ## 06/15/2015 JG Initial version based off of canonical sample makefile 36 | ##****************************************************************************** 37 | CPPFLAGS ?= -std=c++11 38 | CXX ?= g++ 39 | LDFLAGS ?= 40 | 41 | ifneq (,$(ndebug)) 42 | else 43 | CPPFLAGS += -DENABLE_DEBUG=1 44 | endif 45 | ifneq (,$(nassert)) 46 | else 47 | CPPFLAGS += -DENABLE_ASSERT=1 48 | endif 49 | 50 | #echo $(prefix) 51 | 52 | ifeq (,$(DESTDIR)) 53 | ifneq (,$(prefix)) 54 | CPPFLAGS += -I$(prefix)/include 55 | LDFLAGS += -L$(prefix)/lib -Wl,-rpath-link -Wl,$(prefix)/lib -Wl,-rpath -Wl,$(prefix)/lib \ 56 | -L$(prefix)/lib64 -Wl,-rpath-link -Wl,$(prefix)/lib64 -Wl,-rpath -Wl,$(prefix)/lib64 57 | endif 58 | else 59 | ifeq (,$(prefix)) 60 | prefix = /usr/local 61 | endif 62 | CPPFLAGS += -I$(DESTDIR)$(prefix)/include 63 | LDFLAGS += -L$(DESTDIR)$(prefix)/lib -Wl,-rpath-link -Wl,$(prefix)/lib -Wl,-rpath -Wl,$(DESTDIR)$(prefix)/lib \ 64 | -L$(DESTDIR)$(prefix)/lib64 -Wl,-rpath-link -Wl,$(prefix)/lib64 -Wl,-rpath -Wl,$(DESTDIR)$(prefix)/lib64 65 | endif 66 | 67 | CORE_DIR="../../sw/core" 68 | PLATFORM_DIR="../../sw/platform" 69 | CORE_SRC = $(CORE_DIR)/workload_manager.cpp $(CORE_DIR)/utils.cpp $(CORE_DIR)/memory_manager.cpp $(CORE_DIR)/fthread.cpp $(CORE_DIR)/fthreadRec.cpp $(CORE_DIR)/fpgaapi.cpp $(PLATFORM_DIR)/platform.cpp 70 | SOURCES = $(CORE_SRC) app.cpp 71 | 72 | all: app 73 | 74 | app: $(SOURCES) Makefile 75 | $(CXX) $(CPPFLAGS) -D__AAL_USER__=1 -g -O2 -o app $(SOURCES) $(LDFLAGS) -lOSAL -lAAS -lxlrt 76 | 77 | clean: 78 | $(RM) app 79 | 80 | .PHONY:all clean 81 | -------------------------------------------------------------------------------- /app/sw/app.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "../../sw/core/centaur.h" 4 | 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | 9 | //* get FPGA handle 10 | FPGA * myfpga = new FPGA(); 11 | getFPGA(myfpga); 12 | 13 | //****** HERE USER APPLICATION CODE GOES *****// 14 | 15 | return 1; 16 | } 17 | -------------------------------------------------------------------------------- /quartus/par/README: -------------------------------------------------------------------------------- 1 | **Start Quartus project:** 2 | quartus ome2_ivt.qpf 3 | 4 | **Add new files** 5 | Files related to Centaur, add them to the file **qsf_spl_PAR_files.qsf** 6 | 7 | Operator files go into **qsf_afu_PAR_files.qsf** 8 | 9 | **DO NOT COMMIT YOUR VERSION OF ome2_ivt.qsf** 10 | 11 | -------------------------------------------------------------------------------- /quartus/par/ome2_ivt.qpf: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------- # 2 | # 3 | # Copyright (C) 1991-2013 Altera Corporation 4 | # Your use of Altera Corporation's design tools, logic functions 5 | # and other software and tools, and its AMPP partner logic 6 | # functions, and any output files from any of the foregoing 7 | # (including device programming or simulation files), and any 8 | # associated documentation or information are expressly subject 9 | # to the terms and conditions of the Altera Program License 10 | # Subscription Agreement, Altera MegaCore Function License 11 | # Agreement, or other applicable license agreement, including, 12 | # without limitation, that your use is for the sole purpose of 13 | # programming logic devices manufactured by Altera and sold by 14 | # Altera or its authorized distributors. Please refer to the 15 | # applicable agreement for further details. 16 | # 17 | # -------------------------------------------------------------------------- # 18 | # 19 | # Quartus II 64-Bit 20 | # Version 13.1.0 Build 162 10/23/2013 SJ Full Version 21 | # Date created = 15:27:26 November 13, 2014 22 | # 23 | # -------------------------------------------------------------------------- # 24 | 25 | QUARTUS_VERSION = "13.1" 26 | DATE = "15:27:26 November 13, 2014" 27 | 28 | # Revisions 29 | 30 | PROJECT_REVISION = "ome2_ivt" 31 | -------------------------------------------------------------------------------- /quartus/par/ome2_ivt.qsf: -------------------------------------------------------------------------------- 1 | 2 | #---------------------------------------------------------------------------# 3 | # Synthesis Options # 4 | #---------------------------------------------------------------------------# 5 | set_global_assignment -name FAMILY "Stratix V" 6 | set_global_assignment -name DEVICE 5SGXEA7N1F45C1 7 | set_global_assignment -name ORIGINAL_QUARTUS_VERSION "12.0 SP1" 8 | set_global_assignment -name PROJECT_CREATION_TIME_DATE "20:50:33 AUGUST 04, 2012" 9 | set_global_assignment -name LAST_QUARTUS_VERSION 13.1 10 | set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 11 | set_global_assignment -name MAX_CORE_JUNCTION_TEMP 85 12 | set_global_assignment -name ERROR_CHECK_FREQUENCY_DIVISOR 256 13 | set_global_assignment -name PARTITION_NETLIST_TYPE SOURCE -section_id Top 14 | set_global_assignment -name PARTITION_COLOR 16764057 -section_id Top 15 | set_global_assignment -name PARTITION_NETLIST_TYPE POST_FIT -section_id "ome_bot:bot_ome" 16 | set_global_assignment -name PARTITION_FITTER_PRESERVATION_LEVEL PLACEMENT_AND_ROUTING -section_id "ome_bot:bot_ome" 17 | 18 | set_global_assignment -name SEED 0 19 | set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON 20 | set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_DUPLICATION ON 21 | set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON 22 | set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED 23 | set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON 24 | set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS OFF 25 | set_global_assignment -name USE_HIGH_SPEED_ADDER ON 26 | set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON 27 | set_global_assignment -name SMART_RECOMPILE OFF 28 | set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON 29 | set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC_FOR_AREA ON 30 | set_global_assignment -name STRATIX_DEVICE_IO_STANDARD "2.5 V" 31 | set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" 32 | set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON 33 | set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM 34 | set_global_assignment -name FITTER_EFFORT "STANDARD FIT" 35 | set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON 36 | set_global_assignment -name AUTO_PACKED_REGISTERS_STRATIXII NORMAL 37 | set_global_assignment -name MUX_RESTRUCTURE ON 38 | set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON 39 | set_global_assignment -name TOP_LEVEL_ENTITY ome_top 40 | 41 | set_global_assignment -name PARTITION_FITTER_PRESERVATION_LEVEL PLACEMENT_AND_ROUTING -section_id Top 42 | set_global_assignment -name PARTITION_COLOR 39423 -section_id "ome_bot:bot_ome" 43 | set_global_assignment -name POWER_PRESET_COOLING_SOLUTION "23 MM HEAT SINK WITH 200 LFPM AIRFLOW" 44 | set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" 45 | 46 | set_instance_assignment -name MATCH_PLL_COMPENSATION_CLOCK ON -to "ome_bot:bot_ome|qph_top:top_qph|qph_reset_s45:s45_reset_qph|qph_reset_pll_fab_s45:s45_fab_pll_reset_qph|qph_reset_pll_fab_s45_0002:qph_reset_pll_fab_s45_inst|altera_pll:altera_pll_i|outclk_wire[1]" 47 | set_instance_assignment -name PLL_COMPENSATION_MODE NORMAL -to "*qph_reset_pll_fab_s45_0002*|altera_pll:altera_pll_i*|*" 48 | 49 | 50 | source qsf_env_settings.qsf 51 | source qsf_qph_PAR_files.qsf 52 | source qsf_spl_PAR_files.qsf 53 | source qsf_afu_PAR_files.qsf 54 | 55 | set_global_assignment -name SDC_FILE sdc_qph.sdc 56 | set_global_assignment -name SDC_FILE sdc_qlp.sdc 57 | 58 | set_global_assignment -name PARTITION_NETLIST_TYPE POST_SYNTH -section_id "cci_std_afu:cci_std_afu" 59 | set_global_assignment -name PARTITION_FITTER_PRESERVATION_LEVEL PLACEMENT_AND_ROUTING -section_id "cci_std_afu:cci_std_afu" 60 | set_global_assignment -name PARTITION_COLOR 39423 -section_id "cci_std_afu:cci_std_afu" 61 | 62 | 63 | 64 | 65 | set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top 66 | set_instance_assignment -name PARTITION_HIERARCHY ccist_48421 -to "cci_std_afu:cci_std_afu" -section_id "cci_std_afu:cci_std_afu" 67 | set_instance_assignment -name PARTITION_HIERARCHY botom_6b8b1 -to "ome_bot:bot_ome" -section_id "ome_bot:bot_ome" -------------------------------------------------------------------------------- /quartus/par/qsf_afu_PAR_files.qsf: -------------------------------------------------------------------------------- 1 | set_global_assignment -name SEARCH_PATH $DOPPIO_OPS 2 | 3 | 4 | ########################### Regex AFU ############################ 5 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/nlb_gfifo.v 6 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/nlb_axififo.v 7 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/fifo_async_1.v 8 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/fifo_async_16.v 9 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/fifo_async_256.v 10 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/async_axififo.v 11 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/regex/rtl/axis_register.vhd 12 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/fifo_to_axis.v 13 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/pg_smartshifter.v 14 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/nlb_gram_sdp.v 15 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_decoder.v 16 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_halfrange.v 17 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_onestate.v 18 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_engines.v 19 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_engines_async.v 20 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_top_ff.v 21 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/rem_top_ff_bsh.v 22 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/mdb_stringreader.v 23 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/mdb_batparser_string.v 24 | 25 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/regex/rtl/regex_mdb.v 26 | 27 | ########################### Test Count AFU ############################ 28 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/testcount/rtl/onesCounter.v 29 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/testcount/rtl/testcount.sv 30 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/testcount/rtl/testcount_um.v 31 | 32 | ########################### Copy AFU ############################ 33 | 34 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/copy/rtl/copy.v 35 | 36 | ########################### Percentage AFU ############################ 37 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/percentage/rtl/reduction_tree.v 38 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/percentage/rtl/percentage.v 39 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/percentage/rtl/percentage_um.v 40 | 41 | ########################### MULADD AFU ############################ 42 | 43 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/addmul/rtl/addmul.sv 44 | 45 | ########################### Selection AFU ############################ 46 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/selection/rtl/selection.vhd 47 | 48 | ########################### Min Max Sum AFU ############################ 49 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/minmaxsum/rtl/minmaxsum.vhd 50 | 51 | ########################### SGD AFU ############################ 52 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/simple_dual_port_ram.vhd 53 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/sgd.vhd 54 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/reorder.vhd 55 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/my_fp_mult.vhd 56 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/my_fp_converter27.vhd 57 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/my_fp_converter.vhd 58 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/my_fp_adder.vhd 59 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/my_fifo.vhd 60 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/hybrid_dot_product.vhd 61 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/gather.vhd 62 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/fp_vector_mult.vhd 63 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/fp_subtract_mult.vhd 64 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/fp_scalar_vector_mult.vhd 65 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/fixed_vector_subtract.vhd 66 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/sgd/rtl/fixed_adder_tree.vhd 67 | 68 | ########################### SKYLINE AFU ############################ 69 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/singleportbram.v 70 | 71 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/skyline/rtl/core.vhd 72 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/skyline/rtl/corepipe.vhd 73 | set_global_assignment -name VHDL_FILE $DOPPIO_OPS/skyline/rtl/conf.vhd 74 | 75 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/OutputTupleFilter.v 76 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/SkylineOutputFIFO.v 77 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/SkylineInputFIFO.v 78 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/skyline.v 79 | 80 | set_global_assignment -name VERILOG_FILE $DOPPIO_OPS/skyline/rtl/singleportbram_w.v 81 | 82 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/core_wide.sv 83 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/corepipe_wide.sv 84 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/SkylineOutputFIFO_wide.sv 85 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/SkylineInputFIFO_wide.sv 86 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/SkylineInputFIFO_wide_full.sv 87 | set_global_assignment -name SYSTEMVERILOG_FILE $DOPPIO_OPS/skyline/rtl/skyline_wide.sv 88 | -------------------------------------------------------------------------------- /quartus/par/qsf_env_settings.qsf: -------------------------------------------------------------------------------- 1 | set MY_WORKDIR $::env(WORKDIR) 2 | set REL_RTL_SRC "/qpi" 3 | set QPI_RTL_SRC $MY_WORKDIR$REL_RTL_SRC 4 | set CENTAUR_SRC $::env(CENTAUR_HOME)/rtl 5 | set DOPPIO_OPS $::env(DOPPIO_HOME)/fpga/operators 6 | puts " Variable defined QPI_RTL_SRC: $QPI_RTL_SRC" 7 | puts " Variable defined CENTAUR_RTL_SRC: $CENTAUR_SRC" 8 | puts " Variable defined DOPPIO_OPS_SRC: $DOPPIO_OPS" 9 | -------------------------------------------------------------------------------- /quartus/par/qsf_spl_PAR_files.qsf: -------------------------------------------------------------------------------- 1 | set_global_assignment -name VERILOG_MACRO "MAX_TRANSFER_SIZE_1=1" 2 | set_global_assignment -name SEARCH_PATH $CENTAUR_SRC 3 | 4 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/mem/spl_sdp_mem.v 5 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/mem/spl_pt_mem.v 6 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/mem/spl_fifo.v 7 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/mem/quick_fifo.v 8 | 9 | ########################### IO Library Modules ############################ 10 | 11 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/iolib/crb_consumer.v 12 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/iolib/crb_producer.v 13 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/iolib/order_module_backpressure.v 14 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/iolib/order_module_backpressure_wr.v 15 | 16 | ##################### FPGA Core module and sub modules ##################### 17 | 18 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/fpga_setup.v 19 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/csr_file.v 20 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/job_queue.v 21 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/pipeline_agent.v 22 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/jobs_reader.sv 23 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/job_distributor.sv 24 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/job_manager.sv 25 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/cmd_interpreter.sv 26 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/cmd_processor.sv 27 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/fpga_core.sv 28 | 29 | ########################### Data Arbitration modules ######################## 30 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/RoundRobinArbitration.sv 31 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/BatchRoundRobinArbitration.sv 32 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/regulator.sv 33 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/data_arbiter.sv 34 | 35 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/pt_module.v 36 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/io_requester.v 37 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread_shell/server_io.v 38 | 39 | ########################### fthread Modules ######################## 40 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/fifo_reader.v 41 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/fifo_writer.v 42 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/user_tx_rd_if.v 43 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/user_tx_wr_if.v 44 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/iolib/ReadConfigStruct.v 45 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/fthread_controller.v 46 | 47 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/fthread.v 48 | set_global_assignment -name VERILOG_FILE $CENTAUR_SRC/fthread/afu.v 49 | 50 | 51 | ########################### Top Modules ######################## 52 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fthread_shell/fpga_server.sv 53 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/fpga_arch.sv 54 | set_global_assignment -name SYSTEMVERILOG_FILE $CENTAUR_SRC/cci_std_afu.sv 55 | -------------------------------------------------------------------------------- /quartus/par/sdc_qlp.sdc: -------------------------------------------------------------------------------- 1 | 2 | #--------------------------------------------------------------------------------------------------- 3 | # set 5ns paths through raddr (32UI) to embedded address registers in 2x clocked ram (16UI clocking) 4 | #--------------------------------------------------------------------------------------------------- 5 | set_max_delay -through *tag*raddr* 5.0 6 | set_max_delay -through *quad_ram*raddr* 5.0 7 | set_max_delay -through *re*_q*raddr* 5.0 8 | set_max_delay -through *mem_req_fifo*raddr* 5.0 9 | set_max_delay -through *4Byteram*raddr* 5.0 10 | 11 | set_max_delay -to [get_registers {*qlp_top*tag*wxe*}] 5.0 12 | set_max_delay -to [get_registers {*qlp_top*quad_ram*wxe*}] 5.0 13 | set_max_delay -to [get_registers {*qlp_top*re*_q*wxe*}] 5.0 14 | set_max_delay -to [get_registers {*mem_top*quad_*wxe*}] 5.0 15 | set_max_delay -to [get_registers {*qlp_top*4Byteram*wxe*}] 5.0 16 | 17 | set_max_delay -to [get_registers {*qlp_top*quad_ram*wxaddr*}] 5.0 18 | set_max_delay -to [get_registers {*qlp_top*re*_q*wxaddr*}] 5.0 19 | set_max_delay -to [get_registers {*qlp_top*tag*wxaddr*}] 5.0 20 | set_max_delay -to [get_registers {*mem_top*quad_*wxaddr*}] 5.0 21 | set_max_delay -to [get_registers {*qlp_top*4Byteram*wxaddr*}] 5.0 22 | set_max_delay -from *clk_align* 2.5 23 | set_max_delay -from *4Byteram*wxe* 2.5 24 | 25 | set_max_delay -to [get_registers {*reset_sync*reset_reg*}] 5.0 26 | set_multicycle_path -end -hold -to [get_registers {*reset_sync*reset_reg*}] 1 27 | 28 | #----------------------------------------------------------------------------------------- 29 | # over constrain due to large clk skews for clk domain crossing 30 | #----------------------------------------------------------------------------------------- 31 | # 32 | if {$::quartus(nameofexecutable) == "quartus_sta"} { 33 | # set_max_delay -to *top_nlb* 5.0 34 | set_max_delay -from *qph_xcvr_* -to *qph_mach* 5.0 35 | set_max_delay -from *qph_mach* -to *qph_xcvr_* 5.0 36 | set_max_delay -from *qph_mach* -to *qph_mach* 5.0 37 | } 38 | -------------------------------------------------------------------------------- /quartus/quartus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if [ -z "$QUARTUS_HOME" ]; then 5 | echo "Install Quartus software (13.1) and set QUARTUS_HOME (e.g. in your bashrc)." 6 | exit 1 7 | fi 8 | 9 | # Quartus system direcory 10 | export QUARTUS_ROOTDIR=$QUARTUS_HOME/quartus 11 | export QUARTUS_ROOTDIR_OVERRIDE=$QUARTUS_HOME/quartus 12 | 13 | # Turn on Quartus 64-bit processing. 14 | export QUARTUS_64BIT=1 15 | 16 | # Add Quartus bin to PATH variable 17 | export PATH=$PATH:$QUARTUS_ROOTDIR/bin 18 | 19 | # Setup Quartus license server 20 | if [ -z "$LM_LICENSE_FILE" ]; then 21 | export LM_LICENSE_FILE="" 22 | fi 23 | 24 | # *** EDIT: Specify your license server or file *** 25 | export LM_LICENSE_FILE=${WORKDIR}/license.dat:$LM_LICENSE_FILE 26 | #export LM_LICENSE_FILE='1800@my_license_server.com':$LM_LICENSE_FILE 27 | -------------------------------------------------------------------------------- /quartus/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export WORKDIR=$PWD 4 | source $WORKDIR/quartus.sh 5 | alias cdw='cd $WORKDIR' 6 | 7 | if [ -z "$CENTAUR_HOME" ]; then 8 | export CENTAUR_HOME=$WORKDIR/.. 9 | echo "CENTAUR_HOME varialbe was set" 10 | fi 11 | 12 | if [ -z "$DOPPIO_HOME" ]; then 13 | echo "Set DOPPIO_HOME env variable to the directory of harp-applications" 14 | fi 15 | 16 | 17 | #Tell Git to stop tracking changes on ome2_ivt.qsf 18 | git update-index --assume-unchanged par/ome2_ivt.qsf 19 | -------------------------------------------------------------------------------- /rtl/afu_defines.vh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "framework_defines.vh" 18 | 19 | `ifndef AFU_DEFINES_VH 20 | `define AFU_DEFINES_VH 21 | 22 | `define REGEX_AFU 16'h1 23 | `define MURMUR_AFU 16'h2 24 | `define TEST_AND_COUNT_AFU 16'h3 25 | `define MAX_MIN_SUM_AFU 16'h4 26 | `define COPY32_AFU 16'h5 27 | `define COPY64_AFU 16'h6 28 | `define COPY128_AFU 16'h7 29 | `define COPY256_AFU 16'h8 30 | `define COPY512_AFU 16'h9 31 | `define PERCENTAGE_AFU 16'ha 32 | `define MAC_AFU 16'hb 33 | `define SELECTION 16'hc 34 | `define SKYLINE256_AFU 16'hd 35 | `define SKYLINE128_AFU 16'he 36 | `define SKYLINE64_AFU 16'hf 37 | `define SGD_AFU 16'h10 38 | 39 | `define UNDEF_AFU 16'hffff 40 | 41 | `define FTHREAD_1_PLACED_AFU `SGD_AFU 42 | `define FTHREAD_2_PLACED_AFU `SKYLINE128_AFU 43 | `define FTHREAD_3_PLACED_AFU `REGEX_AFU 44 | `define FTHREAD_4_PLACED_AFU `PERCENTAGE_AFU 45 | `define FTHREAD_5_PLACED_AFU `COPY32_AFU 46 | `define FTHREAD_6_PLACED_AFU `COPY32_AFU 47 | `define FTHREAD_7_PLACED_AFU `COPY32_AFU 48 | `define FTHREAD_8_PLACED_AFU `COPY32_AFU 49 | 50 | `define FTHREAD_1_AFU_CONFIG_LINES 3 51 | `define FTHREAD_2_AFU_CONFIG_LINES 3 52 | `define FTHREAD_3_AFU_CONFIG_LINES 2 53 | `define FTHREAD_4_AFU_CONFIG_LINES 1 54 | `define FTHREAD_5_AFU_CONFIG_LINES 1 55 | `define FTHREAD_6_AFU_CONFIG_LINES 1 56 | `define FTHREAD_7_AFU_CONFIG_LINES 1 57 | `define FTHREAD_8_AFU_CONFIG_LINES 1 58 | 59 | `define FTHREAD_1_USER_AFU_RD_TAG `AFU_TAG 60 | `define FTHREAD_2_USER_AFU_RD_TAG `AFU_TAG 61 | `define FTHREAD_3_USER_AFU_RD_TAG `AFU_TAG 62 | `define FTHREAD_4_USER_AFU_RD_TAG `AFU_TAG 63 | `define FTHREAD_5_USER_AFU_RD_TAG `AFU_TAG 64 | `define FTHREAD_6_USER_AFU_RD_TAG `AFU_TAG 65 | `define FTHREAD_7_USER_AFU_RD_TAG `AFU_TAG 66 | `define FTHREAD_8_USER_AFU_RD_TAG `AFU_TAG 67 | 68 | `define FTHREAD_1_USER_AFU_WR_TAG `AFU_TAG 69 | `define FTHREAD_2_USER_AFU_WR_TAG `AFU_TAG 70 | `define FTHREAD_3_USER_AFU_WR_TAG `AFU_TAG 71 | `define FTHREAD_4_USER_AFU_WR_TAG `AFU_TAG 72 | `define FTHREAD_5_USER_AFU_WR_TAG `AFU_TAG 73 | `define FTHREAD_6_USER_AFU_WR_TAG `AFU_TAG 74 | `define FTHREAD_7_USER_AFU_WR_TAG `AFU_TAG 75 | `define FTHREAD_8_USER_AFU_WR_TAG `AFU_TAG 76 | 77 | 78 | `define NUM_JOB_TYPES 4 79 | `define JOB_TYPE_BITS 2 80 | 81 | `endif 82 | -------------------------------------------------------------------------------- /rtl/cci_std_afu.sv: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | module cci_std_afu( 29 | // Link/Protocol (LP) clocks and reset 30 | input /*var*/ logic vl_clk_LPdomain_32ui, // CCI Inteface Clock. 32ui link/protocol clock domain. 31 | input /*var*/ logic vl_clk_LPdomain_16ui, // 2x CCI interface clock. Synchronous.16ui link/protocol clock domain. 32 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_SystemReset_n, // System Reset 33 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_SoftReset_n, // CCI-S soft reset 34 | 35 | // Native CCI Interface (cache line interface for back end) 36 | /* Channel 0 can receive READ, WRITE, WRITE CSR responses.*/ 37 | input /*var*/ logic [17:0] ffs_vl18_LP32ui_lp2sy_C0RxHdr, // System to LP header 38 | input /*var*/ logic [511:0] ffs_vl512_LP32ui_lp2sy_C0RxData, // System to LP data 39 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0RxWrValid, // RxWrHdr valid signal 40 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0RxRdValid, // RxRdHdr valid signal 41 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0RxCgValid, // RxCgHdr valid signal 42 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0RxUgValid, // Rx Umsg Valid signal 43 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0RxIrValid, // Rx Interrupt valid signal 44 | /* Channel 1 reserved for WRITE RESPONSE ONLY */ 45 | input /*var*/ logic [17:0] ffs_vl18_LP32ui_lp2sy_C1RxHdr, // System to LP header (Channel 1) 46 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C1RxWrValid, // RxData valid signal (Channel 1) 47 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C1RxIrValid, // Rx Interrupt valid signal (Channel 1) 48 | 49 | /*Channel 0 reserved for READ REQUESTS ONLY */ 50 | output /*var*/ logic [60:0] ffs_vl61_LP32ui_sy2lp_C0TxHdr, // System to LP header 51 | output /*var*/ logic ffs_vl_LP32ui_sy2lp_C0TxRdValid, // TxRdHdr valid signals 52 | /*Channel 1 reserved for WRITE REQUESTS ONLY */ 53 | output /*var*/ logic [60:0] ffs_vl61_LP32ui_sy2lp_C1TxHdr, // System to LP header 54 | output /*var*/ logic [511:0] ffs_vl512_LP32ui_sy2lp_C1TxData, // System to LP data 55 | output /*var*/ logic ffs_vl_LP32ui_sy2lp_C1TxWrValid, // TxWrHdr valid signal 56 | output /*var*/ logic ffs_vl_LP32ui_sy2lp_C1TxIrValid, // Tx Interrupt valid signal 57 | /* Tx push flow control */ 58 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C0TxAlmFull, // Channel 0 almost full 59 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_C1TxAlmFull, // Channel 1 almost full 60 | 61 | input /*var*/ logic ffs_vl_LP32ui_lp2sy_InitDnForSys // System layer is aok to run 62 | ); 63 | 64 | /* User AFU goes here 65 | */ 66 | 67 | 68 | fpga_arch fpga_arch( 69 | .clk (vl_clk_LPdomain_32ui), 70 | .Clk_400 (vl_clk_LPdomain_16ui), 71 | .rst_n (ffs_vl_LP32ui_lp2sy_SystemReset_n), 72 | .linkup (ffs_vl_LP32ui_lp2sy_InitDnForSys), 73 | 74 | // CCI TX read request 75 | .cci_tx_rd_almostfull (ffs_vl_LP32ui_lp2sy_C0TxAlmFull), 76 | .spl_tx_rd_valid (ffs_vl_LP32ui_sy2lp_C0TxRdValid), 77 | .spl_tx_rd_hdr (ffs_vl61_LP32ui_sy2lp_C0TxHdr), 78 | 79 | // CCI TX write request 80 | .cci_tx_wr_almostfull (ffs_vl_LP32ui_lp2sy_C1TxAlmFull), 81 | .spl_tx_wr_valid (ffs_vl_LP32ui_sy2lp_C1TxWrValid), 82 | .spl_tx_intr_valid (ffs_vl_LP32ui_sy2lp_C1TxIrValid), 83 | .spl_tx_wr_hdr (ffs_vl61_LP32ui_sy2lp_C1TxHdr), 84 | .spl_tx_data (ffs_vl512_LP32ui_sy2lp_C1TxData), 85 | 86 | // CCI RX read response 87 | .cci_rx_rd_valid (ffs_vl_LP32ui_lp2sy_C0RxRdValid), 88 | .cci_rx_wr_valid0 (ffs_vl_LP32ui_lp2sy_C0RxWrValid), 89 | .cci_rx_cfg_valid (ffs_vl_LP32ui_lp2sy_C0RxCgValid), 90 | .cci_rx_intr_valid0 (ffs_vl_LP32ui_lp2sy_C0RxIrValid), 91 | .cci_rx_umsg_valid (ffs_vl_LP32ui_lp2sy_C0RxUgValid), 92 | .cci_rx_hdr0 (ffs_vl18_LP32ui_lp2sy_C0RxHdr), 93 | .cci_rx_data (ffs_vl512_LP32ui_lp2sy_C0RxData), 94 | 95 | // CCI RX write response 96 | .cci_rx_wr_valid1 (ffs_vl_LP32ui_lp2sy_C1RxWrValid), 97 | .cci_rx_intr_valid1 (ffs_vl_LP32ui_lp2sy_C1RxIrValid), 98 | .cci_rx_hdr1 (ffs_vl18_LP32ui_lp2sy_C1RxHdr) 99 | ); 100 | 101 | endmodule 102 | -------------------------------------------------------------------------------- /rtl/framework_defines.vh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `ifndef FRAMEWORK_DEFINES_VH 18 | `define FRAMEWORK_DEFINES_VH 19 | 20 | /// Configuration Parameters 21 | `define NUMBER_OF_FTHREADS 4 22 | `define FTHREADS_BITS 2 23 | 24 | `define PT_ADDRESS_BITS 11 // 4 GB workspace 25 | `define PTE_WIDTH 17 // 2 MB pages 26 | 27 | `define CMD_QUEUE_STRUCT_SIZE 16'd3 28 | `define CMD_QUEUE_BUFFER_SIZE 16'd32 29 | `define JOB_QUEUE_BUFFER_SIZE 16'd128 30 | 31 | `define INT_MAX 32'hFFFFFFFF 32 | 33 | 34 | 35 | `define CRB_STRUCT_PRODUCER_LINE_OFFSET 32'h1 36 | `define CRB_STRUCT_CONSUMER_LINE_OFFSET 32'h2 37 | 38 | `define AFU_ID 64'h111_00181 39 | `define SPL_ID 32'h11100101 40 | 41 | `define AFU_ID_DSM_OFFSET 0 42 | `define PT_STATUS_DSM_OFFSET 1 43 | `define CTX_STATUS_DSM_OFFSET 2 44 | `define ALLOC_OPERATORS_DSM_OFFSET 3 45 | 46 | 47 | `define CMD_LINE_WIDTH 248 48 | 49 | `define NUM_USER_STATE_COUNTERS 10 50 | 51 | `define FPGA_TERMINATE_CMD 16'h0001 52 | `define START_JOB_MANAGER_CMD 16'h0002 53 | 54 | `define SET_CMD_POLL_RATE_INSTR 16'h0010 55 | `define STOP_CMD_POLL_TIMEOUT_INSTR 16'h0020 56 | 57 | `define GET_CHANNEL_STATUS_INSTR 16'h0100 58 | `define GET_OPERATOR_STATUS_INSTR 16'h0200 59 | `define GET_PAGETABLE_STATUS_INSTR 16'h0300 60 | `define GET_APPLICATIONS_STATUS_INSTR 16'h0400 61 | 62 | `define TERMINATE_OPERATOR_INSTR 16'h1000 63 | 64 | `define CTRL_CMD_VALID_FLAG_LOC 25 65 | 66 | `define CMQ_VALID_MAGIC_NUMBER 32'h13579bdf 67 | `define CMQ_PROD_VALID_MAGIC_NUMBER 32'h02468ace 68 | 69 | `define WR_IF_DIRECT_PIPELINE_CODE 2'h1 70 | `define WR_IF_MEM_PIPELINE_CODE 2'h3 71 | 72 | `define RD_IF_DIRECT_PIPELINE_CODE 2'h1 73 | `define RD_IF_MEM_PIPELINE_CODE 2'h2 74 | 75 | `define PIPEILINE_RD_ADDR_CODE 32'h00000000 76 | //////////////////////////////////////////////////////////////////////////////////////// 77 | /// ERROR EVENTS CODES 78 | 79 | `define MEMORY_ACCESS_OF_NON_ALLCATED_REGION 8'h0 80 | `define FIFO_OVERFLOW 8'h1 81 | `define WRITE_RESPONSES_OVERFLOW 8'h2 82 | 83 | //////////////////////////////////////////////////////////////////////////////////////// 84 | /// Different Modules TAG width (if not specified RD, WR, then it applies for both) 85 | `define QPI_TAG 14 86 | 87 | `define JOB_QUEUE_TAG 2 88 | `define JOB_QUEUE_TAG_USED_WR 1'b0 89 | `define JOB_QUEUE_TAG_USED_RD 1'b0 90 | 91 | `define JOB_READER_TAG `JOB_QUEUE_TAG + 2//`FTHREADS_BITS 92 | 93 | `define FPGA_SETUP_TAG 4 94 | 95 | `define PAGETABLE_TAG 4 96 | 97 | `define FPGA_CORE_USR_TAG 3 + ((`JOB_READER_TAG > `FPGA_SETUP_TAG)? `JOB_READER_TAG : `FPGA_SETUP_TAG) 98 | `define FPGA_CORE_TAG 8 99 | 100 | `define IF_TAG 9 101 | `define AFU_TAG 8 102 | `define FTHREAD_TAG 10 103 | 104 | 105 | `endif 106 | -------------------------------------------------------------------------------- /rtl/fthread/iolib/ReadConfigStruct.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | module ReadConfigStruct #(parameter MAX_NUM_CONFIG_CL = 2) 18 | ( 19 | input wire clk, 20 | input wire rst_n, 21 | //-------------------------------------------------// 22 | input wire get_config_struct, 23 | input wire [57:0] base_addr, 24 | input wire [31:0] config_struct_length, 25 | // User Module TX RD 26 | output reg [57:0] cs_tx_rd_addr, 27 | output reg [8:0] cs_tx_rd_tag, 28 | output reg cs_tx_rd_valid, 29 | input wire cs_tx_rd_free, 30 | // User Module RX RD 31 | input wire [8:0] cs_rx_rd_tag, 32 | input wire [511:0] cs_rx_rd_data, 33 | input wire cs_rx_rd_valid, 34 | // 35 | output wire [(MAX_NUM_CONFIG_CL<<9)-1:0] afu_config_struct, 36 | output wire afu_config_struct_valid 37 | ); 38 | 39 | 40 | 41 | wire rd_done; 42 | wire all_reads_done; 43 | 44 | reg [31:0] numReadsSent; 45 | reg [31:0] numReadsDone; 46 | reg [31:0] rd_cnt; 47 | 48 | reg [511:0] config_lines[MAX_NUM_CONFIG_CL]; 49 | reg config_lines_valid[MAX_NUM_CONFIG_CL]; 50 | 51 | genvar i; 52 | 53 | generate for( i = 0; i < MAX_NUM_CONFIG_CL; i = i + 1) begin: configLines 54 | 55 | always@(posedge clk) begin 56 | if(~rst_n) begin 57 | //config_lines[ i ] <= 0; 58 | config_lines_valid[ i ] <= 0; 59 | end 60 | else if(cs_rx_rd_valid) begin 61 | config_lines[ i ] <= (cs_rx_rd_tag[1:0] == i)? cs_rx_rd_data : config_lines[ i ]; 62 | config_lines_valid[ i ] <= (cs_rx_rd_tag[1:0] == i)? 1'b1 : config_lines_valid[ i ]; 63 | end 64 | end 65 | 66 | 67 | assign afu_config_struct[512*(i+1) - 1 : 512*i] = config_lines[ i ]; 68 | end 69 | 70 | endgenerate 71 | 72 | 73 | /////////////////////////////// Generating Read Requests ////////////////////////////// 74 | // 75 | assign all_reads_done = (numReadsSent == numReadsDone) & (numReadsSent != 0); 76 | assign afu_config_struct_valid = rd_done & all_reads_done; 77 | assign rd_done = (rd_cnt == config_struct_length); 78 | 79 | always@(posedge clk) begin 80 | if(~rst_n) begin 81 | cs_tx_rd_valid <= 1'b0; 82 | rd_cnt <= 0; 83 | cs_tx_rd_addr <= 0; 84 | cs_tx_rd_tag <= 0; 85 | end 86 | else if(cs_tx_rd_free | ~cs_tx_rd_valid) begin 87 | if( ~rd_done & get_config_struct ) begin 88 | rd_cnt <= rd_cnt + 1'b1; 89 | cs_tx_rd_valid <= 1'b1; 90 | cs_tx_rd_addr <= ({1'b0, base_addr} + {1'b0, rd_cnt}); 91 | cs_tx_rd_tag <= rd_cnt[8:0]; 92 | end 93 | else begin 94 | cs_tx_rd_valid <= 1'b0; 95 | end 96 | end 97 | end 98 | 99 | //////////////////////////////////////////////////////////////////////////////////////////////////// 100 | always @(posedge clk) begin 101 | if(~rst_n) begin 102 | numReadsSent <= 0; 103 | numReadsDone <= 0; 104 | end 105 | else begin 106 | numReadsSent <= (cs_tx_rd_valid & cs_tx_rd_free)? numReadsSent + 1'b1 : numReadsSent; 107 | numReadsDone <= (cs_rx_rd_valid)? numReadsDone + 1'b1 : numReadsDone; 108 | end 109 | end 110 | 111 | 112 | endmodule 113 | -------------------------------------------------------------------------------- /rtl/fthread/iolib/order_module_backpressure.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `default_nettype none 18 | 19 | module order_module_backpressure 20 | #( 21 | parameter TAG_WIDTH = 6, 22 | parameter OUT_TAG_WIDTH = 6, 23 | parameter USER_TAG_WIDTH = 8, 24 | parameter DATA_WIDTH = 512, 25 | parameter ADDR_WIDTH = 58) 26 | ( 27 | input wire clk, 28 | input wire rst_n, 29 | //-------------------------------------------------// 30 | // input requests 31 | input wire [ADDR_WIDTH-1:0] usr_tx_rd_addr, 32 | input wire [USER_TAG_WIDTH-1:0] usr_tx_rd_tag, 33 | input wire usr_tx_rd_valid, 34 | output wire usr_tx_rd_free, 35 | // User Module TX RD 36 | output wire [ADDR_WIDTH-1:0] ord_tx_rd_addr, 37 | output wire [OUT_TAG_WIDTH-1:0] ord_tx_rd_tag, 38 | output wire ord_tx_rd_valid, 39 | input wire ord_tx_rd_free, 40 | // User Module RX RD 41 | input wire [TAG_WIDTH-1:0] ord_rx_rd_tag, 42 | input wire [DATA_WIDTH-1:0] ord_rx_rd_data, 43 | input wire ord_rx_rd_valid, 44 | // 45 | output reg [USER_TAG_WIDTH-1:0] usr_rx_rd_tag, 46 | output reg [DATA_WIDTH-1:0] usr_rx_rd_data, 47 | output reg usr_rx_rd_valid, 48 | input wire usr_rx_rd_ready 49 | ); 50 | 51 | 52 | 53 | reg [2**TAG_WIDTH-1:0] rob_valid; 54 | reg rob_re; 55 | reg rob_re_d1; 56 | reg [USER_TAG_WIDTH-1:0] rob_rtag; 57 | reg [USER_TAG_WIDTH+TAG_WIDTH-1:0] rob_raddr; 58 | 59 | wire pend_tag_fifo_full; 60 | wire pend_tag_fifo_valid; 61 | wire absorb_pend_tag; 62 | 63 | wire [USER_TAG_WIDTH+TAG_WIDTH-1:0] curr_pend_tag; 64 | wire [DATA_WIDTH-1:0] rob_rdata; 65 | 66 | reg [1:0] pending_valid; 67 | reg [DATA_WIDTH-1:0] pending_data [1:0]; 68 | reg [USER_TAG_WIDTH-1:0] pending_tag [1:0]; 69 | 70 | reg [TAG_WIDTH-1:0] ord_tag; 71 | 72 | 73 | 74 | 75 | 76 | assign ord_tx_rd_valid = usr_tx_rd_valid & ~pend_tag_fifo_full; 77 | assign ord_tx_rd_tag = {{{OUT_TAG_WIDTH - TAG_WIDTH}{1'b0}}, ord_tag}; 78 | assign ord_tx_rd_addr = usr_tx_rd_addr; 79 | 80 | assign usr_tx_rd_free = ord_tx_rd_free & ~pend_tag_fifo_full; 81 | 82 | // RX_RD reorder buffer for rd data 83 | spl_sdp_mem #(.DATA_WIDTH (DATA_WIDTH), 84 | .ADDR_WIDTH (TAG_WIDTH) // transfer size 1, tag width 6 -> 64 entries 85 | ) reorder_buf ( 86 | .clk (clk), 87 | .we ( ord_rx_rd_valid ), 88 | .waddr ( ord_rx_rd_tag[TAG_WIDTH-1:0] ), 89 | .din ( ord_rx_rd_data ), 90 | 91 | .re ( rob_re ), 92 | .raddr ( rob_raddr[TAG_WIDTH-1:0] ), 93 | .dout ( rob_rdata ) 94 | ); 95 | 96 | 97 | // FIFO of tags for sent TX RD requests 98 | quick_fifo #(.FIFO_WIDTH(USER_TAG_WIDTH + TAG_WIDTH), 99 | .FIFO_DEPTH_BITS(TAG_WIDTH), 100 | .FIFO_ALMOSTFULL_THRESHOLD(32) 101 | ) pend_tag_fifo( 102 | .clk (clk), 103 | .reset_n (rst_n), 104 | .din ({usr_tx_rd_tag, ord_tag}), 105 | .we (usr_tx_rd_valid & ord_tx_rd_free), 106 | 107 | .re ( absorb_pend_tag), 108 | .dout (curr_pend_tag), 109 | .empty (), 110 | .valid (pend_tag_fifo_valid), 111 | .full (pend_tag_fifo_full), 112 | .count (), 113 | .almostfull () 114 | ); 115 | 116 | assign absorb_pend_tag = rob_re; 117 | 118 | always@(posedge clk) begin 119 | if(~rst_n) begin 120 | rob_valid <= 0; 121 | rob_re <= 0; 122 | rob_re_d1 <= 0; 123 | rob_rtag <= 0; 124 | rob_raddr <= 0; 125 | usr_rx_rd_valid <= 1'b0; 126 | //usr_rx_rd_data <= 0; 127 | usr_rx_rd_tag <= 0; 128 | pending_valid <= 0; 129 | 130 | ord_tag <= 0; 131 | end 132 | else begin 133 | if( usr_tx_rd_valid & ord_tx_rd_free & ~pend_tag_fifo_full ) ord_tag <= ord_tag + 1'b1; 134 | // write response in the responses memory if cannot bypass rob buffer 135 | if(ord_rx_rd_valid) begin 136 | rob_valid[ord_rx_rd_tag[TAG_WIDTH-1:0]] <= 1'b1; 137 | end 138 | 139 | // read rob buffer 140 | rob_re <= 1'b0; 141 | rob_re_d1 <= rob_re; 142 | rob_rtag <= rob_raddr[USER_TAG_WIDTH+TAG_WIDTH-1 : TAG_WIDTH]; 143 | 144 | // if current pending tag has valid response then read it from the responses memory 145 | if( rob_valid[curr_pend_tag[TAG_WIDTH-1:0]] && pend_tag_fifo_valid && (~pending_valid[0] | (~pending_valid[1] & ~rob_re_d1) )) begin 146 | rob_re <= 1'b1; 147 | rob_raddr <= curr_pend_tag; 148 | rob_valid[curr_pend_tag[TAG_WIDTH-1:0]] <= 1'b0; 149 | end 150 | 151 | // usr rx rd: 152 | // Advance if either new data comes from the order memory or new data is requested by the AFU 153 | if(~pending_valid[0]) begin 154 | pending_valid[0] <= rob_re_d1; 155 | pending_data[0] <= rob_rdata; 156 | pending_tag[0] <= rob_rtag; 157 | end 158 | else if( ~usr_rx_rd_valid | usr_rx_rd_ready) begin 159 | if(pending_valid[1]) begin 160 | pending_valid[0] <= 1'b1; 161 | pending_data[0] <= pending_data[1]; 162 | pending_tag[0] <= pending_tag[1]; 163 | end 164 | else begin 165 | pending_valid[0] <= rob_re_d1; 166 | pending_data[0] <= rob_rdata; 167 | pending_tag[0] <= rob_rtag; 168 | end 169 | end 170 | 171 | if( usr_rx_rd_ready) begin 172 | if(pending_valid[1]) begin 173 | pending_valid[1] <= rob_re_d1; 174 | pending_data[1] <= rob_rdata; 175 | pending_tag[1] <= rob_rtag; 176 | end 177 | else begin 178 | pending_valid[1] <= 0; 179 | end 180 | end 181 | else if( pending_valid[0] & ~pending_valid[1] ) begin 182 | pending_valid[1] <= rob_re_d1; 183 | pending_data[1] <= rob_rdata; 184 | pending_tag[1] <= rob_rtag; 185 | end 186 | 187 | if(usr_rx_rd_ready | ~usr_rx_rd_valid) begin 188 | 189 | usr_rx_rd_valid <= pending_valid[0]; 190 | usr_rx_rd_data <= pending_data[0]; 191 | usr_rx_rd_tag <= pending_tag[0]; 192 | end 193 | // Chekc if data got consumed, in case we are not advancing anyway 194 | //else if (usr_rx_rd_valid && usr_rx_rd_ready) begin 195 | // usr_rx_rd_valid <= 1'b0; 196 | //end 197 | end 198 | end 199 | 200 | 201 | endmodule 202 | 203 | `default_nettype wire 204 | -------------------------------------------------------------------------------- /rtl/fthread/iolib/order_module_backpressure_wr.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `default_nettype none 18 | 19 | module order_module_backpressure_wr 20 | #( 21 | parameter TAG_WIDTH = 6, 22 | parameter OUT_TAG_WIDTH = 6, 23 | parameter USER_TAG_WIDTH = 8, 24 | parameter DATA_WIDTH = 512, 25 | parameter ADDR_WIDTH = 58) 26 | ( 27 | input wire clk, 28 | input wire rst_n, 29 | //-------------------------------------------------// 30 | // input requests 31 | input wire [ADDR_WIDTH-1:0] usr_tx_wr_addr, 32 | input wire [USER_TAG_WIDTH-1:0] usr_tx_wr_tag, 33 | input wire usr_tx_wr_valid, 34 | input wire [DATA_WIDTH-1:0] usr_tx_data, 35 | output wire usr_tx_wr_ready, 36 | // User Module TX RD 37 | output wire [ADDR_WIDTH-1:0] ord_tx_wr_addr, 38 | output wire [OUT_TAG_WIDTH-1:0] ord_tx_wr_tag, 39 | output wire ord_tx_wr_valid, 40 | output wire [DATA_WIDTH-1:0] ord_tx_data, 41 | input wire ord_tx_wr_ready, 42 | // User Module RX RD 43 | input wire [TAG_WIDTH-1:0] ord_rx_wr_tag, 44 | input wire ord_rx_wr_valid, 45 | // 46 | output reg [USER_TAG_WIDTH-1:0] usr_rx_wr_tag, 47 | output reg usr_rx_wr_valid, 48 | input wire usr_rx_wr_ready 49 | ); 50 | 51 | 52 | 53 | reg [2**TAG_WIDTH-1:0] rob_valid; 54 | reg rob_re; 55 | reg rob_re_d1; 56 | reg [USER_TAG_WIDTH-1:0] rob_rtag; 57 | 58 | wire pend_tag_fifo_full; 59 | wire pend_tag_fifo_valid; 60 | wire absorb_pend_tag; 61 | 62 | wire [USER_TAG_WIDTH+TAG_WIDTH-1:0] curr_pend_tag; 63 | 64 | 65 | reg [TAG_WIDTH-1:0] ord_tag; 66 | 67 | reg [USER_TAG_WIDTH-1:0] usr_rx_wr_tag_reg; 68 | reg usr_rx_wr_valid_reg; 69 | 70 | 71 | 72 | assign ord_tx_wr_valid = usr_tx_wr_valid & ~pend_tag_fifo_full; 73 | assign ord_tx_wr_tag = {{{OUT_TAG_WIDTH - TAG_WIDTH}{1'b0}}, ord_tag}; 74 | assign ord_tx_wr_addr = usr_tx_wr_addr; 75 | assign ord_tx_data = usr_tx_data; 76 | 77 | assign usr_tx_wr_ready = ord_tx_wr_ready & ~pend_tag_fifo_full; 78 | 79 | 80 | // FIFO of tags for sent TX RD requests 81 | quick_fifo #(.FIFO_WIDTH(USER_TAG_WIDTH + TAG_WIDTH), 82 | .FIFO_DEPTH_BITS(TAG_WIDTH), 83 | .FIFO_ALMOSTFULL_THRESHOLD(32) 84 | ) pend_tag_fifo( 85 | .clk (clk), 86 | .reset_n (rst_n), 87 | .din ({usr_tx_wr_tag, ord_tag}), 88 | .we (usr_tx_wr_valid & ord_tx_wr_ready), 89 | 90 | .re ( absorb_pend_tag), 91 | .dout (curr_pend_tag), 92 | .empty (), 93 | .valid (pend_tag_fifo_valid), 94 | .full (pend_tag_fifo_full), 95 | .count (), 96 | .almostfull () 97 | ); 98 | 99 | assign absorb_pend_tag = rob_re; 100 | 101 | always@(posedge clk) begin 102 | if(~rst_n) begin 103 | rob_valid <= 0; 104 | usr_rx_wr_valid <= 1'b0; 105 | usr_rx_wr_tag <= 0; 106 | 107 | ord_tag <= 0; 108 | rob_re <= 0; 109 | rob_re_d1 <= 0; 110 | rob_rtag <= 0; 111 | end 112 | else begin 113 | if( usr_tx_wr_valid & ord_tx_wr_ready & ~pend_tag_fifo_full ) ord_tag <= ord_tag + 1'b1; 114 | // write response in the responses memory if cannot bypass rob buffer 115 | 116 | if(ord_rx_wr_valid) begin 117 | rob_valid[ord_rx_wr_tag[TAG_WIDTH-1:0]] <= 1'b1; 118 | end 119 | rob_re <= 1'b0; 120 | // if current pending tag has valid response then read it from the responses memory 121 | if( ~usr_rx_wr_valid_reg | ~usr_rx_wr_valid | usr_rx_wr_ready) begin 122 | if( rob_valid[curr_pend_tag[TAG_WIDTH-1:0]] && pend_tag_fifo_valid) begin 123 | rob_rtag <= curr_pend_tag[USER_TAG_WIDTH + TAG_WIDTH - 1: TAG_WIDTH]; 124 | rob_valid[curr_pend_tag[TAG_WIDTH-1:0]] <= 1'b0; 125 | rob_re <= 1'b1; 126 | rob_re_d1 <= 1'b1; 127 | end 128 | else begin 129 | rob_re_d1 <= 1'b0; 130 | end 131 | rob_re_d1 <= rob_re; 132 | usr_rx_wr_valid_reg <= rob_re_d1; 133 | usr_rx_wr_tag_reg <= rob_rtag; 134 | 135 | usr_rx_wr_valid <= usr_rx_wr_valid_reg; 136 | usr_rx_wr_tag <= usr_rx_wr_tag_reg; 137 | end 138 | end 139 | end 140 | 141 | 142 | endmodule 143 | 144 | `default_nettype wire -------------------------------------------------------------------------------- /rtl/fthread_shell/BatchRoundRobinArbitration.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | module BatchRoundRobinArbitrationLogic #( 18 | parameter NUMBER_OF_USERS = 4, 19 | parameter USERS_BITS = 2, 20 | parameter USER_LINE_IN_WIDTH = 512, 21 | parameter USER_LINE_OUT_WIDTH = 512, 22 | parameter BATCH_SIZE = 16 23 | )( 24 | input clk, 25 | input rst_n, 26 | 27 | // Users TX Channel 28 | input wire [USER_LINE_IN_WIDTH - 1 : 0] usr_tx_lines[NUMBER_OF_USERS-1:0], 29 | input wire [NUMBER_OF_USERS-1 : 0] usr_tx_valid, 30 | output wire [NUMBER_OF_USERS-1 : 0] usr_tx_ready, 31 | // TX Channel 32 | output wire [USER_LINE_IN_WIDTH-1:0] rr_tx_line, 33 | output wire [USERS_BITS-1:0] rr_tx_tag, 34 | output wire rr_tx_valid, 35 | input wire rr_tx_ready 36 | ); 37 | 38 | 39 | reg [USERS_BITS-1:0] select; 40 | wire [USERS_BITS-1:0] selected_user; 41 | reg [USERS_BITS-1:0] batching_user; 42 | 43 | reg [31:0] curr_batching_user_count; 44 | reg batching_user_valid; 45 | 46 | wire select_curr_batching_user; 47 | 48 | reg [USERS_BITS-1:0] sh_pos; 49 | wire [USERS_BITS-1:0] pr_sh_in; 50 | reg [USERS_BITS-1:0] pr_reg[NUMBER_OF_USERS-1:0]; 51 | 52 | wire [NUMBER_OF_USERS-1 : 0] pr_sh_en; 53 | wire [NUMBER_OF_USERS-1 : 0] prio; 54 | 55 | wire sh_enable; 56 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 57 | ///////////////////////////////////////////// ///////////////////////////////////////////////// 58 | /////////////////////////////////////// Priority Multiplexer ///////////////////////////////////////////// 59 | ///////////////////////////////////////////// ///////////////////////////////////////////////// 60 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 61 | genvar i; 62 | 63 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: selUsrLine 64 | assign usr_tx_ready[i] = rr_tx_ready & (selected_user == i); 65 | end 66 | endgenerate 67 | 68 | assign rr_tx_tag = selected_user; 69 | assign rr_tx_line = usr_tx_lines[selected_user]; 70 | assign rr_tx_valid = (|usr_tx_valid); 71 | ///////////////////////// Multi input shift register (Priority Register) //////////////////////////////////////////////// 72 | // priority register shift enables for each user slot 73 | generate for( i = 0; i < NUMBER_OF_USERS; i=i+1) begin: PrShEn 74 | if(i < NUMBER_OF_USERS-1) 75 | assign pr_sh_en[i] = (sh_pos <= i); 76 | else 77 | assign pr_sh_en[i] = (sh_pos != NUMBER_OF_USERS-1); 78 | end 79 | endgenerate 80 | 81 | // priority register shift input 82 | assign pr_sh_in = pr_reg[sh_pos]; 83 | // priority register slots 84 | generate for( i = 0; i < NUMBER_OF_USERS-1; i = i + 1) begin: PrReg 85 | always@(posedge clk) begin 86 | if(~ rst_n) begin 87 | pr_reg[i] <= i; 88 | end 89 | else if(sh_enable & pr_sh_en[i]) begin 90 | pr_reg[i] <= pr_reg[i+1]; 91 | end 92 | end 93 | end 94 | endgenerate 95 | 96 | always@(posedge clk) begin 97 | if(~ rst_n) begin 98 | pr_reg[NUMBER_OF_USERS-1] <= NUMBER_OF_USERS-1; 99 | end 100 | else if(sh_enable & pr_sh_en[NUMBER_OF_USERS-1]) begin 101 | pr_reg[NUMBER_OF_USERS-1] <= pr_sh_in; 102 | end 103 | end 104 | 105 | assign sh_enable = (|usr_tx_valid) & rr_tx_ready & ~select_curr_batching_user; 106 | 107 | assign select_curr_batching_user = usr_tx_valid[batching_user] & batching_user_valid & (curr_batching_user_count < BATCH_SIZE); 108 | 109 | always@(posedge clk) begin 110 | if(~rst_n) begin 111 | batching_user_valid <= 0; 112 | batching_user <= 0; 113 | curr_batching_user_count <= 0; 114 | end 115 | else begin 116 | if( sh_enable ) begin 117 | batching_user_valid <= 1'b1; 118 | curr_batching_user_count <= 1; 119 | batching_user <= select; 120 | end 121 | else if(select_curr_batching_user & rr_tx_ready) begin 122 | if( curr_batching_user_count == BATCH_SIZE-1 ) begin 123 | batching_user_valid <= 1'b0; 124 | curr_batching_user_count <= 0; 125 | end 126 | else begin 127 | curr_batching_user_count <= curr_batching_user_count + 1'b1; 128 | end 129 | end 130 | end 131 | end 132 | 133 | assign selected_user = (sh_enable)? select : batching_user; 134 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 135 | 136 | ////////////////////////////////////////// Mapping User Valid in to the current priority order //////////////////////////// 137 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: PRIO_b 138 | assign prio[i] = usr_tx_valid[ pr_reg[i] ]; 139 | end 140 | endgenerate 141 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 142 | 143 | ///////////////////////////////////////////////// Priority Encoder ///////////////////////////////////////////////////////// 144 | integer j; 145 | 146 | always @(*) begin 147 | select = 0; // default value 148 | for ( j=NUMBER_OF_USERS-1; j>=0; j = j-1) 149 | if (prio[j]) select = pr_reg[j]; 150 | end 151 | 152 | always @(*) begin 153 | sh_pos = 0; // default value 154 | for ( j=NUMBER_OF_USERS-1; j>=0; j = j-1) 155 | if (prio[j]) sh_pos = j; 156 | end 157 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 158 | 159 | 160 | endmodule 161 | -------------------------------------------------------------------------------- /rtl/fthread_shell/RoundRobinArbitration.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | module RoundRobinArbitrationLogic #( 18 | parameter NUMBER_OF_USERS = 4, 19 | parameter USERS_BITS = 2, 20 | parameter USER_LINE_IN_WIDTH = 512, 21 | parameter USER_LINE_OUT_WIDTH = 512 22 | )( 23 | input clk, 24 | input rst_n, 25 | 26 | // Users TX Channel 27 | input wire [USER_LINE_IN_WIDTH - 1 : 0] usr_tx_lines[NUMBER_OF_USERS-1:0], 28 | input wire [NUMBER_OF_USERS-1 : 0] usr_tx_valid, 29 | output wire [NUMBER_OF_USERS-1 : 0] usr_tx_ready, 30 | // TX Channel 31 | output wire [USER_LINE_IN_WIDTH-1:0] rr_tx_line, 32 | output wire [USERS_BITS-1:0] rr_tx_tag, 33 | output wire rr_tx_valid, 34 | input wire rr_tx_ready 35 | ); 36 | 37 | 38 | reg [USERS_BITS-1:0] select; 39 | wire [USERS_BITS-1:0] sel_pr_sh_in; 40 | reg [USERS_BITS-1:0] sh_pos; 41 | wire [USERS_BITS-1:0] pr_sh_in; 42 | reg [USERS_BITS-1:0] pr_reg[NUMBER_OF_USERS-1:0]; 43 | 44 | wire [NUMBER_OF_USERS-1 : 0] pr_sh_en; 45 | wire [NUMBER_OF_USERS-1 : 0] prio; 46 | 47 | wire sh_enable; 48 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 49 | ///////////////////////////////////////////// ///////////////////////////////////////////////// 50 | /////////////////////////////////////// Priority Multiplexer ///////////////////////////////////////////// 51 | ///////////////////////////////////////////// ///////////////////////////////////////////////// 52 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 53 | genvar i; 54 | 55 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: selUsrLine 56 | assign usr_tx_ready[i] = rr_tx_ready & (select == i); 57 | end 58 | endgenerate 59 | 60 | assign rr_tx_tag = select; 61 | assign rr_tx_line = usr_tx_lines[select]; 62 | assign rr_tx_valid = (|usr_tx_valid); 63 | ///////////////////////// Multi input shift register (Priority Register) //////////////////////////////////////////////// 64 | // priority register shift enables for each user slot 65 | generate for( i = 0; i < NUMBER_OF_USERS; i=i+1) begin: PrShEn 66 | if(i < NUMBER_OF_USERS-1) 67 | assign pr_sh_en[i] = (sh_pos <= i); 68 | else 69 | assign pr_sh_en[i] = (sh_pos != NUMBER_OF_USERS-1); 70 | end 71 | endgenerate 72 | 73 | // priority register shift input 74 | assign pr_sh_in = pr_reg[sh_pos]; 75 | // priority register slots 76 | generate for( i = 0; i < NUMBER_OF_USERS-1; i = i + 1) begin: PrReg 77 | always@(posedge clk) begin 78 | if(~ rst_n) begin 79 | pr_reg[i] <= i; 80 | end 81 | else if(sh_enable & pr_sh_en[i]) begin 82 | pr_reg[i] <= pr_reg[i+1]; 83 | end 84 | end 85 | end 86 | endgenerate 87 | 88 | always@(posedge clk) begin 89 | if(~ rst_n) begin 90 | pr_reg[NUMBER_OF_USERS-1] <= NUMBER_OF_USERS-1; 91 | end 92 | else if(sh_enable & pr_sh_en[NUMBER_OF_USERS-1]) begin 93 | pr_reg[NUMBER_OF_USERS-1] <= pr_sh_in; 94 | end 95 | end 96 | 97 | assign sh_enable = (|usr_tx_valid) & rr_tx_ready; 98 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 99 | 100 | ////////////////////////////////////////// Mapping User Valid in to the current priority order //////////////////////////// 101 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: PRIO_b 102 | assign prio[i] = usr_tx_valid[ pr_reg[i] ]; 103 | end 104 | endgenerate 105 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 106 | 107 | ///////////////////////////////////////////////// Priority Encoder ///////////////////////////////////////////////////////// 108 | integer j; 109 | 110 | always @(*) begin 111 | select = 0; // default value 112 | for ( j=NUMBER_OF_USERS-1; j>=0; j = j-1) 113 | if (prio[j]) select = pr_reg[j]; 114 | end 115 | 116 | always @(*) begin 117 | sh_pos = 0; // default value 118 | for ( j=NUMBER_OF_USERS-1; j>=0; j = j-1) 119 | if (prio[j]) sh_pos = j; 120 | end 121 | 122 | //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 123 | 124 | 125 | endmodule -------------------------------------------------------------------------------- /rtl/fthread_shell/cmd_interpreter.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | `include "../afu_defines.vh" 19 | 20 | module cmd_interpreter ( 21 | input wire clk, 22 | input wire rst_n, 23 | //////////////////////////////// Commands ///////////////////////////////////// 24 | //---- Terminate Command 25 | output reg dsm_reset, 26 | input wire [`PTE_WIDTH-1:0] first_page_address, 27 | 28 | //---- Start Command 29 | output reg [31:0] job_queue_base_addr[`NUM_JOB_TYPES-1:0], 30 | output reg job_reader_enable, 31 | output reg [31:0] job_queue_size, 32 | output reg [15:0] queue_poll_rate, 33 | output reg [15:0] job_config[`NUM_JOB_TYPES-1:0], 34 | output reg job_config_valid, 35 | 36 | //////////////// From Command Queue ///////////////////////// 37 | input wire [511:0] cmd_queue_out, 38 | input wire cmd_queue_valid, 39 | output wire cmd_queue_ready 40 | ); 41 | 42 | /////////////////////////////////////////////////////////////////////////////////////////////////// 43 | //////////////////////////////////// //////////////////////////////////// 44 | ///////////////////////////////// CMD Decoder ///////////////////////////////// 45 | //////////////////////////////////// //////////////////////////////////// 46 | /////////////////////////////////////////////////////////////////////////////////////////////////// 47 | 48 | assign cmd_queue_ready = 1'b1; 49 | 50 | // 51 | always @(posedge clk) begin 52 | if( ~rst_n | dsm_reset ) begin 53 | dsm_reset <= 1'b0; 54 | job_reader_enable <= 1'b0; 55 | job_config_valid <= 1'b0; 56 | job_queue_size <= 0; 57 | queue_poll_rate <= 0; 58 | end 59 | else if(cmd_queue_valid) begin 60 | case(cmd_queue_out[15:0]) 61 | `FPGA_TERMINATE_CMD: begin 62 | dsm_reset <= 1'b1; 63 | end 64 | `START_JOB_MANAGER_CMD: begin 65 | job_reader_enable <= 1'b1; 66 | job_config_valid <= 1'b1; 67 | job_queue_size <= cmd_queue_out[127:96]; 68 | queue_poll_rate <= cmd_queue_out[31:16]; 69 | end 70 | endcase 71 | end 72 | else begin 73 | dsm_reset <= 1'b0; 74 | job_reader_enable <= 1'b0; 75 | job_config_valid <= 1'b0; 76 | job_queue_size <= 0; 77 | queue_poll_rate <= 0; 78 | end 79 | end 80 | 81 | genvar j; 82 | generate for( j = 0; j < `NUM_JOB_TYPES; j = j + 1) begin: job_data 83 | 84 | always @(posedge clk) begin 85 | if( ~rst_n | dsm_reset ) begin 86 | job_queue_base_addr[j] <= 0; 87 | job_config[j] <= 0; 88 | end 89 | else if(cmd_queue_valid & (cmd_queue_out[15:0] == `START_JOB_MANAGER_CMD)) begin 90 | job_queue_base_addr[j] <= {first_page_address, cmd_queue_out[(32-`PTE_WIDTH-1) + j*96 + 32 : j*96 + 32]}; 91 | job_config[j] <= cmd_queue_out[63 + j*96 + 32 : j*96 + 32 + 32]; 92 | end 93 | end 94 | 95 | end 96 | endgenerate 97 | 98 | /////// 99 | 100 | endmodule 101 | -------------------------------------------------------------------------------- /rtl/fthread_shell/cmd_processor.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../afu_defines.vh" 18 | 19 | module cmd_processor 20 | ( 21 | input wire clk, 22 | input wire rst_n, 23 | //-------------------------------------------------// 24 | input wire first_page_addr_valid, 25 | input wire [`PTE_WIDTH-1:0] first_page_addr, 26 | input wire ctx_valid, 27 | // TX RD 28 | output wire [31:0] cp_tx_rd_addr, 29 | output wire [`JOB_QUEUE_TAG-1:0] cp_tx_rd_tag, 30 | output wire cp_tx_rd_valid, 31 | input wire cp_tx_rd_ready, 32 | // TX WR 33 | output wire [31:0] cp_tx_wr_addr, 34 | output wire [`JOB_QUEUE_TAG-1:0] cp_tx_wr_tag, 35 | output wire cp_tx_wr_valid, 36 | output wire [511:0] cp_tx_data, 37 | input wire cp_tx_wr_ready, 38 | // RX RD 39 | input wire [`JOB_QUEUE_TAG-1:0] cp_rx_rd_tag, 40 | input wire [511:0] cp_rx_data, 41 | input wire cp_rx_rd_valid, 42 | // RX WR 43 | input wire cp_rx_wr_valid, 44 | input wire [`JOB_QUEUE_TAG-1:0] cp_rx_wr_tag, 45 | //---- Terminate Command 46 | output wire dsm_reset, 47 | 48 | //---- Start Command 49 | output wire [31:0] job_queue_base_addr[`NUM_JOB_TYPES-1:0], 50 | output wire job_reader_enable, 51 | output wire [15:0] queue_poll_rate, 52 | output wire [31:0] job_queue_size, 53 | output wire [15:0] job_config[`NUM_JOB_TYPES-1:0], 54 | output wire job_config_valid 55 | 56 | ); 57 | 58 | 59 | 60 | wire [31:0] cmd_queue_size; // in CLs 61 | wire [511:0] cmd_queue_out; 62 | wire cmd_queue_valid; 63 | wire cmd_queue_ready; 64 | 65 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 66 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 67 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 68 | assign cmd_queue_size = {`CMD_QUEUE_BUFFER_SIZE, `CMD_QUEUE_STRUCT_SIZE}; 69 | job_queue cmd_queue( 70 | .clk (clk), 71 | .rst_n (rst_n & ~dsm_reset), 72 | //-------------------------------------------------// 73 | .start_queue (first_page_addr_valid & ctx_valid), 74 | .queue_base_addr ({first_page_addr, {32-`PTE_WIDTH{1'b0}}}), 75 | .queue_size (cmd_queue_size), 76 | .queue_poll_rate (16'h07FF), 77 | .queue_reset (1'b0), 78 | // TX RD 79 | .jq_tx_rd_addr (cp_tx_rd_addr), 80 | .jq_tx_rd_tag (cp_tx_rd_tag), 81 | .jq_tx_rd_valid (cp_tx_rd_valid), 82 | .jq_tx_rd_ready (cp_tx_rd_ready), 83 | // TX WR 84 | .jq_tx_wr_addr (cp_tx_wr_addr), 85 | .jq_tx_wr_tag (cp_tx_wr_tag), 86 | .jq_tx_wr_valid (cp_tx_wr_valid), 87 | .jq_tx_data (cp_tx_data), 88 | .jq_tx_wr_ready (cp_tx_wr_ready), 89 | // RX RD 90 | .jq_rx_rd_tag (cp_rx_rd_tag), 91 | .jq_rx_data (cp_rx_data), 92 | .jq_rx_rd_valid (cp_rx_rd_valid), 93 | // RX WR 94 | .jq_rx_wr_valid (cp_rx_wr_valid), 95 | .jq_rx_wr_tag (cp_rx_wr_tag), 96 | // 97 | .job_queue_out (cmd_queue_out), 98 | .job_queue_valid (cmd_queue_valid), 99 | .job_queue_ready (cmd_queue_ready) 100 | ); 101 | 102 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 103 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 104 | //////////////////////////////////////////////////////////////////////////////////////////////////////// 105 | cmd_interpreter cmd_interpreter( 106 | .clk (clk), 107 | .rst_n (rst_n), 108 | //////////////////////////////// Commands ///////////////////////////////////// 109 | //---- Terminate Command 110 | .dsm_reset (dsm_reset), 111 | .first_page_address (first_page_addr), 112 | 113 | //---- Start Command 114 | .job_queue_base_addr (job_queue_base_addr), 115 | .job_reader_enable (job_reader_enable), 116 | .job_queue_size (job_queue_size), 117 | .queue_poll_rate (queue_poll_rate), 118 | .job_config (job_config), 119 | .job_config_valid (job_config_valid), 120 | 121 | //////////////// From Command Queue ///////////////////////// 122 | .cmd_queue_out (cmd_queue_out), 123 | .cmd_queue_valid (cmd_queue_valid), 124 | .cmd_queue_ready (cmd_queue_ready) 125 | ); 126 | 127 | 128 | endmodule 129 | -------------------------------------------------------------------------------- /rtl/fthread_shell/csr_file.v: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | module csr_file( 29 | input wire clk, 30 | input wire reset_n, 31 | output wire spl_reset, 32 | 33 | // csr_file --> dsm_module, spl_id, afu_id 34 | output reg csr_spl_dsm_base_valid, 35 | output reg [31:0] csr_spl_dsm_base, 36 | input wire csr_spl_dsm_base_done, 37 | 38 | output reg csr_afu_dsm_base_valid, 39 | output reg [31:0] csr_afu_dsm_base, 40 | input wire csr_afu_dsm_base_done, 41 | 42 | // csr_file --> ctx_tracker, FPGA virtual memory space 43 | output reg csr_ctx_base_valid, 44 | output reg [31:0] csr_ctx_base, 45 | input wire csr_ctx_base_done, 46 | 47 | // server_io --> csr_file 48 | input wire io_rx_csr_valid, 49 | input wire [13:0] io_rx_csr_addr, 50 | input wire [31:0] io_rx_csr_data 51 | ); 52 | 53 | 54 | localparam [5:0] 55 | SPL_CSR_DSR_BASEL = 6'b00_0000, // 1000 //10'h244, // 910 56 | SPL_CSR_DSR_BASEH = 6'b00_0001, // 1004 //10'h245, // 914 57 | SPL_CSR_CTX_BASELL = 6'b00_0010, // 1008 //10'h246, // 918 58 | SPL_CSR_CTX_BASELH = 6'b00_0011, // 100c //10'h247; // 91c 59 | SPL_CSR_CTRL = 6'b00_0100, // 1010 //10'h248, // 920 60 | SPL_CSR_SCRATCH = 6'b11_1111; //10'h27f, // 9fc 61 | 62 | localparam [5:0] 63 | AFU_CSR_DSR_BASEL = 6'b00_0000, //10'h280, // a00 64 | AFU_CSR_DSR_BASEH = 6'b00_0001, //10'h281, // a04 65 | AFU_CSR_CTX_BASEL = 6'b00_0010, //10'h282, // a08 66 | AFU_CSR_CTX_BASEH = 6'b00_0011, //10'h283, // a0c 67 | AFU_CSR_SCRATCH = 6'b11_1111, //10'h2bf; // afc 68 | AFU_CSR_CMD_OPCODE = 6'b00_1111; 69 | 70 | 71 | 72 | reg [5:0] spl_dsr_base_hi; 73 | reg [5:0] afu_dsr_base_hi; 74 | reg csr_reset = 0; 75 | reg csr_enable = 0; 76 | 77 | assign spl_reset = csr_reset; 78 | //-------------------------------------------------------------------- 79 | // RX - spl_csr<--spl_io 80 | //-------------------------------------------------------------------- 81 | always @(posedge clk) begin 82 | if (~reset_n) begin 83 | csr_spl_dsm_base_valid <= 1'b0; 84 | csr_afu_dsm_base_valid <= 1'b0; 85 | csr_ctx_base_valid <= 1'b0; 86 | 87 | spl_dsr_base_hi <= 0; 88 | afu_dsr_base_hi <= 0; 89 | 90 | csr_reset <= 0; 91 | csr_enable <= 0; 92 | end 93 | 94 | else begin 95 | csr_reset <= 1'b0; 96 | csr_enable <= 0; 97 | 98 | if ( csr_ctx_base_done ) csr_ctx_base_valid <= 1'b0; 99 | if ( csr_spl_dsm_base_done ) csr_spl_dsm_base_valid <= 1'b0; 100 | if ( csr_afu_dsm_base_done ) csr_afu_dsm_base_valid <= 1'b0; 101 | 102 | if ( csr_spl_dsm_base_done ) spl_dsr_base_hi <= 0; 103 | if ( csr_afu_dsm_base_done ) afu_dsr_base_hi <= 0; 104 | 105 | if (io_rx_csr_valid) begin 106 | if (io_rx_csr_addr[13:6] == 8'h10) begin 107 | case (io_rx_csr_addr[5:0]) 108 | SPL_CSR_DSR_BASEH : begin 109 | spl_dsr_base_hi <= io_rx_csr_data[5:0]; 110 | end 111 | 112 | SPL_CSR_DSR_BASEL : begin 113 | csr_spl_dsm_base_valid <= 1'b1; 114 | csr_spl_dsm_base <= {spl_dsr_base_hi, io_rx_csr_data[31:6]}; 115 | end 116 | 117 | SPL_CSR_CTX_BASELH : begin 118 | csr_ctx_base[31:26] <= io_rx_csr_data[5:0]; 119 | end 120 | 121 | SPL_CSR_CTX_BASELL : begin 122 | csr_ctx_base[25:0] <= io_rx_csr_data[31:6]; 123 | csr_ctx_base_valid <= 1'b1; 124 | end 125 | 126 | SPL_CSR_CTRL : begin 127 | csr_reset <= io_rx_csr_data[0]; 128 | csr_enable <= io_rx_csr_data[1]; 129 | end 130 | endcase 131 | end 132 | 133 | else if (io_rx_csr_addr[13:6] == 8'h8a) begin 134 | case (io_rx_csr_addr[5:0]) 135 | AFU_CSR_DSR_BASEH : begin 136 | afu_dsr_base_hi <= io_rx_csr_data[5:0]; 137 | end 138 | 139 | AFU_CSR_DSR_BASEL : begin 140 | csr_afu_dsm_base_valid <= 1'b1; 141 | csr_afu_dsm_base <= {afu_dsr_base_hi, io_rx_csr_data[31:6]}; 142 | end 143 | endcase 144 | end 145 | end 146 | end 147 | end // rx csr 148 | 149 | endmodule 150 | -------------------------------------------------------------------------------- /rtl/fthread_shell/data_arbiter.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../framework_defines.vh" 18 | 19 | module DataArbiter ( 20 | input wire clk, 21 | input wire rst_n, 22 | 23 | ////////////////// io_requester <--> arbiter 24 | // TX_RD request, 25 | input wire cor_tx_rd_ready, 26 | output wire cor_tx_rd_valid, 27 | output wire [70:0] cor_tx_rd_hdr, 28 | // TX_WR request, 29 | input wire cor_tx_wr_ready, 30 | output wire cor_tx_wr_valid, 31 | output wire [74:0] cor_tx_wr_hdr, 32 | output wire [511:0] cor_tx_data, 33 | 34 | ////////////////// server_io <--> arbiter 35 | // RX_RD response, 36 | input wire io_rx_rd_valid, 37 | input wire [511:0] io_rx_data, 38 | input wire [12:0] io_rx_rd_tag, 39 | 40 | // RX_WR response, 41 | input wire io_rx_wr_valid, 42 | input wire [12:0] io_rx_wr_tag, 43 | 44 | //////////////////////// Toward Channels //////////////////////////// 45 | //-------------- read interface 46 | input wire ft_tx_rd_valid[`NUMBER_OF_FTHREADS-1:0], 47 | input wire [67:0] ft_tx_rd_hdr[`NUMBER_OF_FTHREADS-1:0], 48 | output wire ft_tx_rd_ready[`NUMBER_OF_FTHREADS-1:0], 49 | 50 | output wire ft_rx_rd_valid[`NUMBER_OF_FTHREADS-1:0], 51 | output wire [511:0] ft_rx_data[`NUMBER_OF_FTHREADS-1:0], 52 | output wire [`FTHREAD_TAG-1:0] ft_rx_rd_tag[`NUMBER_OF_FTHREADS-1:0], 53 | //-------------- write interface 54 | input wire [71:0] ft_tx_wr_hdr[`NUMBER_OF_FTHREADS-1:0], 55 | input wire [511:0] ft_tx_data[`NUMBER_OF_FTHREADS-1:0], 56 | input wire ft_tx_wr_valid[`NUMBER_OF_FTHREADS-1:0], 57 | output wire ft_tx_wr_ready[`NUMBER_OF_FTHREADS-1:0], 58 | 59 | output wire ft_rx_wr_valid[`NUMBER_OF_FTHREADS-1:0], 60 | output wire [`FTHREAD_TAG-1:0] ft_rx_wr_tag[`NUMBER_OF_FTHREADS-1:0] 61 | ); 62 | 63 | wire [(512+72)-1 : 0] usr_wr_tx_lines[`NUMBER_OF_FTHREADS-1:0]; 64 | wire [512+72-1 : 0] wr_tx_line; 65 | wire [`FTHREADS_BITS-1:0] wr_tx_tag; 66 | wire wr_tx_valid; 67 | 68 | wire [(512+`FTHREAD_TAG)-1 :0] usr_rd_rx_lines[`NUMBER_OF_FTHREADS-1:0]; 69 | wire [67:0] rd_tx_line; 70 | wire [`FTHREADS_BITS-1:0] rd_tx_tag; 71 | wire rd_tx_valid; 72 | 73 | 74 | reg io_rx_rd_valid_d1; 75 | reg [511:0] io_rx_data_d1; 76 | reg [12:0] io_rx_rd_tag_d1; 77 | 78 | reg io_rx_wr_valid_d1; 79 | reg [12:0] io_rx_wr_tag_d1; 80 | //// Register RX 81 | always @(posedge clk) begin 82 | if(~rst_n) begin 83 | io_rx_rd_valid_d1 <= 0; 84 | //io_rx_data_d1 <= 0; 85 | io_rx_rd_tag_d1 <= 0; 86 | 87 | io_rx_wr_valid_d1 <= 0; 88 | io_rx_wr_tag_d1 <= 0; 89 | end 90 | else begin 91 | io_rx_rd_valid_d1 <= io_rx_rd_valid; 92 | io_rx_data_d1 <= io_rx_data; 93 | io_rx_rd_tag_d1 <= io_rx_rd_tag; 94 | 95 | io_rx_wr_valid_d1 <= io_rx_wr_valid; 96 | io_rx_wr_tag_d1 <= io_rx_wr_tag; 97 | end 98 | end 99 | //// 100 | 101 | 102 | genvar i; 103 | /////////////////////////////////////////////////////////////////////////////////////////////// 104 | generate for( i = 0; i < `NUMBER_OF_FTHREADS; i = i + 1) begin: usrWrLines 105 | assign usr_wr_tx_lines[i] = {ft_tx_wr_hdr[i], ft_tx_data[i]}; 106 | end 107 | endgenerate 108 | 109 | 110 | assign cor_tx_wr_valid = wr_tx_valid; 111 | assign cor_tx_wr_hdr = {wr_tx_line[512+71: 512+`FTHREAD_TAG], {{3-`FTHREADS_BITS}{1'b0}}, wr_tx_tag, wr_tx_line[512+`FTHREAD_TAG-1:512+0]}; 112 | assign cor_tx_data = wr_tx_line[511:0]; 113 | 114 | regulator #(.NUMBER_OF_USERS(`NUMBER_OF_FTHREADS), 115 | .USERS_BITS(`FTHREADS_BITS), 116 | .USER_LINE_IN_WIDTH(512+72), 117 | .USER_LINE_OUT_WIDTH(`FTHREAD_TAG), 118 | .PRIORITY_BATCHED_ROUND_ROBIN(1), 119 | .ROUND_ROBIN_BATCH_SIZE(16)) 120 | WR_Channel_Regulator( 121 | 122 | .clk (clk), 123 | .rst_n (rst_n), 124 | 125 | // Users TX Channel 126 | .usr_tx_lines (usr_wr_tx_lines), 127 | .usr_tx_valid (ft_tx_wr_valid), 128 | .usr_tx_ready (ft_tx_wr_ready), 129 | 130 | // Users RX Channel 131 | .usr_rx_lines (ft_rx_wr_tag), 132 | .usr_rx_valid (ft_rx_wr_valid), 133 | 134 | // TX Channel 135 | .tx_line (wr_tx_line), 136 | .tx_tag (wr_tx_tag), 137 | .tx_valid (wr_tx_valid), 138 | .tx_ready (cor_tx_wr_ready), 139 | 140 | // RX Channel 141 | .rx_line (io_rx_wr_tag_d1[`FTHREAD_TAG-1:0]), 142 | .rx_tag (io_rx_wr_tag_d1[`FTHREAD_TAG+`FTHREADS_BITS - 1:`FTHREAD_TAG]), 143 | .rx_valid (io_rx_wr_valid_d1) 144 | ); 145 | 146 | //////////////////////////////////////////////////////////////////////////////////////////////////// 147 | genvar j; 148 | generate for( j = 0; j < `NUMBER_OF_FTHREADS; j = j + 1) begin: usrRdLines 149 | assign ft_rx_data[j] = usr_rd_rx_lines[j][511 : 0]; 150 | assign ft_rx_rd_tag[j] = usr_rd_rx_lines[j][512 + `FTHREAD_TAG-1 : 512]; 151 | end 152 | endgenerate 153 | 154 | assign cor_tx_rd_valid = rd_tx_valid; 155 | assign cor_tx_rd_hdr = {rd_tx_line[67:`FTHREAD_TAG], {{3-`FTHREADS_BITS}{1'b0}}, rd_tx_tag, rd_tx_line[`FTHREAD_TAG-1:0]}; 156 | 157 | regulator #(.NUMBER_OF_USERS(`NUMBER_OF_FTHREADS), 158 | .USERS_BITS(`FTHREADS_BITS), 159 | .USER_LINE_IN_WIDTH(68), 160 | .USER_LINE_OUT_WIDTH(512+`FTHREAD_TAG), 161 | .PRIORITY_BATCHED_ROUND_ROBIN(1), 162 | .ROUND_ROBIN_BATCH_SIZE(16)) 163 | RD_Channel_Regulator( 164 | 165 | .clk (clk), 166 | .rst_n (rst_n), 167 | 168 | // Users TX Channel 169 | .usr_tx_lines (ft_tx_rd_hdr), 170 | .usr_tx_valid (ft_tx_rd_valid), 171 | .usr_tx_ready (ft_tx_rd_ready), 172 | 173 | // Users RX Channel 174 | .usr_rx_lines (usr_rd_rx_lines), 175 | .usr_rx_valid (ft_rx_rd_valid), 176 | 177 | // TX Channel 178 | .tx_line (rd_tx_line), 179 | .tx_tag (rd_tx_tag), 180 | .tx_valid (rd_tx_valid), 181 | .tx_ready (cor_tx_rd_ready), 182 | 183 | // RX Channel 184 | .rx_line ({io_rx_rd_tag_d1[`FTHREAD_TAG-1:0], io_rx_data_d1}), 185 | .rx_tag (io_rx_rd_tag_d1[`FTHREAD_TAG+`FTHREADS_BITS - 1:`FTHREAD_TAG]), 186 | .rx_valid (io_rx_rd_valid_d1) 187 | ); 188 | 189 | endmodule 190 | -------------------------------------------------------------------------------- /rtl/fthread_shell/fpga_setup.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../framework_defines.vh" 18 | `include "../afu_defines.vh" 19 | 20 | module fpga_setup ( 21 | input wire clk, 22 | input wire rst_n, 23 | 24 | output reg ctx_status_valid, 25 | 26 | // server_io <--> cmd server: RX_RD 27 | input wire io_rx_csr_valid, 28 | input wire [13:0] io_rx_csr_addr, 29 | input wire [31:0] io_rx_csr_data, 30 | 31 | // TX WR 32 | input wire setup_tx_wr_ready, 33 | output reg setup_tx_wr_valid, 34 | output reg [`FPGA_SETUP_TAG-1:0] setup_tx_wr_tag, 35 | output reg [31:0] setup_tx_wr_addr, 36 | output reg [511:0] setup_tx_data, 37 | 38 | // setup pagetable 39 | input wire [1:0] pt_status, 40 | output wire pt_update, 41 | output wire [31:0] pt_base_addr, 42 | 43 | output wire spl_reset_t 44 | ); 45 | 46 | reg [31:0] pt_update_cycles = 0; 47 | 48 | reg afu_dsm_updated = 0; 49 | wire csr_afu_dsm_base_valid; 50 | wire [31:0] csr_afu_dsm_base; 51 | 52 | reg spl_dsm_updated = 0; 53 | wire csr_spl_dsm_base_valid; 54 | wire [31:0] csr_spl_dsm_base; 55 | 56 | reg vir_ctx_updated = 0; 57 | wire [31:0] csr_vir_ctx_base; 58 | wire csr_vir_ctx_valid; 59 | 60 | reg pt_status_updated = 0; 61 | reg afu_config_updated = 0; 62 | 63 | wire spl_dsm_update; 64 | wire afu_dsm_update; 65 | wire vir_ctx_update; 66 | wire pt_status_update; 67 | wire afu_config_update; 68 | /////////////////////////////////////////////////////////////////////////////////////////////////// 69 | ///////////////////////////////////////////// /////////////////////////// 70 | ////////////////////////////////////////// CSR File //////////////////////// 71 | ///////////////////////////////////////////// /////////////////////////// 72 | /////////////////////////////////////////////////////////////////////////////////////////////////// 73 | 74 | csr_file csr_file( 75 | .clk (clk), 76 | .reset_n (rst_n), 77 | .spl_reset (spl_reset_t), 78 | // server_io --> csr_file 79 | .io_rx_csr_valid (io_rx_csr_valid), 80 | .io_rx_csr_addr (io_rx_csr_addr), 81 | .io_rx_csr_data (io_rx_csr_data), 82 | 83 | // csr_file --> dsm_module, spl_id, afu_id 84 | .csr_spl_dsm_base (csr_spl_dsm_base), 85 | .csr_spl_dsm_base_valid (csr_spl_dsm_base_valid), 86 | .csr_spl_dsm_base_done (spl_dsm_updated), 87 | 88 | .csr_afu_dsm_base (csr_afu_dsm_base), 89 | .csr_afu_dsm_base_valid (csr_afu_dsm_base_valid), 90 | .csr_afu_dsm_base_done (afu_dsm_updated), 91 | 92 | // csr_file --> ctx_tracker, FPGA virtual memory space 93 | .csr_ctx_base_valid (csr_vir_ctx_valid), 94 | .csr_ctx_base (csr_vir_ctx_base), 95 | .csr_ctx_base_done (vir_ctx_updated) 96 | ); 97 | 98 | /////////////////////////////////////////////////////////////////////////////////////////////////// 99 | ///////////////////////////////////////////// /////////////////////////// 100 | ////////////////////////////////////////// Setup FSM //////////////////////// 101 | ///////////////////////////////////////////// /////////////////////////// 102 | /////////////////////////////////////////////////////////////////////////////////////////////////// 103 | 104 | assign spl_dsm_update = csr_spl_dsm_base_valid & ~spl_dsm_updated; 105 | assign afu_dsm_update = csr_afu_dsm_base_valid & ~afu_dsm_updated; 106 | assign vir_ctx_update = csr_vir_ctx_valid & (|pt_status) & ~vir_ctx_updated; 107 | assign pt_status_update = csr_vir_ctx_valid & pt_status[0] & ~pt_status_updated; 108 | 109 | assign afu_config_update = afu_dsm_updated & ~afu_config_updated; 110 | 111 | always @(posedge clk) begin 112 | if (~rst_n | spl_reset_t) begin 113 | setup_tx_wr_addr <= 0; 114 | setup_tx_wr_valid <= 1'b0; 115 | setup_tx_wr_tag <= 'h0; 116 | setup_tx_data <= 0; 117 | 118 | spl_dsm_updated <= 1'b0; 119 | afu_dsm_updated <= 1'b0; 120 | vir_ctx_updated <= 1'b0; 121 | pt_status_updated <= 1'b0; 122 | afu_config_updated <= 1'b0; 123 | 124 | ctx_status_valid <= 0; 125 | end 126 | else if( setup_tx_wr_ready ) begin 127 | casex ({spl_dsm_update, afu_dsm_update, afu_config_update, vir_ctx_update, pt_status_update}) 128 | 5'b1????: begin // 129 | setup_tx_wr_addr <= csr_spl_dsm_base; 130 | setup_tx_wr_valid <= 1'b1; 131 | setup_tx_wr_tag <= 'h1; 132 | setup_tx_data <= {480'b0, `SPL_ID}; 133 | spl_dsm_updated <= 1'b1; 134 | end 135 | 5'b01???: begin // 136 | setup_tx_wr_addr <= csr_afu_dsm_base; 137 | setup_tx_wr_valid <= 1'b1; 138 | setup_tx_wr_tag <= 'h2; 139 | setup_tx_data <= {448'b0, `AFU_ID}; 140 | afu_dsm_updated <= 1'b1; 141 | end 142 | 5'b001??: begin 143 | setup_tx_wr_addr <= (csr_afu_dsm_base + `ALLOC_OPERATORS_DSM_OFFSET); 144 | setup_tx_wr_valid <= 1'b1; 145 | setup_tx_wr_tag <= 'h3; 146 | setup_tx_data <= {256'b0, 147 | {16'b0,`FTHREAD_8_PLACED_AFU}, 148 | {16'b0,`FTHREAD_7_PLACED_AFU}, 149 | {16'b0,`FTHREAD_6_PLACED_AFU}, 150 | {16'b0,`FTHREAD_5_PLACED_AFU}, 151 | {16'b0,`FTHREAD_4_PLACED_AFU}, 152 | {16'b0,`FTHREAD_3_PLACED_AFU}, 153 | {16'b0,`FTHREAD_2_PLACED_AFU}, 154 | {16'b0,`FTHREAD_1_PLACED_AFU} }; 155 | afu_config_updated <= 1'b1; 156 | end 157 | 5'b0001?: begin 158 | setup_tx_wr_addr <= (csr_spl_dsm_base + `CTX_STATUS_DSM_OFFSET); 159 | setup_tx_wr_valid <= 1'b1; 160 | setup_tx_wr_tag <= 'h4; 161 | setup_tx_data <= {384'b0, 127'b0, pt_status[1]}; 162 | vir_ctx_updated <= 1'b1; 163 | 164 | end 165 | 5'b00001: begin 166 | setup_tx_wr_addr <= (csr_afu_dsm_base + `PT_STATUS_DSM_OFFSET); 167 | setup_tx_wr_valid <= 1'b1; 168 | setup_tx_wr_tag <= 'h5; 169 | setup_tx_data <= {480'b0, pt_update_cycles}; 170 | pt_status_updated <= 1'b1; 171 | 172 | ctx_status_valid <= pt_status[0]; 173 | end 174 | 5'b00000: begin 175 | setup_tx_wr_addr <= 0; 176 | setup_tx_wr_valid <= 1'b0; 177 | setup_tx_wr_tag <= 'h0; 178 | setup_tx_data <= 0; 179 | end 180 | endcase 181 | end 182 | end 183 | 184 | /////////////////////////////////////////////////////////////////////////////////////////////////// 185 | 186 | assign pt_update = csr_vir_ctx_valid; 187 | assign pt_base_addr = csr_vir_ctx_base; 188 | // 189 | always @(posedge clk) begin 190 | if (~rst_n | spl_reset_t) begin 191 | pt_update_cycles <= 0; 192 | end 193 | else begin 194 | if(pt_update & ~(|pt_status) ) begin 195 | pt_update_cycles <= pt_update_cycles + 1'b1; 196 | end 197 | end 198 | end 199 | 200 | endmodule -------------------------------------------------------------------------------- /rtl/fthread_shell/job_manager.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../framework_defines.vh" 18 | `include "../afu_defines.vh" 19 | 20 | module job_manager( 21 | 22 | input wire clk, 23 | input wire rst_n, 24 | 25 | input wire [31:0] job_queue_base_addr[`NUM_JOB_TYPES-1:0], 26 | input wire job_reader_enable, 27 | input wire [31:0] job_queue_size, 28 | input wire [57:0] ws_virt_base_addr, 29 | input wire [15:0] queue_poll_rate, 30 | input wire ws_virt_base_addr_valid, 31 | //--------------------------------- TX, RX Interfaces ---------------------------------------// 32 | // TX RD 33 | output wire [31:0] jrd_tx_rd_addr, 34 | output wire [`JOB_READER_TAG-1:0] jrd_tx_rd_tag, 35 | output wire jrd_tx_rd_valid, 36 | input wire jrd_tx_rd_ready, 37 | // TX WR 38 | output wire [31:0] jrd_tx_wr_addr, 39 | output wire [`JOB_READER_TAG-1:0] jrd_tx_wr_tag, 40 | output wire jrd_tx_wr_valid, 41 | output wire [511:0] jrd_tx_data, 42 | input wire jrd_tx_wr_ready, 43 | // RX RD 44 | input wire [`JOB_READER_TAG-1:0] jrd_rx_rd_tag, 45 | input wire [511:0] jrd_rx_data, 46 | input wire jrd_rx_rd_valid, 47 | // RX WR 48 | input wire jrd_rx_wr_valid, 49 | input wire [`JOB_READER_TAG-1:0] jrd_rx_wr_tag, 50 | //-------------------------------- Configuration Matrix -------------------------------------// 51 | input wire [15:0] fthread_config[`NUMBER_OF_FTHREADS-1:0], 52 | input wire fthread_config_valid, 53 | 54 | input wire [15:0] job_config[`NUM_JOB_TYPES-1:0], 55 | input wire job_config_valid, 56 | 57 | //----------------------------------- Jobs to FThreads ------------------------------------// 58 | output wire [`CMD_LINE_WIDTH-1:0] fthread_job[`NUMBER_OF_FTHREADS-1:0], 59 | output wire fthread_job_valid[`NUMBER_OF_FTHREADS-1:0], 60 | input wire fthread_done[`NUMBER_OF_FTHREADS-1:0] 61 | ); 62 | 63 | 64 | 65 | wire [511:0] job_queue_out[`NUM_JOB_TYPES-1:0]; 66 | wire job_queue_valid[`NUM_JOB_TYPES-1:0]; 67 | wire job_queue_ready[`NUM_JOB_TYPES-1:0]; 68 | 69 | 70 | 71 | jobs_reader jobs_reader( 72 | 73 | .clk (clk), 74 | .rst_n (rst_n), 75 | 76 | .job_queue_base_addr (job_queue_base_addr), 77 | .job_reader_enable (job_reader_enable), 78 | .job_queue_size (job_queue_size), 79 | .queue_poll_rate (queue_poll_rate), 80 | //---------------------- TX, RX Interfaces ----------------------// 81 | // TX RD 82 | .jrd_tx_rd_addr (jrd_tx_rd_addr), 83 | .jrd_tx_rd_tag (jrd_tx_rd_tag), 84 | .jrd_tx_rd_valid (jrd_tx_rd_valid), 85 | .jrd_tx_rd_ready (jrd_tx_rd_ready), 86 | // TX WR 87 | .jrd_tx_wr_addr (jrd_tx_wr_addr), 88 | .jrd_tx_wr_tag (jrd_tx_wr_tag), 89 | .jrd_tx_wr_valid (jrd_tx_wr_valid), 90 | .jrd_tx_data (jrd_tx_data), 91 | .jrd_tx_wr_ready (jrd_tx_wr_ready), 92 | // RX RD 93 | .jrd_rx_rd_tag (jrd_rx_rd_tag), 94 | .jrd_rx_data (jrd_rx_data), 95 | .jrd_rx_rd_valid (jrd_rx_rd_valid), 96 | // RX WR 97 | .jrd_rx_wr_valid (jrd_rx_wr_valid), 98 | .jrd_rx_wr_tag (jrd_rx_wr_tag), 99 | //--------------------------------------------------------------// 100 | .job_queue_out (job_queue_out), 101 | .job_queue_valid (job_queue_valid), 102 | .job_queue_ready (job_queue_ready) 103 | ); 104 | 105 | 106 | job_distributor job_distributor 107 | ( 108 | .clk (clk), 109 | .rst_n (rst_n), 110 | .ws_virt_base_addr (ws_virt_base_addr), 111 | .ws_virt_base_addr_valid (ws_virt_base_addr_valid), 112 | //-------- Standing Job Requests ------// 113 | .job_queue_out (job_queue_out), 114 | .job_queue_valid (job_queue_valid), 115 | .job_queue_ready (job_queue_ready), 116 | //--------- Configuration Matrix ------// 117 | .fthread_config (fthread_config), 118 | .fthread_config_valid (fthread_config_valid), 119 | 120 | .job_config (job_config), 121 | .job_config_valid (job_config_valid), 122 | 123 | //--------- Jobs to FThreads --------// 124 | .fthread_job (fthread_job), 125 | .fthread_job_valid (fthread_job_valid), 126 | .fthread_done (fthread_done) 127 | ); 128 | 129 | 130 | endmodule 131 | -------------------------------------------------------------------------------- /rtl/fthread_shell/job_queue.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../framework_defines.vh" 18 | 19 | 20 | module job_queue #(parameter POLL_CYCLES = 32) 21 | ( 22 | input wire clk, 23 | input wire rst_n, 24 | //-------------------------------------------------// 25 | input wire start_queue, 26 | input wire [31:0] queue_base_addr, 27 | input wire [31:0] queue_size, // in CLs 28 | input wire [15:0] queue_poll_rate, 29 | input wire queue_reset, 30 | // TX RD 31 | output reg [31:0] jq_tx_rd_addr, 32 | output reg [`JOB_QUEUE_TAG-1:0] jq_tx_rd_tag, 33 | output reg jq_tx_rd_valid, 34 | input wire jq_tx_rd_ready, 35 | // TX WR 36 | output reg [31:0] jq_tx_wr_addr, 37 | output reg [`JOB_QUEUE_TAG-1:0] jq_tx_wr_tag, 38 | output reg jq_tx_wr_valid, 39 | output reg [511:0] jq_tx_data, 40 | input wire jq_tx_wr_ready, 41 | // RX RD 42 | input wire [`JOB_QUEUE_TAG-1:0] jq_rx_rd_tag, 43 | input wire [511:0] jq_rx_data, 44 | input wire jq_rx_rd_valid, 45 | // RX WR 46 | input wire jq_rx_wr_valid, 47 | input wire [`JOB_QUEUE_TAG-1:0] jq_rx_wr_tag, 48 | ///////////////////////// User Logic Interface //////////////////// 49 | output reg [511:0] job_queue_out, 50 | output reg job_queue_valid, 51 | input wire job_queue_ready 52 | ); 53 | 54 | ///////////////////////////////// Wires Declarations //////////////////////////// 55 | 56 | wire update_status; 57 | wire [15:0] rd_cnt_inc; 58 | /////////////////////////////////////// Reg Declarations ///////////////////////// 59 | reg [31:0] numPulledJobs; 60 | reg [31:0] numAvailableJobs; 61 | reg [15:0] queue_buffer_size; 62 | reg [15:0] rd_cnt; 63 | 64 | reg [15:0] prog_poll_cycles; 65 | reg [15:0] poll_count; 66 | 67 | reg [31:0] queue_struct_base; 68 | reg [31:0] queue_buffer_base; 69 | 70 | reg [2:0] jq_fsm_state; 71 | 72 | reg last_req_d1; 73 | 74 | reg [5:0] rx_rd_tag; 75 | reg [511:0] rx_rd_data; 76 | reg rx_rd_valid; 77 | reg rx_wr_valid; 78 | reg [7:0] rx_wr_tag; 79 | 80 | reg write_response_pending; 81 | reg [31:0] lastUpdatedJobs; 82 | 83 | reg jq_producer_valid; 84 | 85 | /////////////////////////////////// Local Parameters ///////////////////////////////////////// 86 | localparam [2:0] 87 | CMQ_IDLE_STATE = 3'b000, 88 | 89 | CMQ_READ_CMD_STATE = 3'b001, 90 | CMQ_RECEIVE_STATE = 3'b010, 91 | CMQ_PROCESS_STATE = 3'b011, 92 | 93 | CMQ_CHECK_STATE = 3'b100, 94 | CMQ_POLL_STATE = 3'b101, 95 | CMQ_POLL_RESP_STATE = 3'b110; 96 | 97 | /////////// buffer response 98 | always@(posedge clk) begin 99 | if(~rst_n | queue_reset) begin 100 | rx_rd_tag <= 0; 101 | //rx_rd_data <= 0; 102 | rx_rd_valid <= 0; 103 | // RX WR 104 | rx_wr_valid <= 0; 105 | rx_wr_tag <= 0; 106 | end 107 | else begin 108 | rx_rd_tag <= jq_rx_rd_tag; 109 | rx_rd_data <= jq_rx_data; 110 | rx_rd_valid <= jq_rx_rd_valid; 111 | // RX WR 112 | rx_wr_valid <= jq_rx_wr_valid; 113 | rx_wr_tag <= jq_rx_wr_tag; 114 | end 115 | end 116 | 117 | /////////////////////////////// CRB Status Logic ///////////////////////////////// 118 | always @(posedge clk) begin 119 | if(~rst_n | queue_reset) begin 120 | numPulledJobs <= 0; 121 | numAvailableJobs <= 0; 122 | 123 | queue_struct_base <= 0; 124 | queue_buffer_base <= 0; 125 | queue_buffer_size <= 0; 126 | 127 | jq_fsm_state <= CMQ_IDLE_STATE; 128 | poll_count <= 0; 129 | prog_poll_cycles <= 0; 130 | 131 | jq_tx_rd_addr <= 0; 132 | jq_tx_rd_valid <= 0; 133 | jq_tx_rd_tag <= 0; 134 | last_req_d1 <= 0; 135 | 136 | jq_producer_valid <= 0; 137 | 138 | job_queue_valid <= 0; 139 | job_queue_out <= 0; 140 | 141 | rd_cnt <= 0; 142 | end 143 | else begin 144 | case (jq_fsm_state) 145 | CMQ_IDLE_STATE: begin 146 | jq_fsm_state <= (start_queue)? CMQ_POLL_STATE : CMQ_IDLE_STATE; 147 | queue_struct_base <= queue_base_addr; 148 | queue_buffer_base <= queue_base_addr + queue_size[3:0]; 149 | queue_buffer_size <= queue_size[31:16]; 150 | rd_cnt <= 0; 151 | prog_poll_cycles <= queue_poll_rate; 152 | end 153 | /////////////////////// Read Commands from the Queue States //////////////////////////// 154 | CMQ_READ_CMD_STATE: begin 155 | jq_fsm_state <= CMQ_RECEIVE_STATE; 156 | rd_cnt <= (rd_cnt_inc == queue_buffer_size)? 0 : rd_cnt_inc; 157 | jq_tx_rd_valid <= 1'b1; 158 | jq_tx_rd_addr <= {1'b0, queue_buffer_base} + {1'b0, rd_cnt}; 159 | jq_tx_rd_tag <= 'h2; 160 | numAvailableJobs <= numAvailableJobs - 1; 161 | last_req_d1 <= (numAvailableJobs == 1); 162 | end 163 | 164 | CMQ_RECEIVE_STATE: begin 165 | if( jq_tx_rd_ready ) jq_tx_rd_valid <= 1'b0; 166 | 167 | jq_fsm_state <= (rx_rd_valid)? CMQ_PROCESS_STATE : CMQ_RECEIVE_STATE; 168 | numPulledJobs <= (rx_rd_valid)? numPulledJobs + 1'b1 : numPulledJobs; 169 | 170 | job_queue_out <= rx_rd_data; 171 | job_queue_valid <= (rx_rd_valid)? 1'b1 : 1'b0; 172 | end 173 | CMQ_PROCESS_STATE: begin 174 | jq_fsm_state <= (~job_queue_ready)? CMQ_PROCESS_STATE : 175 | (last_req_d1)? CMQ_POLL_STATE : CMQ_READ_CMD_STATE; 176 | job_queue_valid <= (~job_queue_ready)? 1'b1 : 1'b0; 177 | end 178 | /////////////////////////// Poll On CMD Queue Producer and Check Validity ////////////////////// 179 | CMQ_CHECK_STATE: begin 180 | jq_fsm_state <= ((numAvailableJobs != 0) & jq_producer_valid)? CMQ_READ_CMD_STATE : CMQ_POLL_STATE; 181 | end 182 | CMQ_POLL_STATE: begin 183 | 184 | if( poll_count == prog_poll_cycles) begin 185 | jq_tx_rd_addr <= queue_struct_base + `CRB_STRUCT_PRODUCER_LINE_OFFSET; 186 | jq_tx_rd_valid <= 1'b1; 187 | jq_tx_rd_tag <= 'h1; 188 | 189 | jq_fsm_state <= CMQ_POLL_RESP_STATE; 190 | end 191 | poll_count <= poll_count + 1'b1; 192 | end 193 | CMQ_POLL_RESP_STATE: begin 194 | if( jq_tx_rd_ready ) jq_tx_rd_valid <= 1'b0; 195 | 196 | poll_count <= 0; 197 | 198 | jq_fsm_state <= (rx_rd_valid)? CMQ_CHECK_STATE : CMQ_POLL_RESP_STATE; 199 | numAvailableJobs <= (rx_rd_valid)? ((rx_rd_data[63:32] >> 6) - numPulledJobs) : numAvailableJobs; 200 | jq_producer_valid <= (rx_rd_valid)? (rx_rd_data[95:64] == `CMQ_PROD_VALID_MAGIC_NUMBER) : 0; 201 | end 202 | endcase 203 | end 204 | end 205 | 206 | assign rd_cnt_inc = rd_cnt + 1'b1; 207 | 208 | ////////////////////////////////////////////////////////////////////////////////////// 209 | ////////////////////////////// CRB Consumer Status Update //////////////////////////// 210 | ////////////////////////////// TX WR Requests Generation //////////////////////////// 211 | 212 | always @(posedge clk) begin 213 | if(~rst_n | queue_reset) begin 214 | //jq_tx_data <= 0; 215 | jq_tx_wr_addr <= 0; 216 | jq_tx_wr_valid <= 0; 217 | jq_tx_wr_tag <= 0; 218 | 219 | lastUpdatedJobs <= 0; 220 | 221 | write_response_pending <= 0; 222 | end 223 | else begin 224 | write_response_pending <= (write_response_pending)? ~rx_wr_valid : (jq_tx_wr_ready & jq_tx_wr_valid); 225 | 226 | if(jq_tx_wr_ready | ~jq_tx_wr_valid) begin 227 | if( update_status ) begin 228 | lastUpdatedJobs <= numPulledJobs; 229 | jq_tx_data <= {448'b0, numPulledJobs << 6, numPulledJobs}; 230 | jq_tx_wr_addr <= queue_struct_base + `CRB_STRUCT_CONSUMER_LINE_OFFSET; 231 | jq_tx_wr_tag <= 0; 232 | jq_tx_wr_valid <= 1'b1; 233 | end 234 | else begin 235 | jq_tx_wr_valid <= 0; 236 | end 237 | end 238 | end 239 | end 240 | 241 | assign update_status = ((numPulledJobs - lastUpdatedJobs) > 0) & ~write_response_pending; 242 | 243 | 244 | endmodule 245 | -------------------------------------------------------------------------------- /rtl/fthread_shell/jobs_reader.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "../framework_defines.vh" 18 | `include "../afu_defines.vh" 19 | 20 | module jobs_reader( 21 | 22 | input wire clk, 23 | input wire rst_n, 24 | 25 | input wire [31:0] job_queue_base_addr[`NUM_JOB_TYPES-1:0], 26 | input wire job_reader_enable, 27 | input wire [31:0] job_queue_size, 28 | input wire [15:0] queue_poll_rate, 29 | //---------------------- TX, RX Interfaces ----------------------// 30 | // TX RD 31 | output reg [31:0] jrd_tx_rd_addr, 32 | output reg [`JOB_READER_TAG-1:0] jrd_tx_rd_tag, 33 | output reg jrd_tx_rd_valid, 34 | input wire jrd_tx_rd_ready, 35 | // TX WR 36 | output reg [31:0] jrd_tx_wr_addr, 37 | output reg [`JOB_READER_TAG-1:0] jrd_tx_wr_tag, 38 | output reg jrd_tx_wr_valid, 39 | output reg [511:0] jrd_tx_data, 40 | input wire jrd_tx_wr_ready, 41 | // RX RD 42 | input wire [`JOB_READER_TAG-1:0] jrd_rx_rd_tag, 43 | input wire [511:0] jrd_rx_data, 44 | input wire jrd_rx_rd_valid, 45 | // RX WR 46 | input wire jrd_rx_wr_valid, 47 | input wire [`JOB_READER_TAG-1:0] jrd_rx_wr_tag, 48 | //--------------------------------------------------------------// 49 | output wire [511:0] job_queue_out[`NUM_JOB_TYPES-1:0], 50 | output wire job_queue_valid[`NUM_JOB_TYPES-1:0], 51 | input wire job_queue_ready[`NUM_JOB_TYPES-1:0] 52 | ); 53 | 54 | wire [31:0] jq_tx_rd_addr[`NUM_JOB_TYPES-1:0]; 55 | wire [`JOB_QUEUE_TAG-1:0] jq_tx_rd_tag[`NUM_JOB_TYPES-1:0]; 56 | wire jq_tx_rd_valid[`NUM_JOB_TYPES-1:0]; 57 | wire jq_tx_rd_ready[`NUM_JOB_TYPES-1:0]; 58 | 59 | wire [31:0] jq_tx_wr_addr[`NUM_JOB_TYPES-1:0]; 60 | wire [`JOB_QUEUE_TAG-1:0] jq_tx_wr_tag[`NUM_JOB_TYPES-1:0]; 61 | wire jq_tx_wr_valid[`NUM_JOB_TYPES-1:0]; 62 | wire jq_tx_wr_ready[`NUM_JOB_TYPES-1:0]; 63 | wire [511:0] jq_tx_data[`NUM_JOB_TYPES-1:0]; 64 | 65 | wire jq_rx_rd_valid[`NUM_JOB_TYPES-1:0]; 66 | wire [511:0] jq_rx_data[`NUM_JOB_TYPES-1:0]; 67 | wire [`JOB_QUEUE_TAG-1:0] jq_rx_rd_tag[`NUM_JOB_TYPES-1:0]; 68 | 69 | wire jq_rx_wr_valid[`NUM_JOB_TYPES-1:0]; 70 | wire [`JOB_QUEUE_TAG-1:0] jq_rx_wr_tag[`NUM_JOB_TYPES-1:0]; 71 | 72 | reg [1:0] rd_rr_state; 73 | reg [1:0] wr_rr_state; 74 | /////////////////////////////////////////////////////////////////////////////////////////////////// 75 | ///////////////////////////////////////////// /////////////////////////// 76 | ////////////////////////////////////////// IO Channels //////////////////////// 77 | ///////////////////////////////////////////// /////////////////////////// 78 | /////////////////////////////////////////////////////////////////////////////////////////////////// 79 | 80 | genvar i; 81 | 82 | generate for(i = 0; i < `NUM_JOB_TYPES; i = i + 1) begin: jq_tx_ready 83 | 84 | assign jq_tx_rd_ready[i] = jrd_tx_rd_ready & (rd_rr_state == i); 85 | assign jq_tx_wr_ready[i] = jrd_tx_wr_ready & (wr_rr_state == i); 86 | end 87 | endgenerate 88 | //------------------- TX RD -----------------------// 89 | always @(posedge clk) begin 90 | if (~rst_n) begin 91 | jrd_tx_rd_addr <= 0; 92 | jrd_tx_rd_tag <= 0; 93 | jrd_tx_rd_valid <= 0; 94 | 95 | rd_rr_state <= 0; 96 | end 97 | else if( jrd_tx_rd_ready ) begin 98 | jrd_tx_rd_addr <= jq_tx_rd_addr[rd_rr_state]; 99 | 100 | if( `JOB_QUEUE_TAG_USED_RD ) 101 | jrd_tx_rd_tag <= {rd_rr_state, jq_tx_rd_tag[rd_rr_state]}; 102 | else 103 | jrd_tx_rd_tag <= {rd_rr_state, {`JOB_QUEUE_TAG{1'b0}}}; 104 | 105 | jrd_tx_rd_valid <= jq_tx_rd_valid[rd_rr_state]; 106 | 107 | rd_rr_state <= (rd_rr_state == `NUM_JOB_TYPES-1)? 0 : rd_rr_state + 1'b1; 108 | end 109 | end 110 | 111 | //------------------- TX WR -----------------------// 112 | always @(posedge clk) begin 113 | if (~rst_n) begin 114 | jrd_tx_wr_addr <= 0; 115 | jrd_tx_wr_tag <= 0; 116 | jrd_tx_wr_valid <= 0; 117 | //jrd_tx_data <= 0; 118 | 119 | wr_rr_state <= 0; 120 | end 121 | else if( jrd_tx_wr_ready ) begin 122 | jrd_tx_wr_addr <= jq_tx_wr_addr[wr_rr_state]; 123 | 124 | if( `JOB_QUEUE_TAG_USED_WR ) 125 | jrd_tx_wr_tag <= {wr_rr_state, jq_tx_wr_tag[wr_rr_state]}; 126 | else 127 | jrd_tx_wr_tag <= {wr_rr_state, {`JOB_QUEUE_TAG{1'b0}}}; 128 | 129 | jrd_tx_wr_valid <= jq_tx_wr_valid[wr_rr_state]; 130 | jrd_tx_data <= jq_tx_data[wr_rr_state]; 131 | 132 | wr_rr_state <= (wr_rr_state == `NUM_JOB_TYPES-1)? 0 : wr_rr_state + 1'b1; 133 | end 134 | end 135 | 136 | 137 | //-------------------- RX RD, WR ----------------------// 138 | generate for(i = 0; i < `NUM_JOB_TYPES; i = i + 1) begin: jq_rx 139 | 140 | // rx rd 141 | assign jq_rx_rd_tag[i] = jrd_rx_rd_tag[`JOB_QUEUE_TAG-1:0]; 142 | assign jq_rx_data[i] = jrd_rx_data; 143 | assign jq_rx_rd_valid[i] = jrd_rx_rd_valid & (jrd_rx_rd_tag[`JOB_READER_TAG-1:`JOB_READER_TAG-2] == i); 144 | 145 | // rx wr 146 | assign jq_rx_wr_tag[i] = jrd_rx_wr_tag[`JOB_QUEUE_TAG-1:0]; 147 | assign jq_rx_wr_valid[i] = jrd_rx_wr_valid & (jrd_rx_wr_tag[`JOB_READER_TAG-1:`JOB_READER_TAG-2] == i); 148 | end 149 | endgenerate 150 | /////////////////////////////////////////////////////////////////////////////////////////////////// 151 | ///////////////////////////////////////////// /////////////////////////// 152 | ////////////////////////////////////////// Job Queues //////////////////////// 153 | ///////////////////////////////////////////// /////////////////////////// 154 | /////////////////////////////////////////////////////////////////////////////////////////////////// 155 | 156 | generate for(i = 0; i < `NUM_JOB_TYPES; i = i + 1) begin: job_q 157 | 158 | job_queue job_queue_x ( 159 | .clk (clk), 160 | .rst_n (rst_n), 161 | //-------------------------------------------------// 162 | .queue_poll_rate (queue_poll_rate), 163 | .start_queue (job_reader_enable), 164 | .queue_base_addr (job_queue_base_addr[i]), 165 | .queue_size (job_queue_size), 166 | .queue_reset (1'b0), 167 | // TX RD 168 | .jq_tx_rd_addr (jq_tx_rd_addr[i]), 169 | .jq_tx_rd_tag (jq_tx_rd_tag[i]), 170 | .jq_tx_rd_valid (jq_tx_rd_valid[i]), 171 | .jq_tx_rd_ready (jq_tx_rd_ready[i]), 172 | // TX WR 173 | .jq_tx_wr_addr (jq_tx_wr_addr[i]), 174 | .jq_tx_wr_tag (jq_tx_wr_tag[i]), 175 | .jq_tx_wr_valid (jq_tx_wr_valid[i]), 176 | .jq_tx_data (jq_tx_data[i]), 177 | .jq_tx_wr_ready (jq_tx_wr_ready[i]), 178 | // RX RD 179 | .jq_rx_rd_tag (jq_rx_rd_tag[i]), 180 | .jq_rx_data (jq_rx_data[i]), 181 | .jq_rx_rd_valid (jq_rx_rd_valid[i]), 182 | // RX WR 183 | .jq_rx_wr_valid (jq_rx_wr_valid[i]), 184 | .jq_rx_wr_tag (jq_rx_wr_tag[i]), 185 | // 186 | .job_queue_out (job_queue_out[i]), 187 | .job_queue_valid (job_queue_valid[i]), 188 | .job_queue_ready (job_queue_ready[i]) 189 | ); 190 | 191 | 192 | end 193 | endgenerate 194 | 195 | 196 | 197 | 198 | 199 | endmodule 200 | -------------------------------------------------------------------------------- /rtl/fthread_shell/pipeline_agent.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | `include "framework_defines.vh" 18 | 19 | module pipeline_agent 20 | ( 21 | input wire clk, 22 | input wire rst_n, 23 | 24 | // Pipelining request 25 | input wire find_pipeline_schedule, 26 | input wire direct_pipeline_schedule, 27 | input wire [0:`NUMBER_OF_FTHREADS-1] fthreads_state, 28 | input wire [0:`NUMBER_OF_FTHREADS-1] src_job_fthread_mapping, 29 | input wire [0:`NUMBER_OF_FTHREADS-1] dst_job_fthread_mapping, 30 | 31 | // Pipeline Schedule decision 32 | output reg [0:`NUMBER_OF_FTHREADS-1] src_fthread_select, 33 | output reg [0:`NUMBER_OF_FTHREADS-1] dst_fthread_select, 34 | output reg dst_fthread_reserve, 35 | output reg pipeline_schedule_valid 36 | 37 | ); 38 | 39 | wire [0:`NUMBER_OF_FTHREADS-1] dst_job_mapping_shifted; 40 | wire [0:`NUMBER_OF_FTHREADS-1] pipeline_src_job_mapping; 41 | wire [0:`NUMBER_OF_FTHREADS-1] valid_pipeline_mapping_src; 42 | wire [0:`NUMBER_OF_FTHREADS-1] src_job_mapping_shifted; 43 | wire [0:`NUMBER_OF_FTHREADS-1] pipeline_dst_job_mapping; 44 | wire [0:`NUMBER_OF_FTHREADS-1] valid_pipeline_mapping_dst; 45 | wire [0:`NUMBER_OF_FTHREADS-1] valid_pipeline_mapping_both; 46 | wire [0:`NUMBER_OF_FTHREADS-1] src_fthread_select_b; 47 | wire [0:`NUMBER_OF_FTHREADS-1] dst_fthread_select_b; 48 | wire [0:`NUMBER_OF_FTHREADS-1] src_fthread_select_s; 49 | wire [0:`NUMBER_OF_FTHREADS-1] dst_fthread_select_s; 50 | 51 | wire [0:`NUMBER_OF_FTHREADS-1] src_fthread_select_a; 52 | wire [0:`NUMBER_OF_FTHREADS-1] dst_fthread_select_a; 53 | wire [0:`NUMBER_OF_FTHREADS-1] dst_fthread_select_r; 54 | wire [0:`NUMBER_OF_FTHREADS-1] valid_mapping_src; 55 | wire [0:`NUMBER_OF_FTHREADS-1] valid_mapping_dst; 56 | 57 | genvar k; 58 | /////////////////////////////////////////////////////////////////////////////////////////////////// 59 | ////////////////////////////////// /////////////////////////////// 60 | /////////////////////////////// Memory Pipeline Schedule Decision ///////////////////////////// 61 | ////////////////////////////////// /////////////////////////////// 62 | /////////////////////////////////////////////////////////////////////////////////////////////////// 63 | generate if(`NUMBER_OF_FTHREADS > 1) begin 64 | // any valid mapping 65 | assign valid_mapping_src = src_job_fthread_mapping & ~fthreads_state; 66 | assign valid_mapping_dst = dst_job_fthread_mapping & ~fthreads_state; 67 | 68 | // src schedule decision 69 | //generate 70 | for ( k = `NUMBER_OF_FTHREADS-1; k >1 ; k = k-1) begin: ft_selected_src_a 71 | 72 | assign src_fthread_select_a[k] = valid_mapping_src[k] & ~(|(valid_mapping_src[0:k-1])); 73 | 74 | end 75 | //endgenerate 76 | 77 | assign src_fthread_select_a[1] = valid_mapping_src[1] & ~valid_mapping_src[0]; 78 | assign src_fthread_select_a[0] = valid_mapping_src[0]; 79 | 80 | // dst schedule decision 81 | //generate 82 | for ( k = `NUMBER_OF_FTHREADS-1; k >1 ; k = k-1) begin: ft_selected_dst_a 83 | 84 | assign dst_fthread_select_a[k] = valid_mapping_dst[k] & ~(|(valid_mapping_dst[0:k-1])); 85 | 86 | end 87 | //endgenerate 88 | 89 | assign dst_fthread_select_a[1] = valid_mapping_dst[1] & ~valid_mapping_dst[0]; 90 | assign dst_fthread_select_a[0] = valid_mapping_dst[0]; 91 | 92 | // dst schedule reserve 93 | //generate 94 | for ( k = `NUMBER_OF_FTHREADS-1; k >1 ; k = k-1) begin: ft_selected_dst_r 95 | 96 | assign dst_fthread_select_r[k] = dst_job_fthread_mapping[k] & ~(|(dst_job_fthread_mapping[0:k-1])); 97 | 98 | end 99 | //endgenerate 100 | 101 | assign dst_fthread_select_r[1] = dst_job_fthread_mapping[1] & ~dst_job_fthread_mapping[0]; 102 | assign dst_fthread_select_r[0] = dst_job_fthread_mapping[0]; 103 | /////////////////////////////////////////////////////////////////////////////////////////////////// 104 | ////////////////////////////////// /////////////////////////////// 105 | /////////////////////////////// Direct Pipeline Schedule Decision ///////////////////////////// 106 | ////////////////////////////////// /////////////////////////////// 107 | /////////////////////////////////////////////////////////////////////////////////////////////////// 108 | 109 | // Check if at least the src can be mapped 110 | assign dst_job_mapping_shifted = {dst_job_fthread_mapping[1:`NUMBER_OF_FTHREADS-1], 1'b0}; 111 | assign pipeline_src_job_mapping = dst_job_mapping_shifted & src_job_fthread_mapping; 112 | assign valid_pipeline_mapping_src = pipeline_src_job_mapping & ~fthreads_state; 113 | 114 | // check if at least dst can be mapped 115 | assign src_job_mapping_shifted = {1'b0, src_job_fthread_mapping[0:`NUMBER_OF_FTHREADS-2]}; 116 | assign pipeline_dst_job_mapping = src_job_mapping_shifted & dst_job_fthread_mapping; 117 | assign valid_pipeline_mapping_dst = pipeline_dst_job_mapping & ~fthreads_state; 118 | 119 | // Check if both src and dst can be mapped 120 | assign valid_pipeline_mapping_both = {valid_pipeline_mapping_dst[1:`NUMBER_OF_FTHREADS-1], 1'b0} & 121 | valid_pipeline_mapping_src; 122 | //---------------------------------------------------------------------------------------------------// 123 | // src schedule decision based on valid mapping for both src & dst 124 | //generate 125 | for ( k = `NUMBER_OF_FTHREADS-1; k >1 ; k = k-1) begin: ft_selected_src_b 126 | 127 | assign src_fthread_select_b[k] = valid_pipeline_mapping_both[k] & ~(|(valid_pipeline_mapping_both[0:k-1])); 128 | 129 | end 130 | //endgenerate 131 | 132 | assign src_fthread_select_b[1] = valid_pipeline_mapping_both[1] & ~valid_pipeline_mapping_both[0]; 133 | assign src_fthread_select_b[0] = valid_pipeline_mapping_both[0]; 134 | //---------------------------------------------------------------------------------------------------// 135 | // dst schedule decision based on valid mapping for both src & dst 136 | assign dst_fthread_select_b = {1'b0, src_fthread_select_b[0:`NUMBER_OF_FTHREADS-2]}; 137 | //---------------------------------------------------------------------------------------------------// 138 | // src schedule decision based on valid mapping at least for src 139 | //generate 140 | for ( k = `NUMBER_OF_FTHREADS-1; k >1 ; k = k-1) begin: ft_selected_src_s 141 | 142 | assign src_fthread_select_s[k] = valid_pipeline_mapping_src[k] & ~(|(valid_pipeline_mapping_src[0:k-1])); 143 | 144 | end 145 | //endgenerate 146 | 147 | assign src_fthread_select_s[1] = valid_pipeline_mapping_src[1] & ~valid_pipeline_mapping_src[0]; 148 | assign src_fthread_select_s[0] = valid_pipeline_mapping_src[0]; 149 | //---------------------------------------------------------------------------------------------------// 150 | // dst reserved 151 | assign dst_fthread_select_s = {1'b0, src_fthread_select_s[0:`NUMBER_OF_FTHREADS-2]}; 152 | //---------------------------------------------------------------------------------------------------// 153 | 154 | always @(posedge clk) begin 155 | if (~rst_n) begin 156 | src_fthread_select <= 0; 157 | dst_fthread_select <= 0; 158 | dst_fthread_reserve <= 0; 159 | pipeline_schedule_valid <= 0; 160 | end 161 | else begin 162 | if(direct_pipeline_schedule) begin 163 | if (|src_fthread_select_b) begin 164 | src_fthread_select <= src_fthread_select_b; 165 | dst_fthread_select <= dst_fthread_select_b; 166 | dst_fthread_reserve <= 1'b0; 167 | end 168 | else begin 169 | src_fthread_select <= src_fthread_select_s; 170 | dst_fthread_select <= dst_fthread_select_s; 171 | dst_fthread_reserve <= 1'b1; 172 | end 173 | end 174 | else begin 175 | src_fthread_select <= src_fthread_select_a; 176 | dst_fthread_select <= 0; 177 | dst_fthread_reserve <= 1'b0; 178 | 179 | if(|src_fthread_select_a) begin 180 | if(|dst_fthread_select_a) begin 181 | dst_fthread_select <= dst_fthread_select_a; 182 | end 183 | else begin 184 | dst_fthread_select <= dst_fthread_select_r; 185 | dst_fthread_reserve <= 1'b1; 186 | end 187 | end 188 | end 189 | 190 | pipeline_schedule_valid <= find_pipeline_schedule; 191 | end 192 | end 193 | 194 | end 195 | else begin 196 | always @(posedge clk) begin 197 | src_fthread_select <= 0; 198 | dst_fthread_select <= 0; 199 | dst_fthread_reserve <= 0; 200 | pipeline_schedule_valid <= 0; 201 | end 202 | end 203 | endgenerate 204 | 205 | endmodule -------------------------------------------------------------------------------- /rtl/fthread_shell/regulator.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | module regulator #( 19 | parameter NUMBER_OF_USERS = 4, 20 | parameter USERS_BITS = 2, 21 | parameter USER_LINE_IN_WIDTH = 512, 22 | parameter USER_LINE_OUT_WIDTH = 512, 23 | parameter PRIORITY_ROUND_ROBIN = 0, 24 | parameter PRIORITY_BATCHED_ROUND_ROBIN = 0, 25 | parameter ROUND_ROBIN_BATCH_SIZE = 16, 26 | parameter PRIORITY_EQUAL = 0, 27 | parameter PRIORITY_ASCENDING = 0, 28 | parameter PRIORITY_DESCENDING = 0, 29 | parameter PRIORITY_CUSTOM = 0, 30 | parameter ENABLE_OUT_FIFO = 1, 31 | parameter ENABLE_IN_BUFFER = 1 32 | )( 33 | 34 | input wire clk, 35 | input wire rst_n, 36 | 37 | // Users TX Channel 38 | input wire [USER_LINE_IN_WIDTH - 1 : 0] usr_tx_lines[NUMBER_OF_USERS-1:0], 39 | input wire usr_tx_valid[NUMBER_OF_USERS-1:0], 40 | output wire usr_tx_ready[NUMBER_OF_USERS-1:0], 41 | 42 | // Users RX Channel 43 | output reg [USER_LINE_OUT_WIDTH - 1 : 0] usr_rx_lines[NUMBER_OF_USERS-1:0], 44 | output reg usr_rx_valid[NUMBER_OF_USERS-1:0], 45 | 46 | // TX Channel 47 | output wire [USER_LINE_IN_WIDTH-1:0] tx_line, 48 | output wire [USERS_BITS-1:0] tx_tag, 49 | output wire tx_valid, 50 | input wire tx_ready, 51 | 52 | // RX Channel 53 | input wire [USER_LINE_OUT_WIDTH-1:0] rx_line, 54 | input wire [USERS_BITS-1:0] rx_tag, 55 | input wire rx_valid 56 | ); 57 | 58 | wire [NUMBER_OF_USERS-1 : 0] usr_tx_full; 59 | wire [NUMBER_OF_USERS-1 : 0] sel_usr_line; 60 | wire [NUMBER_OF_USERS-1 : 0] usr_valid; 61 | wire [USER_LINE_IN_WIDTH-1 : 0] usr_in_lines[NUMBER_OF_USERS-1:0]; 62 | 63 | wire [USER_LINE_IN_WIDTH-1 : 0] rr_tx_line; 64 | wire [USERS_BITS-1:0] rr_tx_tag; 65 | wire rr_tx_valid; 66 | 67 | wire tx_queue_full; 68 | 69 | 70 | reg [USER_LINE_OUT_WIDTH-1:0] rx_line_reg; 71 | reg [USERS_BITS-1:0] rx_tag_reg; 72 | reg rx_valid_reg; 73 | 74 | wire [USER_LINE_OUT_WIDTH - 1 : 0] usr_rx_lines_tmp[NUMBER_OF_USERS-1:0]; 75 | wire [NUMBER_OF_USERS-1 : 0] usr_rx_valid_tmp; 76 | 77 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 78 | 79 | genvar i; 80 | ///////////////////////////////////////////////// user input FIFOs ////////////////////////////////// 81 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: in_fifo 82 | quick_fifo #(.FIFO_WIDTH(USER_LINE_IN_WIDTH), 83 | .FIFO_DEPTH_BITS(4), 84 | .FIFO_ALMOSTFULL_THRESHOLD(8) 85 | ) usr_tx_fifo_X( 86 | .clk (clk), 87 | .reset_n (rst_n), 88 | .din (usr_tx_lines[i]), 89 | .we (usr_tx_valid[i]), 90 | .re (sel_usr_line[i]), 91 | .dout (usr_in_lines[i]), 92 | .empty (), 93 | .valid (usr_valid[i]), 94 | .full (usr_tx_full[i]), 95 | .count (), 96 | .almostfull () 97 | ); 98 | end 99 | endgenerate 100 | 101 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: usrTXFull 102 | assign usr_tx_ready[i] = ~usr_tx_full[i]; 103 | end 104 | endgenerate 105 | ///////////////////////////////////////////////////////////////////////////////////////////////////// 106 | generate 107 | if( PRIORITY_BATCHED_ROUND_ROBIN == 1 ) begin 108 | BatchRoundRobinArbitrationLogic #( 109 | .NUMBER_OF_USERS(NUMBER_OF_USERS), 110 | .USERS_BITS(USERS_BITS), 111 | .USER_LINE_IN_WIDTH(USER_LINE_IN_WIDTH), 112 | .USER_LINE_OUT_WIDTH(USER_LINE_OUT_WIDTH), 113 | .BATCH_SIZE(ROUND_ROBIN_BATCH_SIZE) 114 | ) BatchRoundRobinArbitrationLogic( 115 | .clk (clk), 116 | .rst_n (rst_n), 117 | 118 | // Users TX Channel 119 | .usr_tx_lines (usr_in_lines), 120 | .usr_tx_valid (usr_valid), 121 | .usr_tx_ready (sel_usr_line), 122 | // TX Channel 123 | .rr_tx_line (rr_tx_line), 124 | .rr_tx_tag (rr_tx_tag), 125 | .rr_tx_valid (rr_tx_valid), 126 | .rr_tx_ready (~tx_queue_full) 127 | ); 128 | end 129 | else begin 130 | 131 | RoundRobinArbitrationLogic #( 132 | .NUMBER_OF_USERS(NUMBER_OF_USERS), 133 | .USERS_BITS(USERS_BITS), 134 | .USER_LINE_IN_WIDTH(USER_LINE_IN_WIDTH), 135 | .USER_LINE_OUT_WIDTH(USER_LINE_OUT_WIDTH) 136 | ) RoundRobinArbitrationLogic( 137 | .clk (clk), 138 | .rst_n (rst_n), 139 | 140 | // Users TX Channel 141 | .usr_tx_lines (usr_in_lines), 142 | .usr_tx_valid (usr_valid), 143 | .usr_tx_ready (sel_usr_line), 144 | // TX Channel 145 | .rr_tx_line (rr_tx_line), 146 | .rr_tx_tag (rr_tx_tag), 147 | .rr_tx_valid (rr_tx_valid), 148 | .rr_tx_ready (~tx_queue_full) 149 | ); 150 | end 151 | 152 | endgenerate 153 | 154 | 155 | ////////////////////////////////////////////////////// Out TX FIFO //////////////////////////////// 156 | 157 | quick_fifo #(.FIFO_WIDTH(USER_LINE_IN_WIDTH + USERS_BITS), 158 | .FIFO_DEPTH_BITS(4), 159 | .FIFO_ALMOSTFULL_THRESHOLD(8) 160 | ) tx_req_queue( 161 | .clk (clk), 162 | .reset_n (rst_n), 163 | .din ({rr_tx_tag, rr_tx_line}), 164 | .we ( rr_tx_valid ), 165 | .re ( tx_ready ), 166 | .dout ({tx_tag, tx_line}), 167 | .empty (), 168 | .valid (tx_valid), 169 | .full (tx_queue_full), 170 | .count (), 171 | .almostfull () 172 | ); 173 | 174 | ////////////////////////////////////////////////////// RX to Users Register /////////////////////////// 175 | 176 | always@(posedge clk) begin 177 | if(~rst_n) begin 178 | rx_line_reg <= 0; 179 | rx_tag_reg <= 0; 180 | rx_valid_reg <= 0; 181 | end 182 | else begin 183 | rx_line_reg <= rx_line; 184 | rx_tag_reg <= rx_tag; 185 | rx_valid_reg <= rx_valid; 186 | end 187 | end 188 | 189 | generate for( i = 0; i < NUMBER_OF_USERS; i = i + 1) begin: rxUsr 190 | assign usr_rx_valid_tmp[i] = rx_valid_reg & (rx_tag_reg == i); 191 | assign usr_rx_lines_tmp[i] = rx_line_reg; 192 | 193 | always@(posedge clk) begin 194 | if(~ rst_n) begin 195 | usr_rx_valid[i] <= 1'b0; 196 | //usr_rx_lines[i] <= 0; 197 | end 198 | else begin 199 | usr_rx_valid[i] <= usr_rx_valid_tmp[i]; 200 | usr_rx_lines[i] <= usr_rx_lines_tmp[i]; 201 | end 202 | end 203 | end 204 | endgenerate 205 | 206 | endmodule 207 | 208 | 209 | -------------------------------------------------------------------------------- /rtl/mem/quick_fifo.v: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | module quick_fifo #( 29 | parameter FIFO_WIDTH = 32, 30 | parameter FIFO_DEPTH_BITS = 8, 31 | parameter FIFO_ALMOSTFULL_THRESHOLD = 2**FIFO_DEPTH_BITS - 4 32 | ) ( 33 | input wire clk, 34 | input wire reset_n, 35 | 36 | input wire we, // input write enable 37 | input wire [FIFO_WIDTH - 1:0] din, // input write data with configurable width 38 | 39 | input wire re, // input read enable 40 | output reg valid, // dout valid 41 | output reg [FIFO_WIDTH - 1:0] dout, // output read data with configurable width 42 | 43 | output reg [FIFO_DEPTH_BITS - 1:0] count, // output FIFOcount 44 | output reg empty, // output FIFO empty 45 | output reg full, // output FIFO full 46 | output reg almostfull // output configurable programmable full/ almost full 47 | ); 48 | 49 | reg [FIFO_DEPTH_BITS - 1:0] rp = 0; 50 | reg [FIFO_DEPTH_BITS - 1:0] wp = 0; 51 | 52 | reg [FIFO_DEPTH_BITS - 1:0] mem_count = 0; // output FIFOcount 53 | reg mem_empty = 1'b1; 54 | 55 | reg valid_t1 = 0, valid_t2 = 0; 56 | reg valid0 = 0; 57 | 58 | wire remem; 59 | wire wemem; 60 | wire remem_valid; 61 | 62 | wire [FIFO_WIDTH-1:0] dout_mem; 63 | 64 | assign remem = (re & valid_t1 & valid_t2) | ~(valid_t1 & valid_t2); 65 | assign wemem = we & ~full; 66 | 67 | assign remem_valid = remem & ~mem_empty; 68 | 69 | 70 | spl_sdp_mem #(.DATA_WIDTH(FIFO_WIDTH), 71 | .ADDR_WIDTH(FIFO_DEPTH_BITS)) fifo_mem( 72 | .clk (clk), 73 | .we (wemem), 74 | .re (remem), 75 | .raddr (rp), 76 | .waddr (wp), 77 | .din (din), 78 | .dout (dout_mem) 79 | ); 80 | 81 | // data 82 | always @(posedge clk) begin 83 | dout <= (valid_t2)? ((re)? dout_mem : dout) : dout_mem; 84 | end 85 | 86 | // valids, flags 87 | always @(posedge clk) begin 88 | if (~reset_n) begin 89 | empty <= 1'b1; 90 | full <= 1'b0; 91 | almostfull <= 1'b0; 92 | count <= 0; //32'b0; 93 | rp <= 0; 94 | wp <= 0; 95 | valid_t2 <= 1'b0; 96 | valid_t1 <= 1'b0; 97 | mem_empty <= 1'b1; 98 | mem_count <= 'b0; 99 | 100 | //dout <= 0; 101 | valid <= 0; 102 | valid0 <= 0; 103 | end 104 | 105 | else begin 106 | 107 | valid <= (valid)? ((re)? valid0 : 1'b1) : valid0; 108 | valid0 <= (remem)? ~mem_empty : valid0; 109 | 110 | valid_t2 <= (valid_t2)? ((re)? valid_t1 : 1'b1) : valid_t1; 111 | 112 | valid_t1 <= (remem)? ~mem_empty : valid_t1; 113 | rp <= (remem & ~mem_empty)? (rp + 1'b1) : rp; 114 | wp <= (wemem)? (wp + 1'b1) : wp; 115 | 116 | // mem_empty 117 | if (we) mem_empty <= 1'b0; 118 | else if(remem & (mem_count == 1'b1)) mem_empty <= 1'b1; 119 | 120 | // mem_count 121 | if( wemem & ~remem_valid) mem_count <= mem_count + 1'b1; 122 | else if (~wemem & remem_valid) mem_count <= mem_count - 1'b1; 123 | 124 | 125 | // empty 126 | if (we) empty <= 1'b0; 127 | else if((re & valid_t2 & ~valid_t1) & (count == 1'b1)) empty <= 1'b1; 128 | 129 | // count 130 | if( wemem & (~(re & valid_t2) | ~re) ) count <= count + 1'b1; 131 | else if (~wemem & (re & valid_t2)) count <= count - 1'b1; 132 | 133 | // 134 | if (we & ~re) begin 135 | 136 | if (count == (2**FIFO_DEPTH_BITS-1)) 137 | full <= 1'b1; 138 | 139 | if (count == (FIFO_ALMOSTFULL_THRESHOLD-1)) 140 | almostfull <= 1'b1; 141 | end 142 | // 143 | if ((~we | full) & re) begin // 144 | full <= 1'b0; 145 | 146 | if (count == FIFO_ALMOSTFULL_THRESHOLD) 147 | almostfull <= 1'b0; 148 | end 149 | end 150 | end 151 | 152 | endmodule 153 | 154 | -------------------------------------------------------------------------------- /rtl/mem/spl_pt_mem.v: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | module spl_pt_mem #( 29 | parameter DATA_WIDTH = 32, 30 | parameter ADDR_WIDTH = 8 31 | ) ( 32 | input wire clk, 33 | 34 | // port 0, read/write 35 | input wire we0, 36 | input wire re0, 37 | input wire [ADDR_WIDTH-1:0] addr0, 38 | input wire [DATA_WIDTH-1:0] din0, 39 | output reg [DATA_WIDTH-1:0] dout0, 40 | 41 | // port 1, read only 42 | input wire re1, 43 | input wire [ADDR_WIDTH-1:0] addr1, 44 | output reg [DATA_WIDTH-1:0] dout1 45 | ); 46 | 47 | 48 | `ifdef VENDOR_XILINX 49 | (* ram_extract = "yes", ram_style = "block" *) 50 | reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; 51 | `else 52 | (* ramstyle = "AUTO, no_rw_check" *) 53 | reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; 54 | `endif 55 | 56 | always @(posedge clk) begin 57 | if (we0) 58 | mem[addr0] <= din0; 59 | 60 | if (re0) 61 | dout0 <= mem[addr0]; 62 | 63 | if (re1) 64 | dout1 <= mem[addr1]; 65 | end 66 | 67 | endmodule 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /rtl/mem/spl_sdp_mem.v: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | module spl_sdp_mem #( 29 | parameter DATA_WIDTH = 32, 30 | parameter ADDR_WIDTH = 8 31 | ) ( 32 | input wire clk, 33 | input wire we, 34 | input wire re, 35 | input wire [ADDR_WIDTH-1:0] raddr, 36 | input wire [ADDR_WIDTH-1:0] waddr, 37 | input wire [DATA_WIDTH-1:0] din, 38 | output reg [DATA_WIDTH-1:0] dout 39 | ); 40 | 41 | 42 | `ifdef VENDOR_XILINX 43 | (* ram_extract = "yes", ram_style = "block" *) 44 | reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; 45 | `else 46 | (* ramstyle = "no_rw_check" *) reg [DATA_WIDTH-1:0] mem[0:2**ADDR_WIDTH-1]; 47 | `endif 48 | 49 | 50 | always @(posedge clk) begin 51 | if (we) 52 | mem[waddr] <= din; 53 | 54 | if (re) 55 | dout <= mem[raddr]; 56 | end 57 | 58 | endmodule 59 | 60 | -------------------------------------------------------------------------------- /rtl/spl_defines.vh: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013-2015, Intel Corporation 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // * Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright notice, 9 | // this list of conditions and the following disclaimer in the documentation 10 | // and/or other materials provided with the distribution. 11 | // * Neither the name of Intel Corporation nor the names of its contributors 12 | // may be used to endorse or promote products derived from this software 13 | // without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 | // POSSIBILITY OF SUCH DAMAGE. 26 | 27 | 28 | `ifndef SPL_DEFINES_VH 29 | `define SPL_DEFINES_VH 30 | 31 | // `ifdef MAX_TRANSFER_SIZE_4 32 | // `define MAX_TRANSFER_SIZE 3'h4 33 | // `elsif MAX_TRANSFER_SIZE_3 34 | // `define MAX_TRANSFER_SIZE 3'h3 35 | // `elsif MAX_TRANSFER_SIZE_2 36 | // `ifdef MAX_TRANSFER_SIZE_2 37 | // `define MAX_TRANSFER_SIZE 3'h2 38 | //`elsif MAX_TRANSFER_SIZE_1 39 | `define MAX_TRANSFER_SIZE 3'h1 40 | //`else 41 | // *** must define MAX_TRANSFER_SIZE_1 or MAX_TRANSFER_SIZE_2 *** 42 | // `endif 43 | 44 | `define AFU_IF_TX_HDR_WIDTH 99 45 | `define VIR_ADDR_WIDTH 42 46 | 47 | `define SPL_WTAG_WIDTH 6 48 | `define MAX_NUM_WTAGS 2**`SPL_WTAG_WIDTH 49 | 50 | `define SPL_TAG_WIDTH 6 51 | `define MAX_NUM_TAGS 2**`SPL_TAG_WIDTH 52 | 53 | `define SPL_TWQ_WIDTH 566 // data(512) + len(6) + cmd(2) + addr(32) +tag(14) = 566 54 | `define SPL_TRQ_WIDTH 38+`SPL_TAG_WIDTH // addr(32) + len(6) + TAG_WIDTH 55 | 56 | // `define CCI_REQ_WR_DSR 4'b0000 57 | `define CCI_REQ_WR_THRU 4'b0001 58 | `define CCI_REQ_WR_LINE 4'b0010 59 | `define CCI_REQ_WR `CCI_REQ_WR_LINE 60 | `define CCI_REQ_RD 4'b0100 61 | `define CCI_REQ_WR_FENCE 4'b0101 62 | // `define CCI_REQ_TASKDONE 4'b0111 63 | 64 | `define CCI_RSP_WR_CSR 4'b0000 65 | `define CCI_RSP_RD_CSR 4'b1100 66 | `define CCI_RSP_WR 4'b0001 67 | `define CCI_RSP_RD 4'b0100 68 | 69 | `define CCI_DATA_WIDTH 512 70 | `define CCI_RX_HDR_WIDTH 18 71 | `define CCI_TX_HDR_WIDTH 61 72 | 73 | `define COR_REQ_WR_DSR 2'b00 74 | `define COR_REQ_WR_THRU 2'b01 75 | `define COR_REQ_WR_LINE 2'b10 76 | `define COR_REQ_WR_FENCE 2'b11 77 | 78 | `define PCIE_FMTTYPE_MEM_READ32 7'b000_0000 79 | `define PCIE_FMTTYPE_MEM_READ64 7'b010_0000 80 | `define PCIE_FMTTYPE_MEM_WRITE32 7'b100_0000 81 | `define PCIE_FMTTYPE_MEM_WRITE64 7'b110_0000 82 | `define PCIE_FMTTYPE_CFG_WRITE 7'b100_0100 83 | `define PCIE_FMTTYPE_CPL 7'b000_1010 84 | `define PCIE_FMTTYPE_CPLD 7'b100_1010 85 | 86 | `define AVL_TXQ_HDR_WIDTH 49 // cfg(1) + addr(32) + rd(1) + wr(1) + tag(14) = 49 87 | `define AVL_TXQ_CPL_WIDTH 96 // data(64) + requester ID(16) + tag(8) + length(1) + low_addr(7) = 96 88 | 89 | `endif 90 | -------------------------------------------------------------------------------- /standalone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export WORKDIR=$PWD 4 | 5 | if [ -z "$CENTAUR_HOME" ]; then 6 | export CENTAUR_HOME=$WORKDIR 7 | echo "CENTAUR_HOME varialbe was set" 8 | fi 9 | 10 | AFU_QSF_FILE="$CENTAUR_HOME/quartus/par/qsf_afu_PAR_files.qsf" 11 | ENV_SETTINGS_QSF_FILE="$CENTAUR_HOME/quartus/par/qsf_env_settings.qsf" 12 | QUARTUS_SETUP_FILE="$CENTAUR_HOME/quartus/setup.sh" 13 | 14 | # clean afu qsf file from doppiodb operators 15 | echo "set_global_assignment -name SEARCH_PATH \$APP_OPS" > $AFU_QSF_FILE 16 | 17 | # clean environments settings file 18 | echo "set MY_WORKDIR $::env(WORKDIR) 19 | set REL_RTL_SRC \"/qpi\" 20 | set QPI_RTL_SRC \$MY_WORKDIR\$REL_RTL_SRC 21 | set CENTAUR_SRC \$::env(CENTAUR_HOME)/rtl 22 | set APP_OPS \$::env(CENTAUR_HOME)/app/rtl 23 | puts \" Variable defined QPI_RTL_SRC: \$QPI_RTL_SRC\" 24 | puts \" Variable defined CENTAUR_RTL_SRC: \$CENTAUR_SRC\" 25 | puts \" Variable defined APP_OPS_SRC: \$APP_OPS\" 26 | " > $ENV_SETTINGS_QSF_FILE 27 | 28 | 29 | echo "#!/bin/bash 30 | 31 | export WORKDIR=\$PWD 32 | source \$WORKDIR/quartus.sh 33 | alias cdw='cd \$WORKDIR' 34 | 35 | if [ -z \"\$CENTAUR_HOME\" ]; then 36 | export CENTAUR_HOME=\$WORKDIR/.. 37 | echo \"CENTAUR_HOME varialbe was set\" 38 | fi 39 | 40 | #Tell Git to stop tracking changes on ome2_ivt.qsf 41 | git update-index --assume-unchanged par/ome2_ivt.qsf" > $QUARTUS_SETUP_FILE 42 | 43 | 44 | -------------------------------------------------------------------------------- /sw/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fpgasystems/Centaur/f3013a53a4ae2fa2822361ea968380d9fe958369/sw/.DS_Store -------------------------------------------------------------------------------- /sw/core/centaur.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | //#include "fpgaapi.h" 18 | #include "fpipe.h" 19 | #include "pipeline_job.h" 20 | -------------------------------------------------------------------------------- /sw/core/fpga_defs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __FPGA_DEF_H__ 18 | #define __FPGA_DEF_H__ 19 | 20 | 21 | #ifndef CL 22 | # define CL(x) ((x) * 64) 23 | #endif // CL 24 | #ifndef MB 25 | # define MB(x) ((x) * 1024 * 1024) 26 | #endif // MB 27 | #ifndef GB 28 | # define GB(x) ((x) * MB(1) * 1024) 29 | #endif // GB 30 | 31 | #ifndef RND_TO_CL 32 | # define RND_TO_CL(x) ( ((x+63)/64) * 64) 33 | #endif // RND_TO_CL 34 | 35 | #ifdef MSG 36 | # undef MSG 37 | #endif // MSG 38 | #ifndef MSG 39 | #define MSG(x) (std::cout<< x < 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include "fpga_defs.h" 33 | #include "utils.h" 34 | 35 | #include "../platform/platform.h" 36 | 37 | #include "fqueue.h" 38 | 39 | #include "fthreadRec.h" 40 | #include "fthread.h" 41 | #include "memory_manager.h" 42 | #include "workload_manager.h" 43 | 44 | /* 45 | FPGA data structure is the object representation of the FPGA programming 46 | environment. It provide member functions that: 47 | - Establish link with an FPGA device. 48 | - Create and enqueue jobs on the FPGA. 49 | - Access CPU-FPGA shared memory space. 50 | */ 51 | class Fthread; 52 | class MemoryManager; 53 | class WorkloadManager; 54 | 55 | class FPGA 56 | { 57 | 58 | public: 59 | 60 | FPGA(); 61 | 62 | ~FPGA(); 63 | 64 | bool init(); 65 | bool run(); 66 | bool release(); 67 | void terminate(); 68 | //**********************************************************************************// 69 | //******************** Interface to Platform dependent modules *******************// 70 | //**********************************************************************************// 71 | public: 72 | bool allocHWService(); 73 | bool allocate_workspace(uint64_t ws_size); 74 | bool initiateLink(); 75 | 76 | protected: 77 | HWService* srvHndle; // platform.h 78 | 79 | //**********************************************************************************// 80 | //******************************* FPGA setup methods *******************************// 81 | //**********************************************************************************// 82 | public: 83 | bool obtainConfiguredOperators(); 84 | void setUpCommandQueue(); 85 | bool allocMemManagers(); 86 | 87 | int get_config_opcode(int idx); 88 | bool adjacentJobs(uint32_t J1, uint32_t J2); 89 | protected: 90 | // List of currently configured operators 91 | unsigned int configuredOperators[NUM_FTHREADS]; 92 | unsigned char* dsm_base; 93 | //**********************************************************************************// 94 | //**************************** Interface to Workload Manager ***********************// 95 | //**********************************************************************************// 96 | public: 97 | unsigned char* alloc_job_queue(unsigned int &q_ptr_phys); 98 | bool enqueueSingleFThread(FthreadRec* ftRec); 99 | bool enqueuePipelineJob(FthreadRec* src, FthreadRec* dst, uint32_t code); 100 | 101 | protected: 102 | WorkloadManager* wl_manager; 103 | unsigned int job_queues_count; 104 | //**********************************************************************************// 105 | //*********************** Interface to Memory Manager ******************************// 106 | //**********************************************************************************// 107 | public: 108 | void* realloc(void* dstruct, size_t size); 109 | void* malloc(size_t size); 110 | void* malloc(size_t size, size_t* maxsize); 111 | template FQueue * queue_malloc(size_t size, uint32_t syncSize); 112 | void free(void * ptr); 113 | unsigned char* get_ws_base_virt(); 114 | unsigned char* get_ws_base_phys(); 115 | uint64_t get_ws_size(); 116 | void computeAddressCodes(); 117 | uint32_t get_addr_code(char ty); 118 | 119 | protected: 120 | MemoryManager* m_manager[NUM_MMANAGER]; 121 | unsigned char* ws_base_virt; 122 | unsigned char* ws_base_phys; 123 | uint64_t ws_size; 124 | 125 | uint32_t mem_pipe_read_code; 126 | uint32_t direct_pipe_read_code; 127 | uint32_t mem_norm_addr_code; 128 | //**********************************************************************************// 129 | //***************************** Job Creation/Deletion ******************************// 130 | //**********************************************************************************// 131 | public: 132 | FthreadRec* allocateFThreadRecord(); 133 | FthreadRec* create_fthread(unsigned int opcode, 134 | unsigned char* afu_config, 135 | int cfg_size); 136 | 137 | FthreadRec* create_fthread(unsigned int opcode, 138 | unsigned char* afu_config, 139 | int cfg_size, void* ret); 140 | void free_fthread(int id); 141 | 142 | //template Pipeline * get_pipeline_resource(uint32_t pipe_src, uint32_t pipe_dst); 143 | 144 | protected: 145 | FthreadRec f_threads[MAX_NUM_ALLOWED_JOBS]; 146 | std::atomic f_threads_flag[MAX_NUM_ALLOWED_JOBS]; 147 | 148 | //**********************************************************************************// 149 | //********************************* FPGA command queue ****************************// 150 | //**********************************************************************************// 151 | public: 152 | void enqueue_command(OneCL * cmd); 153 | protected: 154 | FQueue* cmd_queue; 155 | std::mutex cmd_queue_mutex; 156 | 157 | }; 158 | //**********************************************************************************// 159 | template 160 | FQueue * FPGA::queue_malloc(size_t size, uint32_t syncSize){ 161 | 162 | FQueue * crb = reinterpret_cast*>(this->malloc( sizeof(FQueue) )); 163 | 164 | crb->m_buffer = reinterpret_cast(this->malloc(size)); 165 | 166 | crb->m_capacity = size / 64; 167 | crb->m_capacity_bytes = size; 168 | crb->update_bytes_rate = syncSize; 169 | crb->synch_size = syncSize; 170 | crb->m_crb_code = 0; 171 | 172 | crb->m_producer_idx = 0; 173 | crb->m_producer_bytes = 0; 174 | crb->m_producer_done = false; 175 | crb->m_producer_code = 0; 176 | 177 | crb->m_consumer_idx = 0; 178 | crb->m_consumer_bytes = 0; 179 | 180 | return crb; 181 | } 182 | //**********************************************************************************// 183 | bool getFPGA(FPGA * fpga); 184 | void fpgaServer(FPGA *fpga); 185 | 186 | #endif // __FPGA_API_H__ 187 | -------------------------------------------------------------------------------- /sw/core/fpipe.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __FPIPE_H__ 18 | #define __FPIPE_H__ 19 | 20 | class Fthread; 21 | class FPGA; 22 | 23 | #include "fpgaapi.h" 24 | 25 | template 26 | class FPipe 27 | { 28 | public: 29 | FPipe(FPGA* fpga_t, uint32_t pipe_src, uint32_t pipe_dst, uint32_t queue_size = PIPELINE_QUEUE_SIZE, uint16_t page_size = DEFAULT_PAGE_SIZE); 30 | ~FPipe(){} 31 | 32 | uint32_t getReadAddrCode() { return m_readAddrCode; } 33 | bool isMemPipe() { return m_memPipe; } 34 | unsigned char* getFIFOPtr() { return reinterpret_cast(m_pipelineQueue); } 35 | 36 | unsigned char* ptr () { return reinterpret_cast (uint64_t(m_readAddrCode << 28) << 32); } 37 | 38 | void pop(T &data); 39 | void push(T data); 40 | 41 | 42 | protected: 43 | uint32_t m_readAddrCode; 44 | bool m_memPipe; 45 | FPGA* m_fpga; 46 | FQueue* m_pipelineQueue; 47 | }; 48 | 49 | template 50 | FPipe::FPipe(FPGA* fpga_t, uint32_t pipe_src, uint32_t pipe_dst, uint32_t queue_size, uint16_t page_size) 51 | { 52 | m_pipelineQueue = nullptr; 53 | m_fpga = fpga_t; 54 | m_memPipe = false; 55 | 56 | uint32_t psize = page_size; 57 | uint32_t qsize = queue_size; 58 | 59 | if (page_size%(sizeof(T)) > 0) 60 | { 61 | psize = (page_size/(sizeof(T)) + 1)*sizeof(T); 62 | } 63 | 64 | if (queue_size%page_size > 0) 65 | { 66 | qsize = page_size*(queue_size/page_size) + page_size; 67 | } 68 | 69 | if(pipe_src == 0) 70 | { 71 | m_memPipe = true; 72 | } 73 | else if(pipe_dst == 0) 74 | { 75 | m_memPipe = true; 76 | m_readAddrCode = 0; 77 | } 78 | else 79 | { 80 | m_memPipe = true; 81 | if (m_fpga->adjacentJobs(pipe_src, pipe_dst)) 82 | { 83 | m_memPipe = false; 84 | 85 | } 86 | m_readAddrCode = (m_memPipe)? fpga_t->get_addr_code('M') : fpga_t->get_addr_code('D'); 87 | } 88 | 89 | if(m_memPipe) 90 | { 91 | m_pipelineQueue = m_fpga->queue_malloc(qsize, psize); 92 | } 93 | } 94 | 95 | template 96 | FPipe::~FPipe() 97 | { 98 | if (m_pipelineQueue != nullptr) 99 | { 100 | m_fpga->free(m_pipelineQueue); 101 | } 102 | } 103 | 104 | template 105 | void FPipe::pop(T &data) 106 | { 107 | m_pipelineQueue->pop(data); 108 | } 109 | 110 | template 111 | void FPipe::push(T data) 112 | { 113 | m_pipelineQueue->push(data); 114 | } 115 | 116 | #endif // __FPIPE_H__ 117 | -------------------------------------------------------------------------------- /sw/core/fqueue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FQUEUE_H 18 | #define FQUEUE_H 19 | 20 | //#include // AFU structure definitions (brings in spl2defs.h) 21 | #include 22 | #include 23 | 24 | #define FQUEUE_VALID_CODE 0x13579bdf 25 | #define FQUEUE_PRODUCER_VALID_CODE 0x02468ace 26 | 27 | 28 | template 29 | class FQueue{ 30 | public: //Everything is public to avoid reordering in memory 31 | // fields 32 | union { 33 | uint64_t qword0[8]; // make it a whole cacheline 34 | struct { 35 | T *m_buffer; 36 | //btUnsigned32bitInt m_size; 37 | volatile uint32_t m_capacity; 38 | volatile uint32_t m_capacity_bytes; 39 | volatile uint32_t update_bytes_rate; 40 | volatile uint32_t m_crb_code; 41 | volatile uint32_t synch_size; 42 | }; 43 | }; 44 | 45 | // producer info 46 | union { 47 | uint64_t qword1[8]; // make it a whole cacheline 48 | struct { 49 | volatile uint32_t m_producer_idx; 50 | volatile uint32_t m_producer_bytes; 51 | volatile uint32_t m_producer_code; 52 | volatile bool m_producer_done; 53 | }; 54 | }; 55 | 56 | // consumer info 57 | union { 58 | uint64_t qword2[8]; // make it a whole cacheline 59 | struct { 60 | volatile uint32_t m_consumer_idx; 61 | volatile uint32_t m_consumer_bytes; 62 | }; 63 | }; 64 | // Constructor 65 | //FQueue(ServiceHW *srvHndle, unsigned int capacity); //TODO 66 | 67 | bool push(T value); 68 | bool pop(T& value); 69 | bool empty(); 70 | bool full(); 71 | std::size_t size() const; 72 | std::size_t capacity() const; 73 | 74 | void done(); 75 | bool isDone(); 76 | void reset(); 77 | 78 | }; 79 | 80 | /*template 81 | FQueue::FQueue(ServiceHW *srvHndle, unsigned int capacity) 82 | { 83 | unsigned int num_cl = (capacity+63 / 64); //TODO check rounding 84 | m_buffer = (btVirtAddr)(srvHndle->malloc(CL(num_cl))); //TODO this should be done by the allocator!!! 85 | //m_size = 0; 86 | m_capacity = capacity; 87 | m_capacity_bytes = capacity * sizeof(T); 88 | 89 | m_producer_idx = 0; 90 | m_producer_bytes = 0; 91 | m_producer_done = false; 92 | m_consumer_idx = 0; 93 | m_consumer_bytes = 0; 94 | }*/ 95 | 96 | // TODO deconstructor 97 | 98 | template 99 | bool FQueue::push( T value) 100 | { 101 | // Check if full 102 | while ( full() ) 103 | { 104 | SleepNano(100); 105 | // return false; 106 | } 107 | //insert at end 108 | atomic_thread_fence(std::memory_order_acquire); 109 | 110 | m_buffer[m_producer_idx] = value; 111 | m_producer_idx = (m_producer_idx + 1) % m_capacity; 112 | m_producer_bytes += sizeof(T); 113 | 114 | atomic_thread_fence(std::memory_order_release); 115 | return true; 116 | } 117 | 118 | template 119 | bool FQueue::pop(T & value) 120 | { 121 | // Check if empty 122 | while ( empty() & ~isDone() ) 123 | { 124 | SleepNano(100); 125 | // return false; 126 | } 127 | atomic_thread_fence(std::memory_order_acquire); 128 | value = m_buffer[m_consumer_idx]; 129 | m_consumer_idx = (m_consumer_idx + 1) % m_capacity; 130 | m_consumer_bytes += sizeof(T); 131 | 132 | atomic_thread_fence(std::memory_order_release); 133 | 134 | return true; 135 | } 136 | 137 | template 138 | bool FQueue::empty() 139 | { 140 | 141 | if( (m_producer_bytes - m_consumer_bytes) < sizeof(T) ) 142 | { 143 | return true; 144 | 145 | } 146 | return (m_producer_bytes == m_consumer_bytes); 147 | // return (m_producer_idx == m_consumer_idx); 148 | } 149 | 150 | template 151 | bool FQueue::full() 152 | { 153 | if( (m_capacity_bytes - (m_producer_bytes - m_consumer_bytes) ) < sizeof(T) ) return true; 154 | return ((m_producer_bytes - m_consumer_bytes) == m_capacity_bytes); 155 | } 156 | 157 | template 158 | size_t FQueue::size() const 159 | { 160 | return (m_capacity_bytes - (m_producer_bytes - m_consumer_bytes)); 161 | } 162 | 163 | template 164 | size_t FQueue::capacity() const 165 | { 166 | return m_capacity; 167 | } 168 | 169 | template 170 | void FQueue::done() 171 | { 172 | m_producer_done = true; 173 | m_producer_code = 0xffffffff; 174 | } 175 | 176 | template 177 | bool FQueue::isDone() 178 | { 179 | return m_producer_done; 180 | } 181 | 182 | template 183 | void FQueue::reset() 184 | { 185 | m_producer_idx = 0; 186 | m_producer_bytes = 0; 187 | m_producer_done = false; 188 | m_consumer_idx = 0; 189 | m_consumer_bytes = 0; 190 | } 191 | 192 | #endif 193 | -------------------------------------------------------------------------------- /sw/core/fthread.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "fpgaapi.h" 18 | #include 19 | 20 | #include 21 | 22 | //**********************************************************************************// 23 | //************************** ************************// 24 | //********************* Fthread Implementation *******************// 25 | //************************** ************************// 26 | //**********************************************************************************// 27 | Fthread::Fthread(FthreadRec * t_rec) 28 | { 29 | FRecord = t_rec; 30 | 31 | t_rec->parent()->enqueueSingleFThread(FRecord); 32 | } 33 | //**********************************************************************************// 34 | Fthread::Fthread(FthreadRec * t_rec, bool enqueue) 35 | { 36 | 37 | FRecord = t_rec; 38 | 39 | if( enqueue ) t_rec->parent()->enqueueSingleFThread(FRecord); 40 | 41 | } 42 | //**********************************************************************************// 43 | Fthread::Fthread(FPGA* fpga_t, unsigned int OpCode, void* afu_config, unsigned int afu_config_size) 44 | { 45 | 46 | FRecord = fpga_t->allocateFThreadRecord(); 47 | 48 | if(FRecord != NULL) 49 | { 50 | FRecord->setFThreadRec(OpCode, reinterpret_cast(afu_config), RND_TO_CL(afu_config_size) >> 6, NULL); 51 | 52 | fpga_t->enqueueSingleFThread(FRecord); 53 | } 54 | } 55 | //**********************************************************************************// 56 | Fthread::~Fthread() 57 | { 58 | FRecord->reset(); 59 | 60 | delete FRecord; 61 | } 62 | //**********************************************************************************// 63 | void Fthread::join() 64 | { 65 | unsigned char state = 0; 66 | do 67 | { 68 | SleepNano( 200 ); 69 | 70 | state = FRecord->get_status()->state; 71 | 72 | }while(state != OPERATOR_DONE_STATE); 73 | 74 | // Free FThread resources 75 | FthreadRec * tmp_rec = FRecord; 76 | 77 | FRecord = new FthreadRec( FRecord ); 78 | 79 | tmp_rec->parent()->free(tmp_rec->get_cfg()); 80 | 81 | tmp_rec->reset(); 82 | 83 | FRecord->parent()->free_fthread(FRecord->get_id()); 84 | } 85 | //**********************************************************************************// 86 | double Fthread::timing() 87 | { 88 | uint64_t cycles = FRecord->get_status()->exec_cycles; 89 | return (cycles*5.0/1000000.0); 90 | } 91 | //**********************************************************************************// 92 | void Fthread::printStatusLine() 93 | { 94 | FTStatus* status = FRecord->get_status(); 95 | std::cout << "--------------- STATUS LINE ---------" << std::endl; 96 | std::cout << "State: " << status->state << std::endl; 97 | std::cout << "Reads: " << status->reads << std::endl; 98 | std::cout << "Writes: " << status->writes << std::endl; 99 | std::cout << "Execution Cycles: " << status->exec_cycles << std::endl; 100 | std::cout << "Configuration Cycles: " << status->ConfigCycles << std::endl; 101 | std::cout << "Terminating Cycles Cycles: " << status->TerminatingCycles << std::endl; 102 | std::cout << "Read Valid High Cycles: " << status->ReadCycles << std::endl; 103 | std::cout << "Read Ready High Cycles: " << status->ReadyCycles << std::endl; 104 | for (int i = 0; i < 8; i++) 105 | { 106 | std::cout << "AFU status[" << i << "]: " << status->afu_counters[i] << std::endl; 107 | } 108 | } 109 | 110 | unsigned int Fthread::readCounter(unsigned int counter_code) 111 | { 112 | return 0;// status_line->dw[6 + counter_code]; 113 | } 114 | -------------------------------------------------------------------------------- /sw/core/fthread.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __FTHREAD_H__ 18 | #define __FTHREAD_H__ 19 | 20 | 21 | class FPGA; 22 | class Pipeline; 23 | 24 | //***************************************************************************// 25 | class Fthread 26 | { 27 | public: 28 | Fthread(FthreadRec * t_rec); 29 | Fthread(FthreadRec * t_rec, bool enqueue); 30 | Fthread(FPGA* fpga_t, unsigned int OpCode, void* afu_config, unsigned int afu_config_size); 31 | 32 | //Fthread(FthreadRec * src_rec, Pipeline * pipe1, FthreadRec * dst_rec); 33 | //Fthread(Pipeline * pipe1, FthreadRec * dst_rec); 34 | //Fthread(FthreadRec * src_rec, Pipeline * pipe1); 35 | 36 | ~Fthread(); 37 | 38 | void join(); 39 | double timing(); 40 | void printStatusLine(); 41 | unsigned int readCounter(unsigned int counter_code); 42 | 43 | FthreadRec * getFThreadRec(){ return FRecord; } 44 | 45 | protected: 46 | 47 | FthreadRec * FRecord; 48 | 49 | }; 50 | 51 | #endif // __FTHREAD_H__ 52 | -------------------------------------------------------------------------------- /sw/core/fthreadRec.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "fpgaapi.h" 18 | #include 19 | 20 | #include 21 | 22 | //**********************************************************************************// 23 | //************************** ************************// 24 | //********************* FthreadRec Implementation *******************// 25 | //************************** ************************// 26 | //**********************************************************************************// 27 | FthreadRec::FthreadRec() 28 | { 29 | parent_process_ID = -1; 30 | config_params = NULL; 31 | status_line = NULL; 32 | uID = -1; 33 | RdFIFOPtr = 0; 34 | WrFIFOPtr = 0; 35 | name = std::string("t_name"); 36 | } 37 | //**********************************************************************************// 38 | FthreadRec::~FthreadRec() 39 | { 40 | 41 | ::memset(status_line, 0, sizeof(struct FTStatus)); 42 | 43 | status_line = NULL; 44 | config_params = NULL; 45 | config_struct_size = 0; 46 | } 47 | //**********************************************************************************// 48 | FthreadRec::FthreadRec(FthreadRec * t_rec) 49 | { 50 | parent_process_ID = t_rec->parent_process_ID; 51 | config_params = t_rec->config_params; 52 | config_struct_size = t_rec->config_struct_size; 53 | uID = t_rec->uID; 54 | parent_fpga = t_rec->parent_fpga; 55 | name = std::string("t_name"); 56 | opcode = t_rec->opcode; 57 | retPtr = t_rec->retPtr; 58 | 59 | RdFIFOPtr = t_rec->RdFIFOPtr; 60 | WrFIFOPtr = t_rec->WrFIFOPtr; 61 | 62 | status_line = new FTStatus; 63 | 64 | memcpy(status_line, t_rec->status_line, sizeof(struct FTStatus)); 65 | } 66 | //**********************************************************************************// 67 | void FthreadRec::init(int id, int pid, FPGA* parent) 68 | { 69 | parent_process_ID = pid; 70 | uID = id; 71 | parent_fpga = parent; 72 | } 73 | //**********************************************************************************// 74 | bool FthreadRec::setFThreadRec(unsigned int code, unsigned char* cfg_s, 75 | unsigned int cfg_size, void* ret) 76 | { 77 | 78 | opcode = code; 79 | 80 | retPtr = ret; 81 | 82 | config_params = cfg_s; 83 | config_struct_size = cfg_size; 84 | 85 | if(status_line == NULL) 86 | { 87 | void* s_ptr = parent_fpga->malloc( sizeof(FTStatus) ); 88 | status_line = reinterpret_cast( s_ptr ); 89 | } 90 | ::memset(status_line, 0, sizeof(FTStatus)); 91 | } 92 | //**********************************************************************************// 93 | void FthreadRec::reset() 94 | { 95 | ::memset(status_line, 0, sizeof(FTStatus)); 96 | 97 | config_params = NULL; 98 | config_struct_size = 0; 99 | opcode = 0; 100 | } 101 | -------------------------------------------------------------------------------- /sw/core/fthreadRec.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __FTHREAD_REC_H__ 18 | #define __FTHREAD_REC_H__ 19 | 20 | 21 | class FPGA; 22 | 23 | struct FTStatus 24 | { 25 | // CL #1: State, performance, AFU debug messages 26 | union { 27 | uint32_t qword0[16]; // make it a whole cacheline 28 | struct { 29 | uint32_t state; 30 | uint32_t reads; 31 | uint32_t writes; 32 | uint32_t exec_cycles; 33 | uint32_t ConfigCycles; 34 | uint32_t TerminatingCycles; 35 | uint32_t ReadCycles; 36 | uint32_t ReadyCycles; 37 | uint32_t afu_counters[8]; 38 | }; 39 | }; 40 | }; 41 | 42 | class FthreadRec{ 43 | public: 44 | FthreadRec(); 45 | FthreadRec( FthreadRec * t_rec ); 46 | ~FthreadRec(); 47 | 48 | bool setFThreadRec(unsigned int code, unsigned char* cfg_s, 49 | unsigned int cfg_size, void* ret); 50 | void init(int id, int pid, FPGA* parent); 51 | void reset(); 52 | 53 | 54 | unsigned char* get_cfg() { return config_params; } 55 | int get_cfg_size() { return config_struct_size; } 56 | int get_id() { return uID; } 57 | FTStatus* get_status() { return status_line; } 58 | int get_opcode() { return opcode; } 59 | FPGA* parent() { return parent_fpga; } 60 | 61 | void* get_retPtr() { return retPtr; } 62 | 63 | unsigned char* get_WrFIFOPtr(){ return WrFIFOPtr; } 64 | unsigned char* get_RdFIFOPtr(){ return RdFIFOPtr; } 65 | 66 | void setRdFIFOPtr(unsigned char * fptr){ RdFIFOPtr = fptr; } 67 | void setWrFIFOPtr(unsigned char * fptr){ WrFIFOPtr = fptr; } 68 | 69 | protected: 70 | std::string name; 71 | int uID; 72 | int opcode; 73 | int parent_process_ID; 74 | 75 | int config_struct_size; 76 | unsigned char* config_params; 77 | FTStatus* status_line; 78 | 79 | void* retPtr; 80 | 81 | unsigned char* RdFIFOPtr; 82 | unsigned char* WrFIFOPtr; 83 | 84 | FPGA* parent_fpga; 85 | 86 | }; 87 | 88 | #endif -------------------------------------------------------------------------------- /sw/core/memory_manager.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "fpgaapi.h" 18 | 19 | 20 | MemoryManager::MemoryManager(unsigned char* virBase, uint64_t wsSize) 21 | :baseAddr(virBase) 22 | { 23 | dataBaseAddr = virBase + MB(2); 24 | sizeChunks1k = 1024; // 1 MB 25 | sizeChunks1 = 870; // 870 MB 26 | sizeChunks5 = 75; // 375 MB 27 | sizeChunks10 = 75; // 750 MB 28 | //sizeChunks25 = 25; 29 | //sizeChunks50 = 20; 30 | sizeChunks100 = 20; // 2000 MB 31 | 32 | chunks1k = new MemChunk[sizeChunks1k]; 33 | chunks1 = new MemChunk[sizeChunks1]; 34 | chunks5 = new MemChunk[sizeChunks5]; 35 | chunks10 = new MemChunk[sizeChunks10]; 36 | //chunks25 = new MemChunk[sizeChunks25]; 37 | //chunks50 = new MemChunk[sizeChunks50]; 38 | chunks100 = new MemChunk[sizeChunks100]; 39 | 40 | //set shared memory to zero 41 | ::memset(virBase, 0, wsSize); 42 | 43 | //map addresses to chunks 44 | unsigned char* base = dataBaseAddr; 45 | 46 | for (int i = 0; i < sizeChunks1k; i++) 47 | { 48 | chunks1k[i].addr = base; 49 | chunks1k[i].free = true; 50 | base += 1024; 51 | } 52 | for (int i = 0; i < sizeChunks1; i++) 53 | { 54 | chunks1[i].addr = base; 55 | chunks1[i].free = true; 56 | base += MB(1); 57 | } 58 | for (int i = 0; i < sizeChunks5; i++) 59 | { 60 | chunks5[i].addr = base; 61 | chunks5[i].free = true; 62 | base += MB(5); 63 | } 64 | for (int i = 0; i < sizeChunks10; i++) 65 | { 66 | chunks10[i].addr = base; 67 | chunks10[i].free = true; 68 | base += MB(10); 69 | } 70 | for (int i = 0; i < sizeChunks100; i++) 71 | { 72 | chunks100[i].addr = base; 73 | chunks100[i].free = true; 74 | base += MB(100); 75 | } 76 | } 77 | 78 | MemoryManager::~MemoryManager() 79 | { 80 | delete[] chunks1k; 81 | delete[] chunks1; 82 | delete[] chunks5; 83 | delete[] chunks10; 84 | delete[] chunks100; 85 | } 86 | 87 | unsigned char* MemoryManager::get_virt_base() //TODO rename 88 | { 89 | return baseAddr; 90 | } 91 | 92 | void* MemoryManager::malloc(size_t size, size_t* maxsize) 93 | { 94 | //printChunks(); 95 | 96 | MemChunk* list; 97 | size_t listSize = 0; 98 | // printf("Requested size: %i, in MB: %i\n", size, size/MB(1)); fflush(stdout); 99 | if (size <= 1024) 100 | { 101 | list = chunks1k; 102 | listSize = sizeChunks1k; 103 | *maxsize = 1024; 104 | } 105 | else if (size <= MB(1)) 106 | { 107 | list = chunks1; 108 | listSize = sizeChunks1; 109 | *maxsize = MB(1); 110 | } 111 | else if (size <= MB(5)) 112 | { 113 | list = chunks5; 114 | listSize = sizeChunks5; 115 | *maxsize = MB(5); 116 | } 117 | else if (size <= MB(10)) 118 | { 119 | list = chunks10; 120 | listSize = sizeChunks10; 121 | *maxsize = MB(10); 122 | } 123 | else if (size <= MB(100)) 124 | { 125 | list = chunks100; 126 | listSize = sizeChunks100; 127 | *maxsize = MB(100); 128 | } 129 | else 130 | { 131 | MSG("malloc: requested size too large, size: "< free; 24 | }; 25 | 26 | class MemoryManager{ 27 | public: 28 | //TODO btPhysAddr newer used 29 | MemoryManager(unsigned char* virBase, uint64_t wsSize ); 30 | ~MemoryManager(); 31 | unsigned char* get_virt_base(); //TODO rename to getBase() 32 | 33 | //void* malloc(size_t size); 34 | void* malloc(size_t size, size_t* maxsize); 35 | void free(void* ptr); 36 | 37 | private: 38 | inline uint64_t roundUpToCLs(uint64_t size); 39 | inline uint64_t roundUpToChunks(uint64_t size); 40 | void printChunks(); 41 | 42 | private: 43 | unsigned char* baseAddr; 44 | unsigned char* dataBaseAddr; 45 | 46 | MemChunk* chunks1k; 47 | MemChunk* chunks1; 48 | MemChunk* chunks5; 49 | MemChunk* chunks10; 50 | //MemChunk* chunks25; 51 | //MemChunk* chunks50; 52 | MemChunk* chunks100; 53 | 54 | size_t sizeChunks1k; 55 | size_t sizeChunks1; 56 | size_t sizeChunks5; 57 | size_t sizeChunks10; 58 | //size_t sizeChunks25; 59 | //size_t sizeChunks50; 60 | size_t sizeChunks100; 61 | 62 | }; 63 | 64 | #endif //__MEMORY_MANAGER_H__ 65 | -------------------------------------------------------------------------------- /sw/core/pipeline_job.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef __FPIPELINEJOB_H__ 18 | #define __FPIPELINEJOB_H__ 19 | 20 | class Fthread; 21 | class FPGA; 22 | 23 | #include "fpgaapi.h" 24 | #include "fpipe.h" 25 | 26 | template 27 | class PipelineJob 28 | { 29 | public: 30 | PipelineJob(FthreadRec* src, FPipe* pipe, FthreadRec* dst); 31 | PipelineJob(FPipe* pipe, FthreadRec* dst); 32 | PipelineJob(FthreadRec* src, FPipe* pipe); 33 | ~PipelineJob(); 34 | 35 | void join(); 36 | double timing(); 37 | void printStatusLine(); 38 | 39 | Fthread* getSrc() { return m_srcThread; } 40 | Fthread* getDst() { return m_dstThread; } 41 | 42 | protected: 43 | FPGA* m_fpga; 44 | Fthread* m_srcThread; 45 | Fthread* m_dstThread; 46 | }; 47 | 48 | template 49 | PipelineJob::PipelineJob(FthreadRec* srcOp, FPipe* pipe, FthreadRec* dstOp) 50 | { 51 | m_fpga = srcOp->parent(); 52 | m_srcThread = nullptr; 53 | m_dstThread = nullptr; 54 | 55 | if (srcOp) 56 | { 57 | m_srcThread = new Fthread(srcOp, false); 58 | } 59 | if (dstOp) 60 | { 61 | m_dstThread = new Fthread(dstOp, false); 62 | } 63 | 64 | uint32_t code = 0x00000011; 65 | if( pipe->isMemPipe() ) 66 | { 67 | code = code | 0x00080020 | (pipe->getReadAddrCode() << 20); 68 | } 69 | else 70 | { 71 | code = code | 0x00040000 | (pipe->getReadAddrCode() << 24); 72 | } 73 | 74 | if( pipe->isMemPipe() ) 75 | { 76 | m_srcThread->getFThreadRec()->setWrFIFOPtr( pipe->getFIFOPtr() ); 77 | m_dstThread->getFThreadRec()->setRdFIFOPtr( pipe->getFIFOPtr() ); 78 | } 79 | 80 | m_srcThread->getFThreadRec()->parent()->enqueuePipelineJob(m_srcThread->getFThreadRec(), m_dstThread->getFThreadRec(), code); 81 | } 82 | 83 | template 84 | PipelineJob::~PipelineJob() 85 | { 86 | if (m_srcThread != nullptr) 87 | delete m_srcThread; 88 | if (m_dstThread != nullptr) 89 | delete m_dstThread; 90 | } 91 | 92 | template 93 | PipelineJob::PipelineJob(FPipe* pipe, FthreadRec* dstOp) 94 | { 95 | //assert(dstOp != nullptr); 96 | m_fpga = dstOp->parent(); 97 | m_srcThread = nullptr; 98 | m_dstThread = new Fthread(dstOp, false); 99 | 100 | uint32_t code = 0x00000080 | (pipe->getReadAddrCode() << 8); 101 | 102 | m_dstThread->getFThreadRec()->setRdFIFOPtr( pipe->getFIFOPtr() ); 103 | 104 | m_dstThread->getFThreadRec()->parent()->enqueuePipelineJob(nullptr, m_dstThread->getFThreadRec(), code); 105 | } 106 | 107 | template 108 | PipelineJob::PipelineJob(FthreadRec* srcOp, FPipe* pipe) 109 | { 110 | //assert(srcOp != nullptr); 111 | m_fpga = srcOp->parent(); 112 | m_srcThread = new Fthread(srcOp, false); 113 | m_dstThread = nullptr; 114 | 115 | uint32_t code = 0x00000030; 116 | 117 | m_srcThread->getFThreadRec()->setWrFIFOPtr( pipe->getFIFOPtr() ); 118 | 119 | m_srcThread->getFThreadRec()->parent()->enqueuePipelineJob(m_srcThread->getFThreadRec(), nullptr, code); 120 | } 121 | 122 | template 123 | void PipelineJob::join() 124 | { 125 | // Release the operator resources 126 | if (m_srcThread) 127 | m_srcThread->join(); 128 | if (m_dstThread) 129 | m_dstThread->join(); 130 | } 131 | 132 | template 133 | double PipelineJob::timing() 134 | { 135 | if (m_dstThread) 136 | return m_dstThread->timing(); 137 | else 138 | return m_srcThread->timing(); 139 | } 140 | 141 | template 142 | void PipelineJob::printStatusLine() 143 | { 144 | if (m_srcThread) 145 | { 146 | std::cout << "Source FThread STATUS LINE ---------" << std::endl; 147 | m_srcThread->printStatusLine(); 148 | } 149 | 150 | if (m_dstThread) 151 | { 152 | std::cout << "Destination FThread STATUS LINE ---------" << std::endl; 153 | m_dstThread->printStatusLine(); 154 | } 155 | } 156 | 157 | #endif // __FPIPELINEJOB_H__ 158 | -------------------------------------------------------------------------------- /sw/core/utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "fpgaapi.h" 18 | //**********************************************************************************// 19 | void _DumpCL( void * pCL) 20 | { 21 | uint32_t *pu32 = reinterpret_cast(pCL); 22 | 23 | std::cout << std::dec; 24 | std::cout << "Status: " << pu32[0] << std::endl; 25 | std::cout << "Reads: " << pu32[1] << std::endl; 26 | std::cout << "Writes: " << pu32[2] << std::endl; 27 | std::cout << "Execution cycles: " << pu32[3] << std::endl; 28 | 29 | std::cout << std::hex << std::setfill('0') << std::uppercase; 30 | for( int i = 4; i < ( CL(1) / sizeof(uint32_t)); ++i ) 31 | { 32 | std::cout << "0x" << std::setw(8) << pu32[i] << " " << std::endl; 33 | } 34 | std::cout <<"" << std::nouppercase << std::endl; 35 | } // _DumpCL 36 | //**********************************************************************************// 37 | double get_time() 38 | { 39 | struct timeval t; 40 | struct timezone tzp; 41 | gettimeofday(&t, &tzp); 42 | return t.tv_sec + t.tv_usec*1e-6; 43 | } 44 | //**********************************************************************************// 45 | bool errlog(unsigned int err_code) 46 | { 47 | switch( err_code ) 48 | { 49 | case ERR_HWSRV_ALLOC_FAILED: 50 | std::cout << "\n ERROR: Allocating Hardware Service Failed \n"; 51 | break; 52 | case ERR_WS_ALLOC_FAILED: 53 | std::cout << "\n ERROR: Allocating Shared Memory Space Failed \n"; 54 | break; 55 | case ERR_TRANS_INIT_FAILED: 56 | std::cout << "\n ERROR: Establishing link to FPGA Failed \n"; 57 | break; 58 | case ERR_CONFIG_OPS_UNKNWON: 59 | std::cout << "\n ERROR: Cannot Retrieve Configured AFUs Codes from the FPGA \n"; 60 | break; 61 | case ERR_JOB_TYPE_NOT_SUPPORTED: 62 | std::cout << "\n ERROR: Requesting Non Supported Job Type \n"; 63 | break; 64 | default: 65 | std::cout << "\n Unknown Error Type \n"; 66 | } 67 | 68 | return false; 69 | } -------------------------------------------------------------------------------- /sw/core/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #ifndef __UTILS_H__ 19 | #define __UTILS_H__ 20 | 21 | struct OneCL { // Make a cache-line sized structure 22 | uint32_t dw[16]; // for array arithmetic 23 | }; 24 | 25 | struct page4kB{ 26 | char pg[4096]; 27 | }; 28 | 29 | struct page1kB{ 30 | char pg[1024]; 31 | }; 32 | 33 | #endif // __UTILS_H__ 34 | void _DumpCL( void * pCL) ; 35 | double get_time(); 36 | bool errlog(unsigned int err_code); 37 | 38 | -------------------------------------------------------------------------------- /sw/core/workload_manager.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "fpgaapi.h" 18 | #include 19 | #include 20 | 21 | WorkloadManager::WorkloadManager(FPGA * p_fpga) 22 | { 23 | 24 | parent_fpga = p_fpga; 25 | // Set up command queues 26 | for(int i = 0; i < NUM_FTHREADS; i++) 27 | { 28 | printf(" allocate job queue in workload manager\n"); 29 | unsigned int q_ptr_phys; 30 | job_queue[i] = reinterpret_cast*>( p_fpga->alloc_job_queue(q_ptr_phys) ); 31 | //printf("job queue#%i at: %p\n", i, job_queue[i]); 32 | 33 | ::memset(job_queue[i], 0, sizeof(FQueue)); 34 | 35 | job_queue[i]->m_buffer = reinterpret_cast( (unsigned char*)(job_queue[i]) + 36 | sizeof(FQueue)); 37 | job_queue[i]->m_capacity = JOB_QUEUE_SIZE; 38 | job_queue[i]->m_capacity_bytes = JOB_QUEUE_SIZE * sizeof(struct OneCL); 39 | job_queue[i]->update_bytes_rate = 2048; 40 | job_queue[i]->m_crb_code = FQUEUE_VALID_CODE; 41 | job_queue[i]->synch_size = sizeof(struct OneCL); 42 | 43 | job_queue[i]->m_producer_idx = 0; 44 | job_queue[i]->m_producer_bytes = 0; 45 | job_queue[i]->m_producer_done = false; 46 | job_queue[i]->m_producer_code = FQUEUE_PRODUCER_VALID_CODE; 47 | 48 | job_queue[i]->m_consumer_idx = 0; 49 | job_queue[i]->m_consumer_bytes = 0; 50 | 51 | 52 | // 53 | queue_code[i] = 0; 54 | // 55 | jqueue_base_phys[i] = q_ptr_phys >> 6; 56 | } 57 | // 58 | printf("job queues allocated!\n"); 59 | 60 | // setup queues codes 61 | for(int i = 0; i < NUM_FTHREADS; i++) 62 | { 63 | unsigned int code = p_fpga->get_config_opcode(i); 64 | for(int j = 0; j < NUM_FTHREADS; j++) 65 | { 66 | if(queue_code[j] == 0) 67 | { 68 | queue_code[j] = code; 69 | break; 70 | } 71 | else if( queue_code[j] == code) break; 72 | } 73 | } 74 | 75 | // Set start_fpga_wlm_cmd 76 | printf(" set up the start FPGA command\n"); 77 | uint32_t q_size = JOB_QUEUE_SIZE << 16; 78 | 79 | q_size |= (0x0000FFFF & (sizeof(FQueue) >> 6)); 80 | // 81 | ::memset(&start_fpga_wlm_cmd, 0, sizeof(OneCL)); 82 | 83 | printf("set start commands fields\n"); 84 | start_fpga_wlm_cmd.dw[0] = (0x00010000) | (uint32_t)(START_JOB_MANAGER_CMD & 0x0000FFFF); 85 | for(int i = 0; i < NUM_FTHREADS; i++) 86 | { 87 | // 96-bits: queue size, code, addr 88 | start_fpga_wlm_cmd.dw[1+(3*i + 0)] = jqueue_base_phys[i]; 89 | start_fpga_wlm_cmd.dw[1+(3*i + 1)] = (uint32_t)(queue_code[i]); 90 | start_fpga_wlm_cmd.dw[1+(3*i + 2)] = (uint32_t)(q_size); 91 | } 92 | printf(" workload manager allocated\n"); 93 | 94 | } 95 | //**********************************************************************************// 96 | WorkloadManager::~WorkloadManager() 97 | { 98 | 99 | } 100 | //**********************************************************************************// 101 | int WorkloadManager::get_job_queue_index( int opcode ) 102 | { 103 | for(int i = 0; i < NUM_FTHREADS; i++) 104 | { 105 | if( opcode == queue_code[i] ) return i; 106 | } 107 | return -1; 108 | } 109 | //**********************************************************************************// 110 | /* 111 | enqueue_job: when 1 fthread is passed: a single fthread job request is enqueued 112 | when multiple fthreads passed it enqueue a pipeline job request. 113 | Currently pipelining of two fthreads is only supported. 114 | */ 115 | bool WorkloadManager::enqueue_job(FthreadRec * t_thread[], int num_threads, uint32_t code) 116 | { 117 | // It fails if more than 2 fthreads are pipelined 118 | if( num_threads > 2 ) return false; 119 | // 120 | struct OneCL cmd_CL; 121 | ::memset(&cmd_CL, 0, sizeof(OneCL)); 122 | 123 | unsigned char * wsptr = t_thread[0]->parent()->get_ws_base_virt(); 124 | 125 | // get the job queue for src fthread 126 | int job_type = get_job_queue_index(t_thread[0]->get_opcode()); 127 | 128 | if( job_type == -1 ) return errlog(ERR_JOB_TYPE_NOT_SUPPORTED); 129 | 130 | // destination job if two fthreads passed 131 | int dst_job_type = get_job_queue_index(t_thread[num_threads-1]->get_opcode()); 132 | 133 | unsigned int codew = ((dst_job_type << 1) & 0x0000000E) | code; 134 | 135 | cmd_CL.dw[0] = (uint32_t)(codew); 136 | 137 | for(int i = 0; i < num_threads; i++) 138 | { 139 | cmd_CL.dw[7*i + 1] = (uint32_t)(uint64_t(t_thread[i]->get_status())); 140 | cmd_CL.dw[7*i + 2] = (uint32_t)(uint64_t(t_thread[i]->get_status()) >> 32); 141 | cmd_CL.dw[7*i + 3] = (uint32_t)(uint64_t(t_thread[i]->get_cfg())); 142 | cmd_CL.dw[7*i + 4] = (uint32_t)(uint64_t(t_thread[i]->get_cfg()) >> 32); 143 | cmd_CL.dw[7*i + 5] = (uint32_t)(t_thread[i]->get_cfg_size()); 144 | cmd_CL.dw[7*i + 6] = (uint32_t)(uint64_t(t_thread[i]->get_WrFIFOPtr() - wsptr) >> 6); 145 | cmd_CL.dw[7*i + 7] = (uint32_t)(uint64_t(t_thread[i]->get_RdFIFOPtr() - wsptr) >> 6); 146 | } 147 | 148 | queue_mutex[job_type].lock(); 149 | 150 | while( !job_queue[job_type]->push(cmd_CL) ); 151 | 152 | queue_mutex[job_type].unlock(); 153 | 154 | return true; 155 | } 156 | -------------------------------------------------------------------------------- /sw/core/workload_manager.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | #ifndef __WORKLOAD_MANAGER_H__ 19 | #define __WORKLOAD_MANAGER_H__ 20 | 21 | 22 | class FPGA; 23 | 24 | /* 25 | In the current version where no partial reconfiguration is supported and the 26 | AFUs are fixed on the FPGA the workload manager operation is simple: 27 | 28 | On establishing the connection with the FPGA, and receiving the opcodes for 29 | configured AFUs a predefined mapping of the queues is established and hence 30 | every supported job type is assigned a queue. 31 | 32 | */ 33 | class WorkloadManager{ 34 | public: 35 | WorkloadManager(FPGA * p_fpga); 36 | ~WorkloadManager(); 37 | 38 | 39 | bool enqueue_job(FthreadRec * t_thread[], int num_threads, uint32_t code); 40 | int get_job_queue_index( int opcode ); 41 | 42 | OneCL* start_cmd(){ return &start_fpga_wlm_cmd; } 43 | 44 | protected: 45 | 46 | FQueue* job_queue[NUM_FTHREADS]; 47 | unsigned int queue_code[NUM_FTHREADS]; 48 | std::mutex queue_mutex[NUM_FTHREADS]; 49 | 50 | uint32_t jqueue_base_phys[NUM_FTHREADS]; 51 | 52 | FPGA* parent_fpga; 53 | 54 | OneCL start_fpga_wlm_cmd; 55 | 56 | }; 57 | 58 | #endif // __WORKLOAD_MANAGER_H__ 59 | -------------------------------------------------------------------------------- /sw/platform/platform.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Systems Group, ETHZ 3 | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include // Logger 20 | 21 | 22 | #include // Service Interface 23 | #include // Service Client Interface 24 | #include // AFU structure definitions (brings in spl2defs.h) 25 | 26 | #ifndef __PLATFORM_H__ 27 | #define __PLATFORM_H__ 28 | 29 | #include 30 | #include // SPL2AFU_DSM 31 | 32 | BEGIN_NAMESPACE(AAL) 33 | 34 | // Convenience macros for printing messages and errors. 35 | 36 | 37 | // Print/don't print the event ID's entered in the event handlers. 38 | /*#if 1 39 | # define EVENT_CASE(x) case x : MSG(#x); 40 | #else 41 | # define EVENT_CASE(x) case x : 42 | #endif 43 | */ 44 | END_NAMESPACE(AAL) 45 | 46 | // 47 | using namespace AAL; 48 | 49 | /// @brief Define our Runtime client class so that we can receive the runtime started/stopped notifications. 50 | /// 51 | /// We implement a Service client within, to handle AAL Service allocation/free. 52 | /// We also implement a Semaphore for synchronization with the AAL runtime. 53 | class RuntimeClient : public CAASBase, 54 | public IRuntimeClient 55 | { 56 | public: 57 | RuntimeClient(); 58 | ~RuntimeClient(); 59 | 60 | void end(); 61 | 62 | IRuntime* getRuntime(); 63 | 64 | btBool isOK(); 65 | 66 | // 67 | void runtimeStarted(IRuntime *pRuntime, 68 | const NamedValueSet &rConfigParms); 69 | 70 | void runtimeStopped(IRuntime *pRuntime); 71 | 72 | void runtimeStartFailed(const IEvent &rEvent); 73 | 74 | void runtimeAllocateServiceFailed( IEvent const &rEvent); 75 | 76 | void runtimeAllocateServiceSucceeded(IBase *pClient, 77 | TransactionID const &rTranID); 78 | 79 | void runtimeEvent(const IEvent &rEvent); 80 | // 81 | 82 | protected: 83 | IRuntime *m_pRuntime; ///< Pointer to AAL runtime instance. 84 | Runtime m_Runtime; ///< AAL Runtime 85 | btBool m_isOK; ///< Status 86 | CSemaphore m_Sem; ///< For synchronizing with the AAL runtime. 87 | }; 88 | 89 | class HWService: public CAASBase, public IServiceClient, public ISPLClient 90 | { 91 | 92 | public: 93 | 94 | HWService() 95 | { 96 | m_pAALService = NULL; 97 | m_SPLService = NULL; 98 | m_Result = 0; 99 | m_AFUDSMVirt = NULL; 100 | m_AFUCTXVirt = NULL; 101 | m_AFUCTXPhys = 0; 102 | m_AFUCTXSize = 0, 103 | m_AFUDSMSize = 0; 104 | 105 | SetSubClassInterface(iidServiceClient, dynamic_cast(this)); 106 | SetInterface(iidSPLClient, dynamic_cast(this)); 107 | SetInterface(iidCCIClient, dynamic_cast(this)); 108 | m_Sem.Create(0, 1); 109 | 110 | m_runtimeClient = new RuntimeClient; 111 | 112 | srvAllocated = false; 113 | } 114 | ~HWService(); 115 | 116 | //--------------- Mandatory Interface methods ------------------------// 117 | 118 | // 119 | virtual void OnTransactionStarted(TransactionID const &TranID, 120 | btVirtAddr AFUDSM, 121 | btWSSize AFUDSMSize); 122 | virtual void OnContextWorkspaceSet(TransactionID const &TranID); 123 | 124 | virtual void OnTransactionFailed(const IEvent &Event); 125 | 126 | virtual void OnTransactionComplete(TransactionID const &TranID); 127 | 128 | virtual void OnTransactionStopped(TransactionID const &TranID); 129 | 130 | virtual void OnWorkspaceAllocated(TransactionID const &TranID, 131 | btVirtAddr WkspcVirt, 132 | btPhysAddr WkspcPhys, 133 | btWSSize WkspcSize); 134 | 135 | virtual void OnWorkspaceAllocateFailed(const IEvent &Event); 136 | 137 | virtual void OnWorkspaceFreed(TransactionID const &TranID); 138 | 139 | virtual void OnWorkspaceFreeFailed(const IEvent &Event); 140 | // 141 | 142 | // 143 | void serviceAllocated(IBase *pServiceBase, 144 | TransactionID const &rTranID); 145 | 146 | void serviceAllocateFailed(const IEvent &rEvent); 147 | 148 | void serviceFreed(TransactionID const &rTranID); 149 | 150 | void serviceEvent(const IEvent &rEvent); 151 | // 152 | bool allocHWService(); 153 | bool allocate_workspace(uint64_t size); 154 | 155 | bool init(); 156 | 157 | // Service member objects // 158 | public: 159 | // Basic objects 160 | IBase * m_pAALService; // The generic AAL Service interface for the AFU. 161 | RuntimeClient * m_runtimeClient; 162 | ISPLAFU * m_SPLService; 163 | CSemaphore m_Sem; // For synchronizing with the AAL runtime. 164 | btInt m_Result; // Returned result value; 0 if success 165 | 166 | 167 | btVirtAddr m_AFUCTXVirt; /// 168 | btPhysAddr m_AFUCTXPhys; 169 | btWSSize m_AFUCTXSize; 170 | 171 | btVirtAddr m_AFUDSMVirt; ///< Points to DSM 172 | btWSSize m_AFUDSMSize; ///< Length in bytes of DSM 173 | 174 | bool srvAllocated; 175 | }; 176 | 177 | 178 | #endif // __PLATFORM_H__ 179 | --------------------------------------------------------------------------------