├── .gitignore ├── Insider ├── cosim │ ├── kernels │ │ ├── app_input_data_merger.cpp │ │ ├── app_input_data_mux.cpp │ │ ├── app_output_data_demux.cpp │ │ ├── command_handler.cpp │ │ ├── dram_data_caching.cpp │ │ ├── dram_read_delay_unit.cpp │ │ ├── dram_read_req_multiplexer.cpp │ │ ├── dram_read_req_time_marker.cpp │ │ ├── dram_read_resp_multiplexer.cpp │ │ ├── dram_read_throttle_unit.cpp │ │ ├── dram_write_delay_unit.cpp │ │ ├── dram_write_mux.cpp │ │ ├── dram_write_req_time_marker.cpp │ │ ├── dram_write_throttle_unit.cpp │ │ ├── pcie_data_splitter_app.cpp │ │ ├── pcie_read_req_multiplexer.cpp │ │ ├── pcie_read_resp_multiplexer.cpp │ │ ├── pcie_read_resp_passer.cpp │ │ ├── pcie_read_throttle_unit.cpp │ │ ├── pcie_write_multiplexer.cpp │ │ ├── pcie_write_throttle_unit.cpp │ │ ├── peek_handler.cpp │ │ ├── pipe0_data_handler.cpp │ │ ├── pipe0_dram_dispatcher.cpp │ │ ├── pipe1_data_handler.cpp │ │ ├── pipe1_dram_dispatcher.cpp │ │ ├── pipe2_data_handler.cpp │ │ ├── pipeline_data_passer.cpp │ │ ├── poke_handler.cpp │ │ ├── read_mode_dram_helper_app.cpp │ │ ├── read_mode_pcie_helper_app.cpp │ │ ├── reset_propaganda.cpp │ │ ├── write_mode_app_output_data_caching.cpp │ │ ├── write_mode_dram_helper_app.cpp │ │ ├── write_mode_pcie_helper_app.cpp │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ ├── template │ │ ├── itc_template_header.txt │ │ ├── itc_template_itc.txt │ │ └── reset_template.txt │ └── verif │ │ ├── Makefile │ │ ├── include │ │ ├── .gitignore │ │ ├── insider_cosim.h │ │ ├── sh_dpi_tasks.h │ │ └── uthash.h │ │ └── src │ │ ├── .gitignore │ │ ├── test_main.c │ │ └── test_null.c ├── inc │ ├── insider_common.h │ ├── insider_itc.h │ ├── insider_kernel.h │ ├── insider_macros.h │ ├── insider_runtime.h │ ├── insider_runtime.hpp │ └── insider_types.h ├── lib │ ├── compile.sh │ └── insider_runtime.c ├── llvm │ ├── insider-app │ │ ├── CMakeLists.txt │ │ └── insider_app.cpp │ ├── insider-cosim-intc │ │ ├── CMakeLists.txt │ │ └── insider_cosim_intc.cpp │ └── insider-interconnect │ │ ├── CMakeLists.txt │ │ └── insider_interconnect.cpp ├── shell │ ├── insider_cosim │ ├── insider_device_compiler │ ├── insider_host_g++ │ └── insider_host_gcc ├── src │ └── insider_reset_syn.cpp └── synthesis │ ├── kernels │ ├── app_input_data_merger.cpp │ ├── app_input_data_mux.cpp │ ├── app_output_data_demux.cpp │ ├── dram_data_caching.cpp │ ├── dram_read_delay_unit.cpp │ ├── dram_read_req_multiplexer.cpp │ ├── dram_read_req_time_marker.cpp │ ├── dram_read_resp_multiplexer.cpp │ ├── dram_read_throttle_unit.cpp │ ├── dram_write_delay_unit.cpp │ ├── dram_write_mux.cpp │ ├── dram_write_req_time_marker.cpp │ ├── dram_write_throttle_unit.cpp │ ├── pcie_data_splitter_app.cpp │ ├── pcie_read_req_multiplexer.cpp │ ├── pcie_read_resp_multiplexer.cpp │ ├── pcie_read_resp_passer.cpp │ ├── pcie_read_throttle_unit.cpp │ ├── pcie_write_multiplexer.cpp │ ├── pcie_write_throttle_unit.cpp │ ├── peek_handler.cpp │ ├── pipe0_data_handler.cpp │ ├── pipe0_dram_dispatcher.cpp │ ├── pipe1_data_handler.cpp │ ├── pipe1_dram_dispatcher.cpp │ ├── pipe2_data_handler.cpp │ ├── pipeline_data_passer.cpp │ ├── poke_handler.cpp │ ├── read_mode_dram_helper_app.cpp │ ├── read_mode_pcie_helper_app.cpp │ ├── reset_propaganda.cpp │ ├── write_mode_app_output_data_caching.cpp │ ├── write_mode_dram_helper_app.cpp │ ├── write_mode_pcie_helper_app.cpp │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ └── template │ ├── itc_template_header.txt │ ├── itc_template_itc.txt │ └── reset_template.txt ├── LICENSE ├── README.md ├── STAccel ├── inc │ ├── hls_csim │ │ ├── Makefile.sysc.rules │ │ ├── ap_axi_sdata.h │ │ ├── ap_cint.h │ │ ├── ap_fixed.h │ │ ├── ap_int.h │ │ ├── ap_shift_reg.h │ │ ├── ap_stream.h │ │ ├── ap_sysc │ │ │ ├── AXI4_if.h │ │ │ ├── ap_mem_if.h │ │ │ ├── ap_sc_ext.h │ │ │ └── hls_bus_if.h │ │ ├── ap_utils.h │ │ ├── autoesl_tech.h │ │ ├── autopilot_tech.h │ │ ├── clc.h │ │ ├── clc.h.inc │ │ ├── dds │ │ │ ├── dds_compiler_v6_0_bitacc_cmodel.h │ │ │ └── xip_common_bitacc_cmodel.h │ │ ├── etc │ │ │ ├── ap_fixed_sim.h │ │ │ ├── ap_headers.h │ │ │ ├── ap_int_sim.h │ │ │ ├── ap_private.h │ │ │ ├── autopilot_apint.h │ │ │ ├── autopilot_dt.def │ │ │ ├── autopilot_dt.h │ │ │ ├── autopilot_dt_ext.def │ │ │ ├── autopilot_enum.h │ │ │ ├── autopilot_ssdm_bits.h │ │ │ ├── autopilot_ssdm_op.h │ │ │ ├── c_ap_int_sim.h │ │ │ └── hlslib_headers.h │ │ ├── fft │ │ │ └── xfft_v9_0_bitacc_cmodel.h │ │ ├── fir │ │ │ ├── fir_compiler_v7_2_bitacc_cmodel.h │ │ │ ├── gmp.h │ │ │ ├── xip_common_bitacc_cmodel.h │ │ │ └── xip_mpz_bitacc_cmodel.h │ │ ├── floating_point_v6_1_bitacc_cmodel.h │ │ ├── floating_point_v6_2_bitacc_cmodel.h │ │ ├── floating_point_v7_0_bitacc_cmodel.h │ │ ├── gmp.h │ │ ├── hls │ │ │ ├── dsp │ │ │ │ ├── hls_atan2_cordic.h │ │ │ │ ├── hls_awgn.h │ │ │ │ ├── hls_cmpy.h │ │ │ │ ├── hls_convolution_encoder.h │ │ │ │ ├── hls_nco.h │ │ │ │ ├── hls_qam_demod.h │ │ │ │ ├── hls_qam_mod.h │ │ │ │ ├── hls_sqrt_cordic.h │ │ │ │ ├── hls_viterbi_decoder.h │ │ │ │ └── utils │ │ │ │ │ ├── hls_cordic.h │ │ │ │ │ ├── hls_cordic_functions.h │ │ │ │ │ └── hls_dsp_common_utils.h │ │ │ ├── hls_acosh.h │ │ │ ├── hls_asin_acos.h │ │ │ ├── hls_asinh.h │ │ │ ├── hls_atanh.h │ │ │ ├── hls_axi_io.h │ │ │ ├── hls_basic_math.h │ │ │ ├── hls_big_mult.h │ │ │ ├── hls_cordic.h │ │ │ ├── hls_cordic_tables.h │ │ │ ├── hls_erf.h │ │ │ ├── hls_exp_.h │ │ │ ├── hls_exp_tables_.h │ │ │ ├── hls_fmod_rem_quo.h │ │ │ ├── hls_gamma.h │ │ │ ├── hls_hotbm.h │ │ │ ├── hls_hotbm_tables.h │ │ │ ├── hls_log_.h │ │ │ ├── hls_log_tables.h │ │ │ ├── hls_nextafter.h │ │ │ ├── hls_normalize.h │ │ │ ├── hls_pow.h │ │ │ ├── hls_pow_tables.h │ │ │ ├── hls_range_redux.h │ │ │ ├── hls_round.h │ │ │ ├── hls_rsr.h │ │ │ ├── hls_rsr_tables.h │ │ │ ├── hls_sqrt.h │ │ │ ├── hls_tanh.h │ │ │ ├── hls_video_arithm.h │ │ │ ├── hls_video_core.h │ │ │ ├── hls_video_fast.h │ │ │ ├── hls_video_haar.h │ │ │ ├── hls_video_harris.h │ │ │ ├── hls_video_histogram.h │ │ │ ├── hls_video_hough.h │ │ │ ├── hls_video_imgbase.h │ │ │ ├── hls_video_imgproc.h │ │ │ ├── hls_video_io.h │ │ │ ├── hls_video_mem.h │ │ │ ├── hls_video_stereobm.h │ │ │ ├── hls_video_types.h │ │ │ ├── hls_video_undistort.h │ │ │ ├── linear_algebra │ │ │ │ ├── hls_back_substitute.h │ │ │ │ ├── hls_cholesky.h │ │ │ │ ├── hls_cholesky_inverse.h │ │ │ │ ├── hls_matrix_multiply.h │ │ │ │ ├── hls_qr_inverse.h │ │ │ │ ├── hls_qrf.h │ │ │ │ ├── hls_svd.h │ │ │ │ └── utils │ │ │ │ │ ├── x_hls_complex.h │ │ │ │ │ ├── x_hls_matrix_tb_utils.h │ │ │ │ │ └── x_hls_matrix_utils.h │ │ │ └── utils │ │ │ │ ├── x_hls_defines.h │ │ │ │ ├── x_hls_float_utils.h │ │ │ │ ├── x_hls_traits.h │ │ │ │ └── x_hls_utils.h │ │ ├── hls_dds.h │ │ ├── hls_dsp.h │ │ ├── hls_fft.h │ │ ├── hls_fir.h │ │ ├── hls_fpo.h │ │ ├── hls_half.h │ │ ├── hls_linear_algebra.h │ │ ├── hls_math.h │ │ ├── hls_opencv.h │ │ ├── hls_signal_handler.h │ │ ├── hls_stream.h │ │ ├── hls_video.h │ │ ├── mpfr.h │ │ ├── opencv │ │ │ ├── cv.h │ │ │ ├── cv.hpp │ │ │ ├── cvaux.h │ │ │ ├── cvaux.hpp │ │ │ ├── cvwimage.h │ │ │ ├── cxcore.h │ │ │ ├── cxcore.hpp │ │ │ ├── cxeigen.hpp │ │ │ ├── cxmisc.h │ │ │ ├── highgui.h │ │ │ └── ml.h │ │ ├── opencv2 │ │ │ ├── calib3d │ │ │ │ └── calib3d.hpp │ │ │ ├── contrib │ │ │ │ ├── contrib.hpp │ │ │ │ ├── detection_based_tracker.hpp │ │ │ │ ├── hybridtracker.hpp │ │ │ │ ├── openfabmap.hpp │ │ │ │ └── retina.hpp │ │ │ ├── core │ │ │ │ ├── core.hpp │ │ │ │ ├── core_c.h │ │ │ │ ├── cuda_devptrs.hpp │ │ │ │ ├── devmem2d.hpp │ │ │ │ ├── eigen.hpp │ │ │ │ ├── gpumat.hpp │ │ │ │ ├── internal.hpp │ │ │ │ ├── mat.hpp │ │ │ │ ├── opengl_interop.hpp │ │ │ │ ├── opengl_interop_deprecated.hpp │ │ │ │ ├── operations.hpp │ │ │ │ ├── types_c.h │ │ │ │ ├── version.hpp │ │ │ │ └── wimage.hpp │ │ │ ├── features2d │ │ │ │ └── features2d.hpp │ │ │ ├── flann │ │ │ │ ├── all_indices.h │ │ │ │ ├── allocator.h │ │ │ │ ├── any.h │ │ │ │ ├── autotuned_index.h │ │ │ │ ├── composite_index.h │ │ │ │ ├── config.h │ │ │ │ ├── defines.h │ │ │ │ ├── dist.h │ │ │ │ ├── dummy.h │ │ │ │ ├── dynamic_bitset.h │ │ │ │ ├── flann.hpp │ │ │ │ ├── flann_base.hpp │ │ │ │ ├── general.h │ │ │ │ ├── ground_truth.h │ │ │ │ ├── hdf5.h │ │ │ │ ├── heap.h │ │ │ │ ├── hierarchical_clustering_index.h │ │ │ │ ├── index_testing.h │ │ │ │ ├── kdtree_index.h │ │ │ │ ├── kdtree_single_index.h │ │ │ │ ├── kmeans_index.h │ │ │ │ ├── linear_index.h │ │ │ │ ├── logger.h │ │ │ │ ├── lsh_index.h │ │ │ │ ├── lsh_table.h │ │ │ │ ├── matrix.h │ │ │ │ ├── miniflann.hpp │ │ │ │ ├── nn_index.h │ │ │ │ ├── object_factory.h │ │ │ │ ├── params.h │ │ │ │ ├── random.h │ │ │ │ ├── result_set.h │ │ │ │ ├── sampling.h │ │ │ │ ├── saving.h │ │ │ │ ├── simplex_downhill.h │ │ │ │ └── timer.h │ │ │ ├── gpu │ │ │ │ ├── device │ │ │ │ │ ├── block.hpp │ │ │ │ │ ├── border_interpolate.hpp │ │ │ │ │ ├── color.hpp │ │ │ │ │ ├── common.hpp │ │ │ │ │ ├── datamov_utils.hpp │ │ │ │ │ ├── detail │ │ │ │ │ │ ├── color_detail.hpp │ │ │ │ │ │ ├── reduce.hpp │ │ │ │ │ │ ├── reduce_key_val.hpp │ │ │ │ │ │ ├── transform_detail.hpp │ │ │ │ │ │ ├── type_traits_detail.hpp │ │ │ │ │ │ └── vec_distance_detail.hpp │ │ │ │ │ ├── dynamic_smem.hpp │ │ │ │ │ ├── emulation.hpp │ │ │ │ │ ├── filters.hpp │ │ │ │ │ ├── funcattrib.hpp │ │ │ │ │ ├── functional.hpp │ │ │ │ │ ├── limits.hpp │ │ │ │ │ ├── reduce.hpp │ │ │ │ │ ├── saturate_cast.hpp │ │ │ │ │ ├── scan.hpp │ │ │ │ │ ├── simd_functions.hpp │ │ │ │ │ ├── static_check.hpp │ │ │ │ │ ├── transform.hpp │ │ │ │ │ ├── type_traits.hpp │ │ │ │ │ ├── utility.hpp │ │ │ │ │ ├── vec_distance.hpp │ │ │ │ │ ├── vec_math.hpp │ │ │ │ │ ├── vec_traits.hpp │ │ │ │ │ ├── warp.hpp │ │ │ │ │ ├── warp_reduce.hpp │ │ │ │ │ └── warp_shuffle.hpp │ │ │ │ ├── devmem2d.hpp │ │ │ │ ├── gpu.hpp │ │ │ │ ├── gpumat.hpp │ │ │ │ └── stream_accessor.hpp │ │ │ ├── highgui │ │ │ │ ├── cap_ios.h │ │ │ │ ├── highgui.hpp │ │ │ │ └── highgui_c.h │ │ │ ├── imgproc │ │ │ │ ├── imgproc.hpp │ │ │ │ ├── imgproc_c.h │ │ │ │ └── types_c.h │ │ │ ├── legacy │ │ │ │ ├── blobtrack.hpp │ │ │ │ ├── compat.hpp │ │ │ │ ├── legacy.hpp │ │ │ │ └── streams.hpp │ │ │ ├── ml │ │ │ │ └── ml.hpp │ │ │ ├── objdetect │ │ │ │ └── objdetect.hpp │ │ │ ├── opencv.hpp │ │ │ ├── opencv_modules.hpp │ │ │ ├── photo │ │ │ │ ├── photo.hpp │ │ │ │ └── photo_c.h │ │ │ ├── stitching │ │ │ │ ├── detail │ │ │ │ │ ├── autocalib.hpp │ │ │ │ │ ├── blenders.hpp │ │ │ │ │ ├── camera.hpp │ │ │ │ │ ├── exposure_compensate.hpp │ │ │ │ │ ├── matchers.hpp │ │ │ │ │ ├── motion_estimators.hpp │ │ │ │ │ ├── seam_finders.hpp │ │ │ │ │ ├── util.hpp │ │ │ │ │ ├── util_inl.hpp │ │ │ │ │ ├── warpers.hpp │ │ │ │ │ └── warpers_inl.hpp │ │ │ │ ├── stitcher.hpp │ │ │ │ └── warpers.hpp │ │ │ ├── superres │ │ │ │ ├── optical_flow.hpp │ │ │ │ └── superres.hpp │ │ │ ├── ts │ │ │ │ ├── gpu_perf.hpp │ │ │ │ ├── gpu_test.hpp │ │ │ │ ├── ts.hpp │ │ │ │ ├── ts_gtest.h │ │ │ │ └── ts_perf.hpp │ │ │ ├── video │ │ │ │ ├── background_segm.hpp │ │ │ │ ├── tracking.hpp │ │ │ │ └── video.hpp │ │ │ └── videostab │ │ │ │ ├── deblurring.hpp │ │ │ │ ├── fast_marching.hpp │ │ │ │ ├── fast_marching_inl.hpp │ │ │ │ ├── frame_source.hpp │ │ │ │ ├── global_motion.hpp │ │ │ │ ├── inpainting.hpp │ │ │ │ ├── log.hpp │ │ │ │ ├── motion_stabilizing.hpp │ │ │ │ ├── optical_flow.hpp │ │ │ │ ├── stabilizer.hpp │ │ │ │ └── videostab.hpp │ │ └── script.tcl │ ├── staccel_fifo.h │ ├── staccel_itc.h │ ├── staccel_kernel.h │ └── staccel_type.h ├── llvm │ ├── csim-interconnect │ │ ├── CMakeLists.txt │ │ └── staccel_csim_interconnect.cpp │ ├── s2s-interconnect │ │ ├── CMakeLists.txt │ │ └── staccel_s2s_interconnect.cpp │ └── s2s-kernel │ │ ├── CMakeLists.txt │ │ └── staccel_s2s_kernel.cpp ├── shell │ ├── staccel_csim │ └── staccel_syn ├── src │ └── iopinChecker.cpp └── template │ ├── build │ ├── constraints │ │ ├── cl_pnr_user.xdc │ │ └── cl_synth_user.xdc │ └── scripts │ │ ├── aws_build_dcp_from_cl.sh │ │ ├── create_dcp_from_cl.tcl │ │ └── synth_cl_main.tcl │ ├── design │ ├── cl_common_defines.vh │ ├── cl_id_defines.vh │ ├── cl_main.sv │ ├── cl_main_defines.vh │ └── genip │ ├── software │ ├── runtime │ │ ├── Makefile │ │ ├── const.h │ │ ├── fpga_util.cpp │ │ ├── fpga_util.h │ │ └── test_main.cpp │ └── verif_rtl │ │ ├── Makefile │ │ ├── include │ │ ├── .gitignore │ │ ├── sh_dpi_tasks.h │ │ └── uthash.h │ │ └── src │ │ ├── .gitignore │ │ ├── test_main.c │ │ └── test_null.c │ └── verif │ ├── scripts │ ├── Makefile │ ├── Makefile.vivado │ ├── open_waves.tcl │ ├── top.vivado.f │ └── waves.tcl │ └── tests │ ├── test_main.sv │ └── test_null.sv ├── apps ├── device │ ├── grep │ │ ├── inc │ │ │ ├── constant.h │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ ├── app_grep_input_preprocessor.cpp │ │ │ ├── app_grep_matcher.cpp │ │ │ ├── app_grep_reducer.cpp │ │ │ └── app_grep_writer.cpp │ ├── integration │ │ ├── cosim │ │ │ ├── inc │ │ │ │ ├── constant.h │ │ │ │ └── structure.h │ │ │ ├── interconnects.cpp │ │ │ ├── kernels │ │ │ │ ├── app_input_data_merger.cpp │ │ │ │ ├── app_input_data_mux.cpp │ │ │ │ ├── app_intg_mat_rdc.cpp │ │ │ │ ├── app_intg_matcher.cpp │ │ │ │ ├── app_intg_rdc_16to8.cpp │ │ │ │ ├── app_intg_rdc_32to16.cpp │ │ │ │ ├── app_intg_rdc_4to2.cpp │ │ │ │ ├── app_intg_rdc_8to4.cpp │ │ │ │ ├── app_intg_verifier.cpp │ │ │ │ ├── app_intg_writer.cpp │ │ │ │ ├── app_output_data_demux.cpp │ │ │ │ ├── command_handler.cpp │ │ │ │ ├── dram_data_caching.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_mux.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_read_req_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── peek_handler.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ ├── poke_handler.cpp │ │ │ │ ├── read_mode_dram_helper_app.cpp │ │ │ │ ├── read_mode_pcie_helper_app.cpp │ │ │ │ ├── reset_propaganda.cpp │ │ │ │ ├── write_mode_app_output_data_caching.cpp │ │ │ │ ├── write_mode_dram_helper_app.cpp │ │ │ │ ├── write_mode_pcie_helper_app.cpp │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ │ │ ├── log_all │ │ │ └── project │ │ │ │ ├── design │ │ │ │ ├── app_input_data_merger.v │ │ │ │ ├── app_input_data_mux.v │ │ │ │ ├── app_intg_mat_rdc.v │ │ │ │ ├── app_intg_matcher.v │ │ │ │ ├── app_intg_rdc_16to8.v │ │ │ │ ├── app_intg_rdc_32to16.v │ │ │ │ ├── app_intg_rdc_4to2.v │ │ │ │ ├── app_intg_rdc_8to4.v │ │ │ │ ├── app_intg_verifier.v │ │ │ │ ├── app_intg_writer.v │ │ │ │ ├── app_output_data_demux.v │ │ │ │ ├── cl_common_defines.vh │ │ │ │ ├── cl_id_defines.vh │ │ │ │ ├── cl_main.sv │ │ │ │ ├── cl_main_defines.vh │ │ │ │ ├── command_handler.v │ │ │ │ ├── dram_data_caching.v │ │ │ │ ├── dram_read_delay_unit.v │ │ │ │ ├── dram_read_req_multiplexer.v │ │ │ │ ├── dram_read_req_time_marker.v │ │ │ │ ├── dram_read_resp_multiplexer.v │ │ │ │ ├── dram_read_throttle_unit.v │ │ │ │ ├── dram_write_delay_unit.v │ │ │ │ ├── dram_write_mux.v │ │ │ │ ├── dram_write_req_time_marker.v │ │ │ │ ├── dram_write_throttle_unit.v │ │ │ │ ├── fifo_w16_d128_A.v │ │ │ │ ├── fifo_w16_d1_A.v │ │ │ │ ├── fifo_w16_d4_A.v │ │ │ │ ├── fifo_w1_d128_A.v │ │ │ │ ├── fifo_w1_d1_A.v │ │ │ │ ├── fifo_w1_d4096_A.v │ │ │ │ ├── fifo_w1_d4_A.v │ │ │ │ ├── fifo_w1_d512_A.v │ │ │ │ ├── fifo_w1_d64_A.v │ │ │ │ ├── fifo_w32_d16_A.v │ │ │ │ ├── fifo_w32_d4_A.v │ │ │ │ ├── fifo_w32_d64_A.v │ │ │ │ ├── fifo_w512_d128_A.v │ │ │ │ ├── fifo_w512_d1_A.v │ │ │ │ ├── fifo_w512_d4096_A.v │ │ │ │ ├── fifo_w512_d4_A.v │ │ │ │ ├── fifo_w512_d512_A.v │ │ │ │ ├── fifo_w512_d64_A.v │ │ │ │ ├── fifo_w64_d128_A.v │ │ │ │ ├── fifo_w64_d4_A.v │ │ │ │ ├── fifo_w64_d64_A.v │ │ │ │ ├── fifo_w8_d128_A.v │ │ │ │ ├── fifo_w8_d4_A.v │ │ │ │ ├── fifo_w8_d64_A.v │ │ │ │ ├── genip │ │ │ │ ├── interconnects.v │ │ │ │ ├── pcie_data_splitter_app.v │ │ │ │ ├── pcie_read_req_multiplexer.v │ │ │ │ ├── pcie_read_resp_multiplexer.v │ │ │ │ ├── pcie_read_resp_passer.v │ │ │ │ ├── pcie_read_throttle_unit.v │ │ │ │ ├── pcie_write_multiplexer.v │ │ │ │ ├── pcie_write_throttle_unit.v │ │ │ │ ├── peek_handler.v │ │ │ │ ├── pipe0_data_handler.v │ │ │ │ ├── pipe0_data_handler_kbuf_addr_arr.v │ │ │ │ ├── pipe0_dram_dispatcher.v │ │ │ │ ├── pipe1_data_handler.v │ │ │ │ ├── pipe1_dram_dispatcher.v │ │ │ │ ├── pipe2_data_handler.v │ │ │ │ ├── pipeline_data_passer.v │ │ │ │ ├── poke_handler.v │ │ │ │ ├── read_mode_dram_helper_app.v │ │ │ │ ├── read_mode_dram_helper_app_mux_164_64_1_1.v │ │ │ │ ├── read_mode_pcie_helper_app.v │ │ │ │ ├── read_mode_pcie_helper_app_app_buf_addrs.v │ │ │ │ ├── reset_propaganda.v │ │ │ │ ├── start_for_app_input_data_mux_U0.v │ │ │ │ ├── start_for_app_intg_mat_rdc_U0.v │ │ │ │ ├── start_for_app_intg_matcher_U0.v │ │ │ │ ├── start_for_app_intg_rdc_16to8_U0.v │ │ │ │ ├── start_for_app_intg_rdc_32to16_U0.v │ │ │ │ ├── start_for_app_intg_rdc_4to2_U0.v │ │ │ │ ├── start_for_app_intg_rdc_8to4_U0.v │ │ │ │ ├── start_for_app_intg_verifier_U0.v │ │ │ │ ├── start_for_app_intg_writer_U0.v │ │ │ │ ├── start_for_app_output_data_demux_U0.v │ │ │ │ ├── start_for_dram_data_caching_U0.v │ │ │ │ ├── start_for_dram_read_delay_unit_U0.v │ │ │ │ ├── start_for_dram_read_req_time_marker_U0.v │ │ │ │ ├── start_for_dram_read_throttle_unit_U0.v │ │ │ │ ├── start_for_dram_write_delay_unit_U0.v │ │ │ │ ├── start_for_dram_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_read_req_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_resp_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pipe0_data_handler_U0.v │ │ │ │ ├── start_for_pipe0_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipe1_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipeline_data_passer_U0.v │ │ │ │ ├── start_for_read_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_read_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_reset_propaganda_U0.v │ │ │ │ ├── start_for_write_mode_app_output_data_caching_U0.v │ │ │ │ ├── start_for_write_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pre_merged_app_input_data_forwarder_U0.v │ │ │ │ ├── write_mode_app_output_data_caching.v │ │ │ │ ├── write_mode_dram_helper_app.v │ │ │ │ ├── write_mode_dram_helper_app_mux_164_32_1_1.v │ │ │ │ ├── write_mode_pcie_helper_app.v │ │ │ │ ├── write_mode_pcie_helper_app_mux_83_40_1_1.v │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.v │ │ │ │ ├── software │ │ │ │ ├── runtime │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── const.h │ │ │ │ │ ├── fpga_util.cpp │ │ │ │ │ ├── fpga_util.h │ │ │ │ │ └── test_main.cpp │ │ │ │ └── verif_rtl │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── include │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── insider_cosim.h │ │ │ │ │ ├── sh_dpi_tasks.h │ │ │ │ │ └── uthash.h │ │ │ │ │ └── src │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── test_main.c │ │ │ │ │ └── test_null.c │ │ │ │ └── verif │ │ │ │ ├── scripts │ │ │ │ ├── Makefile │ │ │ │ ├── Makefile.vivado │ │ │ │ ├── open_waves.tcl │ │ │ │ ├── top.vivado.f │ │ │ │ └── waves.tcl │ │ │ │ └── tests │ │ │ │ ├── test_main.sv │ │ │ │ └── test_null.sv │ │ ├── csim │ │ │ ├── csim │ │ │ │ ├── bin │ │ │ │ │ └── .gitignore │ │ │ │ ├── csim_compile.sh │ │ │ │ ├── inc │ │ │ │ │ ├── constant.h │ │ │ │ │ └── structure.h │ │ │ │ └── src │ │ │ │ │ ├── app_intg_mat_rdc.cpp │ │ │ │ │ ├── app_intg_matcher.cpp │ │ │ │ │ ├── app_intg_rdc_16to8.cpp │ │ │ │ │ ├── app_intg_rdc_32to16.cpp │ │ │ │ │ ├── app_intg_rdc_4to2.cpp │ │ │ │ │ ├── app_intg_rdc_8to4.cpp │ │ │ │ │ ├── app_intg_verifier.cpp │ │ │ │ │ ├── app_intg_writer.cpp │ │ │ │ │ ├── command_handler.cpp │ │ │ │ │ ├── dram_helper_app.cpp │ │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ │ ├── interconnects.cpp │ │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ │ ├── pcie_helper_app.cpp │ │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ │ └── reset_propaganda.cpp │ │ │ ├── inc │ │ │ │ ├── constant.h │ │ │ │ └── structure.h │ │ │ ├── interconnects.cpp │ │ │ └── kernels │ │ │ │ ├── app_intg_mat_rdc.cpp │ │ │ │ ├── app_intg_matcher.cpp │ │ │ │ ├── app_intg_rdc_16to8.cpp │ │ │ │ ├── app_intg_rdc_32to16.cpp │ │ │ │ ├── app_intg_rdc_4to2.cpp │ │ │ │ ├── app_intg_rdc_8to4.cpp │ │ │ │ ├── app_intg_verifier.cpp │ │ │ │ ├── app_intg_writer.cpp │ │ │ │ ├── command_handler.cpp │ │ │ │ ├── dram_helper_app.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_helper_app.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ └── reset_propaganda.cpp │ │ ├── inc │ │ │ ├── constant.h │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ ├── app_intg_mat_rdc.cpp │ │ │ ├── app_intg_matcher.cpp │ │ │ ├── app_intg_rdc_16to8.cpp │ │ │ ├── app_intg_rdc_32to16.cpp │ │ │ ├── app_intg_rdc_4to2.cpp │ │ │ ├── app_intg_rdc_8to4.cpp │ │ │ ├── app_intg_verifier.cpp │ │ │ └── app_intg_writer.cpp │ ├── knn │ │ ├── inc │ │ │ ├── constant.h │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ └── app_knn.cpp │ ├── pt │ │ ├── cosim │ │ │ ├── interconnects.cpp │ │ │ ├── kernels │ │ │ │ ├── app_input_data_merger.cpp │ │ │ │ ├── app_input_data_mux.cpp │ │ │ │ ├── app_output_data_demux.cpp │ │ │ │ ├── app_pt.cpp │ │ │ │ ├── command_handler.cpp │ │ │ │ ├── dram_data_caching.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_mux.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_read_req_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── peek_handler.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ ├── poke_handler.cpp │ │ │ │ ├── read_mode_dram_helper_app.cpp │ │ │ │ ├── read_mode_pcie_helper_app.cpp │ │ │ │ ├── reset_propaganda.cpp │ │ │ │ ├── write_mode_app_output_data_caching.cpp │ │ │ │ ├── write_mode_dram_helper_app.cpp │ │ │ │ ├── write_mode_pcie_helper_app.cpp │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ │ │ ├── log_all │ │ │ └── project │ │ │ │ ├── design │ │ │ │ ├── app_input_data_merger.v │ │ │ │ ├── app_input_data_mux.v │ │ │ │ ├── app_output_data_demux.v │ │ │ │ ├── app_pt.v │ │ │ │ ├── cl_common_defines.vh │ │ │ │ ├── cl_id_defines.vh │ │ │ │ ├── cl_main.sv │ │ │ │ ├── cl_main_defines.vh │ │ │ │ ├── command_handler.v │ │ │ │ ├── dram_data_caching.v │ │ │ │ ├── dram_read_delay_unit.v │ │ │ │ ├── dram_read_req_multiplexer.v │ │ │ │ ├── dram_read_req_time_marker.v │ │ │ │ ├── dram_read_resp_multiplexer.v │ │ │ │ ├── dram_read_throttle_unit.v │ │ │ │ ├── dram_write_delay_unit.v │ │ │ │ ├── dram_write_mux.v │ │ │ │ ├── dram_write_req_time_marker.v │ │ │ │ ├── dram_write_throttle_unit.v │ │ │ │ ├── fifo_w16_d128_A.v │ │ │ │ ├── fifo_w16_d1_A.v │ │ │ │ ├── fifo_w16_d4_A.v │ │ │ │ ├── fifo_w1_d128_A.v │ │ │ │ ├── fifo_w1_d1_A.v │ │ │ │ ├── fifo_w1_d4096_A.v │ │ │ │ ├── fifo_w1_d4_A.v │ │ │ │ ├── fifo_w1_d512_A.v │ │ │ │ ├── fifo_w1_d64_A.v │ │ │ │ ├── fifo_w32_d16_A.v │ │ │ │ ├── fifo_w32_d4_A.v │ │ │ │ ├── fifo_w32_d64_A.v │ │ │ │ ├── fifo_w512_d128_A.v │ │ │ │ ├── fifo_w512_d1_A.v │ │ │ │ ├── fifo_w512_d4096_A.v │ │ │ │ ├── fifo_w512_d4_A.v │ │ │ │ ├── fifo_w512_d512_A.v │ │ │ │ ├── fifo_w512_d64_A.v │ │ │ │ ├── fifo_w64_d128_A.v │ │ │ │ ├── fifo_w64_d4_A.v │ │ │ │ ├── fifo_w64_d64_A.v │ │ │ │ ├── fifo_w8_d128_A.v │ │ │ │ ├── fifo_w8_d4_A.v │ │ │ │ ├── fifo_w8_d64_A.v │ │ │ │ ├── genip │ │ │ │ ├── interconnects.v │ │ │ │ ├── pcie_data_splitter_app.v │ │ │ │ ├── pcie_read_req_multiplexer.v │ │ │ │ ├── pcie_read_resp_multiplexer.v │ │ │ │ ├── pcie_read_resp_passer.v │ │ │ │ ├── pcie_read_throttle_unit.v │ │ │ │ ├── pcie_write_multiplexer.v │ │ │ │ ├── pcie_write_throttle_unit.v │ │ │ │ ├── peek_handler.v │ │ │ │ ├── pipe0_data_handler.v │ │ │ │ ├── pipe0_data_handler_kbuf_addr_arr.v │ │ │ │ ├── pipe0_dram_dispatcher.v │ │ │ │ ├── pipe1_data_handler.v │ │ │ │ ├── pipe1_dram_dispatcher.v │ │ │ │ ├── pipe2_data_handler.v │ │ │ │ ├── pipeline_data_passer.v │ │ │ │ ├── poke_handler.v │ │ │ │ ├── read_mode_dram_helper_app.v │ │ │ │ ├── read_mode_dram_helper_app_mux_164_64_1_1.v │ │ │ │ ├── read_mode_pcie_helper_app.v │ │ │ │ ├── read_mode_pcie_helper_app_app_buf_addrs.v │ │ │ │ ├── reset_propaganda.v │ │ │ │ ├── start_for_app_input_data_mux_U0.v │ │ │ │ ├── start_for_app_output_data_demux_U0.v │ │ │ │ ├── start_for_app_pt_U0.v │ │ │ │ ├── start_for_dram_data_caching_U0.v │ │ │ │ ├── start_for_dram_read_delay_unit_U0.v │ │ │ │ ├── start_for_dram_read_req_time_marker_U0.v │ │ │ │ ├── start_for_dram_read_throttle_unit_U0.v │ │ │ │ ├── start_for_dram_write_delay_unit_U0.v │ │ │ │ ├── start_for_dram_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_read_req_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_resp_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pipe0_data_handler_U0.v │ │ │ │ ├── start_for_pipe0_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipe1_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipeline_data_passer_U0.v │ │ │ │ ├── start_for_read_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_read_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_reset_propaganda_U0.v │ │ │ │ ├── start_for_write_mode_app_output_data_caching_U0.v │ │ │ │ ├── start_for_write_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pre_merged_app_input_data_forwarder_U0.v │ │ │ │ ├── write_mode_app_output_data_caching.v │ │ │ │ ├── write_mode_dram_helper_app.v │ │ │ │ ├── write_mode_dram_helper_app_mux_164_32_1_1.v │ │ │ │ ├── write_mode_pcie_helper_app.v │ │ │ │ ├── write_mode_pcie_helper_app_mux_83_40_1_1.v │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.v │ │ │ │ ├── software │ │ │ │ ├── runtime │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── const.h │ │ │ │ │ ├── fpga_util.cpp │ │ │ │ │ ├── fpga_util.h │ │ │ │ │ └── test_main.cpp │ │ │ │ └── verif_rtl │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── include │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── insider_cosim.h │ │ │ │ │ ├── sh_dpi_tasks.h │ │ │ │ │ └── uthash.h │ │ │ │ │ └── src │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── test_main.c │ │ │ │ │ └── test_null.c │ │ │ │ └── verif │ │ │ │ ├── scripts │ │ │ │ ├── Makefile │ │ │ │ ├── Makefile.vivado │ │ │ │ ├── open_waves.tcl │ │ │ │ ├── top.vivado.f │ │ │ │ └── waves.tcl │ │ │ │ └── tests │ │ │ │ ├── test_main.sv │ │ │ │ └── test_null.sv │ │ ├── csim │ │ │ ├── bin │ │ │ │ └── .gitignore │ │ │ ├── csim_compile.sh │ │ │ └── src │ │ │ │ ├── app_input_data_merger.cpp │ │ │ │ ├── app_input_data_mux.cpp │ │ │ │ ├── app_output_data_demux.cpp │ │ │ │ ├── app_pt.cpp │ │ │ │ ├── dram_data_caching.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_mux.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── interconnects.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_read_req_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── peek_handler.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ ├── poke_handler.cpp │ │ │ │ ├── read_mode_dram_helper_app.cpp │ │ │ │ ├── read_mode_pcie_helper_app.cpp │ │ │ │ ├── reset_propaganda.cpp │ │ │ │ ├── write_mode_app_output_data_caching.cpp │ │ │ │ ├── write_mode_dram_helper_app.cpp │ │ │ │ ├── write_mode_pcie_helper_app.cpp │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ └── app_pt.cpp │ ├── relief │ │ ├── inc │ │ │ ├── constant.h │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ ├── app_rlf_diff.cpp │ │ │ ├── app_rlf_dist.cpp │ │ │ ├── app_rlf_dspt.cpp │ │ │ ├── app_rlf_flt16.cpp │ │ │ ├── app_rlf_flt2.cpp │ │ │ ├── app_rlf_flt4.cpp │ │ │ ├── app_rlf_flt8.cpp │ │ │ ├── app_rlf_max_vec.cpp │ │ │ ├── app_rlf_rdc_16to8.cpp │ │ │ ├── app_rlf_rdc_2to1.cpp │ │ │ ├── app_rlf_rdc_4to2.cpp │ │ │ ├── app_rlf_rdc_8to4.cpp │ │ │ ├── app_rlf_stg.cpp │ │ │ ├── app_rlf_upd.cpp │ │ │ └── app_rlf_wr.cpp │ ├── rle │ │ ├── cosim │ │ │ ├── inc │ │ │ │ └── structure.h │ │ │ ├── interconnects.cpp │ │ │ ├── kernels │ │ │ │ ├── app_input_data_merger.cpp │ │ │ │ ├── app_input_data_mux.cpp │ │ │ │ ├── app_output_data_demux.cpp │ │ │ │ ├── app_rle_combine.cpp │ │ │ │ ├── app_rle_expand.cpp │ │ │ │ ├── app_rle_prefix_sum.cpp │ │ │ │ ├── command_handler.cpp │ │ │ │ ├── dram_data_caching.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_mux.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_read_req_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── peek_handler.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ ├── poke_handler.cpp │ │ │ │ ├── read_mode_dram_helper_app.cpp │ │ │ │ ├── read_mode_pcie_helper_app.cpp │ │ │ │ ├── reset_propaganda.cpp │ │ │ │ ├── write_mode_app_output_data_caching.cpp │ │ │ │ ├── write_mode_dram_helper_app.cpp │ │ │ │ ├── write_mode_pcie_helper_app.cpp │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ │ │ ├── log_all │ │ │ └── project │ │ │ │ ├── design │ │ │ │ ├── app_input_data_merger.v │ │ │ │ ├── app_input_data_mux.v │ │ │ │ ├── app_output_data_demux.v │ │ │ │ ├── app_rle_combine.v │ │ │ │ ├── app_rle_combine_mux_646_8_1_1.v │ │ │ │ ├── app_rle_expand.v │ │ │ │ ├── app_rle_expand_mux_325_8_1_1.v │ │ │ │ ├── app_rle_prefix_sum.v │ │ │ │ ├── cl_common_defines.vh │ │ │ │ ├── cl_id_defines.vh │ │ │ │ ├── cl_main.sv │ │ │ │ ├── cl_main_defines.vh │ │ │ │ ├── command_handler.v │ │ │ │ ├── dram_data_caching.v │ │ │ │ ├── dram_read_delay_unit.v │ │ │ │ ├── dram_read_req_multiplexer.v │ │ │ │ ├── dram_read_req_time_marker.v │ │ │ │ ├── dram_read_resp_multiplexer.v │ │ │ │ ├── dram_read_throttle_unit.v │ │ │ │ ├── dram_write_delay_unit.v │ │ │ │ ├── dram_write_mux.v │ │ │ │ ├── dram_write_req_time_marker.v │ │ │ │ ├── dram_write_throttle_unit.v │ │ │ │ ├── fifo_w16_d128_A.v │ │ │ │ ├── fifo_w16_d1_A.v │ │ │ │ ├── fifo_w16_d4_A.v │ │ │ │ ├── fifo_w16_d8_A.v │ │ │ │ ├── fifo_w1_d128_A.v │ │ │ │ ├── fifo_w1_d1_A.v │ │ │ │ ├── fifo_w1_d4096_A.v │ │ │ │ ├── fifo_w1_d4_A.v │ │ │ │ ├── fifo_w1_d512_A.v │ │ │ │ ├── fifo_w1_d64_A.v │ │ │ │ ├── fifo_w1_d8_A.v │ │ │ │ ├── fifo_w32_d16_A.v │ │ │ │ ├── fifo_w32_d4_A.v │ │ │ │ ├── fifo_w32_d64_A.v │ │ │ │ ├── fifo_w512_d128_A.v │ │ │ │ ├── fifo_w512_d1_A.v │ │ │ │ ├── fifo_w512_d4096_A.v │ │ │ │ ├── fifo_w512_d4_A.v │ │ │ │ ├── fifo_w512_d512_A.v │ │ │ │ ├── fifo_w512_d64_A.v │ │ │ │ ├── fifo_w512_d8_A.v │ │ │ │ ├── fifo_w64_d128_A.v │ │ │ │ ├── fifo_w64_d4_A.v │ │ │ │ ├── fifo_w64_d64_A.v │ │ │ │ ├── fifo_w8_d128_A.v │ │ │ │ ├── fifo_w8_d4_A.v │ │ │ │ ├── fifo_w8_d64_A.v │ │ │ │ ├── fifo_w8_d8_A.v │ │ │ │ ├── genip │ │ │ │ ├── interconnects.v │ │ │ │ ├── pcie_data_splitter_app.v │ │ │ │ ├── pcie_read_req_multiplexer.v │ │ │ │ ├── pcie_read_resp_multiplexer.v │ │ │ │ ├── pcie_read_resp_passer.v │ │ │ │ ├── pcie_read_throttle_unit.v │ │ │ │ ├── pcie_write_multiplexer.v │ │ │ │ ├── pcie_write_throttle_unit.v │ │ │ │ ├── peek_handler.v │ │ │ │ ├── pipe0_data_handler.v │ │ │ │ ├── pipe0_data_handler_kbuf_addr_arr.v │ │ │ │ ├── pipe0_dram_dispatcher.v │ │ │ │ ├── pipe1_data_handler.v │ │ │ │ ├── pipe1_dram_dispatcher.v │ │ │ │ ├── pipe2_data_handler.v │ │ │ │ ├── pipeline_data_passer.v │ │ │ │ ├── poke_handler.v │ │ │ │ ├── read_mode_dram_helper_app.v │ │ │ │ ├── read_mode_dram_helper_app_mux_164_64_1_1.v │ │ │ │ ├── read_mode_pcie_helper_app.v │ │ │ │ ├── read_mode_pcie_helper_app_app_buf_addrs.v │ │ │ │ ├── reg_ap_int_base_513_false_false_s.v │ │ │ │ ├── reg_unsigned_short_s.v │ │ │ │ ├── reset_propaganda.v │ │ │ │ ├── start_for_app_input_data_mux_U0.v │ │ │ │ ├── start_for_app_output_data_demux_U0.v │ │ │ │ ├── start_for_app_rle_combine_U0.v │ │ │ │ ├── start_for_app_rle_expand_U0.v │ │ │ │ ├── start_for_app_rle_prefix_sum_U0.v │ │ │ │ ├── start_for_dram_data_caching_U0.v │ │ │ │ ├── start_for_dram_read_delay_unit_U0.v │ │ │ │ ├── start_for_dram_read_req_time_marker_U0.v │ │ │ │ ├── start_for_dram_read_throttle_unit_U0.v │ │ │ │ ├── start_for_dram_write_delay_unit_U0.v │ │ │ │ ├── start_for_dram_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_read_req_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_resp_multiplexer_U0.v │ │ │ │ ├── start_for_pcie_read_throttle_unit_U0.v │ │ │ │ ├── start_for_pcie_write_throttle_unit_U0.v │ │ │ │ ├── start_for_pipe0_data_handler_U0.v │ │ │ │ ├── start_for_pipe0_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipe1_dram_dispatcher_U0.v │ │ │ │ ├── start_for_pipeline_data_passer_U0.v │ │ │ │ ├── start_for_read_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_read_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_reset_propaganda_U0.v │ │ │ │ ├── start_for_write_mode_app_output_data_caching_U0.v │ │ │ │ ├── start_for_write_mode_dram_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pcie_helper_app_U0.v │ │ │ │ ├── start_for_write_mode_pre_merged_app_input_data_forwarder_U0.v │ │ │ │ ├── write_mode_app_output_data_caching.v │ │ │ │ ├── write_mode_dram_helper_app.v │ │ │ │ ├── write_mode_dram_helper_app_mux_164_32_1_1.v │ │ │ │ ├── write_mode_pcie_helper_app.v │ │ │ │ ├── write_mode_pcie_helper_app_mux_83_40_1_1.v │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.v │ │ │ │ ├── software │ │ │ │ ├── runtime │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── const.h │ │ │ │ │ ├── fpga_util.cpp │ │ │ │ │ ├── fpga_util.h │ │ │ │ │ └── test_main.cpp │ │ │ │ └── verif_rtl │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── include │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── insider_cosim.h │ │ │ │ │ ├── sh_dpi_tasks.h │ │ │ │ │ └── uthash.h │ │ │ │ │ └── src │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── test_main.c │ │ │ │ │ └── test_null.c │ │ │ │ └── verif │ │ │ │ ├── scripts │ │ │ │ ├── Makefile │ │ │ │ ├── Makefile.vivado │ │ │ │ ├── cosim.log │ │ │ │ ├── open_waves.tcl │ │ │ │ ├── top.vivado.f │ │ │ │ └── waves.tcl │ │ │ │ └── tests │ │ │ │ ├── test_main.sv │ │ │ │ └── test_null.sv │ │ ├── csim │ │ │ ├── csim_compile.sh │ │ │ ├── inc │ │ │ │ └── structure.h │ │ │ └── src │ │ │ │ ├── app_input_data_merger.cpp │ │ │ │ ├── app_input_data_mux.cpp │ │ │ │ ├── app_output_data_demux.cpp │ │ │ │ ├── app_rle_combine.cpp │ │ │ │ ├── app_rle_expand.cpp │ │ │ │ ├── app_rle_prefix_sum.cpp │ │ │ │ ├── dram_data_caching.cpp │ │ │ │ ├── dram_read_delay_unit.cpp │ │ │ │ ├── dram_read_req_multiplexer.cpp │ │ │ │ ├── dram_read_req_time_marker.cpp │ │ │ │ ├── dram_read_resp_multiplexer.cpp │ │ │ │ ├── dram_read_throttle_unit.cpp │ │ │ │ ├── dram_write_delay_unit.cpp │ │ │ │ ├── dram_write_mux.cpp │ │ │ │ ├── dram_write_req_time_marker.cpp │ │ │ │ ├── dram_write_throttle_unit.cpp │ │ │ │ ├── interconnects.cpp │ │ │ │ ├── pcie_data_splitter_app.cpp │ │ │ │ ├── pcie_read_req_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_multiplexer.cpp │ │ │ │ ├── pcie_read_resp_passer.cpp │ │ │ │ ├── pcie_read_throttle_unit.cpp │ │ │ │ ├── pcie_write_multiplexer.cpp │ │ │ │ ├── pcie_write_throttle_unit.cpp │ │ │ │ ├── peek_handler.cpp │ │ │ │ ├── pipe0_data_handler.cpp │ │ │ │ ├── pipe0_dram_dispatcher.cpp │ │ │ │ ├── pipe1_data_handler.cpp │ │ │ │ ├── pipe1_dram_dispatcher.cpp │ │ │ │ ├── pipe2_data_handler.cpp │ │ │ │ ├── pipeline_data_passer.cpp │ │ │ │ ├── poke_handler.cpp │ │ │ │ ├── read_mode_dram_helper_app.cpp │ │ │ │ ├── read_mode_pcie_helper_app.cpp │ │ │ │ ├── reset_propaganda.cpp │ │ │ │ ├── write_mode_app_output_data_caching.cpp │ │ │ │ ├── write_mode_dram_helper_app.cpp │ │ │ │ ├── write_mode_pcie_helper_app.cpp │ │ │ │ └── write_mode_pre_merged_app_input_data_forwarder.cpp │ │ ├── inc │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ ├── app_rle_combine.cpp │ │ │ ├── app_rle_expand.cpp │ │ │ └── app_rle_prefix_sum.cpp │ ├── sql │ │ ├── inc │ │ │ ├── constant.h │ │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ │ ├── app_sql_filter0.cpp │ │ │ ├── app_sql_filter1.cpp │ │ │ ├── app_sql_input_parser.cpp │ │ │ └── app_sql_writer.cpp │ └── statistics │ │ ├── inc │ │ └── structure.h │ │ ├── interconnects.cpp │ │ └── kernels │ │ ├── app_stat_calculator.cpp │ │ ├── app_stat_rdc_16to8.cpp │ │ ├── app_stat_rdc_4to2.cpp │ │ ├── app_stat_rdc_8to4.cpp │ │ └── app_stat_writer.cpp └── host │ ├── grep │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ └── src │ │ ├── offload │ │ └── grep.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── grep.cpp │ ├── integration │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ └── src │ │ ├── offload │ │ └── integration.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── integration.cpp │ ├── knn │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ └── src │ │ ├── offload │ │ └── knn.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── knn.cpp │ ├── pt │ ├── data_gen │ │ └── run.sh │ └── src │ │ └── offload │ │ ├── read_pt │ │ ├── read_pt.cpp │ │ └── run.sh │ │ └── write_pt │ │ ├── run.sh │ │ └── write_pt.cpp │ ├── relief │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ └── src │ │ ├── offload │ │ └── relief.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── relief.cpp │ ├── rle │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ ├── run.sh │ ├── run_sg.sh │ └── src │ │ ├── offload │ │ ├── rle.cpp │ │ └── rle_sg.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── rle.cpp │ ├── sql │ ├── data_gen │ │ ├── compile.sh │ │ └── data_generator.cpp │ ├── inc │ │ └── const.h │ └── src │ │ ├── offload │ │ ├── compile.sh │ │ └── sql.cpp │ │ └── pure_cpu │ │ ├── compile.sh │ │ └── sql.cpp │ └── statistics │ ├── data_gen │ ├── compile.sh │ └── data_generator.cpp │ ├── inc │ └── const.h │ └── src │ ├── offload │ └── statistics.cpp │ └── pure_cpu │ ├── compile.sh │ └── statistical.cpp ├── driver ├── dma │ ├── Makefile │ └── fpga_dma.c ├── insider_runtime │ ├── Makefile │ └── insider_runtime.c └── nvme │ ├── Makefile │ ├── const.h │ └── disk_block.c ├── fio ├── bw_read.fio ├── bw_write.fio ├── lat_read.fio └── lat_write.fio ├── install.sh ├── lic └── XilinxAWS.lic ├── load_image.sh └── patch ├── aws ├── cosim │ └── sh_bfm.sv ├── runtime │ ├── fpga_pci.c │ └── fpga_pci.h └── synthesis │ └── strategy_TIMING.tcl └── boost └── queue.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | staccel/ 2 | sim/ 3 | build/ 4 | !STAccel/template/build 5 | *.bak 6 | *.old 7 | *~ 8 | *.swp -------------------------------------------------------------------------------- /Insider/cosim/kernels/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /Insider/cosim/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned char available_device_dram_resp_buf_flits = 2 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Insider/cosim/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &release_device_dram_resp_buf_flits, 10 | ST_Queue &after_throttle_unified_dram_read_resp, 11 | ST_Queue &dram_read_context_read) { 12 | bool valid_state = 0; 13 | bool data_state; 14 | bool valid_read_resp = 0; 15 | Dram_Read_Resp data_read_resp; 16 | 17 | while (1) { 18 | #pragma HLS pipeline 19 | if (!valid_state) { 20 | valid_state = dram_read_context_read.read_nb(data_state); 21 | } 22 | if (!valid_read_resp) { 23 | valid_read_resp = 24 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 25 | } 26 | 27 | if (valid_state && valid_read_resp) { 28 | valid_read_resp = false; 29 | if (data_state == HOST_READ_REQ) { 30 | host_dram_read_resp.write(data_read_resp); 31 | } else { 32 | device_dram_read_resp.write(data_read_resp); 33 | release_device_dram_resp_buf_flits.write(0); 34 | } 35 | if (data_read_resp.last) { 36 | valid_state = false; 37 | } 38 | } 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /Insider/cosim/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /Insider/cosim/kernels/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda(ST_Queue &reset_app, ST_Queue &reset_sigs, 7 | ST_Queue &reset_read_mode_dram_helper_app, 8 | ST_Queue &reset_write_mode_dram_helper_app, 9 | ST_Queue &reset_read_mode_pcie_helper_app, 10 | ST_Queue &reset_write_mode_pcie_helper_app, 11 | ST_Queue &reset_pcie_data_splitter_app, 12 | ST_Queue &reset_app_output_data_demux, 13 | ST_Queue &reset_app_input_data_mux, 14 | ST_Queue &reset_write_mode_app_output_data_caching, 15 | ST_Queue &reset_app_input_data_merger) { 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset_sigs.read_nb(dummy)) { 20 | reset_read_mode_dram_helper_app.write(0); 21 | reset_write_mode_dram_helper_app.write(0); 22 | reset_read_mode_pcie_helper_app.write(0); 23 | reset_write_mode_pcie_helper_app.write(0); 24 | reset_pcie_data_splitter_app.write(0); 25 | reset_app.write(0); 26 | reset_app_output_data_demux.write(0); 27 | reset_app_input_data_mux.write(0); 28 | reset_write_mode_app_output_data_caching.write(0); 29 | reset_app_input_data_merger.write(0); 30 | } 31 | } 32 | } 33 | #endif 34 | -------------------------------------------------------------------------------- /Insider/cosim/template/itc_template_header.txt: -------------------------------------------------------------------------------- 1 | #include "insider_itc.h" 2 | 3 | #include "poke_handler.cpp" 4 | #include "peek_handler.cpp" 5 | #include "pipe0_dram_dispatcher.cpp" 6 | #include "pipe1_dram_dispatcher.cpp" 7 | #include "read_mode_dram_helper_app.cpp" 8 | #include "dram_read_req_multiplexer.cpp" 9 | #include "dram_read_resp_multiplexer.cpp" 10 | #include "dram_read_throttle_unit.cpp" 11 | #include "dram_read_delay_unit.cpp" 12 | #include "dram_read_req_time_marker.cpp" 13 | #include "dram_write_req_time_marker.cpp" 14 | #include "dram_write_delay_unit.cpp" 15 | #include "dram_write_throttle_unit.cpp" 16 | #include "pcie_read_resp_passer.cpp" 17 | #include "pcie_read_throttle_unit.cpp" 18 | #include "pcie_data_splitter_app.cpp" 19 | #include "read_mode_pcie_helper_app.cpp" 20 | #include "pcie_write_multiplexer.cpp" 21 | #include "pcie_write_throttle_unit.cpp" 22 | #include "pipe0_data_handler.cpp" 23 | #include "pipe1_data_handler.cpp" 24 | #include "pipe2_data_handler.cpp" 25 | #include "pipeline_data_passer.cpp" 26 | #include "reset_propaganda.cpp" 27 | #include "pcie_read_req_multiplexer.cpp" 28 | #include "pcie_read_resp_multiplexer.cpp" 29 | #include "app_input_data_merger.cpp" 30 | #include "dram_write_mux.cpp" 31 | #include "app_input_data_mux.cpp" 32 | #include "app_output_data_demux.cpp" 33 | #include "write_mode_pcie_helper_app.cpp" 34 | #include "write_mode_pre_merged_app_input_data_forwarder.cpp" 35 | #include "write_mode_app_output_data_caching.cpp" 36 | #include "write_mode_dram_helper_app.cpp" 37 | #include "dram_data_caching.cpp" 38 | -------------------------------------------------------------------------------- /Insider/cosim/template/reset_template.txt: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda( 7 | ST_Queue &reset_sigs, ST_Queue &reset_read_mode_dram_helper_app, 8 | ST_Queue &reset_write_mode_dram_helper_app, 9 | ST_Queue &reset_read_mode_pcie_helper_app, 10 | ST_Queue &reset_write_mode_pcie_helper_app, 11 | ST_Queue &reset_pcie_data_splitter_app, 12 | ST_Queue &reset_app_output_data_demux, 13 | ST_Queue &reset_app_input_data_mux, 14 | ST_Queue &reset_write_mode_app_output_data_caching, 15 | ST_Queue &reset_app_input_data_merger, 16 | ST_Queue &reset_write_mode_pre_merged_app_input_data_forwarder) { 17 | while (1) { 18 | #pragma HLS pipeline 19 | bool dummy; 20 | if (reset_sigs.read_nb(dummy)) { 21 | reset_read_mode_dram_helper_app.write(0); 22 | reset_write_mode_dram_helper_app.write(0); 23 | reset_read_mode_pcie_helper_app.write(0); 24 | reset_write_mode_pcie_helper_app.write(0); 25 | reset_pcie_data_splitter_app.write(0); 26 | reset_app_output_data_demux.write(0); 27 | reset_app_input_data_mux.write(0); 28 | reset_write_mode_app_output_data_caching.write(0); 29 | reset_app_input_data_merger.write(0); 30 | reset_write_mode_pre_merged_app_input_data_forwarder.write(0); 31 | -------------------------------------------------------------------------------- /Insider/cosim/verif/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 18 | 19 | C_SRCS := test_main.c pcie_utils.c cl_utils.c main.c 20 | C_OBJS := $(C_SRCS:.c=.o) 21 | 22 | CC = gcc 23 | CFLAGS = -I ./include 24 | CFLAGS += -I $(HDK_DIR)/common/software/include 25 | all: test_main 26 | 27 | test_main: $(C_SRCS) 28 | $(CC) $(CFLAGS) -o $@ $^ 29 | 30 | clean: 31 | rm test_main 32 | -------------------------------------------------------------------------------- /Insider/cosim/verif/include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/Insider/cosim/verif/include/.gitignore -------------------------------------------------------------------------------- /Insider/cosim/verif/src/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/Insider/cosim/verif/src/.gitignore -------------------------------------------------------------------------------- /Insider/cosim/verif/src/test_main.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // Vivado does not support svGetScopeFromName 22 | //#ifdef INCLUDE_DPI_CALLS 23 | #ifndef VIVADO_SIM 24 | #include "svdpi.h" 25 | #endif 26 | //#endif 27 | 28 | #include "sh_dpi_tasks.h" 29 | #include "insider_cosim.h" 30 | 31 | void test_main(uint32_t *exit_code) { 32 | 33 | // Vivado does not support svGetScopeFromName 34 | //#ifdef INCLUDE_DPI_CALLS 35 | #ifndef VIVADO_SIM 36 | svScope scope; 37 | #endif 38 | //#endif 39 | 40 | uint32_t rdata; 41 | 42 | // Vivado does not support svGetScopeFromName 43 | //#ifdef INCLUDE_DPI_CALLS 44 | #ifndef VIVADO_SIM 45 | scope = svGetScopeFromName("tb"); 46 | svSetScope(scope); 47 | #endif 48 | 49 | simulator(); 50 | 51 | *exit_code = 0; 52 | } 53 | 54 | void user_simulation_function() { 55 | // PUT YOUR CODE HERE 56 | } 57 | -------------------------------------------------------------------------------- /Insider/cosim/verif/src/test_null.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | // Vivado does not support svGetScopeFromName 21 | #ifdef INCLUDE_DPI_CALLS 22 | #ifndef VIVADO_SIM 23 | #include "svdpi.h" 24 | #endif 25 | #endif 26 | 27 | #include "sh_dpi_tasks.h" 28 | 29 | void test_main(uint32_t *exit_code) { 30 | 31 | // NULL Test 32 | 33 | *exit_code = 0; 34 | } 35 | -------------------------------------------------------------------------------- /Insider/inc/insider_common.h: -------------------------------------------------------------------------------- 1 | #ifndef INSIDER_COMMON_H_ 2 | #define INSIDER_COMMON_H_ 3 | 4 | #include 5 | #include 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /Insider/inc/insider_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef INSIDER_KERNEL_H_ 2 | #define INSIDER_KERNEL_H_ 3 | 4 | #define COSIM_DRAMA_ADDR_OFFSET (4LL << 32) 5 | #define COSIM_DRAMB_ADDR_OFFSET (8LL << 32) 6 | #define COSIM_DRAMC_ADDR_OFFSET (12LL << 32) 7 | #define COSIM_DRAMD_ADDR_OFFSET (16LL << 32) 8 | 9 | #include 10 | #include 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /Insider/inc/insider_runtime.h: -------------------------------------------------------------------------------- 1 | #ifndef INSIDER_RUNTIME_H 2 | #define INSIDER_RUNTIME_H 3 | 4 | #include 5 | #include 6 | 7 | void send_input_params(unsigned int data); 8 | void send_input_params_array(unsigned int *data_arr, size_t arr_len); 9 | int vopen(const char *pathname, int flags); 10 | ssize_t vread(int fd, void *buf, size_t count); 11 | int vwrite(int fd, void *buf, size_t count); 12 | int vsync(int fd); 13 | int vclose(int fd); 14 | int vclose_with_rsize(int fd, size_t *rfile_written_bytes); 15 | const char *reg_virt_file(const char *real_path); 16 | const char *reg_virt_file_sg(size_t sg_list_len, const char **file_names, 17 | size_t *offs, size_t *lens); 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /Insider/inc/insider_runtime.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INSIDER_RUNTIME_HPP 2 | #define INSIDER_RUNTIME_HPP 3 | 4 | #include 5 | #include 6 | 7 | extern "C" { 8 | void send_input_params(unsigned int data); 9 | void send_input_params_array(unsigned int *data_arr, size_t arr_len); 10 | int vopen(const char *pathname, int flags); 11 | ssize_t vread(int fd, void *buf, size_t count); 12 | int vwrite(int fd, void *buf, size_t count); 13 | int vsync(int fd); 14 | int vclose(int fd); 15 | int vclose_with_rsize(int fd, size_t *rfile_written_bytes); 16 | const char *reg_virt_file(const char *real_path); 17 | const char *reg_virt_file_sg(size_t sg_list_len, const char **file_names, 18 | size_t *offs, size_t *lens); 19 | } 20 | 21 | const char *reg_virt_file(size_t sg_list_len, const char **file_names, 22 | size_t *offs, size_t *lens) { 23 | return reg_virt_file_sg(sg_list_len, file_names, offs, lens); 24 | } 25 | 26 | int vclose(int fd, size_t *rfile_written_bytes) { 27 | return vclose_with_rsize(fd, rfile_written_bytes); 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /Insider/inc/insider_types.h: -------------------------------------------------------------------------------- 1 | #ifndef INSIDER_TYPES_H_ 2 | #define INSIDER_TYPES_H_ 3 | 4 | #include 5 | 6 | struct Request { 7 | unsigned int sector_off; 8 | unsigned int sector_num; 9 | unsigned int tag; 10 | bool rw; // 0 read, 1 write 11 | }; 12 | 13 | struct Data { 14 | bool last; 15 | ap_uint<512> data; 16 | }; 17 | 18 | struct Dram_Read_Req_With_Time { 19 | Dram_Read_Req req; 20 | unsigned long long time; 21 | }; 22 | 23 | struct Dram_Write_Req_Apply_With_Time { 24 | Dram_Write_Req_Apply req_apply; 25 | unsigned long long time; 26 | }; 27 | 28 | struct Dram_Dispatcher_Write_Req { 29 | unsigned char bank_id; 30 | unsigned char end_bank_id; 31 | unsigned int before_boundry_num; 32 | unsigned int cmd_num; 33 | }; 34 | 35 | struct Dram_Dispatcher_Read_Req { 36 | unsigned char bank_id; 37 | unsigned char end_bank_id; 38 | unsigned int cmd_num; 39 | }; 40 | 41 | struct APP_Data { 42 | ap_uint<512> data; 43 | unsigned short len; 44 | bool eop; 45 | }; 46 | 47 | struct APP_Data_Meta { 48 | unsigned int num; 49 | bool eop; 50 | }; 51 | 52 | struct Write_Mode_PCIe_Read_Req_Context { 53 | unsigned short len; 54 | bool last; 55 | unsigned long long metadata_addr; 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /Insider/lib/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gcc -DPAR_MEMCPY_WORKERS=3 \ 4 | -fopenmp insider_runtime.c -Wall -O3 -shared -fPIC -shared \ 5 | -I/usr/include/insider \ 6 | -o libinsider_runtime.so 7 | -------------------------------------------------------------------------------- /Insider/llvm/insider-app/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(insider_app 8 | insider_app.cpp 9 | ) 10 | 11 | target_link_libraries(insider_app 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS insider_app 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /Insider/llvm/insider-cosim-intc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(insider_cosim_intc 8 | insider_cosim_intc.cpp 9 | ) 10 | 11 | target_link_libraries(insider_cosim_intc 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS insider_cosim_intc 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /Insider/llvm/insider-interconnect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(insider_interconnect 8 | insider_interconnect.cpp 9 | ) 10 | 11 | target_link_libraries(insider_interconnect 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS insider_interconnect 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /Insider/shell/insider_host_g++: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | g++ -fopenmp -lpthread -linsider_runtime -std=c++11 -I/usr/include/insider $@ 4 | -------------------------------------------------------------------------------- /Insider/shell/insider_host_gcc: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gcc -fopenmp -lpthread -linsider_runtime -I/usr/include/insider $@ 4 | -------------------------------------------------------------------------------- /Insider/src/insider_reset_syn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | std::vector kernelNameVec; 8 | 9 | std::string getKernelName(std::string sourceFileName) { 10 | return sourceFileName.substr(0, sourceFileName.find(".")); 11 | } 12 | 13 | int main(int argc, char **argv) { 14 | for (int i = 1; i < argc; i++) { 15 | kernelNameVec.push_back(getKernelName(std::string(argv[i]))); 16 | } 17 | 18 | std::ifstream ifs_template( 19 | "/usr/insider/synthesis/template/reset_template.txt"); 20 | std::string s; 21 | while (std::getline(ifs_template, s)) { 22 | cout << s << endl; 23 | if (s.find("void") != string::npos) { 24 | for (auto kernelName : kernelNameVec) { 25 | cout << "ST_Queue &reset_" << kernelName << ",\n"; 26 | } 27 | } 28 | } 29 | for (auto kernelName : kernelNameVec) { 30 | cout << "reset_" << kernelName << ".write(0);\n"; 31 | } 32 | cout << "}\n}\n}\n#endif"; 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned short available_device_dram_resp_buf_flits = 4 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &after_throttle_unified_dram_read_resp, 10 | ST_Queue &dram_read_context_read) { 11 | bool valid_state = 0; 12 | bool data_state; 13 | bool valid_read_resp = 0; 14 | Dram_Read_Resp data_read_resp; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | if (!valid_state) { 19 | valid_state = dram_read_context_read.read_nb(data_state); 20 | } 21 | if (!valid_read_resp) { 22 | valid_read_resp = 23 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 24 | } 25 | 26 | if (valid_state && valid_read_resp) { 27 | valid_read_resp = false; 28 | if (data_state == HOST_READ_REQ) { 29 | host_dram_read_resp.write(data_read_resp); 30 | } else { 31 | device_dram_read_resp.write(data_read_resp); 32 | } 33 | if (data_read_resp.last) { 34 | valid_state = false; 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/pcie_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_REQ_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_REQ_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_req_multiplexer(ST_Queue &pcie_read_req, 7 | ST_Queue &device_pcie_read_req, 8 | ST_Queue &host_pcie_read_req, 9 | ST_Queue &pcie_read_mux_context) { 10 | 11 | while (1) { 12 | #pragma HLS pipeline 13 | bool has_read_req = false; 14 | PCIe_Read_Req read_req; 15 | bool context; 16 | if (device_pcie_read_req.read_nb(read_req)) { 17 | has_read_req = true; 18 | context = 0; 19 | } else if(host_pcie_read_req.read_nb(read_req)) { 20 | has_read_req = true; 21 | context = 1; 22 | } 23 | 24 | if (has_read_req) { 25 | pcie_read_mux_context.write(context); 26 | pcie_read_req.write(read_req); 27 | } 28 | } 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/pcie_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_multiplexer(ST_Queue &pcie_read_resp, 7 | ST_Queue &device_pcie_read_resp, 8 | ST_Queue &host_pcie_read_resp, 9 | ST_Queue &pcie_read_mux_context) { 10 | bool has_context = false; 11 | bool context; 12 | while (1) { 13 | #pragma HLS pipeline 14 | if (has_context || (has_context = pcie_read_mux_context.read_nb(context))) { 15 | PCIe_Read_Resp read_resp; 16 | if (pcie_read_resp.read_nb(read_resp)) { 17 | if (context == 0) { 18 | device_pcie_read_resp.write(read_resp); 19 | } else { 20 | host_pcie_read_resp.write(read_resp); 21 | } 22 | if (read_resp.last) { 23 | has_context = false; 24 | } 25 | } 26 | } 27 | } 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /Insider/synthesis/kernels/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda(ST_Queue &reset_app, ST_Queue &reset_sigs, 7 | ST_Queue &reset_read_mode_dram_helper_app, 8 | ST_Queue &reset_write_mode_dram_helper_app, 9 | ST_Queue &reset_read_mode_pcie_helper_app, 10 | ST_Queue &reset_write_mode_pcie_helper_app, 11 | ST_Queue &reset_pcie_data_splitter_app, 12 | ST_Queue &reset_app_output_data_demux, 13 | ST_Queue &reset_app_input_data_mux, 14 | ST_Queue &reset_write_mode_app_output_data_caching, 15 | ST_Queue &reset_app_input_data_merger) { 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset_sigs.read_nb(dummy)) { 20 | reset_read_mode_dram_helper_app.write(0); 21 | reset_write_mode_dram_helper_app.write(0); 22 | reset_read_mode_pcie_helper_app.write(0); 23 | reset_write_mode_pcie_helper_app.write(0); 24 | reset_pcie_data_splitter_app.write(0); 25 | reset_app.write(0); 26 | reset_app_output_data_demux.write(0); 27 | reset_app_input_data_mux.write(0); 28 | reset_write_mode_app_output_data_caching.write(0); 29 | reset_app_input_data_merger.write(0); 30 | } 31 | } 32 | } 33 | #endif 34 | -------------------------------------------------------------------------------- /Insider/synthesis/template/itc_template_header.txt: -------------------------------------------------------------------------------- 1 | #include "insider_itc.h" 2 | 3 | #include "poke_handler.cpp" 4 | #include "peek_handler.cpp" 5 | #include "pipe0_dram_dispatcher.cpp" 6 | #include "pipe1_dram_dispatcher.cpp" 7 | #include "read_mode_dram_helper_app.cpp" 8 | #include "dram_read_req_multiplexer.cpp" 9 | #include "dram_read_resp_multiplexer.cpp" 10 | #include "dram_read_throttle_unit.cpp" 11 | #include "dram_read_delay_unit.cpp" 12 | #include "dram_read_req_time_marker.cpp" 13 | #include "dram_write_req_time_marker.cpp" 14 | #include "dram_write_delay_unit.cpp" 15 | #include "dram_write_throttle_unit.cpp" 16 | #include "pcie_read_resp_passer.cpp" 17 | #include "pcie_read_throttle_unit.cpp" 18 | #include "pcie_data_splitter_app.cpp" 19 | #include "read_mode_pcie_helper_app.cpp" 20 | #include "pcie_write_multiplexer.cpp" 21 | #include "pcie_write_throttle_unit.cpp" 22 | #include "pipe0_data_handler.cpp" 23 | #include "pipe1_data_handler.cpp" 24 | #include "pipe2_data_handler.cpp" 25 | #include "pipeline_data_passer.cpp" 26 | #include "reset_propaganda.cpp" 27 | #include "pcie_read_req_multiplexer.cpp" 28 | #include "pcie_read_resp_multiplexer.cpp" 29 | #include "app_input_data_merger.cpp" 30 | #include "dram_write_mux.cpp" 31 | #include "app_input_data_mux.cpp" 32 | #include "app_output_data_demux.cpp" 33 | #include "write_mode_pcie_helper_app.cpp" 34 | #include "write_mode_pre_merged_app_input_data_forwarder.cpp" 35 | #include "write_mode_app_output_data_caching.cpp" 36 | #include "write_mode_dram_helper_app.cpp" 37 | #include "dram_data_caching.cpp" 38 | -------------------------------------------------------------------------------- /Insider/synthesis/template/reset_template.txt: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda( 7 | ST_Queue &reset_sigs, ST_Queue &reset_read_mode_dram_helper_app, 8 | ST_Queue &reset_write_mode_dram_helper_app, 9 | ST_Queue &reset_read_mode_pcie_helper_app, 10 | ST_Queue &reset_write_mode_pcie_helper_app, 11 | ST_Queue &reset_pcie_data_splitter_app, 12 | ST_Queue &reset_app_output_data_demux, 13 | ST_Queue &reset_app_input_data_mux, 14 | ST_Queue &reset_write_mode_app_output_data_caching, 15 | ST_Queue &reset_app_input_data_merger, 16 | ST_Queue &reset_write_mode_pre_merged_app_input_data_forwarder) { 17 | while (1) { 18 | #pragma HLS pipeline 19 | bool dummy; 20 | if (reset_sigs.read_nb(dummy)) { 21 | reset_read_mode_dram_helper_app.write(0); 22 | reset_write_mode_dram_helper_app.write(0); 23 | reset_read_mode_pcie_helper_app.write(0); 24 | reset_write_mode_pcie_helper_app.write(0); 25 | reset_pcie_data_splitter_app.write(0); 26 | reset_app_output_data_demux.write(0); 27 | reset_app_input_data_mux.write(0); 28 | reset_write_mode_app_output_data_caching.write(0); 29 | reset_app_input_data_merger.write(0); 30 | reset_write_mode_pre_merged_app_input_data_forwarder.write(0); 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Zhenyuan Ruan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/ap_sysc/AXI4_if.h: -------------------------------------------------------------------------------- 1 | #include "hls_bus_if.h" 2 | 3 | template 4 | class AXI4M_bus_port : public hls_bus_port<_VHLS_DT> { 5 | typedef hls_bus_port<_VHLS_DT> Base; 6 | 7 | public: 8 | AXI4M_bus_port() {} 9 | 10 | explicit AXI4M_bus_port(const char *name_) : Base(name_) {} 11 | }; 12 | 13 | // 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 14 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/etc/hlslib_headers.h: -------------------------------------------------------------------------------- 1 | // Fixed CR#735958 per Steve's requirement 2 | const STL_STRING hlslib_sysheader_files[] = {""}; 3 | 4 | // 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 5 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/opencv/cxmisc.h: -------------------------------------------------------------------------------- 1 | #ifndef __OPENCV_OLD_CXMISC_H__ 2 | #define __OPENCV_OLD_CXMISC_H__ 3 | 4 | #include "opencv2/core/internal.hpp" 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/opencv2/flann/dummy.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef OPENCV_FLANN_DUMMY_H_ 3 | #define OPENCV_FLANN_DUMMY_H_ 4 | 5 | namespace cvflann { 6 | 7 | #if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS 8 | __declspec(dllexport) 9 | #endif 10 | void dummyfunc(); 11 | 12 | } // namespace cvflann 13 | 14 | #endif /* OPENCV_FLANN_DUMMY_H_ */ 15 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/opencv2/opencv_modules.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ** File generated automatically, do not modify ** 3 | * 4 | * This file defines the list of modules available in current build configuration 5 | * 6 | * 7 | */ 8 | 9 | #define HAVE_OPENCV_CALIB3D 10 | #define HAVE_OPENCV_CONTRIB 11 | #define HAVE_OPENCV_CORE 12 | #define HAVE_OPENCV_FEATURES2D 13 | #define HAVE_OPENCV_FLANN 14 | #define HAVE_OPENCV_GPU 15 | #define HAVE_OPENCV_HIGHGUI 16 | #define HAVE_OPENCV_IMGPROC 17 | #define HAVE_OPENCV_LEGACY 18 | #define HAVE_OPENCV_ML 19 | #define HAVE_OPENCV_OBJDETECT 20 | #define HAVE_OPENCV_PHOTO 21 | #define HAVE_OPENCV_STITCHING 22 | #define HAVE_OPENCV_SUPERRES 23 | #define HAVE_OPENCV_TS 24 | #define HAVE_OPENCV_VIDEO 25 | #define HAVE_OPENCV_VIDEOSTAB 26 | 27 | 28 | -------------------------------------------------------------------------------- /STAccel/inc/hls_csim/script.tcl: -------------------------------------------------------------------------------- 1 | open_project interconnect 2 | set_top interconnect 3 | add_files interconnect.cpp 4 | open_solution "solution1" 5 | set_part {xcvu9p-flgb2104-2-i} 6 | create_clock -period 4 -name default 7 | set_clock_uncertainty 0.7 default 8 | csynth_design 9 | -------------------------------------------------------------------------------- /STAccel/inc/staccel_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef STACCEL_KERNEL_H_ 2 | #define STACCEL_KERNEL_H_ 3 | 4 | #include "staccel_type.h" 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /STAccel/llvm/csim-interconnect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(staccel_csim_interconnect 8 | staccel_csim_interconnect.cpp 9 | ) 10 | 11 | target_link_libraries(staccel_csim_interconnect 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS staccel_csim_interconnect 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /STAccel/llvm/s2s-interconnect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(staccel_s2s_interconnect 8 | staccel_s2s_interconnect.cpp 9 | ) 10 | 11 | target_link_libraries(staccel_s2s_interconnect 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS staccel_s2s_interconnect 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /STAccel/llvm/s2s-kernel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( LLVM_LINK_COMPONENTS 2 | ${LLVM_TARGETS_TO_BUILD} 3 | Option 4 | Support 5 | ) 6 | 7 | add_clang_executable(staccel_s2s_kernel 8 | staccel_s2s_kernel.cpp 9 | ) 10 | 11 | target_link_libraries(staccel_s2s_kernel 12 | PRIVATE 13 | clangAST 14 | clangBasic 15 | clangDriver 16 | clangFrontend 17 | clangRewriteFrontend 18 | clangStaticAnalyzerFrontend 19 | clangTooling 20 | ) 21 | 22 | install(TARGETS staccel_s2s_kernel 23 | RUNTIME DESTINATION bin) 24 | -------------------------------------------------------------------------------- /STAccel/shell/staccel_csim: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -d "kernels" ];then 4 | echo "Error: Cannot find kernel directory!" 5 | exit 1 6 | fi 7 | 8 | if [ ! -f "interconnects.cpp" ];then 9 | echo "Error: Cannot find interconnects.cpp!" 10 | exit 1 11 | fi 12 | 13 | if [ ! -d "/usr/include/staccel" ];then 14 | echo "Error: Cannot find staccel headers!" 15 | exit 1 16 | fi 17 | 18 | rm -rf csim.old 19 | mv csim csim.old 1>/dev/null 2>&1 20 | 21 | mkdir csim 22 | mkdir csim/src 23 | mkdir csim/bin 24 | mkdir csim/inc 25 | staccel_csim_interconnect interconnects.cpp -- -I/usr/include/staccel -I/usr/include/insider -I/usr/include/hls_csim -I inc -I kernels \ 26 | | clang-format --style=llvm 1>csim/src/interconnects.cpp 27 | cp kernels/* csim/src/ 28 | cp -r inc/* csim/inc 1>/dev/null 2>&1 29 | 30 | echo > csim/csim_compile.sh 31 | echo -e '#!/bin/bash\n 32 | CXXFLAGS="-g"\n 33 | g++ $CXXFLAGS src/interconnects.cpp -DCSIM -I/usr/include/staccel -I/usr/include/insider -I/usr/include/hls_csim -I src -I inc -std=c++11 -pthread -o bin/csim' \ 34 | >> csim/csim_compile.sh 35 | chmod a+x csim/csim_compile.sh 36 | -------------------------------------------------------------------------------- /STAccel/template/build/constraints/cl_pnr_user.xdc: -------------------------------------------------------------------------------- 1 | # This contains the CL specific constraints for Top level PNR 2 | 3 | # False path between vled on CL clock and Shell asynchronous clock 4 | set_false_path -from [get_cells WRAPPER_INST/CL/vled_q_reg*] 5 | 6 | # False paths between main clock and tck 7 | set_clock_groups -name TIG_SRAI_1 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks -of_objects [get_pins WRAPPER_INST/SH/kernel_clks_i/clkwiz_sys_clk/inst/CLK_CORE_DRP_I/clk_inst/mmcme3_adv_inst/CLKOUT0]] 8 | set_clock_groups -name TIG_SRAI_2 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks drck] 9 | set_clock_groups -name TIG_SRAI_3 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks -of_objects [get_pins static_sh/pcie_inst/inst/gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] 10 | -------------------------------------------------------------------------------- /STAccel/template/build/constraints/cl_synth_user.xdc: -------------------------------------------------------------------------------- 1 | # This contains the CL specific constraints for synthesis at the CL level 2 | 3 | 4 | -------------------------------------------------------------------------------- /STAccel/template/design/cl_common_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_COMMON_DEFAULTS 17 | `define CL_COMMON_DEFAULTS 18 | 19 | // Value to return for PCIS access to unimplemented register address 20 | `define UNIMPLEMENTED_REG_VALUE 32'hdeaddead 21 | 22 | // CL Register Addresses 23 | `define VLED_REG_ADDR 32'h0000_0504 24 | 25 | `endif 26 | -------------------------------------------------------------------------------- /STAccel/template/design/cl_id_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // CL_SH_ID0 17 | // - PCIe Vendor/Device ID Values 18 | // 31:16: PCIe Device ID 19 | // 15: 0: PCIe Vendor ID 20 | // - A Vendor ID value of 0x8086 is not valid. 21 | // - If using a Vendor ID value of 0x1D0F (Amazon) then valid 22 | // values for Device ID's are in the range of 0xF000 - 0xF0FF. 23 | // - A Vendor/Device ID of 0 (zero) is not valid. 24 | `define CL_SH_ID0 32'hF000_1D0F 25 | 26 | // CL_SH_ID1 27 | // - PCIe Subsystem/Subsystem Vendor ID Values 28 | // 31:16: PCIe Subsystem ID 29 | // 15: 0: PCIe Subsystem Vendor ID 30 | // - A PCIe Subsystem/Subsystem Vendor ID of 0 (zero) is not valid 31 | `define CL_SH_ID1 32'h1D51_FEDD 32 | 33 | 34 | -------------------------------------------------------------------------------- /STAccel/template/design/cl_main_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_MAIN 17 | `define CL_MAIN 18 | 19 | //Put module name of the CL design here. This is used to instantiate in top.sv 20 | `define CL_NAME cl_main 21 | 22 | //Highly recommeneded. For lib FIFO block, uses less async reset (take advantage of 23 | // FPGA flop init capability). This will help with routing resources. 24 | `define FPGA_LESS_RST 25 | 26 | // Uncomment to disable Virtual JTAG 27 | //`define DISABLE_VJTAG_DEBUG 28 | 29 | `endif 30 | -------------------------------------------------------------------------------- /STAccel/template/design/genip: -------------------------------------------------------------------------------- 1 | create_project -in_memory -force 2 | 3 | foreach file [glob -dir . *.tcl] { 4 | source $file 5 | } 6 | 7 | close_project 8 | -------------------------------------------------------------------------------- /STAccel/template/software/runtime/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | #VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 17 | 18 | INCLUDES = -I$(SDK_DIR)/userspace/include 19 | 20 | CPP = g++ 21 | CPPFLAGS = -DCONFIG_LOGLEVEL=4 -g -Wall $(INCLUDES) -std=c++11 -lpthread -fopenmp -O3 22 | 23 | LDLIBS = -lfpga_mgmt -lrt -lpthread 24 | 25 | SRC = test_main.cpp fpga_util.cpp 26 | OBJ = $(SRC:.c=.o) 27 | BIN = test_main 28 | 29 | all: $(BIN) check_env 30 | 31 | $(BIN): $(OBJ) 32 | $(CPP) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) 33 | 34 | clean: 35 | rm -f *.o $(BIN) 36 | 37 | check_env: 38 | ifndef SDK_DIR 39 | $(error SDK_DIR is undefined. Try "source sdk_setup.sh" to set the software environment) 40 | endif 41 | -------------------------------------------------------------------------------- /STAccel/template/software/runtime/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define PAGE_SIZE (1024 * 1024 * 4) 5 | #define INUM_LIMIT (16) 6 | #define ONUM_LIMIT (16) 7 | #define PULL_INPUT_REG (0x0) 8 | #define PUSH_INPUT_REG (0x1) 9 | #define PULL_OUTPUT_REG (0x2) 10 | #define PUSH_OUTPUT_REG (0x3) 11 | #define PUSH_BUF_INIT_DATA_REG (0x4) 12 | #define PULL_BUF_INIT_READY_REG (0x5) 13 | #define SLOT_ID (0) 14 | #define BUF_SIZE (1024 * 1024 * 4) 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /STAccel/template/software/runtime/fpga_util.cpp: -------------------------------------------------------------------------------- 1 | #include "const.h" 2 | #include "fpga_mgmt.h" 3 | #include "fpga_pci.h" 4 | #include "utils/lcd.h" 5 | #include 6 | 7 | uint16_t _pci_vendor_id = 0x1D0F; 8 | uint16_t _pci_device_id = 0xF000; 9 | pci_bar_handle_t _pci_bar_handle; 10 | struct fpga_pci_bar *_bar; 11 | void *_IBufs[INUM_LIMIT]; 12 | void *_OBufs[ONUM_LIMIT]; 13 | uint64_t _IBufs_phy[INUM_LIMIT]; 14 | uint64_t _OBufs_phy[ONUM_LIMIT]; 15 | int _configfds[INUM_LIMIT + ONUM_LIMIT]; 16 | uint8_t _inputL = 0; 17 | uint8_t _inputR = 0; 18 | bool _input_empty = false; 19 | uint8_t _outputL = 0; 20 | uint8_t _outputR = 0; 21 | bool _output_empty = false; 22 | unsigned char _comm_Inum = 0; 23 | unsigned char _comm_Onum = 0; 24 | pthread_mutex_t _receive_control_msg_mutex = PTHREAD_MUTEX_INITIALIZER; 25 | -------------------------------------------------------------------------------- /STAccel/template/software/verif_rtl/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 18 | 19 | C_SRCS := test_main.c pcie_utils.c cl_utils.c main.c 20 | C_OBJS := $(C_SRCS:.c=.o) 21 | 22 | CC = gcc 23 | CFLAGS = -I ./include 24 | CFLAGS += -I $(HDK_DIR)/common/software/include 25 | all: test_main 26 | 27 | test_main: $(C_SRCS) 28 | $(CC) $(CFLAGS) -o $@ $^ 29 | 30 | clean: 31 | rm test_main 32 | -------------------------------------------------------------------------------- /STAccel/template/software/verif_rtl/include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/STAccel/template/software/verif_rtl/include/.gitignore -------------------------------------------------------------------------------- /STAccel/template/software/verif_rtl/src/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/STAccel/template/software/verif_rtl/src/.gitignore -------------------------------------------------------------------------------- /STAccel/template/software/verif_rtl/src/test_main.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // Vivado does not support svGetScopeFromName 22 | //#ifdef INCLUDE_DPI_CALLS 23 | #ifndef VIVADO_SIM 24 | #include "svdpi.h" 25 | #endif 26 | //#endif 27 | 28 | #include "sh_dpi_tasks.h" 29 | 30 | void test_main(uint32_t *exit_code) { 31 | 32 | // Vivado does not support svGetScopeFromName 33 | //#ifdef INCLUDE_DPI_CALLS 34 | #ifndef VIVADO_SIM 35 | svScope scope; 36 | #endif 37 | //#endif 38 | 39 | uint32_t rdata; 40 | 41 | // Vivado does not support svGetScopeFromName 42 | //#ifdef INCLUDE_DPI_CALLS 43 | #ifndef VIVADO_SIM 44 | scope = svGetScopeFromName("tb"); 45 | svSetScope(scope); 46 | #endif 47 | 48 | simulator(); 49 | 50 | *exit_code = 0; 51 | } 52 | 53 | void user_simulation_function() { 54 | // PUT YOUR CODE HERE 55 | } 56 | -------------------------------------------------------------------------------- /STAccel/template/software/verif_rtl/src/test_null.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | // Vivado does not support svGetScopeFromName 21 | #ifdef INCLUDE_DPI_CALLS 22 | #ifndef VIVADO_SIM 23 | #include "svdpi.h" 24 | #endif 25 | #endif 26 | 27 | #include "sh_dpi_tasks.h" 28 | 29 | void test_main(uint32_t *exit_code) { 30 | 31 | // NULL Test 32 | 33 | *exit_code = 0; 34 | } 35 | -------------------------------------------------------------------------------- /STAccel/template/verif/scripts/open_waves.tcl: -------------------------------------------------------------------------------- 1 | current_fileset 2 | open_wave_database tb.wdb 3 | -------------------------------------------------------------------------------- /STAccel/template/verif/scripts/waves.tcl: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set curr_wave [current_wave_config] 17 | if { [string length $curr_wave] == 0 } { 18 | if { [llength [get_objects]] > 0} { 19 | add_wave -recursive / 20 | set_property needs_save false [current_wave_config] 21 | } else { 22 | send_msg_id Add_Wave-1 WARNING "No top level signals found. Simulator will start without a wave window. If you want to open a wave window go to 'File->New Waveform Configuration' or type 'create_wave_config' in the TCL console." 23 | } 24 | } 25 | 26 | run 200 us 27 | quit 28 | -------------------------------------------------------------------------------- /STAccel/template/verif/tests/test_main.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | 17 | module test_main(); 18 | 19 | import tb_type_defines_pkg::*; 20 | `include "cl_common_defines.vh" // CL Defines with register addresses 21 | 22 | // AXI ID 23 | parameter [5:0] AXI_ID = 6'h0; 24 | 25 | logic [31:0] rdata; 26 | logic [15:0] vdip_value; 27 | logic [15:0] vled_value; 28 | 29 | 30 | initial begin 31 | 32 | tb.power_up(); 33 | 34 | tb.set_virtual_dip_switch(.dip(0)); 35 | 36 | vdip_value = tb.get_virtual_dip_switch(); 37 | 38 | // PUT SIM LOGIC HERE 39 | 40 | tb.kernel_reset(); 41 | 42 | tb.power_down(); 43 | 44 | $finish; 45 | end 46 | 47 | endmodule 48 | -------------------------------------------------------------------------------- /STAccel/template/verif/tests/test_null.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | module test_null(); 17 | 18 | initial begin 19 | int exit_code; 20 | 21 | tb.power_up(); 22 | 23 | 24 | tb.test_main(exit_code); 25 | 26 | #50ns; 27 | 28 | tb.power_down(); 29 | 30 | $finish; 31 | end 32 | 33 | endmodule // test_null 34 | -------------------------------------------------------------------------------- /apps/device/grep/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #include 5 | 6 | #define MAX_GREP_PARAM_SIZE (32) 7 | #define POKE_WIDTH_BYTES (4) 8 | #define COMPARE_CHUNK_SIZE (4) 9 | #define MATCHING_VEC_SIZE (DATA_BUS_WIDTH + MAX_GREP_PARAM_SIZE - 1) 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /apps/device/grep/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | 4 | #include "constant.h" 5 | 6 | struct Grep_Input_Data { 7 | char data[MATCHING_VEC_SIZE]; 8 | int col_indices[MATCHING_VEC_SIZE]; 9 | int row_index; 10 | bool eop; 11 | }; 12 | 13 | struct Matched_Pos { 14 | int row_index; 15 | int col_index; 16 | bool eop; 17 | bool valid; 18 | }; 19 | 20 | struct Matching_Vec { 21 | bool matched[MATCHING_VEC_SIZE + COMPARE_CHUNK_SIZE - 1]; 22 | int row_index; 23 | int col_indices[MATCHING_VEC_SIZE + COMPARE_CHUNK_SIZE - 1]; 24 | bool eop; 25 | bool valid; 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /apps/device/grep/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "constant.h" 4 | #include "structure.h" 5 | 6 | #include "app_grep_input_preprocessor.cpp" 7 | #include "app_grep_matcher.cpp" 8 | #include "app_grep_reducer.cpp" 9 | #include "app_grep_writer.cpp" 10 | 11 | void interconnects() { 12 | 13 | ST_Queue app_grep_input_data_chan(16); 14 | ST_Queue app_grep_matched_pos_chan(64); 15 | ST_Queue app_grep_matching_vecs(16); 16 | ST_Queue app_grep_scanned_matching_vecs(16); 17 | 18 | app_grep_input_preprocessor(app_input_data, app_grep_input_data_chan); 19 | app_grep_matcher(app_input_params, app_grep_input_data_chan, 20 | app_grep_matching_vecs); 21 | app_grep_reducer(app_grep_matching_vecs, app_grep_matched_pos_chan); 22 | app_grep_writer(app_output_data, app_grep_matched_pos_chan); 23 | } 24 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define APP_QUERY_LENGTH (32) // should be less than APP_COL_NUM 5 | #define APP_COL_NUM (64) 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | struct APP_Match { 4 | APP_Match() {} 5 | unsigned char record[64]; 6 | unsigned char match[32]; 7 | bool eop; 8 | }; 9 | 10 | struct APP_Ver_Param { 11 | APP_Ver_Param() {} 12 | unsigned char query[32]; 13 | unsigned int thres; 14 | }; 15 | 16 | struct APP_Ver_Record { 17 | APP_Ver_Record() {} 18 | unsigned char record[64]; 19 | unsigned int valid; 20 | bool eop; 21 | }; 22 | struct APP_Reduce_Record32 { 23 | APP_Reduce_Record32() {} 24 | unsigned char overlap[32]; 25 | unsigned char record[64]; 26 | bool valid; 27 | bool eop; 28 | }; 29 | 30 | struct APP_Reduce_Record16 { 31 | APP_Reduce_Record16() {} 32 | unsigned char overlap[16]; 33 | unsigned char record[64]; 34 | bool valid; 35 | bool eop; 36 | }; 37 | 38 | struct APP_Reduce_Record8 { 39 | APP_Reduce_Record8() {} 40 | unsigned char overlap[8]; 41 | unsigned char record[64]; 42 | bool valid; 43 | bool eop; 44 | }; 45 | 46 | struct APP_Reduce_Record4 { 47 | APP_Reduce_Record4() {} 48 | unsigned char overlap[4]; 49 | unsigned char record[64]; 50 | bool valid; 51 | bool eop; 52 | }; 53 | 54 | struct APP_Reduce_Record2 { 55 | APP_Reduce_Record2() {} 56 | unsigned char overlap[2]; 57 | unsigned char record[64]; 58 | bool valid; 59 | bool eop; 60 | }; 61 | #endif 62 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/kernels/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned char available_device_dram_resp_buf_flits = 2 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &release_device_dram_resp_buf_flits, 10 | ST_Queue &after_throttle_unified_dram_read_resp, 11 | ST_Queue &dram_read_context_read) { 12 | bool valid_state = 0; 13 | bool data_state; 14 | bool valid_read_resp = 0; 15 | Dram_Read_Resp data_read_resp; 16 | 17 | while (1) { 18 | #pragma HLS pipeline 19 | if (!valid_state) { 20 | valid_state = dram_read_context_read.read_nb(data_state); 21 | } 22 | if (!valid_read_resp) { 23 | valid_read_resp = 24 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 25 | } 26 | 27 | if (valid_state && valid_read_resp) { 28 | valid_read_resp = false; 29 | if (data_state == HOST_READ_REQ) { 30 | host_dram_read_resp.write(data_read_resp); 31 | } else { 32 | device_dram_read_resp.write(data_read_resp); 33 | release_device_dram_resp_buf_flits.write(0); 34 | } 35 | if (data_read_resp.last) { 36 | valid_state = false; 37 | } 38 | } 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/design/cl_common_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_COMMON_DEFAULTS 17 | `define CL_COMMON_DEFAULTS 18 | 19 | // Value to return for PCIS access to unimplemented register address 20 | `define UNIMPLEMENTED_REG_VALUE 32'hdeaddead 21 | 22 | // CL Register Addresses 23 | `define VLED_REG_ADDR 32'h0000_0504 24 | 25 | `endif 26 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/design/cl_id_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // CL_SH_ID0 17 | // - PCIe Vendor/Device ID Values 18 | // 31:16: PCIe Device ID 19 | // 15: 0: PCIe Vendor ID 20 | // - A Vendor ID value of 0x8086 is not valid. 21 | // - If using a Vendor ID value of 0x1D0F (Amazon) then valid 22 | // values for Device ID's are in the range of 0xF000 - 0xF0FF. 23 | // - A Vendor/Device ID of 0 (zero) is not valid. 24 | `define CL_SH_ID0 32'hF000_1D0F 25 | 26 | // CL_SH_ID1 27 | // - PCIe Subsystem/Subsystem Vendor ID Values 28 | // 31:16: PCIe Subsystem ID 29 | // 15: 0: PCIe Subsystem Vendor ID 30 | // - A PCIe Subsystem/Subsystem Vendor ID of 0 (zero) is not valid 31 | `define CL_SH_ID1 32'h1D51_FEDD 32 | 33 | 34 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/design/cl_main_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_MAIN 17 | `define CL_MAIN 18 | 19 | //Put module name of the CL design here. This is used to instantiate in top.sv 20 | `define CL_NAME cl_main 21 | 22 | //Highly recommeneded. For lib FIFO block, uses less async reset (take advantage of 23 | // FPGA flop init capability). This will help with routing resources. 24 | `define FPGA_LESS_RST 25 | 26 | // Uncomment to disable Virtual JTAG 27 | //`define DISABLE_VJTAG_DEBUG 28 | 29 | `endif 30 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/design/genip: -------------------------------------------------------------------------------- 1 | create_project -in_memory -force 2 | 3 | foreach file [glob -dir . *.tcl] { 4 | source $file 5 | } 6 | 7 | close_project 8 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/runtime/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | #VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 17 | 18 | INCLUDES = -I$(SDK_DIR)/userspace/include 19 | 20 | CPP = g++ 21 | CPPFLAGS = -DCONFIG_LOGLEVEL=4 -g -Wall $(INCLUDES) -std=c++11 -lpthread -fopenmp -O3 22 | 23 | LDLIBS = -lfpga_mgmt -lrt -lpthread 24 | 25 | SRC = test_main.cpp fpga_util.cpp 26 | OBJ = $(SRC:.c=.o) 27 | BIN = test_main 28 | 29 | all: $(BIN) check_env 30 | 31 | $(BIN): $(OBJ) 32 | $(CPP) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) 33 | 34 | clean: 35 | rm -f *.o $(BIN) 36 | 37 | check_env: 38 | ifndef SDK_DIR 39 | $(error SDK_DIR is undefined. Try "source sdk_setup.sh" to set the software environment) 40 | endif 41 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/runtime/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define PAGE_SIZE (1024 * 1024 * 4) 5 | #define INUM_LIMIT (16) 6 | #define ONUM_LIMIT (16) 7 | #define PULL_INPUT_REG (0x0) 8 | #define PUSH_INPUT_REG (0x1) 9 | #define PULL_OUTPUT_REG (0x2) 10 | #define PUSH_OUTPUT_REG (0x3) 11 | #define PUSH_BUF_INIT_DATA_REG (0x4) 12 | #define PULL_BUF_INIT_READY_REG (0x5) 13 | #define SLOT_ID (0) 14 | #define BUF_SIZE (1024 * 1024 * 4) 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/runtime/fpga_util.cpp: -------------------------------------------------------------------------------- 1 | #include "const.h" 2 | #include "fpga_mgmt.h" 3 | #include "fpga_pci.h" 4 | #include "utils/lcd.h" 5 | #include 6 | 7 | uint16_t _pci_vendor_id = 0x1D0F; 8 | uint16_t _pci_device_id = 0xF000; 9 | pci_bar_handle_t _pci_bar_handle; 10 | struct fpga_pci_bar *_bar; 11 | void *_IBufs[INUM_LIMIT]; 12 | void *_OBufs[ONUM_LIMIT]; 13 | uint64_t _IBufs_phy[INUM_LIMIT]; 14 | uint64_t _OBufs_phy[ONUM_LIMIT]; 15 | int _configfds[INUM_LIMIT + ONUM_LIMIT]; 16 | uint8_t _inputL = 0; 17 | uint8_t _inputR = 0; 18 | bool _input_empty = false; 19 | uint8_t _outputL = 0; 20 | uint8_t _outputR = 0; 21 | bool _output_empty = false; 22 | unsigned char _comm_Inum = 0; 23 | unsigned char _comm_Onum = 0; 24 | pthread_mutex_t _receive_control_msg_mutex = PTHREAD_MUTEX_INITIALIZER; 25 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/verif_rtl/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 18 | 19 | C_SRCS := test_main.c pcie_utils.c cl_utils.c main.c 20 | C_OBJS := $(C_SRCS:.c=.o) 21 | 22 | CC = gcc 23 | CFLAGS = -I ./include 24 | CFLAGS += -I $(HDK_DIR)/common/software/include 25 | all: test_main 26 | 27 | test_main: $(C_SRCS) 28 | $(CC) $(CFLAGS) -o $@ $^ 29 | 30 | clean: 31 | rm test_main 32 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/verif_rtl/include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/integration/cosim/project/software/verif_rtl/include/.gitignore -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/verif_rtl/src/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/integration/cosim/project/software/verif_rtl/src/.gitignore -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/software/verif_rtl/src/test_null.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | // Vivado does not support svGetScopeFromName 21 | #ifdef INCLUDE_DPI_CALLS 22 | #ifndef VIVADO_SIM 23 | #include "svdpi.h" 24 | #endif 25 | #endif 26 | 27 | #include "sh_dpi_tasks.h" 28 | 29 | void test_main(uint32_t *exit_code) { 30 | 31 | // NULL Test 32 | 33 | *exit_code = 0; 34 | } 35 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/verif/scripts/open_waves.tcl: -------------------------------------------------------------------------------- 1 | current_fileset 2 | open_wave_database tb.wdb 3 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/verif/scripts/waves.tcl: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set curr_wave [current_wave_config] 17 | if { [string length $curr_wave] == 0 } { 18 | if { [llength [get_objects]] > 0} { 19 | add_wave -recursive / 20 | set_property needs_save false [current_wave_config] 21 | } else { 22 | send_msg_id Add_Wave-1 WARNING "No top level signals found. Simulator will start without a wave window. If you want to open a wave window go to 'File->New Waveform Configuration' or type 'create_wave_config' in the TCL console." 23 | } 24 | } 25 | 26 | run 200 us 27 | quit 28 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/verif/tests/test_main.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | 17 | module test_main(); 18 | 19 | import tb_type_defines_pkg::*; 20 | `include "cl_common_defines.vh" // CL Defines with register addresses 21 | 22 | // AXI ID 23 | parameter [5:0] AXI_ID = 6'h0; 24 | 25 | logic [31:0] rdata; 26 | logic [15:0] vdip_value; 27 | logic [15:0] vled_value; 28 | 29 | 30 | initial begin 31 | 32 | tb.power_up(); 33 | 34 | tb.set_virtual_dip_switch(.dip(0)); 35 | 36 | vdip_value = tb.get_virtual_dip_switch(); 37 | 38 | // PUT SIM LOGIC HERE 39 | 40 | tb.kernel_reset(); 41 | 42 | tb.power_down(); 43 | 44 | $finish; 45 | end 46 | 47 | endmodule 48 | -------------------------------------------------------------------------------- /apps/device/integration/cosim/project/verif/tests/test_null.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | module test_null(); 17 | 18 | initial begin 19 | int exit_code; 20 | 21 | tb.power_up(); 22 | 23 | 24 | tb.test_main(exit_code); 25 | 26 | #50ns; 27 | 28 | tb.power_down(); 29 | 30 | $finish; 31 | end 32 | 33 | endmodule // test_null 34 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/bin/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/csim_compile.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | CXXFLAGS="-g" 5 | 6 | g++ $CXXFLAGS src/interconnects.cpp -DCSIM -I/usr/include/staccel -I/usr/include/insider -I/usr/include/hls_csim -I src -I inc -std=c++11 -pthread -o bin/csim 7 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define APP_QUERY_LENGTH (32) // should be less than APP_COL_NUM 5 | #define APP_COL_NUM (64) 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | struct APP_Match { 4 | APP_Match() {} 5 | unsigned char record[64]; 6 | unsigned char match[32]; 7 | bool eop; 8 | }; 9 | 10 | struct APP_Ver_Param { 11 | APP_Ver_Param() {} 12 | unsigned char query[32]; 13 | unsigned int thres; 14 | }; 15 | 16 | struct APP_Ver_Record { 17 | APP_Ver_Record() {} 18 | unsigned char record[64]; 19 | unsigned int valid; 20 | bool eop; 21 | }; 22 | struct APP_Reduce_Record32 { 23 | APP_Reduce_Record32() {} 24 | unsigned char overlap[32]; 25 | unsigned char record[64]; 26 | bool valid; 27 | bool eop; 28 | }; 29 | 30 | struct APP_Reduce_Record16 { 31 | APP_Reduce_Record16() {} 32 | unsigned char overlap[16]; 33 | unsigned char record[64]; 34 | bool valid; 35 | bool eop; 36 | }; 37 | 38 | struct APP_Reduce_Record8 { 39 | APP_Reduce_Record8() {} 40 | unsigned char overlap[8]; 41 | unsigned char record[64]; 42 | bool valid; 43 | bool eop; 44 | }; 45 | 46 | struct APP_Reduce_Record4 { 47 | APP_Reduce_Record4() {} 48 | unsigned char overlap[4]; 49 | unsigned char record[64]; 50 | bool valid; 51 | bool eop; 52 | }; 53 | 54 | struct APP_Reduce_Record2 { 55 | APP_Reduce_Record2() {} 56 | unsigned char overlap[2]; 57 | unsigned char record[64]; 58 | bool valid; 59 | bool eop; 60 | }; 61 | #endif 62 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/dram_read_delay_unit.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_DELAY_UNIT_CPP_ 2 | #define DRAM_READ_DELAY_UNIT_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_delay_unit( 7 | ST_Queue &device_read_delay_cycle_cnts, 8 | ST_Queue 9 | &before_delay_unified_dram_read_req_with_time, 10 | ST_Queue &after_delay_unified_dram_read_req) { 11 | unsigned long long time = 0; 12 | unsigned long long delay_cycle_cnt = 0; 13 | Dram_Read_Req_With_Time read_req_with_time; 14 | bool valid_read_req_with_time = false; 15 | Dram_Write_Req_Apply_With_Time write_req_apply_with_time; 16 | bool valid_write_req_apply_with_time = false; 17 | 18 | while (1) { 19 | #pragma HLS pipeline 20 | unsigned int new_delay_cycle_cnt; 21 | if (device_read_delay_cycle_cnts.read_nb(new_delay_cycle_cnt)) { 22 | delay_cycle_cnt = new_delay_cycle_cnt; 23 | } 24 | 25 | if (valid_read_req_with_time || 26 | (valid_read_req_with_time = 27 | before_delay_unified_dram_read_req_with_time.read_nb( 28 | read_req_with_time))) { 29 | if (read_req_with_time.time + delay_cycle_cnt <= time) { 30 | if (after_delay_unified_dram_read_req.write_nb( 31 | read_req_with_time.req)) { 32 | valid_read_req_with_time = false; 33 | } 34 | } 35 | } 36 | time++; 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &before_delay_unified_dram_read_req, 10 | ST_Queue &dram_read_context_write) { 11 | while (1) { 12 | #pragma HLS pipeline 13 | Dram_Read_Req req; 14 | if (host_dram_read_req.read_nb(req)) { 15 | dram_read_context_write.write(HOST_READ_REQ); 16 | before_delay_unified_dram_read_req.write(req); 17 | } else if (device_dram_read_req.read_nb(req)) { 18 | dram_read_context_write.write(DEVICE_READ_REQ); 19 | before_delay_unified_dram_read_req.write(req); 20 | } 21 | } 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/dram_read_req_time_marker.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_REQ_TIME_MARKER_CPP_ 2 | #define DRAM_READ_REQ_TIME_MARKER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_time_marker( 7 | ST_Queue &before_delay_unified_dram_read_req, 8 | ST_Queue 9 | &before_delay_unified_dram_read_req_with_time) { 10 | unsigned long long time = 0; 11 | bool valid_read_req = false; 12 | Dram_Read_Req read_req; 13 | 14 | while (1) { 15 | #pragma HLS pipeline 16 | if (valid_read_req || 17 | (valid_read_req = 18 | before_delay_unified_dram_read_req.read_nb(read_req))) { 19 | Dram_Read_Req_With_Time read_req_with_time; 20 | read_req_with_time.req = read_req; 21 | read_req_with_time.time = time; 22 | valid_read_req = true; 23 | if (before_delay_unified_dram_read_req_with_time.write_nb( 24 | read_req_with_time)) { 25 | valid_read_req = false; 26 | } 27 | } 28 | time++; 29 | } 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &after_throttle_unified_dram_read_resp, 10 | ST_Queue &dram_read_context_read) { 11 | bool valid_state = 0; 12 | bool data_state; 13 | bool valid_read_resp = 0; 14 | Dram_Read_Resp data_read_resp; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | if (!valid_state) { 19 | valid_state = dram_read_context_read.read_nb(data_state); 20 | } 21 | if (!valid_read_resp) { 22 | valid_read_resp = 23 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 24 | } 25 | 26 | if (valid_state && valid_read_resp) { 27 | valid_read_resp = false; 28 | if (data_state == HOST_READ_REQ) { 29 | host_dram_read_resp.write(data_read_resp); 30 | } else { 31 | device_dram_read_resp.write(data_read_resp); 32 | } 33 | if (data_read_resp.last) { 34 | valid_state = false; 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/integration/csim/csim/src/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda(ST_Queue &reset_app_intg_matcher, 7 | ST_Queue &reset_app_intg_mat_rdc, 8 | ST_Queue &reset_app_intg_rdc_16to8, 9 | ST_Queue &reset_app_intg_rdc_32to16, 10 | ST_Queue &reset_app_intg_rdc_4to2, 11 | ST_Queue &reset_app_intg_rdc_8to4, 12 | ST_Queue &reset_app_intg_verifier, 13 | ST_Queue &reset_app_intg_writer, 14 | ST_Queue &reset_sigs, 15 | ST_Queue &reset_dram_helper_app, 16 | ST_Queue &reset_pcie_helper_app, 17 | ST_Queue &reset_pcie_data_splitter_app) { 18 | while (1) { 19 | #pragma HLS pipeline 20 | bool dummy; 21 | if (reset_sigs.read_nb(dummy)) { 22 | reset_dram_helper_app.write(0); 23 | reset_pcie_helper_app.write(0); 24 | reset_pcie_data_splitter_app.write(0); 25 | 26 | reset_app_intg_matcher.write(0); 27 | reset_app_intg_mat_rdc.write(0); 28 | reset_app_intg_rdc_16to8.write(0); 29 | reset_app_intg_rdc_32to16.write(0); 30 | reset_app_intg_rdc_4to2.write(0); 31 | reset_app_intg_rdc_8to4.write(0); 32 | reset_app_intg_verifier.write(0); 33 | reset_app_intg_writer.write(0); 34 | } 35 | } 36 | } 37 | #endif -------------------------------------------------------------------------------- /apps/device/integration/csim/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define APP_QUERY_LENGTH (32) // should be less than APP_COL_NUM 5 | #define APP_COL_NUM (64) 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /apps/device/integration/csim/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | struct APP_Match { 4 | APP_Match() {} 5 | unsigned char record[64]; 6 | unsigned char match[32]; 7 | bool eop; 8 | }; 9 | 10 | struct APP_Ver_Param { 11 | APP_Ver_Param() {} 12 | unsigned char query[32]; 13 | unsigned int thres; 14 | }; 15 | 16 | struct APP_Ver_Record { 17 | APP_Ver_Record() {} 18 | unsigned char record[64]; 19 | unsigned int valid; 20 | bool eop; 21 | }; 22 | struct APP_Reduce_Record32 { 23 | APP_Reduce_Record32() {} 24 | unsigned char overlap[32]; 25 | unsigned char record[64]; 26 | bool valid; 27 | bool eop; 28 | }; 29 | 30 | struct APP_Reduce_Record16 { 31 | APP_Reduce_Record16() {} 32 | unsigned char overlap[16]; 33 | unsigned char record[64]; 34 | bool valid; 35 | bool eop; 36 | }; 37 | 38 | struct APP_Reduce_Record8 { 39 | APP_Reduce_Record8() {} 40 | unsigned char overlap[8]; 41 | unsigned char record[64]; 42 | bool valid; 43 | bool eop; 44 | }; 45 | 46 | struct APP_Reduce_Record4 { 47 | APP_Reduce_Record4() {} 48 | unsigned char overlap[4]; 49 | unsigned char record[64]; 50 | bool valid; 51 | bool eop; 52 | }; 53 | 54 | struct APP_Reduce_Record2 { 55 | APP_Reduce_Record2() {} 56 | unsigned char overlap[2]; 57 | unsigned char record[64]; 58 | bool valid; 59 | bool eop; 60 | }; 61 | #endif 62 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/dram_read_delay_unit.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_DELAY_UNIT_CPP_ 2 | #define DRAM_READ_DELAY_UNIT_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_delay_unit( 7 | ST_Queue &device_read_delay_cycle_cnts, 8 | ST_Queue 9 | &before_delay_unified_dram_read_req_with_time, 10 | ST_Queue &after_delay_unified_dram_read_req) { 11 | unsigned long long time = 0; 12 | unsigned long long delay_cycle_cnt = 0; 13 | Dram_Read_Req_With_Time read_req_with_time; 14 | bool valid_read_req_with_time = false; 15 | Dram_Write_Req_Apply_With_Time write_req_apply_with_time; 16 | bool valid_write_req_apply_with_time = false; 17 | 18 | while (1) { 19 | #pragma HLS pipeline 20 | unsigned int new_delay_cycle_cnt; 21 | if (device_read_delay_cycle_cnts.read_nb(new_delay_cycle_cnt)) { 22 | delay_cycle_cnt = new_delay_cycle_cnt; 23 | } 24 | 25 | if (valid_read_req_with_time || 26 | (valid_read_req_with_time = 27 | before_delay_unified_dram_read_req_with_time.read_nb( 28 | read_req_with_time))) { 29 | if (read_req_with_time.time + delay_cycle_cnt <= time) { 30 | if (after_delay_unified_dram_read_req.write_nb( 31 | read_req_with_time.req)) { 32 | valid_read_req_with_time = false; 33 | } 34 | } 35 | } 36 | time++; 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &before_delay_unified_dram_read_req, 10 | ST_Queue &dram_read_context_write) { 11 | while (1) { 12 | #pragma HLS pipeline 13 | Dram_Read_Req req; 14 | if (host_dram_read_req.read_nb(req)) { 15 | dram_read_context_write.write(HOST_READ_REQ); 16 | before_delay_unified_dram_read_req.write(req); 17 | } else if (device_dram_read_req.read_nb(req)) { 18 | dram_read_context_write.write(DEVICE_READ_REQ); 19 | before_delay_unified_dram_read_req.write(req); 20 | } 21 | } 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/dram_read_req_time_marker.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_REQ_TIME_MARKER_CPP_ 2 | #define DRAM_READ_REQ_TIME_MARKER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_time_marker( 7 | ST_Queue &before_delay_unified_dram_read_req, 8 | ST_Queue 9 | &before_delay_unified_dram_read_req_with_time) { 10 | unsigned long long time = 0; 11 | bool valid_read_req = false; 12 | Dram_Read_Req read_req; 13 | 14 | while (1) { 15 | #pragma HLS pipeline 16 | if (valid_read_req || 17 | (valid_read_req = 18 | before_delay_unified_dram_read_req.read_nb(read_req))) { 19 | Dram_Read_Req_With_Time read_req_with_time; 20 | read_req_with_time.req = read_req; 21 | read_req_with_time.time = time; 22 | valid_read_req = true; 23 | if (before_delay_unified_dram_read_req_with_time.write_nb( 24 | read_req_with_time)) { 25 | valid_read_req = false; 26 | } 27 | } 28 | time++; 29 | } 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &after_throttle_unified_dram_read_resp, 10 | ST_Queue &dram_read_context_read) { 11 | bool valid_state = 0; 12 | bool data_state; 13 | bool valid_read_resp = 0; 14 | Dram_Read_Resp data_read_resp; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | if (!valid_state) { 19 | valid_state = dram_read_context_read.read_nb(data_state); 20 | } 21 | if (!valid_read_resp) { 22 | valid_read_resp = 23 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 24 | } 25 | 26 | if (valid_state && valid_read_resp) { 27 | valid_read_resp = false; 28 | if (data_state == HOST_READ_REQ) { 29 | host_dram_read_resp.write(data_read_resp); 30 | } else { 31 | device_dram_read_resp.write(data_read_resp); 32 | } 33 | if (data_read_resp.last) { 34 | valid_state = false; 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/integration/csim/kernels/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda(ST_Queue &reset_app_intg_matcher, 7 | ST_Queue &reset_app_intg_mat_rdc, 8 | ST_Queue &reset_app_intg_rdc_16to8, 9 | ST_Queue &reset_app_intg_rdc_32to16, 10 | ST_Queue &reset_app_intg_rdc_4to2, 11 | ST_Queue &reset_app_intg_rdc_8to4, 12 | ST_Queue &reset_app_intg_verifier, 13 | ST_Queue &reset_app_intg_writer, 14 | ST_Queue &reset_sigs, 15 | ST_Queue &reset_dram_helper_app, 16 | ST_Queue &reset_pcie_helper_app, 17 | ST_Queue &reset_pcie_data_splitter_app) { 18 | while (1) { 19 | #pragma HLS pipeline 20 | bool dummy; 21 | if (reset_sigs.read_nb(dummy)) { 22 | reset_dram_helper_app.write(0); 23 | reset_pcie_helper_app.write(0); 24 | reset_pcie_data_splitter_app.write(0); 25 | 26 | reset_app_intg_matcher.write(0); 27 | reset_app_intg_mat_rdc.write(0); 28 | reset_app_intg_rdc_16to8.write(0); 29 | reset_app_intg_rdc_32to16.write(0); 30 | reset_app_intg_rdc_4to2.write(0); 31 | reset_app_intg_rdc_8to4.write(0); 32 | reset_app_intg_verifier.write(0); 33 | reset_app_intg_writer.write(0); 34 | } 35 | } 36 | } 37 | #endif -------------------------------------------------------------------------------- /apps/device/integration/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define APP_QUERY_LENGTH (32) // should be less than APP_COL_NUM 5 | #define APP_COL_NUM (64) 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /apps/device/integration/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | struct APP_Match { 4 | APP_Match() {} 5 | unsigned char record[64]; 6 | unsigned char match[32]; 7 | bool eop; 8 | }; 9 | 10 | struct APP_Ver_Param { 11 | APP_Ver_Param() {} 12 | unsigned char query[32]; 13 | unsigned int thres; 14 | }; 15 | 16 | struct APP_Ver_Record { 17 | APP_Ver_Record() {} 18 | unsigned char record[64]; 19 | unsigned int valid; 20 | bool eop; 21 | }; 22 | struct APP_Reduce_Record32 { 23 | APP_Reduce_Record32() {} 24 | unsigned char overlap[32]; 25 | unsigned char record[64]; 26 | bool valid; 27 | bool eop; 28 | }; 29 | 30 | struct APP_Reduce_Record16 { 31 | APP_Reduce_Record16() {} 32 | unsigned char overlap[16]; 33 | unsigned char record[64]; 34 | bool valid; 35 | bool eop; 36 | }; 37 | 38 | struct APP_Reduce_Record8 { 39 | APP_Reduce_Record8() {} 40 | unsigned char overlap[8]; 41 | unsigned char record[64]; 42 | bool valid; 43 | bool eop; 44 | }; 45 | 46 | struct APP_Reduce_Record4 { 47 | APP_Reduce_Record4() {} 48 | unsigned char overlap[4]; 49 | unsigned char record[64]; 50 | bool valid; 51 | bool eop; 52 | }; 53 | 54 | struct APP_Reduce_Record2 { 55 | APP_Reduce_Record2() {} 56 | unsigned char overlap[2]; 57 | unsigned char record[64]; 58 | bool valid; 59 | bool eop; 60 | }; 61 | #endif 62 | -------------------------------------------------------------------------------- /apps/device/integration/kernels/app_intg_mat_rdc.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INTG_MAT_RDC_H_ 2 | #define APP_INTG_MAT_RDC_H_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_intg_mat_rdc( 10 | ST_Queue &app_intg_mat_rdc_input, 11 | ST_Queue &app_intg_verifier_input_record) { 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Match match_in; 15 | APP_Ver_Record record_out; 16 | if (app_intg_mat_rdc_input.read_nb(match_in)) { 17 | bool reduced_match = 0; 18 | for (int i = 0; i < 32; i++) { 19 | #pragma HLS unroll 20 | reduced_match = reduced_match | match_in.match[i]; 21 | } 22 | if (reduced_match) { 23 | for (int i = 0; i < 64; i++) { 24 | record_out.record[i] = match_in.record[i]; 25 | } 26 | record_out.eop = match_in.eop; 27 | record_out.valid = true; 28 | app_intg_verifier_input_record.write(record_out); 29 | 30 | } else if (match_in.eop) { 31 | record_out.eop = true; 32 | record_out.valid = false; 33 | app_intg_verifier_input_record.write(record_out); 34 | } 35 | } 36 | } 37 | } 38 | #endif 39 | -------------------------------------------------------------------------------- /apps/device/integration/kernels/app_intg_rdc_16to8.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INTG_RDC16_CPP_ 2 | #define APP_INTG_RDC16_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_intg_rdc_16to8(ST_Queue &app_intg_rdc_input_16to8, 10 | ST_Queue &app_intg_rdc_input_8to4) { 11 | while (1) { 12 | #pragma HLS pipeline 13 | APP_Reduce_Record16 record_in; 14 | #pragma HLS array_partition variable = record_in.record complete 15 | APP_Reduce_Record8 record_out; 16 | #pragma HLS array_partition variable = record_out.record complete 17 | if (app_intg_rdc_input_16to8.read_nb(record_in)) { 18 | if (record_in.valid) { 19 | for (int i = 0; i < 16; i += 2) { 20 | #pragma HLS unroll 21 | record_out.overlap[i / 2] = 22 | record_in.overlap[i] + record_in.overlap[i + 1]; 23 | } 24 | for (int i = 0; i < 64; i++) { 25 | #pragma HLS unroll 26 | record_out.record[i] = record_in.record[i]; 27 | } 28 | } 29 | record_out.eop = record_in.eop; 30 | record_out.valid = record_in.valid; 31 | app_intg_rdc_input_8to4.write(record_out); 32 | } 33 | } 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /apps/device/integration/kernels/app_intg_rdc_32to16.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INTG_RDC32_CPP_ 2 | #define APP_INTG_RDC32_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_intg_rdc_32to16( 10 | ST_Queue &app_intg_rdc_input_32to16, 11 | ST_Queue &app_intg_rdc_input_16to8) { 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Reduce_Record32 record_in; 15 | #pragma HLS array_partition variable = record_in.record complete 16 | APP_Reduce_Record16 record_out; 17 | #pragma HLS array_partition variable = record_out.record complete 18 | if (app_intg_rdc_input_32to16.read_nb(record_in)) { 19 | if (record_in.valid) { 20 | for (int i = 0; i < 32; i += 2) { 21 | #pragma HLS unroll 22 | record_out.overlap[i / 2] = 23 | record_in.overlap[i] + record_in.overlap[i + 1]; 24 | } 25 | for (int i = 0; i < 64; i++) { 26 | #pragma HLS unroll 27 | record_out.record[i] = record_in.record[i]; 28 | } 29 | } 30 | record_out.eop = record_in.eop; 31 | record_out.valid = record_in.valid; 32 | app_intg_rdc_input_16to8.write(record_out); 33 | } 34 | } 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/integration/kernels/app_intg_rdc_4to2.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INTG_RDC4_CPP_ 2 | #define APP_INTG_RDC4_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_intg_rdc_4to2( 10 | ST_Queue &app_intg_rdc_input_4to2, 11 | ST_Queue &app_intg_writer_input_record) { 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Reduce_Record4 record_in; 15 | #pragma HLS array_partition variable = record_in.record complete 16 | APP_Reduce_Record2 record_out; 17 | #pragma HLS array_partition variable = record_out.record complete 18 | if (app_intg_rdc_input_4to2.read_nb(record_in)) { 19 | if (record_in.valid) { 20 | for (int i = 0; i < 8; i += 2) { 21 | #pragma HLS unroll 22 | record_out.overlap[i / 2] = 23 | record_in.overlap[i] + record_in.overlap[i + 1]; 24 | } 25 | for (int i = 0; i < 64; i++) { 26 | #pragma HLS unroll 27 | record_out.record[i] = record_in.record[i]; 28 | } 29 | } 30 | record_out.eop = record_in.eop; 31 | record_out.valid = record_in.valid; 32 | app_intg_writer_input_record.write(record_out); 33 | } 34 | } 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/integration/kernels/app_intg_rdc_8to4.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INTG_RDC8_CPP_ 2 | #define APP_INTG_RDC8_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_intg_rdc_8to4(ST_Queue &app_intg_rdc_input_8to4, 10 | ST_Queue &app_intg_rdc_input_4to2) { 11 | while (1) { 12 | #pragma HLS pipeline 13 | APP_Reduce_Record8 record_in; 14 | #pragma HLS array_partition variable = record_in.record complete 15 | APP_Reduce_Record4 record_out; 16 | #pragma HLS array_partition variable = record_out.record complete 17 | if (app_intg_rdc_input_8to4.read_nb(record_in)) { 18 | if (record_in.valid) { 19 | for (int i = 0; i < 8; i += 2) { 20 | #pragma HLS unroll 21 | record_out.overlap[i / 2] = 22 | record_in.overlap[i] + record_in.overlap[i + 1]; 23 | } 24 | for (int i = 0; i < 64; i++) { 25 | #pragma HLS unroll 26 | record_out.record[i] = record_in.record[i]; 27 | } 28 | } 29 | record_out.eop = record_in.eop; 30 | record_out.valid = record_in.valid; 31 | app_intg_rdc_input_4to2.write(record_out); 32 | } 33 | } 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /apps/device/knn/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define FEATURE_DIM (4096) 5 | #define MAX_FEATURE_WEIGHT (10) 6 | #define MAX_FEATURE_WEIGHT_LOG2 (4) 7 | #define RESULT_SIZE (64) 8 | #define WEIGHT_SIZE (1) 9 | #define POKE_WIDTH (32) 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /apps/device/knn/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | 4 | #endif 5 | -------------------------------------------------------------------------------- /apps/device/knn/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "constant.h" 4 | #include "structure.h" 5 | 6 | #include "app_knn.cpp" 7 | 8 | // describe the interconnection 9 | void interconnects() { 10 | app_knn(app_input_data, app_output_data, app_input_params); 11 | } 12 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/app_pt.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_PT_CPP_ 2 | #define APP_PT_CPP_ 3 | #include 4 | 5 | void app_pt(ST_Queue &reset_app_pt, ST_Queue &app_input_data, 6 | ST_Queue &app_output_data, 7 | ST_Queue &app_input_params) { 8 | 9 | bool reset = false; 10 | unsigned reset_cnt = 0; 11 | while (1) { 12 | bool dummy; 13 | if (reset || (reset = reset_app_pt.read_nb(dummy))) { 14 | unsigned int dummy0; 15 | app_input_params.read_nb(dummy0); 16 | struct APP_Data dummy1; 17 | app_input_data.read_nb(dummy1); 18 | reset_cnt++; 19 | if (reset_cnt == RESET_CNT) { 20 | reset_cnt = 0; 21 | reset = false; 22 | } 23 | } else { 24 | #pragma HLS pipeline 25 | unsigned int dummy; 26 | app_input_params.read_nb(dummy); 27 | 28 | APP_Data data; 29 | if (app_input_data.read_nb(data)) { 30 | app_output_data.write(data); 31 | } 32 | } 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned char available_device_dram_resp_buf_flits = 2 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &release_device_dram_resp_buf_flits, 10 | ST_Queue &after_throttle_unified_dram_read_resp, 11 | ST_Queue &dram_read_context_read) { 12 | bool valid_state = 0; 13 | bool data_state; 14 | bool valid_read_resp = 0; 15 | Dram_Read_Resp data_read_resp; 16 | 17 | while (1) { 18 | #pragma HLS pipeline 19 | if (!valid_state) { 20 | valid_state = dram_read_context_read.read_nb(data_state); 21 | } 22 | if (!valid_read_resp) { 23 | valid_read_resp = 24 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 25 | } 26 | 27 | if (valid_state && valid_read_resp) { 28 | valid_read_resp = false; 29 | if (data_state == HOST_READ_REQ) { 30 | host_dram_read_resp.write(data_read_resp); 31 | } else { 32 | device_dram_read_resp.write(data_read_resp); 33 | release_device_dram_resp_buf_flits.write(0); 34 | } 35 | if (data_read_resp.last) { 36 | valid_state = false; 37 | } 38 | } 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/kernels/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda( 7 | ST_Queue &reset_app_pt, ST_Queue &reset_sigs, 8 | ST_Queue &reset_read_mode_dram_helper_app, 9 | ST_Queue &reset_write_mode_dram_helper_app, 10 | ST_Queue &reset_read_mode_pcie_helper_app, 11 | ST_Queue &reset_write_mode_pcie_helper_app, 12 | ST_Queue &reset_pcie_data_splitter_app, 13 | ST_Queue &reset_app_output_data_demux, 14 | ST_Queue &reset_app_input_data_mux, 15 | ST_Queue &reset_write_mode_app_output_data_caching, 16 | ST_Queue &reset_app_input_data_merger, 17 | ST_Queue &reset_write_mode_pre_merged_app_input_data_forwarder) { 18 | while (1) { 19 | #pragma HLS pipeline 20 | bool dummy; 21 | if (reset_sigs.read_nb(dummy)) { 22 | reset_read_mode_dram_helper_app.write(0); 23 | reset_write_mode_dram_helper_app.write(0); 24 | reset_read_mode_pcie_helper_app.write(0); 25 | reset_write_mode_pcie_helper_app.write(0); 26 | reset_pcie_data_splitter_app.write(0); 27 | reset_app_output_data_demux.write(0); 28 | reset_app_input_data_mux.write(0); 29 | reset_write_mode_app_output_data_caching.write(0); 30 | reset_app_input_data_merger.write(0); 31 | reset_write_mode_pre_merged_app_input_data_forwarder.write(0); 32 | reset_app_pt.write(0); 33 | } 34 | } 35 | } 36 | #endif -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/design/cl_common_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_COMMON_DEFAULTS 17 | `define CL_COMMON_DEFAULTS 18 | 19 | // Value to return for PCIS access to unimplemented register address 20 | `define UNIMPLEMENTED_REG_VALUE 32'hdeaddead 21 | 22 | // CL Register Addresses 23 | `define VLED_REG_ADDR 32'h0000_0504 24 | 25 | `endif 26 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/design/cl_id_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // CL_SH_ID0 17 | // - PCIe Vendor/Device ID Values 18 | // 31:16: PCIe Device ID 19 | // 15: 0: PCIe Vendor ID 20 | // - A Vendor ID value of 0x8086 is not valid. 21 | // - If using a Vendor ID value of 0x1D0F (Amazon) then valid 22 | // values for Device ID's are in the range of 0xF000 - 0xF0FF. 23 | // - A Vendor/Device ID of 0 (zero) is not valid. 24 | `define CL_SH_ID0 32'hF000_1D0F 25 | 26 | // CL_SH_ID1 27 | // - PCIe Subsystem/Subsystem Vendor ID Values 28 | // 31:16: PCIe Subsystem ID 29 | // 15: 0: PCIe Subsystem Vendor ID 30 | // - A PCIe Subsystem/Subsystem Vendor ID of 0 (zero) is not valid 31 | `define CL_SH_ID1 32'h1D51_FEDD 32 | 33 | 34 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/design/cl_main_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_MAIN 17 | `define CL_MAIN 18 | 19 | //Put module name of the CL design here. This is used to instantiate in top.sv 20 | `define CL_NAME cl_main 21 | 22 | //Highly recommeneded. For lib FIFO block, uses less async reset (take advantage of 23 | // FPGA flop init capability). This will help with routing resources. 24 | `define FPGA_LESS_RST 25 | 26 | // Uncomment to disable Virtual JTAG 27 | //`define DISABLE_VJTAG_DEBUG 28 | 29 | `endif 30 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/design/genip: -------------------------------------------------------------------------------- 1 | create_project -in_memory -force 2 | 3 | foreach file [glob -dir . *.tcl] { 4 | source $file 5 | } 6 | 7 | close_project 8 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/runtime/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | #VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 17 | 18 | INCLUDES = -I$(SDK_DIR)/userspace/include 19 | 20 | CPP = g++ 21 | CPPFLAGS = -DCONFIG_LOGLEVEL=4 -g -Wall $(INCLUDES) -std=c++11 -lpthread -fopenmp -O3 22 | 23 | LDLIBS = -lfpga_mgmt -lrt -lpthread 24 | 25 | SRC = test_main.cpp fpga_util.cpp 26 | OBJ = $(SRC:.c=.o) 27 | BIN = test_main 28 | 29 | all: $(BIN) check_env 30 | 31 | $(BIN): $(OBJ) 32 | $(CPP) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) 33 | 34 | clean: 35 | rm -f *.o $(BIN) 36 | 37 | check_env: 38 | ifndef SDK_DIR 39 | $(error SDK_DIR is undefined. Try "source sdk_setup.sh" to set the software environment) 40 | endif 41 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/runtime/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define PAGE_SIZE (1024 * 1024 * 4) 5 | #define INUM_LIMIT (16) 6 | #define ONUM_LIMIT (16) 7 | #define PULL_INPUT_REG (0x0) 8 | #define PUSH_INPUT_REG (0x1) 9 | #define PULL_OUTPUT_REG (0x2) 10 | #define PUSH_OUTPUT_REG (0x3) 11 | #define PUSH_BUF_INIT_DATA_REG (0x4) 12 | #define PULL_BUF_INIT_READY_REG (0x5) 13 | #define SLOT_ID (0) 14 | #define BUF_SIZE (1024 * 1024 * 4) 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/runtime/fpga_util.cpp: -------------------------------------------------------------------------------- 1 | #include "const.h" 2 | #include "fpga_mgmt.h" 3 | #include "fpga_pci.h" 4 | #include "utils/lcd.h" 5 | #include 6 | 7 | uint16_t _pci_vendor_id = 0x1D0F; 8 | uint16_t _pci_device_id = 0xF000; 9 | pci_bar_handle_t _pci_bar_handle; 10 | struct fpga_pci_bar *_bar; 11 | void *_IBufs[INUM_LIMIT]; 12 | void *_OBufs[ONUM_LIMIT]; 13 | uint64_t _IBufs_phy[INUM_LIMIT]; 14 | uint64_t _OBufs_phy[ONUM_LIMIT]; 15 | int _configfds[INUM_LIMIT + ONUM_LIMIT]; 16 | uint8_t _inputL = 0; 17 | uint8_t _inputR = 0; 18 | bool _input_empty = false; 19 | uint8_t _outputL = 0; 20 | uint8_t _outputR = 0; 21 | bool _output_empty = false; 22 | unsigned char _comm_Inum = 0; 23 | unsigned char _comm_Onum = 0; 24 | pthread_mutex_t _receive_control_msg_mutex = PTHREAD_MUTEX_INITIALIZER; 25 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/verif_rtl/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 18 | 19 | C_SRCS := test_main.c pcie_utils.c cl_utils.c main.c 20 | C_OBJS := $(C_SRCS:.c=.o) 21 | 22 | CC = gcc 23 | CFLAGS = -I ./include 24 | CFLAGS += -I $(HDK_DIR)/common/software/include 25 | all: test_main 26 | 27 | test_main: $(C_SRCS) 28 | $(CC) $(CFLAGS) -o $@ $^ 29 | 30 | clean: 31 | rm test_main 32 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/verif_rtl/include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/pt/cosim/project/software/verif_rtl/include/.gitignore -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/verif_rtl/src/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/pt/cosim/project/software/verif_rtl/src/.gitignore -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/software/verif_rtl/src/test_null.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | // Vivado does not support svGetScopeFromName 21 | #ifdef INCLUDE_DPI_CALLS 22 | #ifndef VIVADO_SIM 23 | #include "svdpi.h" 24 | #endif 25 | #endif 26 | 27 | #include "sh_dpi_tasks.h" 28 | 29 | void test_main(uint32_t *exit_code) { 30 | 31 | // NULL Test 32 | 33 | *exit_code = 0; 34 | } 35 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/verif/scripts/open_waves.tcl: -------------------------------------------------------------------------------- 1 | current_fileset 2 | open_wave_database tb.wdb 3 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/verif/scripts/waves.tcl: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set curr_wave [current_wave_config] 17 | if { [string length $curr_wave] == 0 } { 18 | if { [llength [get_objects]] > 0} { 19 | add_wave -recursive / 20 | set_property needs_save false [current_wave_config] 21 | } else { 22 | send_msg_id Add_Wave-1 WARNING "No top level signals found. Simulator will start without a wave window. If you want to open a wave window go to 'File->New Waveform Configuration' or type 'create_wave_config' in the TCL console." 23 | } 24 | } 25 | 26 | run 200 us 27 | quit 28 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/verif/tests/test_main.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | 17 | module test_main(); 18 | 19 | import tb_type_defines_pkg::*; 20 | `include "cl_common_defines.vh" // CL Defines with register addresses 21 | 22 | // AXI ID 23 | parameter [5:0] AXI_ID = 6'h0; 24 | 25 | logic [31:0] rdata; 26 | logic [15:0] vdip_value; 27 | logic [15:0] vled_value; 28 | 29 | 30 | initial begin 31 | 32 | tb.power_up(); 33 | 34 | tb.set_virtual_dip_switch(.dip(0)); 35 | 36 | vdip_value = tb.get_virtual_dip_switch(); 37 | 38 | // PUT SIM LOGIC HERE 39 | 40 | tb.kernel_reset(); 41 | 42 | tb.power_down(); 43 | 44 | $finish; 45 | end 46 | 47 | endmodule 48 | -------------------------------------------------------------------------------- /apps/device/pt/cosim/project/verif/tests/test_null.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | module test_null(); 17 | 18 | initial begin 19 | int exit_code; 20 | 21 | tb.power_up(); 22 | 23 | 24 | tb.test_main(exit_code); 25 | 26 | #50ns; 27 | 28 | tb.power_down(); 29 | 30 | $finish; 31 | end 32 | 33 | endmodule // test_null 34 | -------------------------------------------------------------------------------- /apps/device/pt/csim/bin/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/pt/csim/bin/.gitignore -------------------------------------------------------------------------------- /apps/device/pt/csim/csim_compile.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | CXXFLAGS="-g" 5 | 6 | g++ $CXXFLAGS src/interconnects.cpp -DCSIM -I/usr/include/staccel -I/usr/include/insider -I/usr/include/hls_csim -I src -I inc -std=c++11 -pthread -o bin/csim 7 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/app_output_data_demux.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_OUTPUT_DATA_DEMUX_CPP_ 2 | #define APP_OUTPUT_DATA_DEMUX_CPP_ 3 | 4 | #include 5 | 6 | void app_output_data_demux(ST_Queue &app_is_write_mode, 7 | ST_Queue &app_output_data, 8 | ST_Queue &read_mode_app_output_data, 9 | ST_Queue &write_mode_app_output_data, 10 | ST_Queue &reset_app_output_data_demux) { 11 | bool has_is_write_mode = false; 12 | bool is_write_mode; 13 | 14 | bool reset = false; 15 | unsigned int reset_cnt = 0; 16 | 17 | while (1) { 18 | #pragma HLS pipeline 19 | bool dummy; 20 | if (reset || (reset = reset_app_output_data_demux.read_nb(dummy))) { 21 | has_is_write_mode = false; 22 | bool dummy_0; 23 | app_is_write_mode.read_nb(dummy_0); 24 | APP_Data dummy_1; 25 | app_output_data.read_nb(dummy_1); 26 | 27 | reset_cnt++; 28 | if (reset_cnt == RESET_CNT) { 29 | reset_cnt = 0; 30 | reset = false; 31 | } 32 | } else { 33 | if (has_is_write_mode || 34 | (has_is_write_mode = app_is_write_mode.read_nb(is_write_mode))) { 35 | APP_Data app_data; 36 | if (app_output_data.read_nb(app_data)) { 37 | if (is_write_mode) { 38 | write_mode_app_output_data.write(app_data); 39 | } else { 40 | read_mode_app_output_data.write(app_data); 41 | } 42 | } 43 | } 44 | } 45 | } 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/app_pt.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_PT_CPP_ 2 | #define APP_PT_CPP_ 3 | #include 4 | 5 | void app_pt(ST_Queue &reset_app_pt, ST_Queue &app_input_data, 6 | ST_Queue &app_output_data, 7 | ST_Queue &app_input_params) { 8 | 9 | bool reset = false; 10 | unsigned reset_cnt = 0; 11 | while (1) { 12 | bool dummy; 13 | if (reset || (reset = reset_app_pt.read_nb(dummy))) { 14 | unsigned int dummy0; 15 | app_input_params.read_nb(dummy0); 16 | struct APP_Data dummy1; 17 | app_input_data.read_nb(dummy1); 18 | reset_cnt++; 19 | if (reset_cnt == RESET_CNT) { 20 | reset_cnt = 0; 21 | reset = false; 22 | } 23 | } else { 24 | #pragma HLS pipeline 25 | unsigned int dummy; 26 | app_input_params.read_nb(dummy); 27 | 28 | APP_Data data; 29 | if (app_input_data.read_nb(data)) { 30 | app_output_data.write(data); 31 | } 32 | } 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned short available_device_dram_resp_buf_flits = 4 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &after_throttle_unified_dram_read_resp, 10 | ST_Queue &dram_read_context_read) { 11 | bool valid_state = 0; 12 | bool data_state; 13 | bool valid_read_resp = 0; 14 | Dram_Read_Resp data_read_resp; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | if (!valid_state) { 19 | valid_state = dram_read_context_read.read_nb(data_state); 20 | } 21 | if (!valid_read_resp) { 22 | valid_read_resp = 23 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 24 | } 25 | 26 | if (valid_state && valid_read_resp) { 27 | valid_read_resp = false; 28 | if (data_state == HOST_READ_REQ) { 29 | host_dram_read_resp.write(data_read_resp); 30 | } else { 31 | device_dram_read_resp.write(data_read_resp); 32 | } 33 | if (data_read_resp.last) { 34 | valid_state = false; 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/pcie_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_REQ_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_REQ_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_req_multiplexer(ST_Queue &pcie_read_req, 7 | ST_Queue &device_pcie_read_req, 8 | ST_Queue &host_pcie_read_req, 9 | ST_Queue &pcie_read_mux_context) { 10 | 11 | while (1) { 12 | #pragma HLS pipeline 13 | bool has_read_req = false; 14 | PCIe_Read_Req read_req; 15 | bool context; 16 | if (device_pcie_read_req.read_nb(read_req)) { 17 | has_read_req = true; 18 | context = 0; 19 | } else if(host_pcie_read_req.read_nb(read_req)) { 20 | has_read_req = true; 21 | context = 1; 22 | } 23 | 24 | if (has_read_req) { 25 | pcie_read_mux_context.write(context); 26 | pcie_read_req.write(read_req); 27 | } 28 | } 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/pcie_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_multiplexer(ST_Queue &pcie_read_resp, 7 | ST_Queue &device_pcie_read_resp, 8 | ST_Queue &host_pcie_read_resp, 9 | ST_Queue &pcie_read_mux_context) { 10 | bool has_context = false; 11 | bool context; 12 | while (1) { 13 | #pragma HLS pipeline 14 | if (has_context || (has_context = pcie_read_mux_context.read_nb(context))) { 15 | PCIe_Read_Resp read_resp; 16 | if (pcie_read_resp.read_nb(read_resp)) { 17 | if (context == 0) { 18 | device_pcie_read_resp.write(read_resp); 19 | } else { 20 | host_pcie_read_resp.write(read_resp); 21 | } 22 | if (read_resp.last) { 23 | has_context = false; 24 | } 25 | } 26 | } 27 | } 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/pt/csim/src/reset_propaganda.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RESET_PROPAGANDA_CPP_ 2 | #define RESET_PROPAGANDA_CPP_ 3 | 4 | #include 5 | 6 | void reset_propaganda( 7 | ST_Queue &reset_app_pt, ST_Queue &reset_sigs, 8 | ST_Queue &reset_read_mode_dram_helper_app, 9 | ST_Queue &reset_write_mode_dram_helper_app, 10 | ST_Queue &reset_read_mode_pcie_helper_app, 11 | ST_Queue &reset_write_mode_pcie_helper_app, 12 | ST_Queue &reset_pcie_data_splitter_app, 13 | ST_Queue &reset_app_output_data_demux, 14 | ST_Queue &reset_app_input_data_mux, 15 | ST_Queue &reset_write_mode_app_output_data_caching, 16 | ST_Queue &reset_app_input_data_merger, 17 | ST_Queue &reset_write_mode_pre_merged_app_input_data_forwarder) { 18 | while (1) { 19 | #pragma HLS pipeline 20 | bool dummy; 21 | if (reset_sigs.read_nb(dummy)) { 22 | reset_read_mode_dram_helper_app.write(0); 23 | reset_write_mode_dram_helper_app.write(0); 24 | reset_read_mode_pcie_helper_app.write(0); 25 | reset_write_mode_pcie_helper_app.write(0); 26 | reset_pcie_data_splitter_app.write(0); 27 | reset_app_output_data_demux.write(0); 28 | reset_app_input_data_mux.write(0); 29 | reset_write_mode_app_output_data_caching.write(0); 30 | reset_app_input_data_merger.write(0); 31 | reset_write_mode_pre_merged_app_input_data_forwarder.write(0); 32 | reset_app_pt.write(0); 33 | } 34 | } 35 | } 36 | #endif -------------------------------------------------------------------------------- /apps/device/pt/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "app_pt.cpp" 4 | 5 | void interconnects() { 6 | app_pt(app_input_data, app_output_data, app_input_params); 7 | } 8 | -------------------------------------------------------------------------------- /apps/device/pt/kernels/app_pt.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_PT_CPP_ 2 | #define APP_PT_CPP_ 3 | #include 4 | 5 | void app_pt(ST_Queue &app_input_data, 6 | ST_Queue &app_output_data, 7 | ST_Queue &app_input_params) { 8 | while (1) { 9 | #pragma HLS pipeline 10 | unsigned int dummy; 11 | app_input_params.read_nb(dummy); 12 | 13 | APP_Data data; 14 | if (app_input_data.read_nb(data)) { 15 | app_output_data.write(data); 16 | } 17 | } 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /apps/device/relief/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define APP_FEATURE_DIM_LOG (8) 5 | #define APP_FEATURE_DIM (256) // should be > 32 6 | #define APP_QUERY_NUM (1) 7 | #define APP_HIT_DATA_NUM (4) 8 | #define APP_MISS_DATA_NUM (4) 9 | #define APP_THRES (0) 10 | #define APP_FEATURE_WIDTH (4) // counted in B do not change 11 | #define APP_FEATURE_PER_CYCLE (16) // do not change 12 | #define APP_FEATURE_PER_CYCLE_LOG2 (4) // do not change 13 | #endif 14 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_flt2.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_FLT2_CPP_ 2 | #define APP_RLF_FLT2_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_flt2(ST_Queue &app_rlf_flt2_input_data, 10 | ST_Queue &app_rlf_wr_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Flt_Data2 filter2_in; 15 | #pragma HLS array_partition variable = filter2_in.data complete 16 | #pragma HLS array_partition variable = filter2_in.validNum complete 17 | APP_Write_Data write_out; 18 | #pragma HlS array_partition variable = write_out.data complete 19 | if (app_rlf_flt2_input_data.read_nb(filter2_in)) { 20 | for (int m = 0; m < APP_FEATURE_PER_CYCLE; m++) { 21 | #pragma HLS unroll 22 | if (filter2_in.validNum[0] > m) { 23 | write_out.data[m] = filter2_in.data[m]; 24 | } else if (filter2_in.validNum[0] + filter2_in.validNum[1] > m) { 25 | write_out.data[m] = filter2_in.data[m + 8 - filter2_in.validNum[0]]; 26 | } 27 | } 28 | write_out.validNum = filter2_in.validNum[0] + filter2_in.validNum[1]; 29 | write_out.eop = filter2_in.eop; 30 | app_rlf_wr_input_data.write(write_out); 31 | } 32 | } 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_flt4.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_FLT4_CPP_ 2 | #define APP_RLF_FLT4_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_flt4(ST_Queue &app_rlf_flt4_input_data, 10 | ST_Queue &app_rlf_flt2_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Flt_Data4 filter4_in; 15 | #pragma HLS array_partition variable = filter4_in.data complete 16 | #pragma HLS array_partition variable = filter4_in.validNum complete 17 | APP_Flt_Data2 filter2_out; 18 | #pragma HlS array_partition variable = filter2_out.data complete 19 | #pragma HLS array_partition variable = filter2_out.validNum complete 20 | if (app_rlf_flt4_input_data.read_nb(filter4_in)) { 21 | for (int i = 0; i < (APP_FEATURE_PER_CYCLE >> 2); i += 2) { 22 | #pragma HLS unroll 23 | for (int m = 0; m < 8; m++) { 24 | #pragma HLS unroll 25 | if (filter4_in.validNum[i] > m) { 26 | filter2_out.data[4 * i + m] = filter4_in.data[4 * i + m]; 27 | } else if (filter4_in.validNum[i] + filter4_in.validNum[i + 1] > m) { 28 | filter2_out.data[4 * i + m] = 29 | filter4_in.data[4 * i + m - filter4_in.validNum[i] + 4]; 30 | } 31 | } 32 | filter2_out.validNum[i >> 1] = 33 | filter4_in.validNum[i] + filter4_in.validNum[i + 1]; 34 | } 35 | filter2_out.eop = filter4_in.eop; 36 | app_rlf_flt2_input_data.write(filter2_out); 37 | } 38 | } 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_flt8.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_FLT8_CPP_ 2 | #define APP_RLF_FLT8_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_flt8(ST_Queue &app_rlf_flt8_input_data, 10 | ST_Queue &app_rlf_flt4_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Flt_Data8 filter8_in; 15 | #pragma HLS array_partition variable = filter8_in.data complete 16 | #pragma HLS array_partition variable = filter8_in.validNum complete 17 | APP_Flt_Data4 filter4_out; 18 | #pragma HlS array_partition variable = filter4_out.data complete 19 | #pragma HLS array_partition variable = filter4_out.validNum complete 20 | if (app_rlf_flt8_input_data.read_nb(filter8_in)) { 21 | for (int i = 0; i < (APP_FEATURE_PER_CYCLE >> 1); i += 2) { 22 | #pragma HLS unroll 23 | for (int m = 0; m < 4; m++) { 24 | #pragma HLS unroll 25 | if (filter8_in.validNum[i] > m) { 26 | filter4_out.data[2 * i + m] = filter8_in.data[2 * i + m]; 27 | } else if (filter8_in.validNum[i] + filter8_in.validNum[i + 1] > m) { 28 | filter4_out.data[2 * i + m] = 29 | filter8_in.data[2 * i + m - filter8_in.validNum[i] + 2]; 30 | } 31 | } 32 | filter4_out.validNum[i >> 1] = 33 | filter8_in.validNum[i] + filter8_in.validNum[i + 1]; 34 | } 35 | filter4_out.eop = filter8_in.eop; 36 | app_rlf_flt4_input_data.write(filter4_out); 37 | } 38 | } 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_rdc_16to8.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_RDC_16TO8_CPP_ 2 | #define APP_RLF_RDC_16TO8_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_rdc_16to8( 10 | ST_Queue &app_rlf_rdc_16to8_input_data, 11 | ST_Queue &app_rlf_rdc_8to4_input_data) { 12 | 13 | while (1) { 14 | #pragma HLS pipeline 15 | APP_Reduce_Data16 reduce16_in; 16 | #pragma HLS array_partition variable = reduce16_in.reduce complete dim = 0 17 | #pragma HLS array_partition variable = reduce16_in.data complete 18 | APP_Reduce_Data8 reduce8_out; 19 | #pragma HLS array_partition variable = reduce8_out.reduce complete dim = 0 20 | #pragma HLS array_partition variable = reduce8_out.data complete 21 | if (app_rlf_rdc_16to8_input_data.read_nb(reduce16_in)) { 22 | for (int i = 0; i < APP_QUERY_NUM; i++) { 23 | #pragma HLS unroll 24 | for (int j = 0; j < APP_FEATURE_PER_CYCLE; j += 2) { 25 | #pragma HLS unroll 26 | reduce8_out.reduce[j >> 1] = 27 | reduce16_in.reduce[j] + reduce16_in.reduce[j + 1]; 28 | } 29 | } 30 | for (int i = 0; i < APP_FEATURE_PER_CYCLE; i++) { 31 | #pragma HLS unroll 32 | reduce8_out.data[i] = reduce16_in.data[i]; 33 | } 34 | reduce8_out.posi = reduce16_in.posi; 35 | reduce8_out.idx = reduce16_in.idx; 36 | reduce8_out.eot = reduce16_in.eot; 37 | app_rlf_rdc_8to4_input_data.write(reduce8_out); 38 | } 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_rdc_2to1.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_RDC_2TO1_CPP_ 2 | #define APP_RLF_RDC_2TO1_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_rdc_2to1(ST_Queue &app_rlf_rdc_2to1_input_data, 10 | ST_Queue &app_rlf_max_min_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Reduce_Data2 reduce2_in; 15 | #pragma HLS array_partition variable = reduce2_in.reduce complete dim = 0 16 | #pragma HLS array_partition variable = reduce2_in.data complete 17 | APP_Reduce_Data reduce_out; 18 | #pragma HLS array_partition variable = reduce_out.data complete 19 | if (app_rlf_rdc_2to1_input_data.read_nb(reduce2_in)) { 20 | for (int i = 0; i < APP_QUERY_NUM; i++) { 21 | #pragma HLS unroll 22 | reduce_out.reduce = reduce2_in.reduce[0] + reduce2_in.reduce[1]; 23 | } 24 | for (int i = 0; i < APP_FEATURE_PER_CYCLE; i++) { 25 | #pragma HLS unroll 26 | reduce_out.data[i] = reduce2_in.data[i]; 27 | } 28 | reduce_out.posi = reduce2_in.posi; 29 | reduce_out.idx = reduce2_in.idx; 30 | reduce_out.eot = reduce2_in.eot; 31 | app_rlf_max_min_input_data.write(reduce_out); 32 | } 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_rdc_4to2.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_RDC_4TO2_CPP_ 2 | #define APP_RLF_RDC_4TO2_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_rdc_4to2(ST_Queue &app_rlf_rdc_4to2_input_data, 10 | ST_Queue &app_rlf_rdc_2to1_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Reduce_Data4 reduce4_in; 15 | #pragma HLS array_partition variable = reduce4_in.reduce complete dim = 0 16 | #pragma HLS array_partition variable = reduce4_in.data complete 17 | APP_Reduce_Data2 reduce2_out; 18 | #pragma HLS array_partition variable = reduce2_out.reduce complete dim = 0 19 | #pragma HLS array_partition variable = reduce2_out.data complete 20 | if (app_rlf_rdc_4to2_input_data.read_nb(reduce4_in)) { 21 | for (int i = 0; i < APP_QUERY_NUM; i++) { 22 | #pragma HLS unroll 23 | for (int j = 0; j < 4; j += 2) { 24 | #pragma HLS unroll 25 | reduce2_out.reduce[j >> 1] = 26 | reduce4_in.reduce[j] + reduce4_in.reduce[j + 1]; 27 | } 28 | } 29 | for (int i = 0; i < APP_FEATURE_PER_CYCLE; i++) { 30 | #pragma HLS unroll 31 | reduce2_out.data[i] = reduce4_in.data[i]; 32 | } 33 | reduce2_out.posi = reduce4_in.posi; 34 | reduce2_out.idx = reduce4_in.idx; 35 | reduce2_out.eot = reduce4_in.eot; 36 | app_rlf_rdc_2to1_input_data.write(reduce2_out); 37 | } 38 | } 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /apps/device/relief/kernels/app_rlf_rdc_8to4.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_RLF_RDC_8TO4_CPP_ 2 | #define APP_RLF_RDC_8TO4_CPP_ 3 | 4 | #include 5 | 6 | #include "constant.h" 7 | #include "structure.h" 8 | 9 | void app_rlf_rdc_8to4(ST_Queue &app_rlf_rdc_8to4_input_data, 10 | ST_Queue &app_rlf_rdc_4to2_input_data) { 11 | 12 | while (1) { 13 | #pragma HLS pipeline 14 | APP_Reduce_Data8 reduce8_in; 15 | #pragma HLS array_partition variable = reduce8_in.reduce complete dim = 0 16 | #pragma HLS array_partition variable = reduce8_in.data complete 17 | APP_Reduce_Data4 reduce4_out; 18 | #pragma HLS array_partition variable = reduce4_out.reduce complete dim = 0 19 | #pragma HLS array_partition variable = reduce4_out.data complete 20 | if (app_rlf_rdc_8to4_input_data.read_nb(reduce8_in)) { 21 | for (int i = 0; i < APP_QUERY_NUM; i++) { 22 | #pragma HLS unroll 23 | for (int j = 0; j < 8; j += 2) { 24 | #pragma HLS unroll 25 | reduce4_out.reduce[j >> 1] = 26 | (reduce8_in.reduce[j] + reduce8_in.reduce[j + 1]); 27 | } 28 | } 29 | for (int i = 0; i < APP_FEATURE_PER_CYCLE; i++) { 30 | reduce4_out.data[i] = reduce8_in.data[i]; 31 | } 32 | reduce4_out.posi = reduce8_in.posi; 33 | reduce4_out.idx = reduce8_in.idx; 34 | reduce4_out.eot = reduce8_in.eot; 35 | app_rlf_rdc_4to2_input_data.write(reduce4_out); 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/inc/structure.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define NUM_OF_PAIRS (32) 6 | 7 | struct APP_Expand_Data { 8 | unsigned short prefix_sum[NUM_OF_PAIRS + 1]; 9 | char alpha_array[NUM_OF_PAIRS]; 10 | bool eop; 11 | unsigned char len; // mark valid number of pairs 12 | }; 13 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/kernels/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/kernels/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned char available_device_dram_resp_buf_flits = 2 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/kernels/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &release_device_dram_resp_buf_flits, 10 | ST_Queue &after_throttle_unified_dram_read_resp, 11 | ST_Queue &dram_read_context_read) { 12 | bool valid_state = 0; 13 | bool data_state; 14 | bool valid_read_resp = 0; 15 | Dram_Read_Resp data_read_resp; 16 | 17 | while (1) { 18 | #pragma HLS pipeline 19 | if (!valid_state) { 20 | valid_state = dram_read_context_read.read_nb(data_state); 21 | } 22 | if (!valid_read_resp) { 23 | valid_read_resp = 24 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 25 | } 26 | 27 | if (valid_state && valid_read_resp) { 28 | valid_read_resp = false; 29 | if (data_state == HOST_READ_REQ) { 30 | host_dram_read_resp.write(data_read_resp); 31 | } else { 32 | device_dram_read_resp.write(data_read_resp); 33 | release_device_dram_resp_buf_flits.write(0); 34 | } 35 | if (data_read_resp.last) { 36 | valid_state = false; 37 | } 38 | } 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/kernels/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/cl_common_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_COMMON_DEFAULTS 17 | `define CL_COMMON_DEFAULTS 18 | 19 | // Value to return for PCIS access to unimplemented register address 20 | `define UNIMPLEMENTED_REG_VALUE 32'hdeaddead 21 | 22 | // CL Register Addresses 23 | `define VLED_REG_ADDR 32'h0000_0504 24 | 25 | `endif 26 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/cl_id_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // CL_SH_ID0 17 | // - PCIe Vendor/Device ID Values 18 | // 31:16: PCIe Device ID 19 | // 15: 0: PCIe Vendor ID 20 | // - A Vendor ID value of 0x8086 is not valid. 21 | // - If using a Vendor ID value of 0x1D0F (Amazon) then valid 22 | // values for Device ID's are in the range of 0xF000 - 0xF0FF. 23 | // - A Vendor/Device ID of 0 (zero) is not valid. 24 | `define CL_SH_ID0 32'hF000_1D0F 25 | 26 | // CL_SH_ID1 27 | // - PCIe Subsystem/Subsystem Vendor ID Values 28 | // 31:16: PCIe Subsystem ID 29 | // 15: 0: PCIe Subsystem Vendor ID 30 | // - A PCIe Subsystem/Subsystem Vendor ID of 0 (zero) is not valid 31 | `define CL_SH_ID1 32'h1D51_FEDD 32 | 33 | 34 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/cl_main_defines.vh: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | `ifndef CL_MAIN 17 | `define CL_MAIN 18 | 19 | //Put module name of the CL design here. This is used to instantiate in top.sv 20 | `define CL_NAME cl_main 21 | 22 | //Highly recommeneded. For lib FIFO block, uses less async reset (take advantage of 23 | // FPGA flop init capability). This will help with routing resources. 24 | `define FPGA_LESS_RST 25 | 26 | // Uncomment to disable Virtual JTAG 27 | //`define DISABLE_VJTAG_DEBUG 28 | 29 | `endif 30 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/genip: -------------------------------------------------------------------------------- 1 | create_project -in_memory -force 2 | 3 | foreach file [glob -dir . *.tcl] { 4 | source $file 5 | } 6 | 7 | close_project 8 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/reg_ap_int_base_513_false_false_s.v: -------------------------------------------------------------------------------- 1 | // ============================================================== 2 | // RTL generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC 3 | // Version: 2017.4.op 4 | // Copyright (C) 1986-2018 Xilinx, Inc. All Rights Reserved. 5 | // 6 | // =========================================================== 7 | 8 | `timescale 1 ns / 1 ps 9 | 10 | module reg_ap_int_base_513_false_false_s ( 11 | ap_clk, 12 | ap_rst, 13 | in_V, 14 | ap_return, 15 | ap_ce 16 | ); 17 | 18 | 19 | input ap_clk; 20 | input ap_rst; 21 | input [16:0] in_V; 22 | output [16:0] ap_return; 23 | input ap_ce; 24 | 25 | reg[16:0] ap_return; 26 | 27 | wire ap_block_state1_pp0_stage0_iter0; 28 | wire ap_block_state2_pp0_stage0_iter1; 29 | wire ap_block_pp0_stage0_11001; 30 | 31 | always @ (posedge ap_clk) begin 32 | if (((1'b0 == ap_block_pp0_stage0_11001) & (1'b1 == ap_ce))) begin 33 | ap_return <= in_V; 34 | end 35 | end 36 | 37 | assign ap_block_pp0_stage0_11001 = ~(1'b1 == 1'b1); 38 | 39 | assign ap_block_state1_pp0_stage0_iter0 = ~(1'b1 == 1'b1); 40 | 41 | assign ap_block_state2_pp0_stage0_iter1 = ~(1'b1 == 1'b1); 42 | 43 | endmodule //reg_ap_int_base_513_false_false_s 44 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/design/reg_unsigned_short_s.v: -------------------------------------------------------------------------------- 1 | // ============================================================== 2 | // RTL generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC 3 | // Version: 2017.4.op 4 | // Copyright (C) 1986-2018 Xilinx, Inc. All Rights Reserved. 5 | // 6 | // =========================================================== 7 | 8 | `timescale 1 ns / 1 ps 9 | 10 | module reg_unsigned_short_s ( 11 | ap_clk, 12 | ap_rst, 13 | in_r, 14 | ap_return, 15 | ap_ce 16 | ); 17 | 18 | 19 | input ap_clk; 20 | input ap_rst; 21 | input [15:0] in_r; 22 | output [15:0] ap_return; 23 | input ap_ce; 24 | 25 | reg[15:0] ap_return; 26 | 27 | wire ap_block_state1_pp0_stage0_iter0; 28 | wire ap_block_state2_pp0_stage0_iter1; 29 | wire ap_block_pp0_stage0_11001; 30 | 31 | always @ (posedge ap_clk) begin 32 | if (((1'b0 == ap_block_pp0_stage0_11001) & (1'b1 == ap_ce))) begin 33 | ap_return <= in_r; 34 | end 35 | end 36 | 37 | assign ap_block_pp0_stage0_11001 = ~(1'b1 == 1'b1); 38 | 39 | assign ap_block_state1_pp0_stage0_iter0 = ~(1'b1 == 1'b1); 40 | 41 | assign ap_block_state2_pp0_stage0_iter1 = ~(1'b1 == 1'b1); 42 | 43 | endmodule //reg_unsigned_short_s 44 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/runtime/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | #VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 17 | 18 | INCLUDES = -I$(SDK_DIR)/userspace/include 19 | 20 | CPP = g++ 21 | CPPFLAGS = -DCONFIG_LOGLEVEL=4 -g -Wall $(INCLUDES) -std=c++11 -lpthread -fopenmp -O3 22 | 23 | LDLIBS = -lfpga_mgmt -lrt -lpthread 24 | 25 | SRC = test_main.cpp fpga_util.cpp 26 | OBJ = $(SRC:.c=.o) 27 | BIN = test_main 28 | 29 | all: $(BIN) check_env 30 | 31 | $(BIN): $(OBJ) 32 | $(CPP) $(CPPFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) 33 | 34 | clean: 35 | rm -f *.o $(BIN) 36 | 37 | check_env: 38 | ifndef SDK_DIR 39 | $(error SDK_DIR is undefined. Try "source sdk_setup.sh" to set the software environment) 40 | endif 41 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/runtime/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define PAGE_SIZE (1024 * 1024 * 4) 5 | #define INUM_LIMIT (16) 6 | #define ONUM_LIMIT (16) 7 | #define PULL_INPUT_REG (0x0) 8 | #define PUSH_INPUT_REG (0x1) 9 | #define PULL_OUTPUT_REG (0x2) 10 | #define PUSH_OUTPUT_REG (0x3) 11 | #define PUSH_BUF_INIT_DATA_REG (0x4) 12 | #define PULL_BUF_INIT_READY_REG (0x5) 13 | #define SLOT_ID (0) 14 | #define BUF_SIZE (1024 * 1024 * 4) 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/runtime/fpga_util.cpp: -------------------------------------------------------------------------------- 1 | #include "const.h" 2 | #include "fpga_mgmt.h" 3 | #include "fpga_pci.h" 4 | #include "utils/lcd.h" 5 | #include 6 | 7 | uint16_t _pci_vendor_id = 0x1D0F; 8 | uint16_t _pci_device_id = 0xF000; 9 | pci_bar_handle_t _pci_bar_handle; 10 | struct fpga_pci_bar *_bar; 11 | void *_IBufs[INUM_LIMIT]; 12 | void *_OBufs[ONUM_LIMIT]; 13 | uint64_t _IBufs_phy[INUM_LIMIT]; 14 | uint64_t _OBufs_phy[ONUM_LIMIT]; 15 | int _configfds[INUM_LIMIT + ONUM_LIMIT]; 16 | uint8_t _inputL = 0; 17 | uint8_t _inputR = 0; 18 | bool _input_empty = false; 19 | uint8_t _outputL = 0; 20 | uint8_t _outputR = 0; 21 | bool _output_empty = false; 22 | unsigned char _comm_Inum = 0; 23 | unsigned char _comm_Onum = 0; 24 | pthread_mutex_t _receive_control_msg_mutex = PTHREAD_MUTEX_INITIALIZER; 25 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/verif_rtl/Makefile: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | VPATH = src:include:$(HDK_DIR)/common/software/src:$(HDK_DIR)/common/software/include 18 | 19 | C_SRCS := test_main.c pcie_utils.c cl_utils.c main.c 20 | C_OBJS := $(C_SRCS:.c=.o) 21 | 22 | CC = gcc 23 | CFLAGS = -I ./include 24 | CFLAGS += -I $(HDK_DIR)/common/software/include 25 | all: test_main 26 | 27 | test_main: $(C_SRCS) 28 | $(CC) $(CFLAGS) -o $@ $^ 29 | 30 | clean: 31 | rm test_main 32 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/verif_rtl/include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/rle/cosim/project/software/verif_rtl/include/.gitignore -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/verif_rtl/src/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zainryan/INSIDER-System/748b1c4df4fc2c508451e15f6883b08dd94696ad/apps/device/rle/cosim/project/software/verif_rtl/src/.gitignore -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/software/verif_rtl/src/test_null.c: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | // Vivado does not support svGetScopeFromName 21 | #ifdef INCLUDE_DPI_CALLS 22 | #ifndef VIVADO_SIM 23 | #include "svdpi.h" 24 | #endif 25 | #endif 26 | 27 | #include "sh_dpi_tasks.h" 28 | 29 | void test_main(uint32_t *exit_code) { 30 | 31 | // NULL Test 32 | 33 | *exit_code = 0; 34 | } 35 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/verif/scripts/open_waves.tcl: -------------------------------------------------------------------------------- 1 | current_fileset 2 | open_wave_database tb.wdb 3 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/verif/scripts/waves.tcl: -------------------------------------------------------------------------------- 1 | # Amazon FPGA Hardware Development Kit 2 | # 3 | # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Amazon Software License (the "License"). You may not use 6 | # this file except in compliance with the License. A copy of the License is 7 | # located at 8 | # 9 | # http://aws.amazon.com/asl/ 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed on 12 | # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | # implied. See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set curr_wave [current_wave_config] 17 | if { [string length $curr_wave] == 0 } { 18 | if { [llength [get_objects]] > 0} { 19 | add_wave -recursive / 20 | set_property needs_save false [current_wave_config] 21 | } else { 22 | send_msg_id Add_Wave-1 WARNING "No top level signals found. Simulator will start without a wave window. If you want to open a wave window go to 'File->New Waveform Configuration' or type 'create_wave_config' in the TCL console." 23 | } 24 | } 25 | 26 | run 200 us 27 | quit 28 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/verif/tests/test_main.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | 17 | module test_main(); 18 | 19 | import tb_type_defines_pkg::*; 20 | `include "cl_common_defines.vh" // CL Defines with register addresses 21 | 22 | // AXI ID 23 | parameter [5:0] AXI_ID = 6'h0; 24 | 25 | logic [31:0] rdata; 26 | logic [15:0] vdip_value; 27 | logic [15:0] vled_value; 28 | 29 | 30 | initial begin 31 | 32 | tb.power_up(); 33 | 34 | tb.set_virtual_dip_switch(.dip(0)); 35 | 36 | vdip_value = tb.get_virtual_dip_switch(); 37 | 38 | // PUT SIM LOGIC HERE 39 | 40 | tb.kernel_reset(); 41 | 42 | tb.power_down(); 43 | 44 | $finish; 45 | end 46 | 47 | endmodule 48 | -------------------------------------------------------------------------------- /apps/device/rle/cosim/project/verif/tests/test_null.sv: -------------------------------------------------------------------------------- 1 | // Amazon FPGA Hardware Development Kit 2 | // 3 | // Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | // 5 | // Licensed under the Amazon Software License (the "License"). You may not use 6 | // this file except in compliance with the License. A copy of the License is 7 | // located at 8 | // 9 | // http://aws.amazon.com/asl/ 10 | // 11 | // or in the "license" file accompanying this file. This file is distributed on 12 | // an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or 13 | // implied. See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | module test_null(); 17 | 18 | initial begin 19 | int exit_code; 20 | 21 | tb.power_up(); 22 | 23 | 24 | tb.test_main(exit_code); 25 | 26 | #50ns; 27 | 28 | tb.power_down(); 29 | 30 | $finish; 31 | end 32 | 33 | endmodule // test_null 34 | -------------------------------------------------------------------------------- /apps/device/rle/csim/csim_compile.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | CXXFLAGS="-g" 5 | 6 | g++ $CXXFLAGS src/interconnects.cpp -DCSIM -I/usr/include/staccel -I/usr/include/insider -I/usr/include/hls_csim -I src -I inc -std=c++11 -pthread -o bin/csim 7 | -------------------------------------------------------------------------------- /apps/device/rle/csim/inc/structure.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define NUM_OF_PAIRS (32) 6 | 7 | struct APP_Expand_Data { 8 | unsigned short prefix_sum[NUM_OF_PAIRS + 1]; 9 | char alpha_array[NUM_OF_PAIRS]; 10 | bool eop; 11 | unsigned char len; // mark valid number of pairs 12 | }; 13 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/app_input_data_merger.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_INPUT_DATA_MERGER_CPP_ 2 | #define APP_INPUT_DATA_MERGER_CPP_ 3 | 4 | #include 5 | 6 | // The LBA is block-aligned which implies that it is also 64B-aligned. 7 | void app_input_data_merger(ST_Queue &pre_merged_app_input_data, 8 | ST_Queue &app_input_data, 9 | ST_Queue &reset_app_input_data_merger) { 10 | APP_Data delayed_app; 11 | bool has_delayed_app = false; 12 | 13 | bool reset = false; 14 | unsigned int reset_cnt = 0; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | bool dummy; 19 | if (reset || (reset = reset_app_input_data_merger.read_nb(dummy))) { 20 | has_delayed_app = false; 21 | APP_Data dummy_0; 22 | pre_merged_app_input_data.read_nb(dummy_0); 23 | reset_cnt++; 24 | if (reset_cnt == RESET_CNT) { 25 | reset_cnt = 0; 26 | reset = false; 27 | } 28 | } else { 29 | APP_Data app_data; 30 | if (pre_merged_app_input_data.read_nb(app_data)) { 31 | if (!has_delayed_app) { 32 | has_delayed_app = true; 33 | delayed_app = app_data; 34 | } else { 35 | if (app_data.eop) { 36 | // app_data must be an empty flit which only has eop set. 37 | delayed_app.eop = true; 38 | app_input_data.write(delayed_app); 39 | } else { 40 | app_input_data.write(delayed_app); 41 | delayed_app = app_data; 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/dram_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_MULTIPLEXER_CPP_ 2 | #define DRAM_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_req_multiplexer( 7 | ST_Queue &host_dram_read_req, 8 | ST_Queue &device_dram_read_req, 9 | ST_Queue &unified_dram_read_req, 10 | ST_Queue &release_device_dram_resp_buf_flits, 11 | ST_Queue &dram_read_context_write) { 12 | 13 | unsigned short available_device_dram_resp_buf_flits = 4 * DRAM_READ_BATCH_NUM; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | bool dummy; 18 | available_device_dram_resp_buf_flits += 19 | release_device_dram_resp_buf_flits.read_nb(dummy); 20 | 21 | Dram_Read_Req req; 22 | if (host_dram_read_req.read_nb(req)) { 23 | dram_read_context_write.write(HOST_READ_REQ); 24 | unified_dram_read_req.write(req); 25 | } else { 26 | if (available_device_dram_resp_buf_flits >= DRAM_READ_BATCH_NUM) { 27 | if (device_dram_read_req.read_nb(req)) { 28 | dram_read_context_write.write(DEVICE_READ_REQ); 29 | unified_dram_read_req.write(req); 30 | available_device_dram_resp_buf_flits -= req.num; 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/dram_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef DRAM_READ_RESP_MULTIPLEXER_CPP_ 2 | #define DRAM_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void dram_read_resp_multiplexer( 7 | ST_Queue &host_dram_read_resp, 8 | ST_Queue &device_dram_read_resp, 9 | ST_Queue &after_throttle_unified_dram_read_resp, 10 | ST_Queue &dram_read_context_read) { 11 | bool valid_state = 0; 12 | bool data_state; 13 | bool valid_read_resp = 0; 14 | Dram_Read_Resp data_read_resp; 15 | 16 | while (1) { 17 | #pragma HLS pipeline 18 | if (!valid_state) { 19 | valid_state = dram_read_context_read.read_nb(data_state); 20 | } 21 | if (!valid_read_resp) { 22 | valid_read_resp = 23 | after_throttle_unified_dram_read_resp.read_nb(data_read_resp); 24 | } 25 | 26 | if (valid_state && valid_read_resp) { 27 | valid_read_resp = false; 28 | if (data_state == HOST_READ_REQ) { 29 | host_dram_read_resp.write(data_read_resp); 30 | } else { 31 | device_dram_read_resp.write(data_read_resp); 32 | } 33 | if (data_read_resp.last) { 34 | valid_state = false; 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/pcie_read_req_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_REQ_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_REQ_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_req_multiplexer(ST_Queue &pcie_read_req, 7 | ST_Queue &device_pcie_read_req, 8 | ST_Queue &host_pcie_read_req, 9 | ST_Queue &pcie_read_mux_context) { 10 | 11 | while (1) { 12 | #pragma HLS pipeline 13 | bool has_read_req = false; 14 | PCIe_Read_Req read_req; 15 | bool context; 16 | if (device_pcie_read_req.read_nb(read_req)) { 17 | has_read_req = true; 18 | context = 0; 19 | } else if(host_pcie_read_req.read_nb(read_req)) { 20 | has_read_req = true; 21 | context = 1; 22 | } 23 | 24 | if (has_read_req) { 25 | pcie_read_mux_context.write(context); 26 | pcie_read_req.write(read_req); 27 | } 28 | } 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/pcie_read_resp_multiplexer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_MULTIPLEXER_CPP_ 2 | #define PCIE_READ_RESP_MULTIPLEXER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_multiplexer(ST_Queue &pcie_read_resp, 7 | ST_Queue &device_pcie_read_resp, 8 | ST_Queue &host_pcie_read_resp, 9 | ST_Queue &pcie_read_mux_context) { 10 | bool has_context = false; 11 | bool context; 12 | while (1) { 13 | #pragma HLS pipeline 14 | if (has_context || (has_context = pcie_read_mux_context.read_nb(context))) { 15 | PCIe_Read_Resp read_resp; 16 | if (pcie_read_resp.read_nb(read_resp)) { 17 | if (context == 0) { 18 | device_pcie_read_resp.write(read_resp); 19 | } else { 20 | host_pcie_read_resp.write(read_resp); 21 | } 22 | if (read_resp.last) { 23 | has_context = false; 24 | } 25 | } 26 | } 27 | } 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /apps/device/rle/csim/src/pcie_read_resp_passer.cpp: -------------------------------------------------------------------------------- 1 | #ifndef PCIE_READ_RESP_PASSER_CPP_ 2 | #define PCIE_READ_RESP_PASSER_CPP_ 3 | 4 | #include 5 | 6 | void pcie_read_resp_passer( 7 | ST_Queue &pcie_read_resp, 8 | ST_Queue &before_throttle_pcie_read_resp) { 9 | while (1) { 10 | #pragma HLS pipeline 11 | PCIe_Read_Resp read_resp; 12 | if (pcie_read_resp.read_nb(read_resp)) { 13 | before_throttle_pcie_read_resp.write(read_resp); 14 | } 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /apps/device/rle/inc/structure.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define NUM_OF_PAIRS (32) 6 | 7 | struct APP_Expand_Data { 8 | unsigned short prefix_sum[NUM_OF_PAIRS + 1]; 9 | char alpha_array[NUM_OF_PAIRS]; 10 | bool eop; 11 | unsigned char len; // mark valid number of pairs 12 | }; 13 | -------------------------------------------------------------------------------- /apps/device/rle/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "structure.h" 4 | 5 | #include "app_rle_prefix_sum.cpp" 6 | #include "app_rle_combine.cpp" 7 | #include "app_rle_expand.cpp" 8 | 9 | void interconnects() { 10 | ST_Queue app_rle_expand_input_data(8); 11 | ST_Queue app_rle_combine_input_data(8); 12 | 13 | app_rle_prefix_sum(app_input_params, app_input_data, app_rle_expand_input_data); 14 | app_rle_expand(app_rle_expand_input_data, app_rle_combine_input_data); 15 | app_rle_combine(app_rle_combine_input_data, app_output_data); 16 | } 17 | -------------------------------------------------------------------------------- /apps/device/rle/kernels/app_rle_prefix_sum.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "structure.h" 4 | 5 | #ifdef CSIM 6 | #define reg(x) (x) 7 | #else 8 | #include 9 | #endif 10 | 11 | void app_rle_prefix_sum(ST_Queue &app_input_param, 12 | ST_Queue &app_input_data, 13 | ST_Queue &app_rle_expand_input_data) { 14 | bool init = false; 15 | APP_Data data_in; 16 | APP_Expand_Data data_out; 17 | #pragma HLS array_partition variable = data_out.prefix_sum complete dim = 0 18 | #pragma HLS array_partition variable = data_out.alpha_array complete dim = 0 19 | while (1) { 20 | #pragma HLS pipeline 21 | unsigned int dummy; 22 | app_input_param.read_nb(dummy); 23 | 24 | if (!init) { 25 | data_out.prefix_sum[0] = 0; 26 | init = true; 27 | } else if (app_input_data.read_nb(data_in)) { 28 | for (int i = 0; i < NUM_OF_PAIRS; i++) { 29 | if (i < (data_in.len >> 1)) { 30 | data_out.prefix_sum[i + 1] = 31 | reg(data_out.prefix_sum[i] + data_in.data(16 * i + 7, 16 * i)); 32 | data_out.alpha_array[i] = data_in.data(16 * i + 15, 16 * i + 8); 33 | } else { 34 | data_out.prefix_sum[i + 1] = reg(data_out.prefix_sum[i]); 35 | data_out.alpha_array[i] = 0; 36 | } 37 | } 38 | data_out.eop = data_in.eop; 39 | data_out.len = (data_in.len >> 1); 40 | app_rle_expand_input_data.write(data_out); 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /apps/device/sql/inc/constant.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTANT_H_ 2 | #define CONSTANT_H_ 3 | 4 | #define FEATURE_DIM (4096) 5 | #define MAX_FEATURE_WEIGHT (10) 6 | #define MAX_FEATURE_WEIGHT_LOG2 (4) 7 | #define RESULT_SIZE (64) 8 | #define WEIGHT_SIZE (1) 9 | #define POKE_WIDTH (32) 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /apps/device/sql/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | 4 | struct SQL_Record { 5 | char round_name[12]; 6 | char player_name[12]; 7 | char score[2]; 8 | char month[2]; 9 | char day[2]; 10 | char year[2]; 11 | bool eop; 12 | bool valid; 13 | } __attribute__((packed)); 14 | 15 | struct Filter_Params { 16 | unsigned char year_upper_thresh; 17 | unsigned char year_lower_thresh; 18 | }; 19 | #endif 20 | -------------------------------------------------------------------------------- /apps/device/sql/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "constant.h" 4 | #include "structure.h" 5 | 6 | #include "app_sql_filter0.cpp" 7 | #include "app_sql_filter1.cpp" 8 | #include "app_sql_input_parser.cpp" 9 | #include "app_sql_writer.cpp" 10 | 11 | // describe the interconnection 12 | void interconnects() { 13 | ST_Queue app_sql_filter0_input_records(4); 14 | ST_Queue app_sql_filter1_input_records(4); 15 | ST_Queue app_sql_filter0_output_records(4); 16 | ST_Queue app_sql_filter1_output_records(4); 17 | ST_Queue app_sql_filter0_params(4); 18 | ST_Queue app_sql_filter1_params(4); 19 | 20 | app_sql_input_parser(app_input_data, app_input_params, 21 | app_sql_filter0_input_records, 22 | app_sql_filter1_input_records, app_sql_filter0_params, 23 | app_sql_filter1_params); 24 | app_sql_filter0(app_sql_filter0_params, app_sql_filter0_input_records, 25 | app_sql_filter0_output_records); 26 | app_sql_filter1(app_sql_filter1_params, app_sql_filter1_input_records, 27 | app_sql_filter1_output_records); 28 | app_sql_writer(app_sql_filter0_output_records, app_sql_filter1_output_records, 29 | app_output_data); 30 | } 31 | -------------------------------------------------------------------------------- /apps/device/sql/kernels/app_sql_filter0.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_SQL_FILTER0_CPP_ 2 | #define APP_SQL_FILTER0_CPP_ 3 | #include 4 | 5 | #include "constant.h" 6 | #include "structure.h" 7 | 8 | void app_sql_filter0(ST_Queue &app_sql_filter0_params, 9 | ST_Queue &app_sql_filter0_input_records, 10 | ST_Queue &app_sql_filter0_output_records) { 11 | 12 | bool valid_filter_params = false; 13 | unsigned char year_upper_thresh; 14 | unsigned char year_lower_thresh; 15 | while (1) { 16 | #pragma HLS pipeline 17 | if (!valid_filter_params) { 18 | Filter_Params filter_params; 19 | if (app_sql_filter0_params.read_nb(filter_params)) { 20 | year_upper_thresh = filter_params.year_upper_thresh; 21 | year_lower_thresh = filter_params.year_lower_thresh; 22 | valid_filter_params = true; 23 | } 24 | } else { 25 | SQL_Record record; 26 | if (app_sql_filter0_input_records.read_nb(record)) { 27 | unsigned char year = 28 | (record.year[1] - '0') + (record.year[0] - '0') * 10; 29 | record.valid = (year >= year_lower_thresh && year <= year_upper_thresh); 30 | app_sql_filter0_output_records.write(record); 31 | } 32 | } 33 | } 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /apps/device/sql/kernels/app_sql_filter1.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_SQL_FILTER1_CPP_ 2 | #define APP_SQL_FILTER1_CPP_ 3 | #include 4 | 5 | #include "constant.h" 6 | #include "structure.h" 7 | void app_sql_filter1(ST_Queue &app_sql_filter1_params, 8 | ST_Queue &app_sql_filter1_input_records, 9 | ST_Queue &app_sql_filter1_output_records) { 10 | 11 | bool valid_filter_params = false; 12 | unsigned char year_upper_thresh; 13 | unsigned char year_lower_thresh; 14 | 15 | while (1) { 16 | #pragma HLS pipeline 17 | if (!valid_filter_params) { 18 | Filter_Params filter_params; 19 | if (app_sql_filter1_params.read_nb(filter_params)) { 20 | year_upper_thresh = filter_params.year_upper_thresh; 21 | year_lower_thresh = filter_params.year_lower_thresh; 22 | valid_filter_params = true; 23 | } 24 | } else { 25 | SQL_Record record; 26 | if (app_sql_filter1_input_records.read_nb(record)) { 27 | unsigned char year = 28 | (record.year[1] - '0') + (record.year[0] - '0') * 10; 29 | record.valid = (year >= year_lower_thresh && year <= year_upper_thresh); 30 | app_sql_filter1_output_records.write(record); 31 | } 32 | } 33 | } 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /apps/device/statistics/inc/structure.h: -------------------------------------------------------------------------------- 1 | #ifndef STRUCTURE_H_ 2 | #define STRUCTURE_H_ 3 | 4 | struct APP_Scatter_Data16 { 5 | unsigned int max[16]; 6 | unsigned int min[16]; 7 | unsigned long long sum[16]; 8 | bool eop; 9 | }; 10 | 11 | struct APP_Scatter_Data8 { 12 | unsigned int max[8]; 13 | unsigned int min[8]; 14 | unsigned long long sum[8]; 15 | bool eop; 16 | }; 17 | 18 | struct APP_Scatter_Data4 { 19 | unsigned int max[4]; 20 | unsigned int min[4]; 21 | unsigned long long sum[4]; 22 | bool eop; 23 | }; 24 | 25 | struct APP_Scatter_Data2 { 26 | unsigned int max[2]; 27 | unsigned int min[2]; 28 | unsigned long long sum[2]; 29 | bool eop; 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /apps/device/statistics/interconnects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "structure.h" 4 | 5 | #include "app_stat_calculator.cpp" 6 | #include "app_stat_rdc_16to8.cpp" 7 | #include "app_stat_rdc_4to2.cpp" 8 | #include "app_stat_rdc_8to4.cpp" 9 | #include "app_stat_writer.cpp" 10 | 11 | void interconnects() { 12 | ST_Queue app_stat_rdc_16to8_input(8); 13 | ST_Queue app_stat_rdc_8to4_input(8); 14 | ST_Queue app_stat_rdc_4to2_input(8); 15 | ST_Queue app_stat_writer_input(8); 16 | app_stat_calculator(app_input_params, app_input_data, 17 | app_stat_rdc_16to8_input); 18 | app_stat_rdc_16to8(app_stat_rdc_16to8_input, app_stat_rdc_8to4_input); 19 | app_stat_rdc_8to4(app_stat_rdc_8to4_input, app_stat_rdc_4to2_input); 20 | app_stat_rdc_4to2(app_stat_rdc_4to2_input, app_stat_writer_input); 21 | app_stat_writer(app_stat_writer_input, app_output_data); 22 | } 23 | -------------------------------------------------------------------------------- /apps/device/statistics/kernels/app_stat_rdc_16to8.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_STAT_RDC16_CPP_ 2 | #define APP_STAT_RDC16_CPP_ 3 | 4 | #include 5 | 6 | #include "structure.h" 7 | 8 | void app_stat_rdc_16to8(ST_Queue &app_stat_rdc_16to8_input, 9 | ST_Queue &app_stat_rdc_8to4_input) { 10 | while (1) { 11 | #pragma HLS pipeline 12 | APP_Scatter_Data16 data_in; 13 | APP_Scatter_Data8 data_out; 14 | if (app_stat_rdc_16to8_input.read_nb(data_in)) { 15 | for (int i = 0; i < 16; i += 2) { 16 | #pragma HLS unroll 17 | data_out.max[i >> 1] = (data_in.max[i] > data_in.max[i + 1]) 18 | ? data_in.max[i] 19 | : data_in.max[i + 1]; 20 | } 21 | for (int i = 0; i < 16; i += 2) { 22 | #pragma HLS unroll 23 | data_out.min[i >> 1] = (data_in.min[i] < data_in.min[i + 1]) 24 | ? data_in.min[i] 25 | : data_in.min[i + 1]; 26 | } 27 | for (int i = 0; i < 16; i += 2) { 28 | #pragma HLS unroll 29 | data_out.sum[i >> 1] = data_in.sum[i] + data_in.sum[i + 1]; 30 | } 31 | data_out.eop = data_in.eop; 32 | app_stat_rdc_8to4_input.write(data_out); 33 | } 34 | } 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/statistics/kernels/app_stat_rdc_4to2.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_STAT_RDC4_CPP_ 2 | #define APP_STAT_RDC4_CPP_ 3 | 4 | #include 5 | 6 | #include "structure.h" 7 | 8 | void app_stat_rdc_4to2(ST_Queue &app_stat_rdc_4to2_input, 9 | ST_Queue &app_stat_writer_input) { 10 | while (1) { 11 | #pragma HLS pipeline 12 | APP_Scatter_Data4 data_in; 13 | APP_Scatter_Data2 data_out; 14 | if (app_stat_rdc_4to2_input.read_nb(data_in)) { 15 | for (int i = 0; i < 4; i += 2) { 16 | #pragma HLS unroll 17 | data_out.max[i >> 1] = (data_in.max[i] > data_in.max[i + 1]) 18 | ? data_in.max[i] 19 | : data_in.max[i + 1]; 20 | } 21 | for (int i = 0; i < 4; i += 2) { 22 | #pragma HLS unroll 23 | data_out.min[i >> 1] = (data_in.min[i] < data_in.min[i + 1]) 24 | ? data_in.min[i] 25 | : data_in.min[i + 1]; 26 | } 27 | for (int i = 0; i < 4; i += 2) { 28 | #pragma HLS unroll 29 | data_out.sum[i >> 1] = data_in.sum[i] + data_in.sum[i + 1]; 30 | } 31 | data_out.eop = data_in.eop; 32 | app_stat_writer_input.write(data_out); 33 | } 34 | } 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/device/statistics/kernels/app_stat_rdc_8to4.cpp: -------------------------------------------------------------------------------- 1 | #ifndef APP_STAT_RDC8_CPP_ 2 | #define APP_STAT_RDC8_CPP_ 3 | 4 | #include 5 | 6 | #include "structure.h" 7 | 8 | void app_stat_rdc_8to4(ST_Queue &app_stat_rdc_8to4_input, 9 | ST_Queue &app_stat_rdc_4to2_input) { 10 | while (1) { 11 | #pragma HLS pipeline 12 | APP_Scatter_Data8 data_in; 13 | APP_Scatter_Data4 data_out; 14 | if (app_stat_rdc_8to4_input.read_nb(data_in)) { 15 | for (int i = 0; i < 8; i += 2) { 16 | #pragma HLS unroll 17 | data_out.max[i >> 1] = (data_in.max[i] > data_in.max[i + 1]) 18 | ? data_in.max[i] 19 | : data_in.max[i + 1]; 20 | } 21 | for (int i = 0; i < 8; i += 2) { 22 | #pragma HLS unroll 23 | data_out.min[i >> 1] = (data_in.min[i] < data_in.min[i + 1]) 24 | ? data_in.min[i] 25 | : data_in.min[i + 1]; 26 | } 27 | for (int i = 0; i < 8; i += 2) { 28 | #pragma HLS unroll 29 | data_out.sum[i >> 1] = data_in.sum[i] + data_in.sum[i + 1]; 30 | } 31 | data_out.eop = data_in.eop; 32 | app_stat_rdc_4to2_input.write(data_out); 33 | } 34 | } 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /apps/host/grep/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/grep/data_gen/data_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../inc/const.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | char random_char() { 12 | int min_ascii = 65; 13 | int max_ascii = 68; 14 | return min_ascii + rand() % (max_ascii - min_ascii + 1); 15 | } 16 | 17 | int main() { 18 | ios::sync_with_stdio(false); 19 | 20 | freopen("/mnt/centos/grep_input.txt", "w", stdout); 21 | 22 | for (int i = 0; i < FILE_ROW_NUM; i++) { 23 | string line = ""; 24 | for (int j = 0; j < FILE_COL_NUM - 1; j++) { 25 | line += random_char(); 26 | } 27 | cout << line << endl; 28 | } 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /apps/host/grep/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define FILE_ROW_NUM (983040) 5 | #define FILE_COL_NUM (65536) 6 | #define READ_BUF_SIZE (1024 * 1024 * 2) 7 | #define PARAM_STR_LEN (2) 8 | #define MAX_GREP_PARAM_SIZE (32) 9 | #define POKE_WIDTH_BYTES (32) 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /apps/host/grep/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ grep.cpp -O3 -o grep -std=c++11 -mcmodel=medium 3 | -------------------------------------------------------------------------------- /apps/host/integration/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -I ../inc -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/integration/data_gen/data_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../inc/const.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | char random_char() { 12 | int min_ascii = 65; 13 | int max_ascii = 90; 14 | return min_ascii + rand() % (max_ascii - min_ascii + 1); 15 | } 16 | 17 | int main() { 18 | ios::sync_with_stdio(false); 19 | 20 | freopen("/mnt/centos/data_integration.txt", "w", stdout); 21 | 22 | for (int i = 0; i < APP_RECORD_NUM; i++) { 23 | string row_data = ""; 24 | for (int j = 0; j < APP_RECORD_LENGTH; j++) { 25 | row_data += random_char(); 26 | } 27 | cout << row_data; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /apps/host/integration/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONST_H_ 2 | #define _CONST_H_ 3 | 4 | #define APP_RECORD_NUM (1006632960LL) 5 | #define APP_RECORD_LENGTH (64) // counted in B 6 | #define APP_QUERY_LENGTH (32) // in Byte 7 | #define APP_THREAS (0.5) 8 | #define APP_QUERY_THRES (1) 9 | #define APP_RECORD_THRES (3) 10 | #define READ_BUF_SIZE (2 * 1024 * 1024) 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /apps/host/integration/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ integration.cpp -O3 -o integration -std=c++11 -mcmodel=medium 3 | -------------------------------------------------------------------------------- /apps/host/knn/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/knn/data_gen/data_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../inc/const.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | int random(int upper, unsigned int *tid) { return rand_r(tid) % upper; } 11 | 12 | string supply_leadings(string orig, int len) { 13 | assert(orig.size() <= len); 14 | while (orig.size() < len) { 15 | orig = "0" + orig; 16 | } 17 | return orig; 18 | } 19 | 20 | string random_result(unsigned int *tid) { 21 | string ret = to_string(random(NUM_RESULTS, tid)); 22 | return supply_leadings(ret, RESULT_SIZE); 23 | } 24 | 25 | string random_weight(unsigned int *tid) { 26 | string ret = to_string(random(MAX_FEATURE_WEIGHT, tid)); 27 | return supply_leadings(ret, WEIGHT_SIZE); 28 | } 29 | 30 | int main() { 31 | freopen("/mnt/centos/knn_data.txt", "w", stdout); 32 | ios::sync_with_stdio(false); 33 | 34 | omp_set_num_threads(DATA_GEN_NUM_THREADS); 35 | #pragma omp parallel 36 | { 37 | unsigned int tid = omp_get_thread_num(); 38 | for (int i = 0; i < NUM_TRAIN_CASES / DATA_GEN_NUM_THREADS; i++) { 39 | string result = random_result(&tid); 40 | vector feature_vec; 41 | for (int j = 0; j < FEATURE_DIM; j++) { 42 | feature_vec.push_back(random_weight(&tid)); 43 | } 44 | #pragma omp critical 45 | { 46 | cout << result; 47 | for (auto &str : feature_vec) { 48 | cout << str; 49 | } 50 | } 51 | } 52 | } 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /apps/host/knn/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H 2 | #define CONST_H 3 | 4 | #define NUM_TRAIN_CASES (14680064) // should be the multiples of READ_BUF_SIZE 5 | #define NUM_PREDICTING_CASES (8) 6 | #define FEATURE_DIM (4096) 7 | #define NUM_RESULTS (32) 8 | #define MAX_FEATURE_WEIGHT (10) 9 | #define RESULT_SIZE (64) 10 | #define WEIGHT_SIZE (1) 11 | #define DATA_GEN_NUM_THREADS (16) 12 | #define COMPUTATION_NUM_THREADS (8) 13 | #define BUS_WIDTH (64) 14 | #define READ_BUF_SIZE (1024 * 1024 * 2) 15 | #define PARAM_K (10) 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /apps/host/knn/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ knn.cpp -O3 -o knn -std=c++11 -mcmodel=medium -fopenmp 3 | -------------------------------------------------------------------------------- /apps/host/pt/data_gen/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dd if=/dev/urandom of=/mnt/centos/input.txt bs=1M count=8 4 | -------------------------------------------------------------------------------- /apps/host/pt/src/offload/read_pt/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ../../../data_gen/run.sh 4 | insider_host_g++ -O3 read_pt.cpp -o read_pt 5 | ./read_pt 6 | -------------------------------------------------------------------------------- /apps/host/pt/src/offload/write_pt/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ../../../data_gen/run.sh 4 | FILE_SIZE=$(ls -l /mnt/centos/input.txt | cut -f 5 -d " ") 5 | fallocate -l $FILE_SIZE /mnt/centos/output.txt 6 | shred -n 1 /mnt/centos/output.txt 7 | insider_host_g++ -O3 write_pt.cpp -o write_pt 8 | ./write_pt 9 | diff /mnt/centos/input.txt /mnt/centos/output.txt 10 | if [ $? -eq 0 ] 11 | then 12 | echo "PASSED!" 13 | else 14 | echo "FAILED!" 15 | fi 16 | -------------------------------------------------------------------------------- /apps/host/pt/src/offload/write_pt/write_pt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | const int BUF_SIZE = 2 * 1024 * 1024; 10 | unsigned char buf[BUF_SIZE]; 11 | 12 | int main() { 13 | const char *input_path = "/mnt/centos/input.txt"; 14 | const char *real_path = "/mnt/centos/output.txt"; 15 | const char *virt_path = reg_virt_file(real_path); 16 | int v_fd = vopen(virt_path, O_WRONLY); 17 | int i_fd = open(input_path, O_RDONLY); 18 | 19 | if (v_fd && i_fd) { 20 | while (1) { 21 | size_t input_read_bytes = read(i_fd, buf, BUF_SIZE); 22 | if (input_read_bytes == 0) { 23 | break; 24 | } else if (input_read_bytes < 0) { 25 | perror("Read failed"); 26 | exit(-1); 27 | } else { 28 | size_t virt_write_bytes = 0; 29 | while (virt_write_bytes != input_read_bytes) { 30 | size_t delta_bytes = vwrite(v_fd, buf + virt_write_bytes, 31 | input_read_bytes - virt_write_bytes); 32 | if (delta_bytes <= 0) { 33 | std::cout << "Error: delta_bytes <= 0" << std::endl; 34 | goto finish; 35 | } 36 | virt_write_bytes += delta_bytes; 37 | } 38 | } 39 | } 40 | } 41 | finish: 42 | if (i_fd) { 43 | close(i_fd); 44 | } 45 | if (v_fd) { 46 | vclose(v_fd); 47 | } 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /apps/host/relief/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -mcmodel=medium -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/relief/data_gen/data_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../inc/const.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | int rand_num() { return rand() % 10; } 14 | 15 | int features[APP_HIT_DATA_NUM + APP_MISS_DATA_NUM][APP_FEATURE_DIM]; 16 | int query[APP_QUERY_NUM][APP_FEATURE_DIM]; 17 | 18 | int main() { 19 | freopen("/mnt/centos/relief_data.txt", "w", stdout); 20 | ios::sync_with_stdio(false); 21 | 22 | for (long long i = 0; i < APP_HIT_DATA_NUM + APP_MISS_DATA_NUM; i++) { 23 | for (long long j = 0; j < APP_FEATURE_DIM; j++) { 24 | features[i][j] = rand_num(); 25 | } 26 | } 27 | 28 | char *feature_ptr_in_byte = (char *)features; 29 | for (long long i = 0; i < sizeof(features); i++) { 30 | cout << *feature_ptr_in_byte++; 31 | } 32 | 33 | feature_ptr_in_byte = (char *)features; 34 | for (long long i = 0; i < sizeof(features); i++) { 35 | cout << *feature_ptr_in_byte++; 36 | } 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /apps/host/relief/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H_ 2 | #define CONST_H_ 3 | 4 | #define APP_FEATURE_DIM (256) // count in 4B 5 | #define APP_HIT_DATA_NUM (15728640LL) 6 | #define APP_MISS_DATA_NUM (15728640LL) 7 | #define APP_QUERY_NUM (1) 8 | #define READ_BUF_SIZE (2 * 1024 * 1204) 9 | 10 | #define APP_THRES (-10) 11 | #endif 12 | -------------------------------------------------------------------------------- /apps/host/relief/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ relief.cpp -O3 -o relief -std=c++11 -mcmodel=medium 3 | -------------------------------------------------------------------------------- /apps/host/rle/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/rle/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Generating the input data..." 4 | cd data_gen 5 | ./compile.sh 6 | ./data_gen 7 | 8 | echo "Running the host-only version..." 9 | cd ../src/pure_cpu 10 | ./compile.sh 11 | ./rle 12 | mv /mnt/centos/output.txt /mnt/centos/output_cpu.txt 13 | 14 | echo "Setting the output file..." 15 | FILE_SIZE=24193176000 16 | fallocate -l $FILE_SIZE /mnt/centos/output.txt 17 | shred -n 1 -s $FILE_SIZE /mnt/centos/output.txt 18 | 19 | echo "Running the offloading version..." 20 | cd ../offload 21 | insider_host_g++ -O3 rle.cpp -o rle 22 | ./rle 23 | 24 | echo "Comparing the result..." 25 | diff /mnt/centos/output.txt /mnt/centos/output_cpu.txt 26 | 27 | cd ../../ 28 | -------------------------------------------------------------------------------- /apps/host/rle/run_sg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Generating the input data..." 4 | cd data_gen 5 | ./compile.sh 6 | ./data_gen 7 | 8 | echo "Setting the output file..." 9 | FILE_SIZE_0=8064392000 10 | FILE_SIZE_1=8064392000 11 | FILE_SIZE_2=8064392000 12 | fallocate -l $FILE_SIZE_0 /mnt/centos/output_0.txt 13 | shred -n 1 -s $FILE_SIZE_0 /mnt/centos/output_0.txt 14 | fallocate -l $FILE_SIZE_1 /mnt/centos/output_1.txt 15 | shred -n 1 -s $FILE_SIZE_1 /mnt/centos/output_1.txt 16 | fallocate -l $FILE_SIZE_2 /mnt/centos/output_2.txt 17 | shred -n 1 -s $FILE_SIZE_2 /mnt/centos/output_2.txt 18 | 19 | echo "Running the offloading version..." 20 | cd ../src/offload 21 | insider_host_g++ -O3 rle_sg.cpp -o rle_sg 22 | ./rle_sg 23 | 24 | cat /mnt/centos/output_0.txt \ 25 | /mnt/centos/output_1.txt \ 26 | /mnt/centos/output_2.txt > /mnt/centos/output_offload.txt 27 | 28 | rm /mnt/centos/output_0.txt 29 | rm /mnt/centos/output_1.txt 30 | rm /mnt/centos/output_2.txt 31 | 32 | echo "Running the host-only version..." 33 | cd ../pure_cpu 34 | ./compile.sh 35 | ./rle 36 | 37 | echo "Comparing the result..." 38 | diff /mnt/centos/output.txt /mnt/centos/output_offload.txt 39 | 40 | cd ../../ 41 | -------------------------------------------------------------------------------- /apps/host/rle/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ rle.cpp -g -o rle -std=c++11 -mcmodel=medium -fopenmp 3 | -------------------------------------------------------------------------------- /apps/host/sql/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -std=c++11 -g -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/sql/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H 2 | #define CONST_H 3 | 4 | #define ROW_NUM (2013265920) 5 | #define ROUND_NUM (1024) 6 | #define ROUND_NAME_LEN (12) 7 | #define PLAYER_NAME_LEN (12) 8 | #define SCORE_LEN (2) 9 | #define MONTH_LEN (2) 10 | #define DAY_LEN (2) 11 | #define YEAR_LEN (2) 12 | #define ROW_LEN \ 13 | (ROUND_NAME_LEN + PLAYER_NAME_LEN + SCORE_LEN + MONTH_LEN + DAY_LEN + \ 14 | YEAR_LEN) 15 | 16 | #define READ_BUF_SIZE (1024 * 1024 * 2) 17 | #define YEAR_LOWER_THRESH (50) 18 | #define YEAR_UPPER_THRESH (60) 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /apps/host/sql/src/offload/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | insider_host_g++ -O3 -mcmodel=medium sql.cpp -o sql 3 | 4 | -------------------------------------------------------------------------------- /apps/host/sql/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ sql.cpp -O3 -o sql -std=c++11 -mcmodel=medium 3 | -------------------------------------------------------------------------------- /apps/host/statistics/data_gen/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ data_generator.cpp -std=c++11 -O3 -fopenmp -o data_gen 3 | -------------------------------------------------------------------------------- /apps/host/statistics/data_gen/data_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "../inc/const.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | int main() { 11 | ios::sync_with_stdio(false); 12 | freopen("/mnt/centos/statistical_data.txt", "w", stdout); 13 | 14 | for (int i = 0; i < ROW_NUM; i++) { 15 | for (int j = 0; j < COL_NUM / sizeof(unsigned int); j++) { 16 | unsigned int number = rand(); 17 | char *number_in_bytes = (char *)&number; 18 | for (int k = 0; k < sizeof(unsigned int); k++) { 19 | cout << hex << number_in_bytes[k]; 20 | } 21 | } 22 | } 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /apps/host/statistics/inc/const.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONST_H 2 | #define _CONST_H 3 | 4 | #define COL_NUM (786432) // counted in B 5 | #define ROW_NUM (65536) 6 | #define NUMBER_LENGTH (4) // 64 % NUMBER_LENGTH must equals 0 7 | #define READ_BUF_SIZE (2 * 1024 * 1024) 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /apps/host/statistics/src/pure_cpu/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ statistical.cpp -O3 -o statistical -std=c++11 -mcmodel=medium 3 | -------------------------------------------------------------------------------- /driver/dma/Makefile: -------------------------------------------------------------------------------- 1 | # If called directly from the command line, invoke the kernel build system. 2 | 3 | ifeq ($(KERNELRELEASE),) 4 | 5 | KERNEL_SOURCE := /lib/modules/$(shell uname -r)/build 6 | PWD := $(shell pwd) 7 | 8 | module: 9 | $(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=$(PWD) modules 10 | 11 | clean: 12 | $(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=$(PWD) clean 13 | 14 | # Otherwise KERNELRELEASE is defined; we've been invoked from the 15 | # kernel build system and can use its language. 16 | else 17 | 18 | obj-m := fpga_dma.o 19 | 20 | endif 21 | -------------------------------------------------------------------------------- /driver/insider_runtime/Makefile: -------------------------------------------------------------------------------- 1 | obj-m += insider_runtime.o 2 | KDIR = /lib/modules/$(shell uname -r)/build 3 | all: 4 | make -C $(KDIR) M=$(shell pwd) modules 5 | clean: 6 | make -C $(KDIR) M=$(shell pwd) clean 7 | -------------------------------------------------------------------------------- /driver/nvme/Makefile: -------------------------------------------------------------------------------- 1 | # If called directly from the command line, invoke the kernel build system. 2 | 3 | ifeq ($(KERNELRELEASE),) 4 | 5 | KERNEL_SOURCE := /lib/modules/$(shell uname -r)/build 6 | PWD := $(shell pwd) 7 | 8 | module: 9 | $(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=$(PWD) modules 10 | 11 | clean: 12 | $(MAKE) -C $(KERNEL_SOURCE) SUBDIRS=$(PWD) clean 13 | 14 | # Otherwise KERNELRELEASE is defined; we've been invoked from the 15 | # kernel build system and can use its language. 16 | else 17 | 18 | obj-m := nvme_fpga.o 19 | nvme_fpga-y := disk_block.o 20 | 21 | endif 22 | -------------------------------------------------------------------------------- /driver/nvme/const.h: -------------------------------------------------------------------------------- 1 | #ifndef CONST_H 2 | #define CONST_H 3 | 4 | #define FIRST_MINOR (0) 5 | #define MINOR_CNT (16) 6 | 7 | #define HEAD_CNT (128) 8 | #define SECTOR_CNT (128) 9 | #define CYLINDER_CNT (8192) 10 | #define PHYSICAL_SECTOR_SIZE (4096) 11 | #define LOGICAL_SECTOR_SIZE (512) 12 | #define LOGICAL_SECTOR_SIZE_LOG2 (9) 13 | 14 | #define DEVICE_SIZE (HEAD_CNT * SECTOR_CNT * CYLINDER_CNT) 15 | #define DEVICE_NAME ("nvme_fpga") 16 | #define DEVICE_PCIE_WIDTH (64) 17 | #define DEVICE_REQ_QUEUE_DEPTH (64) // must be the power of 2 18 | 19 | #define MASK_MIN (7) 20 | #define BITMAP_CHUNK_NUM (4) 21 | #define BITMAP_CHUNK_SIZE (DEVICE_REQ_QUEUE_DEPTH / BITMAP_CHUNK_NUM) 22 | 23 | #define PHY_MEM_BASE_ADDR (0x82000000) 24 | #define PHY_MEM_SIZE (32 * (1 << 20)) // 32MB 25 | 26 | #define READ_CMD_TAG_OFFSET (64) 27 | #define WRITE_CMD_TAG_OFFSET (128) 28 | #define KBUF_ADDR_TAG (4) 29 | #define HOST_DELAY_CYCLE_TAG (5) 30 | #define HOST_THROTTLE_PARAM_TAG (6) 31 | #define DEVICE_DELAY_CYCLE_TAG (7) 32 | #define DEVICE_THROTTLE_PARAM_TAG (8) 33 | 34 | #define HOST_READ_THROTTLE_PARAM (0) 35 | #define HOST_WRITE_THROTTLE_PARAM (0) 36 | #define DEVICE_DELAY_CYCLE_CNT (250) 37 | #define DEVICE_READ_THROTTLE_PARAM (0) 38 | #define DEVICE_WRITE_THROTTLE_PARAM (0) 39 | 40 | #define WRITE_TYPE (0) 41 | #define READ_TYPE (1) 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /fio/bw_read.fio: -------------------------------------------------------------------------------- 1 | [global] 2 | bs=4m 3 | ioengine=sync 4 | size=60g 5 | direct=1 6 | runtime=60 7 | filename=/dev/nvme_fpga 8 | numjobs=8 9 | 10 | [seq-read] 11 | rw=read 12 | stonewall -------------------------------------------------------------------------------- /fio/bw_write.fio: -------------------------------------------------------------------------------- 1 | [global] 2 | bs=4m 3 | ioengine=sync 4 | size=60g 5 | direct=1 6 | runtime=60 7 | filename=/dev/nvme_fpga 8 | numjobs=8 9 | 10 | [seq-write] 11 | rw=write 12 | stonewall -------------------------------------------------------------------------------- /fio/lat_read.fio: -------------------------------------------------------------------------------- 1 | [global] 2 | bs=512 3 | ioengine=sync 4 | size=1g 5 | direct=1 6 | runtime=60 7 | filename=/dev/nvme_fpga 8 | numjobs=1 9 | 10 | [seq-read] 11 | rw=read 12 | stonewall -------------------------------------------------------------------------------- /fio/lat_write.fio: -------------------------------------------------------------------------------- 1 | [global] 2 | bs=512 3 | ioengine=sync 4 | size=1g 5 | direct=1 6 | runtime=60 7 | filename=/dev/nvme_fpga 8 | numjobs=1 9 | 10 | [seq-read] 11 | rw=write 12 | stonewall --------------------------------------------------------------------------------