├── .gitignore
├── GPU
    ├── final_network_cublasLt_1_node_no_FIFO_scatter
    │   ├── test.cpp
    │   ├── a.out
    │   ├── cuda_server
    │   ├── network_client_sende
    │   ├── single_connection_network_client_sender
    │   ├── multiple_connections_network_client_sender
    │   ├── single_connection_network_server_receiver
    │   ├── multiple_connections_network_server_receiver
    │   ├── run_client_sender.sh
    │   ├── run_cuda_server.sh
    │   ├── pthread_test.c
    │   ├── constant.h
    │   ├── README.md
    │   ├── out
    │   └── single_connection_network_client_sender.c
    ├── measure_network_cuda_cp_latency_3_nodes
    │   ├── a.out
    │   ├── cuda_server
    │   ├── timeline.prof
    │   ├── network_client_sende
    │   ├── single_connection_network_client_sender
    │   ├── single_connection_network_server_receiver
    │   ├── multiple_connections_network_client_sender
    │   ├── multiple_connections_network_server_receiver
    │   ├── run_cuda_server.sh
    │   ├── CPU0_multiple_connections_network_client_sender
    │   ├── FPGA0_multiple_connections_network_client_sender
    │   ├── FPGA1_multiple_connections_network_client_sender
    │   ├── run_client_sender.sh
    │   ├── pthread_test.c
    │   ├── README.md
    │   ├── out
    │   ├── constant.h
    │   └── single_connection_network_client_sender.c
    ├── measure_network_cuda_cp_latency_single_node
    │   ├── a.out
    │   ├── cuda_server
    │   ├── timeline.prof
    │   ├── network_client_sende
    │   ├── simple_2_thread_latency
    │   ├── simple_2_thread_latency.sh
    │   ├── single_connection_network_client_sender
    │   ├── multiple_connections_network_client_sender
    │   ├── single_connection_network_server_receiver
    │   ├── multiple_connections_network_server_receiver
    │   ├── run_cuda_server.sh
    │   ├── run_client_sender.sh
    │   ├── pthread_test.c
    │   ├── constant.h
    │   ├── README.md
    │   └── single_connection_network_client_sender.c
    └── final_network_cublasLt_3_nodes_no_FIFO_scatter
    │   ├── a.out
    │   ├── cuda_server
    │   ├── network_client_sende
    │   ├── single_connection_network_client_sender
    │   ├── single_connection_network_server_receiver
    │   ├── multiple_connections_network_client_sender
    │   ├── multiple_connections_network_server_receiver
    │   ├── CPU0_multiple_connections_network_client_sender
    │   ├── FPGA0_multiple_connections_network_client_sender
    │   ├── FPGA1_multiple_connections_network_client_sender
    │   ├── run_single_sender_CPU0.sh
    │   ├── run_single_sender_FPGA0.sh
    │   ├── run_single_sender_FPGA1.sh
    │   ├── run_cuda_server.sh
    │   ├── run_client_sender.sh
    │   ├── out
    │   ├── pthread_test.c
    │   ├── constant.h
    │   ├── README.md
    │   └── single_connection_network_client_sender.c
├── FPGA
    ├── kernel
    │   ├── user_krnl
    │   │   ├── scatter_krnl
    │   │   │   ├── src
    │   │   │   │   └── hls
    │   │   │   │   │   ├── scatter_config.hpp.in
    │   │   │   │   │   ├── make.tcl.in
    │   │   │   │   │   ├── mem_utils.hpp
    │   │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   │   ├── scatter.hpp
    │   │   │   │   │   └── packet.hpp
    │   │   │   └── config_sp_scatter_krnl.txt
    │   │   ├── iperf_krnl
    │   │   │   ├── src
    │   │   │   │   └── hls
    │   │   │   │   │   ├── iperf_client_config.hpp.in
    │   │   │   │   │   ├── make.tcl.in
    │   │   │   │   │   ├── mem_utils.hpp
    │   │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   │   └── packet.hpp
    │   │   │   └── config_sp_iperf_krnl.txt
    │   │   ├── hls_test_krnl
    │   │   │   ├── config_sp_hls_test_krnl.txt
    │   │   │   └── src
    │   │   │   │   └── hls
    │   │   │   │       ├── mem_utils.hpp
    │   │   │   │       └── in_casting_bench.hpp
    │   │   ├── embedding_krnl
    │   │   │   ├── src
    │   │   │   │   └── hls
    │   │   │   │   │   └── mem_utils.hpp
    │   │   │   └── config_sp_embedding_krnl.txt
    │   │   ├── embedding_377_krnl
    │   │   │   ├── src
    │   │   │   │   └── hls
    │   │   │   │   │   └── mem_utils.hpp
    │   │   │   └── config_sp_embedding_377_krnl.txt
    │   │   ├── embedding_47_krnl
    │   │   │   ├── src
    │   │   │   │   └── hls
    │   │   │   │   │   └── mem_utils.hpp
    │   │   │   └── config_sp_embedding_47_krnl.txt
    │   │   └── embedding_98_krnl
    │   │   │   ├── src
    │   │   │       └── hls
    │   │   │       │   └── mem_utils.hpp
    │   │   │   └── config_sp_embedding_98_krnl.txt
    │   ├── common
    │   │   └── types
    │   │   │   └── network_types.svh.in
    │   ├── network_krnl
    │   │   └── src
    │   │   │   └── hdl
    │   │   │       ├── axis_meta_reg.sv
    │   │   │       ├── axis_data_reg_array.sv
    │   │   │       ├── axis_udp_meta_reg.sv
    │   │   │       └── axis_data_reg.sv
    │   └── cmac_krnl
    │   │   ├── cmac_krnl.xml
    │   │   └── src
    │   │       └── hdl
    │   │           ├── axis_data_reg_array.sv
    │   │           ├── axis_data_reg.sv
    │   │           └── network_clk_cross.sv
    ├── common
    │   ├── includes
    │   │   ├── xcl2
    │   │   │   ├── xcl2.mk
    │   │   │   └── xcl2.cpp
    │   │   ├── bitmap
    │   │   │   ├── bitmap.mk
    │   │   │   ├── bitmap.h
    │   │   │   └── bitmap.cpp
    │   │   ├── logger
    │   │   │   ├── logger.mk
    │   │   │   └── logger.h
    │   │   ├── lodepng
    │   │   │   └── lodepng.mk
    │   │   ├── simplebmp
    │   │   │   ├── simplebmp.mk
    │   │   │   └── simplebmp.h
    │   │   ├── cmdparser
    │   │   │   └── cmdparser.mk
    │   │   ├── oclHelper
    │   │   │   ├── oclHelper.mk
    │   │   │   └── oclHelper.h
    │   │   └── opencl
    │   │   │   └── opencl.mk
    │   └── utility
    │   │   ├── readme_gen
    │   │       ├── gs_summary.py
    │   │       ├── gs_summary_subdir.py
    │   │       ├── update_all_readme.sh
    │   │       └── gs_summary_util.py
    │   │   ├── parse_platform_list.py
    │   │   ├── check_target_device.py
    │   │   ├── makefile_gen
    │   │       ├── update_makegen_all.sh
    │   │       ├── update_descgen_all.sh
    │   │       └── descgen.py
    │   │   ├── device_list.py
    │   │   ├── build_what.sh
    │   │   ├── md2rst
    │   │       └── update_md2rst_all.sh
    │   │   ├── check_license.sh
    │   │   ├── Consolidation.py
    │   │   ├── check_readme.sh
    │   │   ├── check_makefile.sh
    │   │   └── check_descr.py
    ├── README.md
    ├── cmake
    │   ├── FindVivado.cmake
    │   └── FindVivadoHLS.cmake
    ├── config_rtl.mk
    ├── config_hls.mk
    ├── scripts
    │   ├── network_ultrascale.tcl
    │   └── gen_xo.tcl
    ├── CMakeLists.txt
    ├── utils.mk
    └── host
    │   ├── embedding_377_krnl
    │       └── host.hpp
    │   ├── embedding_47_krnl
    │       └── host.hpp
    │   ├── embedding_98_krnl
    │       └── host.hpp
    │   └── embedding_krnl
    │       └── host.hpp
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/test.cpp:
--------------------------------------------------------------------------------
1 | #include <mutex>
2 | 
3 | int main(){}
4 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/scatter_config.hpp.in:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | const unsigned DATA_WIDTH = ${DATA_WIDTH} * 8;


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/src/hls/iperf_client_config.hpp.in:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | const unsigned DATA_WIDTH = ${DATA_WIDTH} * 8;
4 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/a.out


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/a.out


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/cuda_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/cuda_server


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/a.out


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/a.out


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/timeline.prof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/timeline.prof


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/cuda_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/cuda_server


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/cuda_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/cuda_server


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/cuda_server:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/cuda_server


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/timeline.prof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/timeline.prof


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/network_client_sende:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/network_client_sende


--------------------------------------------------------------------------------
/FPGA/common/includes/xcl2/xcl2.mk:
--------------------------------------------------------------------------------
1 | xcl2_SRCS:=${COMMON_REPO}/common/includes/xcl2/xcl2.cpp
2 | xcl2_HDRS:=${COMMON_REPO}/common/includes/xcl2/xcl2.hpp
3 | 
4 | xcl2_CXXFLAGS:=-I${COMMON_REPO}/common/includes/xcl2
5 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/network_client_sende:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/network_client_sende


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/network_client_sende:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/network_client_sende


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/network_client_sende:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/network_client_sende


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/simple_2_thread_latency:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/simple_2_thread_latency


--------------------------------------------------------------------------------
/FPGA/common/includes/bitmap/bitmap.mk:
--------------------------------------------------------------------------------
1 | bitmap_SRCS:=${COMMON_REPO}/common/includes/bitmap/bitmap.cpp
2 | bitmap_HDRS:=${COMMON_REPO}/common/includes/bitmap/bitmap.h
3 | bitmap_CXXFLAGS:=-I${COMMON_REPO}/common/includes/bitmap
4 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/logger/logger.mk:
--------------------------------------------------------------------------------
1 | logger_SRCS:=${COMMON_REPO}/common/includes/logger/logger.cpp
2 | logger_HDRS:=${COMMON_REPO}/common/includes/logger/logger.h
3 | logger_CXXFLAGS:=-I${COMMON_REPO}/common/includes/logger
4 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/lodepng/lodepng.mk:
--------------------------------------------------------------------------------
1 | lodepng_SRCS:=${COMMON_REPO}/common/includes/lodepng/lodepng.cpp
2 | lodepng_HDRS:=${COMMON_REPO}/common/includes/lodepng/lodepng.h
3 | lodepng_CXXFLAGS:=-I${COMMON_REPO}/common/includes/lodepng
4 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/simple_2_thread_latency.sh:
--------------------------------------------------------------------------------
1 | # Run this script before client
2 | rm simple_2_thread_latency
3 | gcc -lpthread simple_2_thread_latency.c -o simple_2_thread_latency
4 | ./simple_2_thread_latency
5 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/single_connection_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/single_connection_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/single_connection_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/single_connection_network_server_receiver


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/single_connection_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/single_connection_network_client_sender


--------------------------------------------------------------------------------
/FPGA/common/includes/simplebmp/simplebmp.mk:
--------------------------------------------------------------------------------
1 | simplebmp_SRCS:=${COMMON_REPO}/common/includes/simplebmp/simplebmp.cpp
2 | simplebmp_HDRS:=${COMMON_REPO}/common/includes/simplebmp/simplebmp.h
3 | simplebmp_CXXFLAGS:=-I${COMMON_REPO}/common/includes/simplebmp
4 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/single_connection_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/single_connection_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/single_connection_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/single_connection_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/multiple_connections_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/multiple_connections_network_server_receiver


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/run_cuda_server.sh:
--------------------------------------------------------------------------------
1 | # Run this script before client
2 | rm cuda_server
3 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
4 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/single_connection_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/single_connection_network_server_receiver


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/single_connection_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/single_connection_network_server_receiver


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/single_connection_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/single_connection_network_server_receiver


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/CPU0_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/CPU0_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/FPGA0_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/FPGA0_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/FPGA1_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_3_nodes/FPGA1_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/multiple_connections_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/measure_network_cuda_cp_latency_single_node/multiple_connections_network_server_receiver


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/run_cuda_server.sh:
--------------------------------------------------------------------------------
1 | # Run this script before client
2 | rm cuda_server
3 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
4 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server


--------------------------------------------------------------------------------
/FPGA/common/includes/cmdparser/cmdparser.mk:
--------------------------------------------------------------------------------
1 | cmdparser_SRCS:=${COMMON_REPO}/common/includes/cmdparser/cmdlineparser.cpp
2 | cmdparser_HDRS:=${COMMON_REPO}/common/includes/cmdparser/cmdlineparser.h
3 | cmdparser_CXXFLAGS:=-I${COMMON_REPO}/common/includes/cmdparser
4 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/multiple_connections_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/multiple_connections_network_server_receiver


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/multiple_connections_network_server_receiver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/multiple_connections_network_server_receiver


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/CPU0_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/CPU0_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/FPGA0_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/FPGA0_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/FPGA1_multiple_connections_network_client_sender:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fpgasystems/GPU-FPGA-Recommendation-System/HEAD/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/FPGA1_multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/run_client_sender.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm multiple_connections_network_client_sender
3 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
4 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/run_client_sender.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm multiple_connections_network_client_sender
3 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
4 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/FPGA/common/includes/oclHelper/oclHelper.mk:
--------------------------------------------------------------------------------
1 | oclHelper_SRCS:=${COMMON_REPO}/common/includes/oclHelper/oclHelper.cpp ${COMMON_REPO}/common/includes/oclHelper/oclErrorCodes.cpp
2 | oclHelper_HDRS:=${COMMON_REPO}/common/includes/oclHelper/oclHelper.h
3 | oclHelper_CXXFLAGS:=-I${COMMON_REPO}/common/includes/oclHelper
4 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/readme_gen/gs_summary.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os, re
 4 | import fnmatch
 5 | import json
 6 | import sys
 7 | 
 8 | # To avoid .pyc files
 9 | sys.dont_write_bytecode = True
10 | 
11 | sys.path.append(".")
12 | import gs_summary_util
13 | 
14 | gs_summary_util.genReadMe(".")
15 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/readme_gen/gs_summary_subdir.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os, re
 4 | import fnmatch
 5 | import json
 6 | import sys
 7 | 
 8 | # To avoid .pyc files
 9 | sys.dont_write_bytecode = True
10 | 
11 | sys.path.append(".")
12 | import gs_summary_util
13 | 
14 | gs_summary_util.genReadMe2(".")
15 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/run_single_sender_CPU0.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm  CPU0_multiple_connections_network_client_sender
3 | gcc CPU0_multiple_connections_network_client_sender.c -lpthread -o CPU0_multiple_connections_network_client_sender
4 | ./CPU0_multiple_connections_network_client_sender &


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/run_single_sender_FPGA0.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm FPGA0_multiple_connections_network_client_sender 
3 | gcc FPGA0_multiple_connections_network_client_sender.c -lpthread -o FPGA0_multiple_connections_network_client_sender
4 | ./FPGA0_multiple_connections_network_client_sender &


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/run_single_sender_FPGA1.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm FPGA1_multiple_connections_network_client_sender 
3 | gcc FPGA1_multiple_connections_network_client_sender.c -lpthread -o FPGA1_multiple_connections_network_client_sender
4 | ./FPGA1_multiple_connections_network_client_sender &
5 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/run_cuda_server.sh:
--------------------------------------------------------------------------------
1 | # Run this script before client
2 | rm cuda_server
3 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
4 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
5 | #nvprof -f --export-profile timeline.prof --concurrent-kernels off ./cuda_server
6 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/run_cuda_server.sh:
--------------------------------------------------------------------------------
1 | # Run this script before client
2 | rm cuda_server
3 | nvcc -std=c++11 -l cublasLt  -lpthread cuda_server.c -o cuda_server
4 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
5 | #nvprof -f --export-profile timeline.prof --concurrent-kernels off ./cuda_server
6 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/parse_platform_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import sys
 4 | 
 5 | def main ():
 6 |     dev = sys.argv[1]
 7 |     if "PLATFORM_REPO_PATHS" in os.environ:
 8 |         plist = os.environ['PLATFORM_REPO_PATHS'].split(":")
 9 |         for shell in plist:
10 |             if os.path.isdir(shell + "/" + dev):
11 |                 return shell
12 | 
13 | print (main())
14 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/opencl/opencl.mk:
--------------------------------------------------------------------------------
 1 | # Definition of include file locations
 2 | xrt_path = $(XILINX_XRT)
 3 | ifneq ($(HOST_ARCH), x86)
 4 | 	xrt_path =  $(SYSROOT)/usr/
 5 | endif
 6 | 
 7 | OPENCL_INCLUDE:= $(xrt_path)/include
 8 | ifneq ($(HOST_ARCH), x86)
 9 | 	OPENCL_INCLUDE:= $(xrt_path)/include/xrt
10 | endif
11 | 
12 | VIVADO_INCLUDE:= $(XILINX_VIVADO)/include
13 | opencl_CXXFLAGS=-I$(OPENCL_INCLUDE) -I$(VIVADO_INCLUDE)
14 | OPENCL_LIB:= $(xrt_path)/lib
15 | opencl_LDFLAGS=-L$(OPENCL_LIB) -lOpenCL -lpthread 
16 | 


--------------------------------------------------------------------------------
/FPGA/README.md:
--------------------------------------------------------------------------------
 1 | ### Vitis with Network Stack
 2 | 
 3 | Adding the network stack to the Vitis shell.
 4 | 
 5 | ## Setup
 6 | Git Clone 
 7 | 
 8 | 	git clone	
 9 | 	git submodule update --init --recursive
10 | 
11 | Setup the HLS IPs:
12 | 
13 |     mkdir build
14 |     cd build
15 |     cmake .. -DFDEV_NAME=u280 -DTCP_STACK_EN=1 -DTCP_STACK_RX_DDR_BYPASS_EN=1 
16 |     make installip
17 | 
18 | Create the Vitis kernel:
19 | 
20 |     make all DEVICE=/opt/xilinx/platforms/xilinx_u280_xdma_201920_3/xilinx_u280_xdma_201920_3.xpfm USER_KRNL=scatter_krnl NETH=4
21 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/check_target_device.py:
--------------------------------------------------------------------------------
 1 | import json, sys
 2 | 
 3 | descriptionfile = sys.argv[1]
 4 | target = sys.argv[2]
 5 | device = sys.argv[3]
 6 | 
 7 | with open(descriptionfile) as json_file:
 8 |     data = json.load(json_file)
 9 | 
10 | targetNotSupported = 'targets' in data and target not in data['targets']
11 | if targetNotSupported:
12 |     print("%s target not supported for this example" % target)
13 | 
14 | deviceNotSupported = 'nboard' in data and any(nboard in device for nboard in data['nboard'])
15 | if deviceNotSupported:
16 |     print("%s device not supported for this example" % device)
17 | 
18 | sys.exit(not(targetNotSupported or deviceNotSupported))
19 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/run_client_sender.sh:
--------------------------------------------------------------------------------
1 | # run this script after server
2 | rm FPGA0_multiple_connections_network_client_sender FPGA1_multiple_connections_network_client_sender CPU0_multiple_connections_network_client_sender
3 | gcc FPGA0_multiple_connections_network_client_sender.c -lpthread -o FPGA0_multiple_connections_network_client_sender
4 | gcc FPGA1_multiple_connections_network_client_sender.c -lpthread -o FPGA1_multiple_connections_network_client_sender
5 | gcc CPU0_multiple_connections_network_client_sender.c -lpthread -o CPU0_multiple_connections_network_client_sender
6 | ./FPGA0_multiple_connections_network_client_sender &
7 | ./FPGA1_multiple_connections_network_client_sender &
8 | ./CPU0_multiple_connections_network_client_sender &


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/run_client_sender.sh:
--------------------------------------------------------------------------------
 1 | # run this script after server
 2 | rm FPGA0_multiple_connections_network_client_sender FPGA1_multiple_connections_network_client_sender CPU0_multiple_connections_network_client_sender
 3 | gcc FPGA0_multiple_connections_network_client_sender.c -lpthread -o FPGA0_multiple_connections_network_client_sender
 4 | gcc FPGA1_multiple_connections_network_client_sender.c -lpthread -o FPGA1_multiple_connections_network_client_sender
 5 | gcc CPU0_multiple_connections_network_client_sender.c -lpthread -o CPU0_multiple_connections_network_client_sender
 6 | ./CPU0_multiple_connections_network_client_sender &
 7 | sleep 10
 8 | ./FPGA0_multiple_connections_network_client_sender &
 9 | sleep 10
10 | ./FPGA1_multiple_connections_network_client_sender &
11 | 


--------------------------------------------------------------------------------
/FPGA/kernel/common/types/network_types.svh.in:
--------------------------------------------------------------------------------
 1 | `ifndef NETWORK_TYPES_SVH
 2 | `define NETWORK_TYPES_SVH
 3 | 
 4 | `define DRAM_EN${DRAM_EN}${TCP_STACK_EN}
 5 | 
 6 | `ifdef DRAM_EN1
 7 | `define USE_DDR
 8 | `endif
 9 | 
10 | `ifdef DRAM_EN10
11 | `define USE_DDR
12 | `endif
13 | 
14 | `ifdef DRAM_EN01
15 | `define USE_DDR
16 | `endif
17 | 
18 | `define USE_${NETWORK_INTERFACE}G
19 | 
20 | parameter NETWORK_STACK_WIDTH = 512;
21 | parameter UDP_META_WIDTH = 176;
22 | 
23 | // TCP/IP
24 | parameter TCP_STACK_EN = ${TCP_STACK_EN};
25 | parameter TCP_RX_BYPASS_EN = ${TCP_STACK_RX_DDR_BYPASS_EN};
26 | 
27 | //UDP/IP
28 | parameter UDP_STACK_EN = ${UDP_STACK_EN};
29 | 
30 | //RoCEv2
31 | parameter ROCE_STACK_EN = 0;
32 | 
33 | //DRAM
34 | parameter NUM_DDR_CHANNELS = 2;
35 | parameter NUM_TCP_CHANNELS = 2;
36 | parameter NUM_NET_PORTS = 2;
37 | 
38 | `endif


--------------------------------------------------------------------------------
/FPGA/cmake/FindVivado.cmake:
--------------------------------------------------------------------------------
 1 | # Author:  Johannes de Fine Licht (johannes.definelicht@inf.ethz.ch)
 2 | # Created: October 2016
 3 | #
 4 | # To specify the path to the Vivado installation, provide:
 5 | #   -DVIVADO_ROOT_DIR=<installation directory>
 6 | # If successful, this script defines:
 7 | #   VIVADO_FOUND
 8 | #   VIVADO_BINARY
 9 | 
10 | cmake_minimum_required(VERSION 3.0)
11 | 
12 | find_path(VIVADO_PATH
13 |   NAMES vivado 
14 |   PATHS ${VIVADO_ROOT_DIR} ENV XILINX_VIVADO
15 |   PATH_SUFFIXES bin
16 | )
17 | 
18 | if(NOT EXISTS ${VIVADO_PATH})
19 | 
20 |   message(WARNING "Vivado not found.")
21 | 
22 | else()
23 | 
24 |   get_filename_component(VIVADO_ROOT_DIR ${VIVADO_PATH} DIRECTORY)
25 | 
26 |   set(VIVADO_FOUND TRUE)
27 |   set(VIVADO_BINARY ${VIVADO_ROOT_DIR}/bin/vivado)
28 | 
29 |   message(STATUS "Found Vivado at ${VIVADO_ROOT_DIR}.")
30 | 
31 | endif()
32 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/out:
--------------------------------------------------------------------------------
 1 | concurrentKernels: 1
 2 | = 1: Concurrent Kernel Execution
 3 | asyncEngineCount: 3
 4 | > 0: Overlap of Data Transfer and Kernel Execution
 5 | = 2: Concurrent Data Transfers
 6 | Device 0 has compute capability 7.5.
 7 | Before Thread
 8 | Successfully built connection with CPU0.
 9 | Successfully built connection with CPU0.
10 | Successfully built connection with CPU0.
11 | Successfully built connection with CPU0.
12 | Successfully built connection with FPGA0.
13 | Successfully built connection with FPGA0.
14 | Successfully built connection with FPGA0.
15 | Successfully built connection with FPGA0.
16 | Successfully built connection with FPGA1.
17 | batch_count: 0
18 | Successfully built connection with FPGA1.
19 | batch_count: 1
20 | Successfully built connection with FPGA1.
21 | batch_count: 2
22 | Successfully built connection with FPGA1.
23 | batch_count: 3
24 | 


--------------------------------------------------------------------------------
/FPGA/cmake/FindVivadoHLS.cmake:
--------------------------------------------------------------------------------
 1 | # Author:  Johannes de Fine Licht (johannes.definelicht@inf.ethz.ch)
 2 | # Created: October 2016
 3 | #
 4 | # To specify the path to the Vivado HLS installation, provide:
 5 | #   -DVIVADO_HLS_ROOT_DIR=<installation directory>
 6 | # If successful, this script defines:
 7 | #   VIVADO_HLS_FOUND
 8 | #   VIVADO_HLS_BINARY
 9 | #   VIVADO_HLS_INCLUDE_DIRS
10 | 
11 | cmake_minimum_required(VERSION 3.0)
12 | 
13 | find_path(VIVADO_HLS_PATH
14 |   NAMES vivado_hls 
15 |   PATHS ${VIVADO_HLS_ROOT_DIR} ENV XILINX_VIVADO_HLS ENV XILINX_HLS
16 |   PATH_SUFFIXES bin
17 | )
18 | 
19 | if(NOT EXISTS ${VIVADO_HLS_PATH})
20 | 
21 |   message(WARNING "Vivado HLS not found.")
22 | 
23 | else()
24 | 
25 |   get_filename_component(VIVADO_HLS_ROOT_DIR ${VIVADO_HLS_PATH} DIRECTORY)
26 | 
27 |   set(VIVADO_HLS_FOUND TRUE)
28 |   set(VIVADO_HLS_INCLUDE_DIRS ${VIVADO_HLS_ROOT_DIR}/include/)
29 |   set(VIVADO_HLS_BINARY ${VIVADO_HLS_ROOT_DIR}/bin/vivado_hls)
30 | 
31 |   message(STATUS "Found Vivado HLS at ${VIVADO_HLS_ROOT_DIR}.")
32 | 
33 | endif()
34 | 


--------------------------------------------------------------------------------
/FPGA/kernel/network_krnl/src/hdl/axis_meta_reg.sv:
--------------------------------------------------------------------------------
 1 | `include "network_intf.svh"
 2 | `include "network_types.svh"
 3 | 
 4 | module axis_meta_reg #(
 5 | 	parameter WIDTH = 32
 6 | ) (
 7 | 	input wire  			aclk,
 8 | 	input wire  			aresetn,
 9 | 	
10 | 	axis_meta.slave 		meta_in,
11 | 	axis_meta.master 		meta_out
12 | );
13 | 
14 | if(WIDTH == 56) begin
15 | 	axis_register_slice_meta_56_0 inst_reg_slice (
16 | 		.aclk(aclk),
17 | 		.aresetn(aresetn),
18 | 		.s_axis_tvalid(meta_in.valid),
19 | 		.s_axis_tready(meta_in.ready),
20 | 		.s_axis_tdata(meta_in.data),
21 | 		.m_axis_tvalid(meta_out.valid),
22 | 		.m_axis_tready(meta_out.ready),
23 | 		.m_axis_tdata(meta_out.data)
24 | 	);
25 | end
26 | else if(WIDTH == 32) begin
27 | 	axis_register_slice_meta_32_0 inst_reg_slice (
28 | 		.aclk(aclk),
29 | 		.aresetn(aresetn),
30 | 		.s_axis_tvalid(meta_in.valid),
31 | 		.s_axis_tready(meta_in.ready),
32 | 		.s_axis_tdata(meta_in.data),
33 | 		.m_axis_tvalid(meta_out.valid),
34 | 		.m_axis_tready(meta_out.ready),
35 | 		.m_axis_tdata(meta_out.data)
36 | 	);
37 | end
38 | 
39 | endmodule


--------------------------------------------------------------------------------
/FPGA/kernel/cmac_krnl/cmac_krnl.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <root versionMajor="1" versionMinor="6">
 3 |   <kernel name="cmac_krnl" language="ip_c" vlnv="ethz.ch:kernel:cmac_krnl:1.0" attributes="" preferredWorkGroupSizeMultiple="0" workGroupSize="1" interrupt="false" hwControlProtocol="ap_ctrl_none">
 4 |     <ports>
 5 |       <port name="s_axi_control" mode="slave" range="0x1000" dataWidth="32" portType="addressable" base="0x0"/>
 6 |       <port name="axis_net_rx" mode="write_only" dataWidth="512" portType="stream"/>
 7 |       <port name="axis_net_tx" mode="read_only" dataWidth="512" portType="stream"/>
 8 |     </ports>
 9 |     <args>
10 |       <arg name="axis_net_rx" addressQualifier="4" id="0" port="axis_net_rx" size="0x4" offset="0x010" hostOffset="0x0" hostSize="0x4" type="stream&lt;qdma_axis&lt;512,0,0,0>>&amp;" />
11 |       <arg name="axis_net_tx" addressQualifier="4" id="1" port="axis_net_tx" size="0x4" offset="0x018" hostOffset="0x0" hostSize="0x4" type="stream&lt;qdma_axis&lt;512,0,0,0>>&amp;" />
12 |     </args>
13 |   </kernel>
14 | </root>


--------------------------------------------------------------------------------
/FPGA/common/utility/makefile_gen/update_makegen_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | appDir=$(dirname $(dirname $(dirname $(readlink -f $0))))
 3 | 
 4 | echo "-----------------------"
 5 | echo "--  UPDATING MAKEFILES --"
 6 | echo "-----------------------"
 7 | 
 8 | update_file() {
 9 | 	ignore=0
10 | 
11 | 	for i in $IGNORE; do
12 | 		if [[ $1 =~ ^description.json ]]; then 
13 | 			ignore=1
14 | 		fi
15 | 	done
16 | 
17 | 	if [[ $VERBOSE == "true" ]]; then
18 | 		echo -n "Checking $1 ... "
19 | 	fi
20 | 	if [[ $ignore == 1 ]]; then
21 | 		if [[ $VERBOSE == "true" ]]; then
22 | 			echo "SKIP"
23 | 		fi
24 | 	else
25 | 		pushd . > /dev/null 
26 | 		cd $(dirname $1)
27 | 		$appDir/utility/makefile_gen/makegen.py description.json #> /dev/null 2>&1
28 | 		popd >/dev/null
29 | 	fi
30 | }
31 | 
32 | 
33 | VCS_FILES=$(git ls-files)
34 | 
35 | for f in $VCS_FILES; do
36 | 	if [[ ($f == */description.json) ]]; then
37 | 		if grep -q '"match_makefile": "false"' $f; then
38 | 			echo $f
39 | 			echo "Makefile Manually Edited:: AutoMakefile Generator Failed"			
40 | 		else
41 | 			echo $f
42 | 			update_file $(readlink -f $f)
43 | 		fi
44 | 	fi
45 | done
46 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/makefile_gen/update_descgen_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | appDir=$(dirname $(dirname $(dirname $(readlink -f $0))))
 3 | 
 4 | echo "-----------------------"
 5 | echo "--  UPDATING MAKEFILES --"
 6 | echo "-----------------------"
 7 | 
 8 | update_file() {
 9 | 	ignore=0
10 | 
11 | 	for i in $IGNORE; do
12 | 		if [[ $1 =~ ^description.json ]]; then 
13 | 			ignore=1
14 | 		fi
15 | 	done
16 | 
17 | 	if [[ $VERBOSE == "true" ]]; then
18 | 		echo -n "Checking $1 ... "
19 | 	fi
20 | 	if [[ $ignore == 1 ]]; then
21 | 		if [[ $VERBOSE == "true" ]]; then
22 | 			echo "SKIP"
23 | 		fi
24 | 	else
25 | 		pushd . > /dev/null 
26 | 		cd $(dirname $1)
27 |         $appDir/utility/makefile_gen/descgen.py description.json #> /dev/null 2>&1
28 | 		popd >/dev/null
29 | 	fi
30 | }
31 | 
32 | 
33 | VCS_FILES=$(git ls-files)
34 | 
35 | for f in $VCS_FILES; do
36 | 	if [[ ($f == */description.json) ]]; then
37 | 		if grep -q '"match_makefile": "false"' $f; then
38 | 			echo $f
39 | 			echo "Makefile Manually Edited:: AutoMakefile Generator Failed"			
40 | 		else
41 | 			echo $f
42 | 			update_file $(readlink -f $f)
43 | 		fi
44 | 	fi
45 | done
46 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/make.tcl.in:
--------------------------------------------------------------------------------
 1 | 
 2 | open_project ${PROJECT_NAME}_prj
 3 | 
 4 | open_solution "solution1"
 5 | set_part ${FPGA_PART}
 6 | create_clock -period ${CLOCK_PERIOD} -name default
 7 | 
 8 | set_top ${PROJECT_NAME}
 9 | 
10 | add_files ${CMAKE_CURRENT_SOURCE_DIR}/scatter.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
11 | 
12 | 
13 | add_files -tb ${CMAKE_CURRENT_SOURCE_DIR}/test_scatter.cpp
14 | 
15 | 
16 | #Check which command
17 | set command [lindex $argv 2]
18 | 
19 | if {$command == "synthesis"} {
20 |    csynth_design
21 | } elseif {$command == "csim"} {
22 |    csim_design
23 | } elseif {$command == "ip"} {
24 |    export_design -format ip_catalog -ipname "scatter" -display_name "scatter" -vendor "ethz.systems.fpga" -version "1.0"
25 | } elseif {$command == "installip"} {
26 |    file mkdir ${IPREPO_DIR}
27 |    file delete -force ${IPREPO_DIR}/${PROJECT_NAME}
28 |    file copy -force ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}_prj/solution1/impl/ip ${IPREPO_DIR}/${PROJECT_NAME}/
29 | } else {
30 |    puts "No valid command specified. Use vivado_hls -f make.tcl <synthesis|csim|ip> ."
31 | }
32 | 
33 | 
34 | exit
35 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/readme_gen/update_all_readme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script regenerates all of the README files in the Vitis example repository
 3 | # An example with an auto-generated README file requires a description.json file
 4 | # Only examples with a valid description.json file are updated by this script
 5 | 
 6 | BASEDIR=$(pwd)
 7 | 
 8 | dir_list=( $(git ls-files | grep 'description.json' | sed -r 's|/[^/]+$||' | sort | uniq ))
 9 | 
10 | echo ${dir_list[@]}
11 | echo $BASEDIR
12 | 
13 | for i in "${dir_list[@]}"
14 | do
15 |     cd $i
16 |     if grep -qr '"match_readme": "false"' .; then
17 |         echo "Ignoring README.md ::" $i
18 |     else
19 |         echo "Updating README for = $i"
20 |         rm README.md
21 |     fi
22 |     make docs
23 |     git add README.md
24 |     cd $BASEDIR
25 | done
26 | 
27 | summary_list=( $(git ls-files | grep 'summary.json' | sed -r 's|/[^/]+$||' | sort | uniq ))
28 | echo ${summary_list[@]}
29 | echo $BASEDIR
30 | for i in "${summary_list[@]}"
31 | do
32 |     cd $i
33 |     echo "Updating README for = $i"
34 |     rm README.md
35 |     make docs
36 |     git add README.md
37 |     cd $BASEDIR
38 | done
39 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/src/hls/make.tcl.in:
--------------------------------------------------------------------------------
 1 | 
 2 | open_project ${PROJECT_NAME}_prj
 3 | 
 4 | open_solution "solution1"
 5 | set_part ${FPGA_PART}
 6 | create_clock -period ${CLOCK_PERIOD} -name default
 7 | 
 8 | set_top ${PROJECT_NAME}
 9 | 
10 | add_files ${CMAKE_CURRENT_SOURCE_DIR}/iperf_client.cpp -cflags "-I${CMAKE_CURRENT_BINARY_DIR}"
11 | 
12 | 
13 | add_files -tb ${CMAKE_CURRENT_SOURCE_DIR}/test_iperf_client.cpp
14 | 
15 | 
16 | #Check which command
17 | set command [lindex $argv 2]
18 | 
19 | if {$command == "synthesis"} {
20 |    csynth_design
21 | } elseif {$command == "csim"} {
22 |    csim_design
23 | } elseif {$command == "ip"} {
24 |    export_design -format ip_catalog -ipname "iperf_client" -display_name "iperf client" -vendor "ethz.systems.fpga" -version "1.0"
25 | } elseif {$command == "installip"} {
26 |    file mkdir ${IPREPO_DIR}
27 |    file delete -force ${IPREPO_DIR}/${PROJECT_NAME}
28 |    file copy -force ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}_prj/solution1/impl/ip ${IPREPO_DIR}/${PROJECT_NAME}/
29 | } else {
30 |    puts "No valid command specified. Use vivado_hls -f make.tcl <synthesis|csim|ip> ."
31 | }
32 | 
33 | 
34 | exit
35 | 


--------------------------------------------------------------------------------
/FPGA/config_rtl.mk:
--------------------------------------------------------------------------------
 1 | VIVADO := $(XILINX_VIVADO)/bin/vivado
 2 | $(TEMP_DIR)/${KRNL_1}.xo: kernel/network_krnl/network_krnl.xml scripts/package_network_krnl.tcl scripts/gen_xo.tcl kernel/network_krnl/src/hdl/*.sv
 3 | 	mkdir -p $(TEMP_DIR)
 4 | 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_1}.xo ${KRNL_1} $(TARGET) $(DEVICE) $(XSA) kernel/network_krnl/network_krnl.xml ./scripts/package_network_krnl.tcl
 5 | 
 6 | $(TEMP_DIR)/${KRNL_2}.xo: kernel/user_krnl/${KRNL_2}/${KRNL_2}.xml scripts/package_${KRNL_2}.tcl scripts/gen_xo.tcl kernel/user_krnl/${KRNL_2}/src/hdl/*.sv
 7 | 	mkdir -p $(TEMP_DIR)
 8 | 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_2}.xo ${KRNL_2} $(TARGET) $(DEVICE) $(XSA) kernel/user_krnl/${KRNL_2}/${KRNL_2}.xml ./scripts/package_${KRNL_2}.tcl
 9 | 
10 | $(TEMP_DIR)/${KRNL_3}.xo: kernel/cmac_krnl/cmac_krnl.xml scripts/package_cmac_krnl.tcl scripts/gen_xo.tcl kernel/cmac_krnl/src/hdl/*.sv
11 | 	mkdir -p $(TEMP_DIR)
12 | 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_3}.xo ${KRNL_3} $(TARGET) $(DEVICE) $(XSA) kernel/cmac_krnl/cmac_krnl.xml ./scripts/package_cmac_krnl.tcl
13 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/device_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | #
 4 | # utility that creates a file that lists down the supported and unsupported devices
 5 | # for each example
 6 | #
 7 | 
 8 | import glob
 9 | import json
10 | import re
11 | import sys
12 | import os
13 | 
14 | import os.path
15 | 
16 | string = "" 
17 | for dirpath, dirnames, filenames in os.walk("../../."):   
18 | 	for filename in [f for f in filenames if (f.endswith("description.json") and f not in "../../common/.")]:
19 | 	
20 | 		f = open(os.path.join(dirpath, filename), "r+")
21 | 		listing = []
22 | 		flag = 0 
23 | 		name_flag = 0
24 | 
25 | 		for txt in f:
26 | 
27 | 			x = re.search(".*device\".*", txt)
28 | 
29 | 			if (x):
30 | 				if(name_flag is 0):
31 |                                         name_flag = 1
32 |                                         string = string + "\n" + dirpath + "\n"
33 | 
34 | 				if(',' not in txt):	
35 | 					flag = 1
36 | 				
37 | 				string = string + txt
38 | 				continue	
39 | 
40 | 			if (flag):
41 | 				string = string + txt
42 | 				
43 | 				if(']' in txt):
44 | 					flag = 0
45 | 		f.close()
46 | 
47 | g = open ("Data.txt", "w")
48 | g.write(string)
49 | g.close()
50 | 	
51 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/pthread_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h> 
 2 | #include <stdlib.h> 
 3 | #include <unistd.h>  //Header file for sleep(). man 3 sleep for details. 
 4 | #include <pthread.h> 
 5 | 
 6 | struct Thread_info {
 7 |     int port;
 8 |     float* buffer;
 9 | };
10 | 
11 | // A normal C function that is executed as a thread  
12 | void *myThreadFun(void* vargp) 
13 | { 
14 |     sleep(1); 
15 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
16 |     printf("Printing GeeksQuiz from Thread %d\n", t_info -> port); 
17 |     for (int i = 0; i < 32; i++) {
18 |         printf("%f\t", t_info -> buffer[i]);
19 |     }
20 |     return NULL; 
21 | } 
22 | 
23 | int main() 
24 | { 
25 |     pthread_t thread_id; 
26 |     printf("Before Thread\n"); 
27 | 
28 |     int port = 8080;
29 |     float* buffer = malloc(128);
30 |     for (int i = 0; i < 32; i++) {
31 |         buffer[i] = i;
32 |     }
33 | 
34 |     struct Thread_info t_info_0;
35 |     t_info_0.port = port;
36 |     t_info_0.buffer = buffer;
37 | 
38 |     pthread_create(&thread_id, NULL, myThreadFun, (void*) &t_info_0); 
39 |     // pthread_create(&thread_id, NULL, myThreadFun, NULL); 
40 |     pthread_join(thread_id, NULL); 
41 |     printf("After Thread\n"); 
42 |     exit(0); 
43 | }


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/pthread_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h> 
 2 | #include <stdlib.h> 
 3 | #include <unistd.h>  //Header file for sleep(). man 3 sleep for details. 
 4 | #include <pthread.h> 
 5 | 
 6 | struct Thread_info {
 7 |     int port;
 8 |     float* buffer;
 9 | };
10 | 
11 | // A normal C function that is executed as a thread  
12 | void *myThreadFun(void* vargp) 
13 | { 
14 |     sleep(1); 
15 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
16 |     printf("Printing GeeksQuiz from Thread %d\n", t_info -> port); 
17 |     for (int i = 0; i < 32; i++) {
18 |         printf("%f\t", t_info -> buffer[i]);
19 |     }
20 |     return NULL; 
21 | } 
22 | 
23 | int main() 
24 | { 
25 |     pthread_t thread_id; 
26 |     printf("Before Thread\n"); 
27 | 
28 |     int port = 8080;
29 |     float* buffer = malloc(128);
30 |     for (int i = 0; i < 32; i++) {
31 |         buffer[i] = i;
32 |     }
33 | 
34 |     struct Thread_info t_info_0;
35 |     t_info_0.port = port;
36 |     t_info_0.buffer = buffer;
37 | 
38 |     pthread_create(&thread_id, NULL, myThreadFun, (void*) &t_info_0); 
39 |     // pthread_create(&thread_id, NULL, myThreadFun, NULL); 
40 |     pthread_join(thread_id, NULL); 
41 |     printf("After Thread\n"); 
42 |     exit(0); 
43 | }


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/pthread_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h> 
 2 | #include <stdlib.h> 
 3 | #include <unistd.h>  //Header file for sleep(). man 3 sleep for details. 
 4 | #include <pthread.h> 
 5 | 
 6 | struct Thread_info {
 7 |     int port;
 8 |     float* buffer;
 9 | };
10 | 
11 | // A normal C function that is executed as a thread  
12 | void *myThreadFun(void* vargp) 
13 | { 
14 |     sleep(1); 
15 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
16 |     printf("Printing GeeksQuiz from Thread %d\n", t_info -> port); 
17 |     for (int i = 0; i < 32; i++) {
18 |         printf("%f\t", t_info -> buffer[i]);
19 |     }
20 |     return NULL; 
21 | } 
22 | 
23 | int main() 
24 | { 
25 |     pthread_t thread_id; 
26 |     printf("Before Thread\n"); 
27 | 
28 |     int port = 8080;
29 |     float* buffer = malloc(128);
30 |     for (int i = 0; i < 32; i++) {
31 |         buffer[i] = i;
32 |     }
33 | 
34 |     struct Thread_info t_info_0;
35 |     t_info_0.port = port;
36 |     t_info_0.buffer = buffer;
37 | 
38 |     pthread_create(&thread_id, NULL, myThreadFun, (void*) &t_info_0); 
39 |     // pthread_create(&thread_id, NULL, myThreadFun, NULL); 
40 |     pthread_join(thread_id, NULL); 
41 |     printf("After Thread\n"); 
42 |     exit(0); 
43 | }


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/pthread_test.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h> 
 2 | #include <stdlib.h> 
 3 | #include <unistd.h>  //Header file for sleep(). man 3 sleep for details. 
 4 | #include <pthread.h> 
 5 | 
 6 | struct Thread_info {
 7 |     int port;
 8 |     float* buffer;
 9 | };
10 | 
11 | // A normal C function that is executed as a thread  
12 | void *myThreadFun(void* vargp) 
13 | { 
14 |     sleep(1); 
15 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
16 |     printf("Printing GeeksQuiz from Thread %d\n", t_info -> port); 
17 |     for (int i = 0; i < 32; i++) {
18 |         printf("%f\t", t_info -> buffer[i]);
19 |     }
20 |     return NULL; 
21 | } 
22 | 
23 | int main() 
24 | { 
25 |     pthread_t thread_id; 
26 |     printf("Before Thread\n"); 
27 | 
28 |     int port = 8080;
29 |     float* buffer = malloc(128);
30 |     for (int i = 0; i < 32; i++) {
31 |         buffer[i] = i;
32 |     }
33 | 
34 |     struct Thread_info t_info_0;
35 |     t_info_0.port = port;
36 |     t_info_0.buffer = buffer;
37 | 
38 |     pthread_create(&thread_id, NULL, myThreadFun, (void*) &t_info_0); 
39 |     // pthread_create(&thread_id, NULL, myThreadFun, NULL); 
40 |     pthread_join(thread_id, NULL); 
41 |     printf("After Thread\n"); 
42 |     exit(0); 
43 | }


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/config_sp_iperf_krnl.txt:
--------------------------------------------------------------------------------
 1 | [connectivity] 
 2 | sp=network_krnl_1.m00_axi:HBM[30]
 3 | sp=network_krnl_1.m01_axi:HBM[31]
 4 | sc=network_krnl_1.m_axis_udp_rx:iperf_krnl_1.s_axis_udp_rx
 5 | sc=network_krnl_1.m_axis_udp_rx_meta:iperf_krnl_1.s_axis_udp_rx_meta
 6 | sc=network_krnl_1.m_axis_tcp_port_status:iperf_krnl_1.s_axis_tcp_port_status
 7 | sc=network_krnl_1.m_axis_tcp_open_status:iperf_krnl_1.s_axis_tcp_open_status
 8 | sc=network_krnl_1.m_axis_tcp_notification:iperf_krnl_1.s_axis_tcp_notification
 9 | sc=network_krnl_1.m_axis_tcp_rx_meta:iperf_krnl_1.s_axis_tcp_rx_meta
10 | sc=network_krnl_1.m_axis_tcp_rx_data:iperf_krnl_1.s_axis_tcp_rx_data
11 | sc=network_krnl_1.m_axis_tcp_tx_status:iperf_krnl_1.s_axis_tcp_tx_status
12 | 
13 | sc=iperf_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
14 | sc=iperf_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
15 | sc=iperf_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
16 | sc=iperf_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
17 | sc=iperf_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
18 | sc=iperf_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
19 | sc=iperf_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
20 | sc=iperf_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
21 | 
22 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
23 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx
24 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/config_sp_scatter_krnl.txt:
--------------------------------------------------------------------------------
 1 | [connectivity] 
 2 | sp=network_krnl_1.m00_axi:HBM[30]
 3 | sp=network_krnl_1.m01_axi:HBM[31]
 4 | sc=network_krnl_1.m_axis_udp_rx:scatter_krnl_1.s_axis_udp_rx
 5 | sc=network_krnl_1.m_axis_udp_rx_meta:scatter_krnl_1.s_axis_udp_rx_meta
 6 | sc=network_krnl_1.m_axis_tcp_port_status:scatter_krnl_1.s_axis_tcp_port_status
 7 | sc=network_krnl_1.m_axis_tcp_open_status:scatter_krnl_1.s_axis_tcp_open_status
 8 | sc=network_krnl_1.m_axis_tcp_notification:scatter_krnl_1.s_axis_tcp_notification
 9 | sc=network_krnl_1.m_axis_tcp_rx_meta:scatter_krnl_1.s_axis_tcp_rx_meta
10 | sc=network_krnl_1.m_axis_tcp_rx_data:scatter_krnl_1.s_axis_tcp_rx_data
11 | sc=network_krnl_1.m_axis_tcp_tx_status:scatter_krnl_1.s_axis_tcp_tx_status
12 | 
13 | sc=scatter_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
14 | sc=scatter_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
15 | sc=scatter_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
16 | sc=scatter_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
17 | sc=scatter_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
18 | sc=scatter_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
19 | sc=scatter_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
20 | sc=scatter_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
21 | 
22 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
23 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx
24 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/hls_test_krnl/config_sp_hls_test_krnl.txt:
--------------------------------------------------------------------------------
 1 | [connectivity] 
 2 | sp=network_krnl_1.m00_axi:HBM[30]
 3 | sp=network_krnl_1.m01_axi:HBM[31]
 4 | sc=network_krnl_1.m_axis_udp_rx:hls_test_krnl_1.s_axis_udp_rx
 5 | sc=network_krnl_1.m_axis_udp_rx_meta:hls_test_krnl_1.s_axis_udp_rx_meta
 6 | sc=network_krnl_1.m_axis_tcp_port_status:hls_test_krnl_1.s_axis_tcp_port_status
 7 | sc=network_krnl_1.m_axis_tcp_open_status:hls_test_krnl_1.s_axis_tcp_open_status
 8 | sc=network_krnl_1.m_axis_tcp_notification:hls_test_krnl_1.s_axis_tcp_notification
 9 | sc=network_krnl_1.m_axis_tcp_rx_meta:hls_test_krnl_1.s_axis_tcp_rx_meta
10 | sc=network_krnl_1.m_axis_tcp_rx_data:hls_test_krnl_1.s_axis_tcp_rx_data
11 | sc=network_krnl_1.m_axis_tcp_tx_status:hls_test_krnl_1.s_axis_tcp_tx_status
12 | 
13 | sc=hls_test_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
14 | sc=hls_test_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
15 | sc=hls_test_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
16 | sc=hls_test_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
17 | sc=hls_test_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
18 | sc=hls_test_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
19 | sc=hls_test_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
20 | sc=hls_test_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
21 | 
22 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
23 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx


--------------------------------------------------------------------------------
/FPGA/common/utility/build_what.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | HEAD=
 4 | 
 5 | if [[ "$BRANCH_NAME" == "" ]]; then
 6 | 	HEAD=remotes/origin/master
 7 | else
 8 | 	HEAD=remotes/origin/${BRANCH_NAME}
 9 | fi
10 | 
11 | PROJS=$(git ls-files | grep description.json | sed -e 's/\.\///' -e 's/\/description.json//')
12 | CHANGES=$(git diff --name-only $HEAD)
13 | 
14 | howmany() { echo $#; }
15 | NUM_CHANGES=$(howmany $CHANGES)
16 | 
17 | echo NUM_CHANGES=$NUM_CHANGES
18 | 
19 | REBUILDS=
20 | for change in $CHANGES; do
21 | 	IN_PROJS=
22 | 	for proj in $PROJS; do
23 | 		if [[ "$change" == ${proj}* ]]; then
24 | 			IN_PROJS="$proj $IN_PROJS"
25 | 		fi
26 | 	done
27 | 
28 | 	if [[ "$change" == */README.md
29 | 		|| "$change" == "utility/build_what.sh"
30 | 		|| "$change" == "Jenkinsfile" ]]; then
31 | 		echo "SKIPPING $change"
32 | 		NUM_CHANGES=$((NUM_CHANGES-1))
33 | 	elif [[ "$IN_PROJS" != "" ]]; then
34 | 		echo "REBUILD $change"
35 | 		NUM_CHANGES=$((NUM_CHANGES-1))
36 | 		REBUILDS="$IN_PROJS $REBUILDS"
37 | 	else
38 | 		echo "UNKNOWN $change"
39 | 	fi
40 | done
41 | 
42 | UNIQ_REBUILDS=$(echo $REBUILDS | xargs -n 1 | sort -u | xargs)
43 | 
44 | echo UNIQ_REBUILDS = $UNIQ_REBUILDS
45 | echo NUM_CHANGES = $NUM_CHANGES
46 | 
47 | # if we know that we only changed something inside a single example then do a rebuild
48 | # of that example only else rebuild all examples.
49 | cat /dev/null > examples.dat
50 | if [[ "$NUM_CHANGES" == "0" && "$UNIQ_REBUILDS" != "" ]]; then
51 | 	for rebuild in $UNIQ_REBUILDS; do
52 | 		echo $rebuild >> examples.dat
53 | 	done
54 | else
55 | 	for proj in $PROJS; do
56 | 		echo $proj >> examples.dat
57 | 	done
58 | fi
59 | 


--------------------------------------------------------------------------------
/FPGA/kernel/cmac_krnl/src/hdl/axis_data_reg_array.sv:
--------------------------------------------------------------------------------
 1 | `include "network_types.svh"
 2 | `include "network_intf.svh"
 3 | 
 4 | module axis_data_reg_array #(
 5 |     parameter integer                       N_STAGES = 2
 6 | ) (
 7 |     input  wire                             aclk,
 8 |     input  wire                             aresetn,
 9 |     axi_stream.slave                        s_axis,
10 |     axi_stream.master                       m_axis
11 | );
12 | 
13 | // ----------------------------------------------------------------------------------------------------------------------- 
14 | // -- Register slices ---------------------------------------------------------------------------------------------------- 
15 | // ----------------------------------------------------------------------------------------------------------------------- 
16 | axi_stream axis_int [N_STAGES+1] ();
17 | 
18 | always_comb begin
19 |     axis_int[0].valid           = s_axis.valid;
20 |     axis_int[0].data            = s_axis.data;
21 |     axis_int[0].keep            = s_axis.keep;
22 |     axis_int[0].last            = s_axis.last;
23 |     s_axis.ready                = axis_int[0].ready;
24 | 
25 |     m_axis.valid                = axis_int[N_STAGES].valid;
26 |     m_axis.data                 = axis_int[N_STAGES].data;
27 |     m_axis.keep                 = axis_int[N_STAGES].keep;
28 |     m_axis.last                 = axis_int[N_STAGES].last;
29 |     axis_int[N_STAGES].ready    = m_axis.ready;
30 | end
31 | 
32 | for(genvar i = 0; i < N_STAGES; i++) begin
33 |     axis_data_reg inst_reg (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_int[i]), .m_axis(axis_int[i+1]));  
34 | end
35 | 
36 | endmodule


--------------------------------------------------------------------------------
/FPGA/kernel/network_krnl/src/hdl/axis_data_reg_array.sv:
--------------------------------------------------------------------------------
 1 | `include "network_types.svh"
 2 | `include "network_intf.svh"
 3 | 
 4 | module axis_data_reg_array #(
 5 |     parameter integer                       N_STAGES = 2
 6 | ) (
 7 |     input  wire                             aclk,
 8 |     input  wire                             aresetn,
 9 |     axi_stream.slave                        s_axis,
10 |     axi_stream.master                       m_axis
11 | );
12 | 
13 | // ----------------------------------------------------------------------------------------------------------------------- 
14 | // -- Register slices ---------------------------------------------------------------------------------------------------- 
15 | // ----------------------------------------------------------------------------------------------------------------------- 
16 | axi_stream axis_int [N_STAGES+1] ();
17 | 
18 | always_comb begin
19 |     axis_int[0].valid           = s_axis.valid;
20 |     axis_int[0].data            = s_axis.data;
21 |     axis_int[0].keep            = s_axis.keep;
22 |     axis_int[0].last            = s_axis.last;
23 |     s_axis.ready                = axis_int[0].ready;
24 | 
25 |     m_axis.valid                = axis_int[N_STAGES].valid;
26 |     m_axis.data                 = axis_int[N_STAGES].data;
27 |     m_axis.keep                 = axis_int[N_STAGES].keep;
28 |     m_axis.last                 = axis_int[N_STAGES].last;
29 |     axis_int[N_STAGES].ready    = m_axis.ready;
30 | end
31 | 
32 | for(genvar i = 0; i < N_STAGES; i++) begin
33 |     axis_data_reg inst_reg (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_int[i]), .m_axis(axis_int[i+1]));  
34 | end
35 | 
36 | endmodule


--------------------------------------------------------------------------------
/FPGA/common/utility/md2rst/update_md2rst_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | appDir=$(dirname $(dirname $(dirname $(readlink -f $0))))
 3 | 
 4 | echo "-----------------------"
 5 | echo "--  UPDATING .md to .rst --"
 6 | echo "-----------------------"
 7 | 
 8 | update_file() {
 9 | 	ignore=0
10 | 
11 | 	for i in $IGNORE; do
12 | 		if [[ $1 =~ ^description.json ]]; then 
13 | 			ignore=1
14 | 		fi
15 | 	done
16 | 
17 | 	if [[ $VERBOSE == "true" ]]; then
18 | 		echo -n "Checking $1 ... "
19 | 	fi
20 | 	if [[ $ignore == 1 ]]; then
21 | 		if [[ $VERBOSE == "true" ]]; then
22 | 			echo "SKIP"
23 | 		fi
24 | 	else
25 | 		pushd . > /dev/null
26 |                 # Migrate to the example directory 
27 | 		cd $(dirname $1)
28 | 		# Take out the name of the example
29 | 		b_name=$(basename $(dirname $1))
30 | 		# Run the detailed .md generator
31 | 		$appDir/utility/md2rst/md2rst.py description.json #> /dev/null 2>&1
32 | 		# Run the .md to .rst file generator using correct path to pandoc
33 | 		$appDir/pandoc-2.7.3/bin/pandoc -f markdown D_README.md -t rst -o $b_name.rst
34 | 		# move the generated .rst to desired folder
35 | 		mv $b_name.rst $appDir/../../test/
36 | 		# delete the detailed readme
37 | 		rm D_README.md
38 | 		# Locate the desired folder and run sphinx to generate html files from
39 | 		# rst files. Go live...
40 | 		popd >/dev/null
41 | 	fi
42 | }
43 | 
44 | VCS_FILES=$(git ls-files)
45 | 
46 | for f in $VCS_FILES; do
47 | 	if [[ ($f == */description.json) ]]; then
48 | 		if grep -q '"match_readme": "false"' $f; then
49 | 			echo $f
50 | 			echo "Readme Manually Edited:: Autofile Generator Failed"			
51 | 		else
52 | 			echo $f
53 | 			update_file $(readlink -f $f)
54 | 		fi
55 | 	fi
56 | done
57 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/check_license.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if LICENSE.txt is provided as an argument
 4 | 
 5 | if [ $# -eq 0 ]; then
 6 |     echo "ERROR: LICENSE.txt not found"
 7 |     exit 1
 8 | fi
 9 | 
10 | # Check if all source files have the correct license
11 | 
12 | LICENSE=$1
13 | TYPES="c cpp h cl"
14 | IGNORE=$(cat .LICENSE_IGNORE.txt)
15 | 
16 | LICENSE_LEN=$(cat $LICENSE | wc -l)
17 | 
18 | echo "-------------------------------------"
19 | echo "--  CHECKING LICENSE of all $TYPES --"
20 | echo "-------------------------------------"
21 | echo "-- IGNORING --"
22 | echo "$IGNORE"
23 | echo "--------------"
24 | 
25 | FAIL=0
26 | 
27 | check_file() {
28 | 	ignore=0
29 | 
30 | 	for i in $IGNORE; do
31 | 		if [[ $1 =~ $i ]]; then 
32 | 			ignore=1
33 | 		fi
34 | 	done
35 | 
36 | 	if [[ $VERBOSE == "true" ]]; then
37 | 		echo -n "Checking $1 ... "
38 | 	fi
39 | 	if [[ $ignore == 1 ]]; then
40 | 		if [[ $VERBOSE == "true" ]]; then
41 | 			echo "SKIP"
42 | 		fi
43 | 	else
44 | 		diff $LICENSE <(head -n$LICENSE_LEN $1) 2>/dev/null 1>&2
45 | 		if [[ $? == 0 ]]; then
46 | 			if [[ $VERBOSE == "true" ]]; then
47 | 				echo "PASS"
48 | 			fi
49 | 		else
50 | 			if [[ $VERBOSE == "true" ]]; then
51 | 				echo "FAIL"
52 | 				diff $LICENSE <(head -n$LICENSE_LEN $1)
53 | 			else
54 | 				echo "$1"
55 | 			fi
56 | 			(( FAIL += 1 ))
57 | 		fi
58 | 	fi
59 | }
60 | 
61 | 
62 | VCS_FILES=$(git ls-files)
63 | 
64 | for f in $VCS_FILES; do
65 | 	for t in $TYPES; do
66 | 		if [[ $f == *.$t ]]; then
67 | 			check_file $f
68 | 		fi
69 | 	done
70 | done
71 | 
72 | if [[ $FAIL != 0 ]]; then
73 | 	echo "ERROR: License check failed"
74 | 	echo "ERROR: please fix the license in these files (or add to ignored if external)"
75 | fi
76 | 
77 | exit $FAIL
78 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/constant.h:
--------------------------------------------------------------------------------
 1 | // Input: (INPUT_FEATURE_LEN, BATCH_SIZE)
 2 | // Layer1: W1 * INPUT + B1 
 3 | //  -> W1 (HIDDEN_SIZE1, INPUT_FEATURE_LEN)
 4 | //  -> B1 (HIDDEN_SIZE1)
 5 | //  -> Result1 (HIDDEN_SIZE1, BATCH_SIZE)
 6 | // Layer2: W2 * Result1 + B2
 7 | //  -> W2 (HIDDEN_SIZE2, HIDDEN_SIZE1)
 8 | //  -> B2 (HIDDEN_SIZE2)
 9 | //  -> Result2 (HIDDEN_SIZE2, BATCH_SIZE)
10 | // Layer3: W3 * Result2 + B3
11 | //  -> W3 (HIDDEN_SIZE3, HIDDEN_SIZE2)
12 | //  -> B3 (HIDDEN_SIZE3)
13 | //  -> Result3 (HIDDEN_SIZE3, BATCH_SIZE)
14 | // Output Layer: W_OUT * Result3 + B_OUT
15 | //  -> W3 (OUTPUT_FEATURE_LEN, HIDDEN_SIZE3)
16 | //  -> B3 (OUTPUT_FEATURE_LEN)
17 | //  -> Result3 (OUTPUT_FEATURE_LEN, BATCH_SIZE)
18 | 
19 | ///////////// OPTION: small model 352 -> 512 /////////////
20 | ///////////// OPTION: large model 880 -> 1024 /////////////
21 | #define INPUT_FEATURE_LEN 880
22 | 
23 | // BATCH_SIZE GIVEN IN constant.h, need to revisit the constant definition later 
24 | #define HIDDEN_SIZE1 1024
25 | #define HIDDEN_SIZE2 512 
26 | #define HIDDEN_SIZE3 256  
27 | #define OUTPUT_FEATURE_LEN 1 
28 | 
29 | /* constraint: SHM_DATA_SIZE === 1 GB */
30 | /* FLOAT_SIZE * BATCH_SIZE * INPUT_DIM * BATCH_NUM_PER_LOOP = 1024 **3 */
31 | #define FLOAT_SIZE 4 
32 | #define BATCH_SIZE 128 // 1024
33 | #define TOTAL_BATCH_NUM (2 * 1024 * 1024 / BATCH_SIZE)
34 | 
35 | /* matrix (batch * input_dim): 1024 * 1024, float: 4 byte, 1024 batches in queue */
36 | /* 4 GB in total, (1024 * 1024 * 4 * 1024) */
37 | #define BLOCK_ENTRY_NUM (BATCH_SIZE * INPUT_FEATURE_LEN)
38 | #define BLOCK_SIZE (BLOCK_ENTRY_NUM * FLOAT_SIZE)
39 | 
40 | #define PORT 8080 // 8080
41 | 
42 | #define THREAD_NUM 4  // the number of sender / receiver threads
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPU-FPGA-Recommendation-System
 2 | The source code of our paper published in KDD 2021--- [FleetRec: Large-Scale Recommendation Inference on Hybrid GPU-FPGA Clusters](https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/485153/1/FleetRec_camera_ready.pdf).
 3 | 
 4 | 
 5 | 
 6 | There are two folders for the FPGA and GPU implementations. To build the FPGA, refer to the README.md in the folder. The supported device is Alveo U280 and we used an [open-source TCP/IP stack for Vitis](https://github.com/fpgasystems/Vitis_with_100Gbps_TCP-IP). The GPU implementation requires CUDA version of at least 11.0 and should support a wide range of GPU models.
 7 | 
 8 | 
 9 | 
10 | There are three experiments on different recommendation models. The FPGA kernels can be found [here](./FPGA/kernel/user_krnl), and the GPU kernels can be found [here](./GPU). There are respective READMEs in those FPGA and GPU folders.
11 | 
12 | 
13 | 
14 | ## Reference
15 | 
16 | The paper corresponds to this repository:
17 | 
18 | Jiang, W., He, Z., Zhang, S., Zeng, K., Feng, L., Zhang, J., ... & Alonso, G. (2021, August). FleetRec: Large-Scale Recommendation Inference on Hybrid GPU-FPGA Clusters. In *27th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2021)*.
19 | 
20 | The FPGA implementation is based on a previous paper:
21 | 
22 | Jiang, W., He, Z., Zhang, S., Preußer, T. B., Zeng, K., Feng, L., ... & Alonso, G. (2021). MicroRec: efficient recommendation inference by hardware and data structure solutions. *Proceedings of Machine Learning and Systems*, *3*.
23 | 
24 | The FPGA network stack we used:
25 | 
26 | Zhenhao He, Dario Korolija, and Gustavo Alonso. 2021. EasyNet: 100 Gbps Network for HLS. In 2021 31th International Conference on Field Programmable Logic and Applications (FPL)


--------------------------------------------------------------------------------
/FPGA/common/utility/Consolidation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | #
 4 | # utility that creates a folder that contains the common folder and
 5 | # all the files of the example 
 6 | #
 7 | 
 8 | from sys import argv
 9 | import re
10 | import sys
11 | import os
12 | import os.path
13 | 
14 | path = os.getcwd()
15 | 
16 | actual_folder = path
17 | folder_created = path + '_backup'
18 | route = argv[0].split('common')
19 | 
20 | if(not os.path.isdir(folder_created)):
21 | 	cmd = 'cp -rf ' + actual_folder + ' ' + folder_created
22 | 	os.system(cmd)
23 | 	os.chmod(folder_created, 0o777)
24 | 
25 | 	f = open(folder_created + '/Makefile', "r+")
26 | 
27 | 	string = ""
28 | 	listing = ['opencl']
29 | 
30 | 	for txt in f:
31 | 
32 | 		x = re.search("^COMMON_REPO =.*", txt)
33 | 
34 | 		if (x):
35 | 			txt = "COMMON_REPO = ./\n"
36 | 
37 | 		string = string + txt
38 | 
39 | 	f.close()
40 | 
41 | 	f = open(folder_created + '/description.json', "r+")
42 | 	
43 | 	flag = 0
44 | 	for txt in f:
45 | 		if "\"includepaths\"" in txt:
46 | 			flag = 1
47 | 			continue
48 | 
49 | 		if (flag == 1):
50 | 			if (']' in txt or '}' in txt):
51 | 				break
52 | 			else:
53 | 				listing.append(txt[txt.find("includes/") + 9 : txt.rfind('\"')])
54 | 		 
55 | 	f.close()
56 | 
57 | 	commonfolders = route[0] + "common/includes/"
58 | 
59 | 	for foldername in os.listdir(commonfolders):
60 | 		if foldername in listing:
61 | 			cmd1 = 'mkdir -p ' + folder_created + '/common/includes/' + foldername
62 | 			cmd2 = 'cp -rf ' + commonfolders + '/' + foldername + '/* ' + folder_created + '/common/includes/' + foldername
63 | 			os.system(cmd1)
64 | 			os.system(cmd2)
65 | 
66 | 	g = open(folder_created + '/Makefile', "w")
67 | 	g.write(string)
68 | 	g.close()
69 | 
70 | 	print ("The new folder's location is %s" % folder_created)
71 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/hls_test_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_377_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_47_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_98_krnl/src/hls/mem_utils.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | #ifndef MEM_UTILS_HPP
28 | #define MEM_UTILS_HPP
29 | 
30 | 
31 | struct memCmd
32 | {
33 | 	ap_uint<64> addr;
34 | 	ap_uint<32> len;
35 | 	memCmd() {}
36 | 	memCmd(ap_uint<64> addr, ap_uint<32> len)
37 | 		:addr(addr), len(len) {}
38 | };
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/check_readme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if all examples have correct Readme 
 4 | 
 5 | echo "-----------------------"
 6 | echo "--  CHECKING READMEs --"
 7 | echo "-----------------------"
 8 | 
 9 | FAIL=0
10 | 
11 | check_file() {
12 | 	ignore=0
13 | 
14 | 	for i in $IGNORE; do
15 | 		if [[ $1 =~ ^description.json ]]; then 
16 | 			ignore=1
17 | 		fi
18 | 	done
19 | 
20 | 	if [[ $VERBOSE == "true" ]]; then
21 | 		echo -n "Checking $1 ... "
22 | 	fi
23 | 	if [[ $ignore == 1 ]]; then
24 | 		if [[ $VERBOSE == "true" ]]; then
25 | 			echo "SKIP"
26 | 		fi
27 | 	else
28 | 		pushd . > /dev/null 
29 | 		cd $(dirname $1)
30 | 		mv README.md README.md.check > /dev/null 2>&1
31 | 		make README.md 2>/dev/null 1>&2
32 | 		rc=$? 
33 |         if [[ $2 != "false" ]]; then
34 |             diff README.md README.md.check 2>/dev/null 1>&2
35 |             if [[ $rc == 0 && $? == 0 ]]; then
36 |                 if [[ $VERBOSE == "true" ]]; then
37 |                     echo "PASS"
38 |                 fi
39 |             else
40 |                 if [[ $VERBOSE == "true" ]]; then
41 |                     echo "FAIL"
42 |                     diff README.md README.md.check
43 |                 else
44 |                     echo "$1"
45 |                 fi
46 |                 (( FAIL += 1 ))
47 |             fi
48 |         fi 
49 | 
50 |         mv README.md.check README.md > /dev/null 2>&1
51 |         popd >/dev/null
52 |     fi
53 | }
54 | 
55 | 
56 | VCS_FILES=$(git ls-files)
57 | 
58 | for f in $VCS_FILES; do
59 |     CHECK_MATCH=true
60 |     if [[ ($f == */description.json) || ($f == */summary.json) ]]; then  
61 |         if grep -q '"match_readme": "false"' $f; then
62 |             CHECK_MATCH=false 
63 |             echo "Ignoring README.md ::" $f	 		
64 |         fi	
65 |         check_file $f $CHECK_MATCH
66 | 	fi
67 | done
68 | 
69 | if [[ $FAIL != 0 ]]; then
70 | 	echo "ERROR: Readme check failed"
71 |     echo "ERROR: please fix the README.md in these files"    
72 | fi
73 | 
74 | exit $FAIL
75 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/constant.h:
--------------------------------------------------------------------------------
 1 | // Input: (INPUT_FEATURE_LEN, BATCH_SIZE)
 2 | // Layer1: W1 * INPUT + B1 
 3 | //  -> W1 (HIDDEN_SIZE1, INPUT_FEATURE_LEN)
 4 | //  -> B1 (HIDDEN_SIZE1)
 5 | //  -> Result1 (HIDDEN_SIZE1, BATCH_SIZE)
 6 | // Layer2: W2 * Result1 + B2
 7 | //  -> W2 (HIDDEN_SIZE2, HIDDEN_SIZE1)
 8 | //  -> B2 (HIDDEN_SIZE2)
 9 | //  -> Result2 (HIDDEN_SIZE2, BATCH_SIZE)
10 | // Layer3: W3 * Result2 + B3
11 | //  -> W3 (HIDDEN_SIZE3, HIDDEN_SIZE2)
12 | //  -> B3 (HIDDEN_SIZE3)
13 | //  -> Result3 (HIDDEN_SIZE3, BATCH_SIZE)
14 | // Output Layer: W_OUT * Result3 + B_OUT
15 | //  -> W3 (OUTPUT_FEATURE_LEN, HIDDEN_SIZE3)
16 | //  -> B3 (OUTPUT_FEATURE_LEN)
17 | //  -> Result3 (OUTPUT_FEATURE_LEN, BATCH_SIZE)
18 | 
19 | ///////////// OPTION: small model 384 -> 512 /////////////
20 | ///////////// OPTION: large model 876 -> 1024 /////////////
21 | #define INPUT_FEATURE_LEN 880
22 | 
23 | // BATCH_SIZE GIVEN IN constant.h, need to revisit the constant definition later 
24 | #define HIDDEN_SIZE1 1024
25 | #define HIDDEN_SIZE2 512 
26 | #define HIDDEN_SIZE3 256  
27 | #define OUTPUT_FEATURE_LEN 1 
28 | 
29 | /* constraint: SHM_DATA_SIZE === 1 GB */
30 | /* FLOAT_SIZE * BATCH_SIZE * INPUT_DIM * BATCH_NUM_PER_LOOP = 1024 **3 */
31 | #define FLOAT_SIZE 4 
32 | #define BATCH_SIZE  256 // 1024
33 | #define BATCH_NUM_PER_LOOP (1024 * 256 / BATCH_SIZE)  // -> should be renamed as FIFO_BATCH_NUM
34 | 
35 | // LOOP = number of GBs to perform 
36 | #define LOOP_NUM 1
37 | 
38 | #define TOTAL_BATCH_NUM (BATCH_NUM_PER_LOOP * LOOP_NUM)
39 | 
40 | /* matrix (batch * input_dim): 1024 * 1024, float: 4 byte, 1024 batches in queue */
41 | /* 4 GB in total, (1024 * 1024 * 4 * 1024) */
42 | #define BLOCK_ENTRY_NUM (BATCH_SIZE * INPUT_FEATURE_LEN)
43 | #define BLOCK_SIZE (BLOCK_ENTRY_NUM * FLOAT_SIZE)
44 | // maximum shared memory size: 1 GB
45 | #define SHM_DATA_SIZE (BLOCK_SIZE * BATCH_NUM_PER_LOOP) 
46 | 
47 | #define SHM_CONTROL_SIZE 1024
48 | 
49 | #define STREAM_NUM 4 // Stream 0 port: PORT, Stream 1 port: PORT + 1, ...
50 | 
51 | #define PORT 8080


--------------------------------------------------------------------------------
/FPGA/config_hls.mk:
--------------------------------------------------------------------------------
 1 | # $(TEMP_DIR)/${KRNL_NAME_2}.xo: kernel/user_krnl/src/C/*.cpp
 2 | # 	mkdir -p $(TEMP_DIR)
 3 | # 	$(VPP) $(CLFLAGS) -c -k ${KRNL_NAME_2} -o $(TEMP_DIR)/${KRNL_NAME_2}.xo --input_files kernel/user_krnl/src/C/*.cpp
 4 | 
 5 | # VIVADO := $(XILINX_VIVADO)/bin/vivado
 6 | # $(TEMP_DIR)/${KRNL_NAME_1}.xo: kernel/network_krnl.xml scripts/package_network_krnl.tcl scripts/gen_xo.tcl kernel/network_krnl/src/hdl/*.sv
 7 | # 	mkdir -p $(TEMP_DIR)
 8 | # 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_NAME_1}.xo ${KRNL_NAME_1} $(TARGET) $(DEVICE) $(XSA)
 9 | 
10 | # # $(TEMP_DIR)/${KRNL_NAME_2}.xo: kernel/user_krnl.xml scripts/package_user_krnl.tcl scripts/gen_xo.tcl kernel/user_krnl/src/hdl/*.sv
11 | # # 	mkdir -p $(TEMP_DIR)
12 | # # 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_NAME_2}.xo ${KRNL_NAME_2} $(TARGET) $(DEVICE) $(XSA)
13 | 
14 | 
15 | # $(TEMP_DIR)/${KRNL_NAME_3}.xo: kernel/cmac_krnl.xml scripts/package_cmac_krnl.tcl scripts/gen_xo.tcl kernel/cmac_krnl/src/hdl/*.sv
16 | # 	mkdir -p $(TEMP_DIR)
17 | # 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_NAME_3}.xo ${KRNL_NAME_3} $(TARGET) $(DEVICE) $(XSA)
18 | 
19 | 
20 | VIVADO := $(XILINX_VIVADO)/bin/vivado
21 | $(TEMP_DIR)/${KRNL_1}.xo: kernel/network_krnl/network_krnl.xml scripts/package_network_krnl.tcl scripts/gen_xo.tcl kernel/network_krnl/src/hdl/*.sv
22 | 	mkdir -p $(TEMP_DIR)
23 | 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_1}.xo ${KRNL_1} $(TARGET) $(DEVICE) $(XSA) kernel/network_krnl/network_krnl.xml ./scripts/package_network_krnl.tcl
24 | 
25 | $(TEMP_DIR)/${KRNL_2}.xo: kernel/user_krnl/${KRNL_2}/src/hls/*.cpp
26 | 	mkdir -p $(TEMP_DIR)
27 | 	$(VPP) $(CLFLAGS) -c -k ${KRNL_2} -o $(TEMP_DIR)/${KRNL_2}.xo --input_files kernel/user_krnl/${KRNL_2}/src/hls/*.cpp
28 | 
29 | 
30 | $(TEMP_DIR)/${KRNL_3}.xo: kernel/cmac_krnl/cmac_krnl.xml scripts/package_cmac_krnl.tcl scripts/gen_xo.tcl kernel/cmac_krnl/src/hdl/*.sv
31 | 	mkdir -p $(TEMP_DIR)
32 | 	$(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs $(TEMP_DIR)/${KRNL_3}.xo ${KRNL_3} $(TARGET) $(DEVICE) $(XSA) kernel/cmac_krnl/cmac_krnl.xml ./scripts/package_cmac_krnl.tcl
33 | 


--------------------------------------------------------------------------------
/FPGA/kernel/network_krnl/src/hdl/axis_udp_meta_reg.sv:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | `timescale 1ns / 1ps
28 | `default_nettype none
29 | 
30 | // 512 axi stream register slice
31 | module axis_udp_meta_reg (
32 |     input wire          aclk,
33 |     input wire          aresetn,
34 |     axis_meta.slave     s_axis,
35 |     axis_meta.master    m_axis
36 | );
37 | 
38 | axis_register_slice_176 slice_inst(
39 |  .aclk(aclk),
40 |  .aresetn(aresetn),
41 |  .s_axis_tvalid(s_axis.valid),
42 |  .s_axis_tready(s_axis.ready),
43 |  .s_axis_tdata(s_axis.data),
44 |  .m_axis_tvalid(m_axis.valid),
45 |  .m_axis_tready(m_axis.ready),
46 |  .m_axis_tdata(m_axis.data)
47 | );
48 | 
49 | endmodule
50 | `default_nettype wire
51 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/README.md:
--------------------------------------------------------------------------------
 1 | # Programs
 2 | 
 3 | First start Terminal 1: run cuda_server first (./run_cuda_server.sh)
 4 | 
 5 | Then start Terminal 2: run client to send data (./run_client_sender.sh) -> Remember to Adjust the Server IP Address
 6 | 
 7 | Correct Results:
 8 | 
 9 | Input Feature Size = 512 -> 68719476736
10 | 
11 | Input Feature Size = 1024 -> 137438953472
12 | 
13 | ## cuda_server.c
14 | 
15 | rm cuda_server
16 | 
17 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
18 | 
19 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
20 | 
21 | ## multiple_connections_network_client_sender.c
22 | 
23 | This program simulates FPGA that opens 4 connections and sending data to the CUDA server.
24 | 
25 | Start CUDA server first, then client.
26 | 
27 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
28 | 
29 | ./multiple_connections_network_client_sender
30 | 
31 | 
32 | # Other programs (for building the final version)
33 | 
34 | ## pthread_test.c
35 | 
36 | https://www.geeksforgeeks.org/multithreading-c-2/
37 | 
38 | Pass port info and memory address space to the thread as a structure, and execute that thread.
39 | 
40 | gcc pthread_test.c -lpthread
41 | 
42 | ./a.out
43 | 
44 | ## single_connection_network_server_receiver.c
45 | 
46 | Start server first, then client.
47 | 
48 | gcc single_connection_network_server_receiver.c -lpthread -o single_connection_network_server_receiver
49 | 
50 | ./single_connection_network_server_receiver
51 | 
52 | ## single_connection_network_client_sender.c
53 | 
54 | Start server first, then client.
55 | 
56 | gcc single_connection_network_client_sender.c -lpthread -o single_connection_network_client_sender
57 | 
58 | ./single_connection_network_client_sender
59 | 
60 | 
61 | ## multiple_connections_network_server_receiver.c
62 | 
63 | Start server first, then client.
64 | 
65 | 4 TCP connections.
66 | 
67 | gcc multiple_connections_network_server_receiver.c -lpthread -o multiple_connections_network_server_receiver
68 | 
69 | ./multiple_connections_network_server_receiver
70 | 
71 | ## multiple_connections_network_client_sender.c
72 | 
73 | Start server first, then client.
74 | 
75 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
76 | 
77 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/README.md:
--------------------------------------------------------------------------------
 1 | # Programs
 2 | 
 3 | First start Terminal 1: run cuda_server first (./run_cuda_server.sh)
 4 | 
 5 | Then start Terminal 2: run client to send data (./run_client_sender.sh) -> Remember to Adjust the Server IP Address
 6 | 
 7 | Correct Results:
 8 | 
 9 | Input Feature Size = 512 -> 68719476736
10 | 
11 | Input Feature Size = 1024 -> 137438953472
12 | 
13 | ## cuda_server.c
14 | 
15 | rm cuda_server
16 | 
17 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
18 | 
19 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
20 | 
21 | ## multiple_connections_network_client_sender.c
22 | 
23 | This program simulates FPGA that opens 4 connections and sending data to the CUDA server.
24 | 
25 | Start CUDA server first, then client.
26 | 
27 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
28 | 
29 | ./multiple_connections_network_client_sender
30 | 
31 | 
32 | # Other programs (for building the final version)
33 | 
34 | ## pthread_test.c
35 | 
36 | https://www.geeksforgeeks.org/multithreading-c-2/
37 | 
38 | Pass port info and memory address space to the thread as a structure, and execute that thread.
39 | 
40 | gcc pthread_test.c -lpthread
41 | 
42 | ./a.out
43 | 
44 | ## single_connection_network_server_receiver.c
45 | 
46 | Start server first, then client.
47 | 
48 | gcc single_connection_network_server_receiver.c -lpthread -o single_connection_network_server_receiver
49 | 
50 | ./single_connection_network_server_receiver
51 | 
52 | ## single_connection_network_client_sender.c
53 | 
54 | Start server first, then client.
55 | 
56 | gcc single_connection_network_client_sender.c -lpthread -o single_connection_network_client_sender
57 | 
58 | ./single_connection_network_client_sender
59 | 
60 | 
61 | ## multiple_connections_network_server_receiver.c
62 | 
63 | Start server first, then client.
64 | 
65 | 4 TCP connections.
66 | 
67 | gcc multiple_connections_network_server_receiver.c -lpthread -o multiple_connections_network_server_receiver
68 | 
69 | ./multiple_connections_network_server_receiver
70 | 
71 | ## multiple_connections_network_client_sender.c
72 | 
73 | Start server first, then client.
74 | 
75 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
76 | 
77 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/FPGA/common/utility/check_makefile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if all examples have correct Makefiles
 4 | 
 5 | echo "-----------------------"
 6 | echo "--  CHECKING MAKEFILES --"
 7 | echo "-----------------------"
 8 | 
 9 | FAIL=0
10 | 
11 | check_file() {
12 | 	ignore=0
13 | 
14 | 	for i in $IGNORE; do
15 | 		if [[ $1 =~ ^description.json ]]; then 
16 | 			ignore=1
17 | 		fi
18 | 	done
19 | 
20 | 	if [[ $VERBOSE == "true" ]]; then
21 | 		echo -n "Checking $1 ... "
22 | 	fi
23 | 	if [[ $ignore == 1 ]]; then
24 | 		if [[ $VERBOSE == "true" ]]; then
25 | 			echo "SKIP"
26 | 		fi
27 | 	else
28 | 		pushd . > /dev/null
29 | 		jsonDir=$(dirname $(readlink -f $1))
30 | 		cd $jsonDir
31 | 		mv Makefile Makefile.check > /dev/null 2>&1
32 | 		mv utils.mk utils.mk.check > /dev/null 2>&1
33 | 		$utilityDir/makefile_gen/makegen.py $1 > /dev/null 2>&1
34 | 		rc=$?
35 | 		diff Makefile Makefile.check 2>/dev/null 1>&2
36 | 		if [[ $rc == 0 && $? == 0 ]]; then
37 | 			#echo 'pass file'
38 | 			if [[ $VERBOSE == "true" ]]; then
39 | 				echo "PASS"
40 | 			fi
41 | 		else
42 | 			if [[ $VERBOSE == "true" ]]; then
43 | 				echo "FAIL"
44 | 				diff Makefile Makefile.check
45 | 			else
46 | 				echo "$1"
47 | 			fi
48 | 			(( FAIL += 1 ))
49 | 		fi
50 | 		mv Makefile.check Makefile > /dev/null 2>&1
51 | 
52 |         diff utils.mk utils.mk.check 2>/dev/null 1>&2
53 | 		if [[ $rc == 0 && $? == 0 ]]; then
54 | 			#echo 'pass file'
55 | 			if [[ $VERBOSE == "true" ]]; then
56 | 				echo "PASS"
57 | 			fi
58 | 		else
59 | 			if [[ $VERBOSE == "true" ]]; then
60 | 				echo "FAIL"
61 | 				diff utils.mk utils.mk.check
62 | 			else
63 | 				echo "$1"
64 | 			fi
65 | 			(( FAIL += 1 ))
66 | 		fi
67 | 		mv utils.mk.check utils.mk > /dev/null 2>&1
68 | 		popd >/dev/null
69 | 	fi
70 | }
71 | 
72 | utilityDir=$(dirname $(readlink -f $0))
73 | cd $utilityDir
74 | cd ..
75 | VCS_FILES=$(git ls-files)
76 | 
77 | for f in $VCS_FILES; do
78 | 	if [[ ($f == */description.json) ]]; then
79 |         if grep -q '"match_ini": "false"' $f; then
80 | 			echo "Manually Edited ini File ::" $f
81 |         fi
82 | 		if grep -q '"match_makefile": "false"' $f; then
83 | 			echo "Ignoring ::" $f	 		
84 |         else
85 | 			check_file $(readlink -f $f)
86 | 		fi
87 | 	fi
88 | done
89 | 
90 | if [[ $FAIL != 0 ]]; then
91 | 	echo "ERROR: Makefile check failed"
92 | 	echo "ERROR: please fix the makefile in these files"
93 | fi
94 | 
95 | exit $FAIL
96 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/check_descr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | #
 4 | # utility that lists down
 5 | # all the examples with respective unnecessary keywords mentioned
 6 | #
 7 | 
 8 | import glob
 9 | import json
10 | import re
11 | import sys
12 | import os
13 | 
14 | import os.path
15 | 
16 | for dirpath, dirnames, filenames in os.walk("../.././"):   
17 |         for filename in [f for f in filenames if (f.endswith("description.json") and f not in "../../common/.")]:
18 | 
19 |                 f = open(os.path.join(dirpath, filename), "r+")
20 |                 flag = 0
21 |                 t = 0
22 |                 string_check = ""
23 | 
24 |                 for txt in f:
25 |                         if ("keywords" in txt and flag == 0):
26 |                                 flag = 1
27 |                                 continue
28 | 
29 |                         if (flag):
30 |                                 if('}' in txt or ']' in txt):
31 |                                         break
32 | 
33 |                                 else:
34 |                                         c_list = txt.split("\"")
35 |                                         check_flag = 0
36 |                                         for c_dirpath, c_dirnames, c_filenames in os.walk(os.path.join(dirpath)):
37 |                                                 for check_filename in [c_f for c_f in c_filenames if (not (c_f.endswith(".md") or c_f.endswith("description.json")))]:
38 |                                                         c_f = open(os.path.join(c_dirpath, check_filename), "rb+")
39 | 
40 |                                                         for check_txt in c_f:
41 |                                                                 if (c_list[1].encode('utf-8') in check_txt):
42 |                                                                         check_flag = 1
43 |                                                                         break
44 | 						
45 |                                                         c_f.close()
46 | 
47 |                                         if (check_flag is 0):
48 |                                                 string_check = string_check + txt
49 |                                                 t = 1
50 | 
51 |                 if (t):
52 |                         print(os.path.join(dirpath))
53 |                         print(string_check)
54 | 		
55 |                 f.close()
56 | 


--------------------------------------------------------------------------------
/FPGA/scripts/network_ultrascale.tcl:
--------------------------------------------------------------------------------
 1 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name axis_sync_fifo 
 2 | set_property -dict [list CONFIG.INTERFACE_TYPE {AXI_STREAM} CONFIG.FIFO_Implementation_axis {Common_Clock_Block_RAM} CONFIG.TDATA_NUM_BYTES {8} CONFIG.TUSER_WIDTH {0} CONFIG.Enable_TLAST {true} CONFIG.HAS_TKEEP {true} CONFIG.Enable_Data_Counts_axis {true} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.TSTRB_WIDTH {8} CONFIG.TKEEP_WIDTH {8} CONFIG.FIFO_Implementation_wach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wach {15} CONFIG.Empty_Threshold_Assert_Value_wach {14} CONFIG.FIFO_Implementation_wrch {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_wrch {15} CONFIG.Empty_Threshold_Assert_Value_wrch {14} CONFIG.FIFO_Implementation_rach {Common_Clock_Distributed_RAM} CONFIG.Full_Threshold_Assert_Value_rach {15} CONFIG.Empty_Threshold_Assert_Value_rach {14}] [get_ips axis_sync_fifo]
 3 | 
 4 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name cmd_fifo_xgemac_rxif 
 5 | set_property -dict [list CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} CONFIG.Input_Data_Width {16} CONFIG.Output_Data_Width {16} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.Use_Embedded_Registers {false} CONFIG.Full_Threshold_Assert_Value {1022} CONFIG.Full_Threshold_Negate_Value {1021} CONFIG.Enable_Safety_Circuit {false}] [get_ips cmd_fifo_xgemac_rxif]
 6 | 
 7 | create_ip -name fifo_generator -vendor xilinx.com -library ip -version 13.2 -module_name cmd_fifo_xgemac_txif 
 8 | set_property -dict [list CONFIG.Fifo_Implementation {Common_Clock_Block_RAM} CONFIG.Input_Data_Width {1} CONFIG.Output_Data_Width {1} CONFIG.Reset_Type {Asynchronous_Reset} CONFIG.Full_Flags_Reset_Value {1} CONFIG.Full_Threshold_Assert_Value {1022} CONFIG.Full_Threshold_Negate_Value {1021} CONFIG.Enable_Safety_Circuit {false}] [get_ips cmd_fifo_xgemac_txif]
 9 | 
10 | #create_ip -name ethernet_frame_padding -vendor ethz.systems.fpga -library hls -version 0.1 -module_name ethernet_frame_padding_ip 
11 | 
12 | create_ip -name axis_data_fifo -vendor xilinx.com -library ip -version 2.0 -module_name axis_pkg_fifo_512 
13 | set_property -dict [list CONFIG.TDATA_NUM_BYTES {64} CONFIG.FIFO_MODE {2} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.Component_Name {axis_pkg_fifo_512}] [get_ips axis_pkg_fifo_512]
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/oclHelper/oclHelper.h:
--------------------------------------------------------------------------------
 1 | /**********
 2 | Copyright (c) 2019, Xilinx, Inc.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | **********/
29 | #ifndef _OCL_HELP_H_
30 | #define _OCL_HELP_H_
31 | 
32 | #include <CL/cl.h>
33 | 
34 | struct oclHardware {
35 |     cl_platform_id mPlatform;
36 |     cl_context mContext;
37 |     cl_device_id mDevice;
38 |     cl_command_queue mQueue;
39 |     short mMajorVersion;
40 |     short mMinorVersion;
41 | };
42 | 
43 | struct oclSoftware {
44 |     cl_program mProgram;
45 |     cl_kernel mKernel;
46 |     char mKernelName[128];
47 |     char mFileName[1024];
48 |     char mCompileOptions[1024];
49 | };
50 | 
51 | oclHardware getOclHardware(cl_device_type type);
52 | 
53 | int getOclSoftware(oclSoftware &software, const oclHardware &hardware);
54 | 
55 | void release(oclSoftware& software);
56 | 
57 | void release(oclHardware& hardware);
58 | 
59 | const char *oclErrorCode(cl_int code);
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/FPGA/kernel/cmac_krnl/src/hdl/axis_data_reg.sv:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | `timescale 1ns / 1ps
28 | `default_nettype none
29 | 
30 | // 512 axi stream register slice
31 | module axis_data_reg (
32 |     input wire          aclk,
33 |     input wire          aresetn,
34 |     axi_stream.slave    s_axis,
35 |     axi_stream.master   m_axis
36 | );
37 | 
38 | axis_register_slice_512 slice_inst(
39 |  .aclk(aclk),
40 |  .aresetn(aresetn),
41 |  .s_axis_tvalid(s_axis.valid),
42 |  .s_axis_tready(s_axis.ready),
43 |  .s_axis_tdata(s_axis.data),
44 |  .s_axis_tkeep(s_axis.keep),
45 |  .s_axis_tlast(s_axis.last),
46 |  .m_axis_tvalid(m_axis.valid),
47 |  .m_axis_tready(m_axis.ready),
48 |  .m_axis_tdata(m_axis.data),
49 |  .m_axis_tkeep(m_axis.keep),
50 |  .m_axis_tlast(m_axis.last)
51 | );
52 | 
53 | endmodule
54 | `default_nettype wire
55 | 


--------------------------------------------------------------------------------
/FPGA/kernel/network_krnl/src/hdl/axis_data_reg.sv:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without modification,
 6 |  * are permitted provided that the following conditions are met:
 7 |  *
 8 |  * 1. Redistributions of source code must retain the above copyright notice,
 9 |  * this list of conditions and the following disclaimer.
10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
11 |  * this list of conditions and the following disclaimer in the documentation
12 |  * and/or other materials provided with the distribution.
13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
14 |  * may be used to endorse or promote products derived from this software
15 |  * without specific prior written permission.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  */
27 | `timescale 1ns / 1ps
28 | `default_nettype none
29 | 
30 | // 512 axi stream register slice
31 | module axis_data_reg (
32 |     input wire          aclk,
33 |     input wire          aresetn,
34 |     axi_stream.slave    s_axis,
35 |     axi_stream.master   m_axis
36 | );
37 | 
38 | axis_register_slice_512 slice_inst(
39 |  .aclk(aclk),
40 |  .aresetn(aresetn),
41 |  .s_axis_tvalid(s_axis.valid),
42 |  .s_axis_tready(s_axis.ready),
43 |  .s_axis_tdata(s_axis.data),
44 |  .s_axis_tkeep(s_axis.keep),
45 |  .s_axis_tlast(s_axis.last),
46 |  .m_axis_tvalid(m_axis.valid),
47 |  .m_axis_tready(m_axis.ready),
48 |  .m_axis_tdata(m_axis.data),
49 |  .m_axis_tkeep(m_axis.keep),
50 |  .m_axis_tlast(m_axis.last)
51 | );
52 | 
53 | endmodule
54 | `default_nettype wire
55 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/constant.h:
--------------------------------------------------------------------------------
 1 | // Input: (INPUT_FEATURE_LEN, BATCH_SIZE)
 2 | // Layer1: W1 * INPUT + B1 
 3 | //  -> W1 (HIDDEN_SIZE1, INPUT_FEATURE_LEN)
 4 | //  -> B1 (HIDDEN_SIZE1)
 5 | //  -> Result1 (HIDDEN_SIZE1, BATCH_SIZE)
 6 | // Layer2: W2 * Result1 + B2
 7 | //  -> W2 (HIDDEN_SIZE2, HIDDEN_SIZE1)
 8 | //  -> B2 (HIDDEN_SIZE2)
 9 | //  -> Result2 (HIDDEN_SIZE2, BATCH_SIZE)
10 | // Layer3: W3 * Result2 + B3
11 | //  -> W3 (HIDDEN_SIZE3, HIDDEN_SIZE2)
12 | //  -> B3 (HIDDEN_SIZE3)
13 | //  -> Result3 (HIDDEN_SIZE3, BATCH_SIZE)
14 | // Output Layer: W_OUT * Result3 + B_OUT
15 | //  -> W3 (OUTPUT_FEATURE_LEN, HIDDEN_SIZE3)
16 | //  -> B3 (OUTPUT_FEATURE_LEN)
17 | //  -> Result3 (OUTPUT_FEATURE_LEN, BATCH_SIZE)
18 | 
19 | ///////////// OPTION: small model 384 -> 512 /////////////
20 | ///////////// OPTION: large model 876 -> 1024 /////////////
21 | 
22 | /// TODO: CHANGE THIS
23 | // #define INPUT_FEATURE_LEN 1024
24 | 
25 | #define INPUT_FEATURE_LEN_RECEIVER 3968
26 | #define INPUT_FEATURE_LEN_FPGA_SENDER 1952
27 | #define INPUT_FEATURE_LEN_CPU_SENDER 64
28 | 
29 | // BATCH_SIZE GIVEN IN constant.h, need to revisit the constant definition later 
30 | #define HIDDEN_SIZE1 2048 // 1024
31 | #define HIDDEN_SIZE2 512 
32 | #define HIDDEN_SIZE3 256  
33 | #define OUTPUT_FEATURE_LEN 1 
34 | 
35 | /* constraint: SHM_DATA_SIZE === 1 GB */
36 | /* FLOAT_SIZE * BATCH_SIZE * INPUT_DIM * BATCH_NUM_PER_LOOP = 1024 **3 */
37 | #define FLOAT_SIZE 4 
38 | #define BATCH_SIZE 1024 // 1024
39 | #define TOTAL_BATCH_NUM (1 * 1024 * 1024 / BATCH_SIZE)
40 | 
41 | // #define BATCH_NUM_PER_THREAD (TOTAL_BATCH_NUM / THREAD_NUM)
42 | 
43 | /* matrix (batch * input_dim): 1024 * 1024, float: 4 byte, 1024 batches in queue */
44 | /* 4 GB in total, (1024 * 1024 * 4 * 1024) */
45 | 
46 | #define BLOCK_ENTRY_NUM_RECEIVER (BATCH_SIZE * INPUT_FEATURE_LEN_RECEIVER)
47 | #define BLOCK_SIZE_RECEIVER (BLOCK_ENTRY_NUM_RECEIVER * FLOAT_SIZE)
48 | 
49 | #define BLOCK_ENTRY_NUM_FPGA_SENDER (BATCH_SIZE * INPUT_FEATURE_LEN_FPGA_SENDER)
50 | #define BLOCK_SIZE_FPGA_SENDER (BLOCK_ENTRY_NUM_FPGA_SENDER * FLOAT_SIZE)
51 | 
52 | #define BLOCK_ENTRY_NUM_CPU_SENDER (BATCH_SIZE * INPUT_FEATURE_LEN_CPU_SENDER)
53 | #define BLOCK_SIZE_CPU_SENDER (BLOCK_ENTRY_NUM_CPU_SENDER * FLOAT_SIZE)
54 | 
55 | #define THREAD_NUM 16
56 | 
57 | // Stream 0 port: PORT, Stream 1 port: PORT + 1, ...
58 | #define PORT_CPU_SENDER_0 7080
59 | #define PORT_FPGA_SENDER_0 8080
60 | #define PORT_FPGA_SENDER_1 9080
61 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/bitmap/bitmap.h:
--------------------------------------------------------------------------------
 1 | /**********
 2 | Copyright (c) 2019, Xilinx, Inc.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | **********/
29 | #ifndef BITMAP_DOT_H
30 | #define BITMAP_DOT_H
31 | 
32 | #include <stdlib.h>
33 | 
34 | class BitmapInterface
35 | {
36 |  private:
37 |   char* core ;
38 |   char* dib ;
39 |   const char* filename ;
40 |   int* image ;
41 | 
42 |   // Core header information
43 |   unsigned short magicNumber ;
44 |   unsigned int fileSize ;
45 |   unsigned int offsetOfImage ;
46 | 
47 |   // DIB information
48 |   int sizeOfDIB ;
49 |   int sizeOfImage ;
50 |   int height ;
51 |   int width ;
52 | 
53 |  public:
54 |   BitmapInterface(const char* f) ;
55 |   ~BitmapInterface() ;
56 | 
57 |   bool readBitmapFile() ;
58 |   bool writeBitmapFile(int* otherImage = NULL); 
59 | 
60 |   inline int* bitmap() { return image ; } 
61 |   unsigned int numPixels() { return sizeOfImage/3 ; }
62 | 
63 |   inline int getHeight() { return height ; }
64 |   inline int getWidth() { return width ; }
65 | 
66 | } ;
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/FPGA/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0)
 2 | project(vitis-network)
 3 | 
 4 | #
 5 | # Vivado
 6 | #
 7 | 
 8 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake)
 9 | 
10 | set(IPREPO_DIR ${CMAKE_BINARY_DIR}/fpga-network-stack/iprepo)
11 | file(MAKE_DIRECTORY ${IPREPO_DIR})
12 | 
13 | # Device
14 | if(FDEV_NAME STREQUAL "u280")
15 |     set(FPGA_PART "xcu280-fsvh2892-2L-e" CACHE STRING "FPGA device")
16 |     set(NETWORK_BANDWIDTH 100 CACHE STRING "Network bandwidth")
17 |     set(NETWORK_INTERFACE 100 CACHE STRING "Network bandwidth")
18 |     set(DATA_WIDTH 64 CACHE STRING "Width of data path in bytes")
19 |     set(CLOCK_PERIOD 3.2 CACHE STRING "Target clock period in nanoseconds")
20 |     set(DEVICE "/opt/xilinx/platforms/xilinx_u280_xdma_201920_3/xilinx_u280_xdma_201920_3.xpfm")
21 | else()
22 |     message(FATAL_ERROR "Target device not supported.")
23 | endif()
24 | 
25 | # Config
26 | set(ROCE_STACK_EN 0 CACHE BOOL "Enable RDMA stack.")
27 | set(TCP_STACK_EN 0 CACHE BOOL "Enable TCP/IP stack")
28 | set(UDP_STACK_EN 1 CACHE BOOL "Enable UDP/IP stack")
29 | set(TCP_STACK_RX_DDR_BYPASS_EN 1 CACHE BOOL "Enabling DDR bypass on the RX path")
30 | 
31 | 
32 | 
33 | 
34 | # QSFP port     
35 | set(QSFP_PORT 1 CACHE STRING "Network traffic route.")
36 | 
37 | #
38 | # Network stack
39 | #
40 | 
41 | add_subdirectory(fpga-network-stack)
42 | 
43 | 
44 | #User kernel IPs
45 | add_subdirectory(kernel/user_krnl/scatter_krnl/src/hls)
46 | add_subdirectory(kernel/user_krnl/iperf_krnl/src/hls)
47 | 
48 | #
49 | # Find Vivado
50 | #
51 | 
52 | find_package(Vivado REQUIRED)
53 | if (NOT VIVADO_FOUND)
54 |    message(FATAL_ERROR "Vivado not found.")
55 | endif()
56 | 
57 | configure_file(${CMAKE_SOURCE_DIR}/scripts/package_network_krnl.tcl.in ${CMAKE_SOURCE_DIR}/scripts/package_network_krnl.tcl)
58 | configure_file(${CMAKE_SOURCE_DIR}/scripts/package_cmac_krnl.tcl.in ${CMAKE_SOURCE_DIR}/scripts/package_cmac_krnl.tcl)
59 | configure_file(${CMAKE_SOURCE_DIR}/scripts/package_scatter_krnl.tcl.in ${CMAKE_SOURCE_DIR}/scripts/package_scatter_krnl.tcl)
60 | configure_file(${CMAKE_SOURCE_DIR}/scripts/package_iperf_krnl.tcl.in ${CMAKE_SOURCE_DIR}/scripts/package_iperf_krnl.tcl)
61 | 
62 | configure_file(${CMAKE_SOURCE_DIR}/scripts/post_sys_link.tcl.in ${CMAKE_SOURCE_DIR}/scripts/post_sys_link.tcl)
63 | configure_file(${CMAKE_SOURCE_DIR}/kernel/common/types/network_types.svh.in ${CMAKE_SOURCE_DIR}/kernel/common/types/network_types.svh)
64 | 
65 | #configure_file(${CMAKE_SOURCE_DIR}/Makefile.in ${CMAKE_SOURCE_DIR}/Makefile)
66 | 
67 | 
68 | #add_custom_target(shell COMMAND ${VIVADO_BINARY} -mode tcl -source ${CMAKE_BINARY_DIR}/shell.tcl)
69 | 
70 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author:  David Sidler (david.sidler@inf.ethz.ch)
 2 | 
 3 | cmake_minimum_required(VERSION 3.0)
 4 | 
 5 | set (PROJECT_NAME scatter)
 6 | project(${PROJECT_NAME})
 7 | 
 8 | # Include custom Find<Module>.cmake scripts to enable searching for Vivado HLS
 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake)
10 | 
11 | # Without this variable set, CMake will build tests when running install
12 | #set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY ON)
13 | 
14 | # Generate Doxygen if available
15 | #find_package(Doxygen)
16 | #if(Doxygen_FOUND)
17 | #  configure_file(${CMAKE_SOURCE_DIR}/Doxyfile.in Doxyfile)
18 | #  add_custom_target(doxygen ALL
19 | #      COMMAND ${DOXYGEN_EXECUTABLE} Doxyfile 
20 | #      WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
21 | #endif()
22 | 
23 | 
24 | set(FPGA_PART "xcu280-fsvh2892-2L-e" CACHE STRING "FPGA device")
25 | set(NETWORK_BANDWIDTH 100 CACHE STRING "Network bandwidth")
26 | set(NETWORK_INTERFACE 100 CACHE STRING "Network bandwidth")
27 | set(DATA_WIDTH 64 CACHE STRING "Width of data path in bytes")
28 | set(CLOCK_PERIOD 3.2 CACHE STRING "Target clock period in nanoseconds")
29 | 
30 | 
31 | # Find Xilinx Vivado HLS
32 | find_package(VivadoHLS REQUIRED)
33 | if (NOT VIVADO_HLS_FOUND)
34 |   message(FATAL_ERROR "Vivado HLS not found.")
35 | endif()
36 | 
37 | # Installation directory
38 | if (DEFINED ENV{IPREPO_DIR})
39 |    set(IPREPO_DIR $ENV{IPREPO_DIR})
40 | elseif(NOT  IPREPO_DIR)
41 |    set(IPREPO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/iprepo/)
42 | endif()
43 | 
44 | 
45 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
46 | 
47 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/scatter_config.hpp.in scatter_config.hpp)
48 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/make.tcl.in make.tcl)
49 | 
50 | 
51 | set(EXAMPLE_HLS_DEPENDS
52 |     ${CMAKE_CURRENT_SOURCE_DIR}/scatter.cpp 
53 |     ${CMAKE_CURRENT_SOURCE_DIR}/scatter.hpp
54 |     ${CMAKE_CURRENT_SOURCE_DIR}/scatter_config.hpp.in
55 |     ${CMAKE_CURRENT_SOURCE_DIR}/test_scatter.cpp)
56 | 
57 | 
58 | #Setup HLS custom targets
59 | set(HLS_TARGETS synthesis csim ip installip)
60 | 
61 | foreach (target ${HLS_TARGETS})
62 |    if (NOT TARGET ${target})
63 |       add_custom_target(${target})
64 |    endif()
65 | 
66 |    add_custom_target(${target}.${PROJECT_NAME}
67 |       COMMAND ${VIVADO_HLS_BINARY} -f make.tcl -tclargs ${target}
68 |       DEPENDS ${EXAMPLE_HLS_DEPENDS})
69 |    add_dependencies(${target} ${target}.${PROJECT_NAME})
70 | endforeach()
71 | 
72 | #target dependencies
73 | add_dependencies(ip.${PROJECT_NAME} synthesis.${PROJECT_NAME})
74 | add_dependencies(installip.${PROJECT_NAME} ip.${PROJECT_NAME})
75 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/README.md:
--------------------------------------------------------------------------------
 1 | # Programs
 2 | 
 3 | First start Terminal 1: run cuda_server first (./run_cuda_server.sh)
 4 | 
 5 | Then start Terminal 2: run client to send data (./run_client_sender.sh) -> Remember to Adjust the Server IP Address
 6 | 
 7 | Correct Results:
 8 | 
 9 | Input Feature Size = 512 -> 68719476736
10 | 
11 | Input Feature Size = 1024 -> 137438953472
12 | 
13 | ## cuda_server.c
14 | 
15 | rm cuda_server
16 | 
17 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
18 | 
19 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
20 | 
21 | ## multiple_connections_network_client_sender.c
22 | 
23 | ### NOTE! Sender must send more data than receiver side, because on both sender and receiver threads have different progress, e.g., receiver connection 1 is waiting for the last batch, yet sender is trying to send the data through connection 2.
24 | 
25 | As a result, we use more sender data (2 * required)
26 | 
27 | This program simulates FPGA that opens 4 connections and sending data to the CUDA server.
28 | 
29 | Start CUDA server first, then client.
30 | 
31 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
32 | 
33 | ./multiple_connections_network_client_sender
34 | 
35 | 
36 | # Other programs (for building the final version)
37 | 
38 | ## pthread_test.c
39 | 
40 | https://www.geeksforgeeks.org/multithreading-c-2/
41 | 
42 | Pass port info and memory address space to the thread as a structure, and execute that thread.
43 | 
44 | gcc pthread_test.c -lpthread
45 | 
46 | ./a.out
47 | 
48 | ## single_connection_network_server_receiver.c
49 | 
50 | Start server first, then client.
51 | 
52 | gcc single_connection_network_server_receiver.c -lpthread -o single_connection_network_server_receiver
53 | 
54 | ./single_connection_network_server_receiver
55 | 
56 | ## single_connection_network_client_sender.c
57 | 
58 | Start server first, then client.
59 | 
60 | gcc single_connection_network_client_sender.c -lpthread -o single_connection_network_client_sender
61 | 
62 | ./single_connection_network_client_sender
63 | 
64 | 
65 | ## multiple_connections_network_server_receiver.c
66 | 
67 | Start server first, then client.
68 | 
69 | 4 TCP connections.
70 | 
71 | gcc multiple_connections_network_server_receiver.c -lpthread -o multiple_connections_network_server_receiver
72 | 
73 | ./multiple_connections_network_server_receiver
74 | 
75 | ## multiple_connections_network_client_sender.c
76 | 
77 | Start server first, then client.
78 | 
79 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
80 | 
81 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/README.md:
--------------------------------------------------------------------------------
 1 | # Programs
 2 | 
 3 | First start Terminal 1: run cuda_server first (./run_cuda_server.sh)
 4 | 
 5 | Then start Terminal 2: run client to send data (./run_client_sender.sh) -> Remember to Adjust the Server IP Address
 6 | 
 7 | Correct Results:
 8 | 
 9 | Input Feature Size = 512 -> 68719476736
10 | 
11 | Input Feature Size = 1024 -> 137438953472
12 | 
13 | ## cuda_server.c
14 | 
15 | rm cuda_server
16 | 
17 | nvcc -l cublasLt -lpthread cuda_server.c -o cuda_server
18 | 
19 | nvprof -f --export-profile timeline.prof --concurrent-kernels on ./cuda_server
20 | 
21 | ## multiple_connections_network_client_sender.c
22 | 
23 | ### NOTE! Sender must send more data than receiver side, because on both sender and receiver threads have different progress, e.g., receiver connection 1 is waiting for the last batch, yet sender is trying to send the data through connection 2.
24 | 
25 | As a result, we use more sender data (2 * required)
26 | 
27 | This program simulates FPGA that opens 4 connections and sending data to the CUDA server.
28 | 
29 | Start CUDA server first, then client.
30 | 
31 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
32 | 
33 | ./multiple_connections_network_client_sender
34 | 
35 | 
36 | # Other programs (for building the final version)
37 | 
38 | ## pthread_test.c
39 | 
40 | https://www.geeksforgeeks.org/multithreading-c-2/
41 | 
42 | Pass port info and memory address space to the thread as a structure, and execute that thread.
43 | 
44 | gcc pthread_test.c -lpthread
45 | 
46 | ./a.out
47 | 
48 | ## single_connection_network_server_receiver.c
49 | 
50 | Start server first, then client.
51 | 
52 | gcc single_connection_network_server_receiver.c -lpthread -o single_connection_network_server_receiver
53 | 
54 | ./single_connection_network_server_receiver
55 | 
56 | ## single_connection_network_client_sender.c
57 | 
58 | Start server first, then client.
59 | 
60 | gcc single_connection_network_client_sender.c -lpthread -o single_connection_network_client_sender
61 | 
62 | ./single_connection_network_client_sender
63 | 
64 | 
65 | ## multiple_connections_network_server_receiver.c
66 | 
67 | Start server first, then client.
68 | 
69 | 4 TCP connections.
70 | 
71 | gcc multiple_connections_network_server_receiver.c -lpthread -o multiple_connections_network_server_receiver
72 | 
73 | ./multiple_connections_network_server_receiver
74 | 
75 | ## multiple_connections_network_client_sender.c
76 | 
77 | Start server first, then client.
78 | 
79 | gcc multiple_connections_network_client_sender.c -lpthread -o multiple_connections_network_client_sender
80 | 
81 | ./multiple_connections_network_client_sender


--------------------------------------------------------------------------------
/FPGA/scripts/gen_xo.tcl:
--------------------------------------------------------------------------------
 1 | # /*******************************************************************************
 2 | # Copyright (c) 2018, Xilinx, Inc.
 3 | # All rights reserved.
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without modification,
 6 | # are permitted provided that the following conditions are met:
 7 | # 
 8 | # 1. Redistributions of source code must retain the above copyright notice,
 9 | # this list of conditions and the following disclaimer.
10 | # 
11 | # 
12 | # 2. Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | # 
16 | # 
17 | # 3. Neither the name of the copyright holder nor the names of its contributors
18 | # may be used to endorse or promote products derived from this software
19 | # without specific prior written permission.
20 | # 
21 | # 
22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE IMPLIED 
24 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
26 | # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
27 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
28 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
29 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
30 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | #
33 | # *******************************************************************************/
34 | 
35 | if { $::argc != 7 } {
36 |     puts "ERROR: Program \"$::argv0\" requires 6 arguments!\n"
37 |     puts "Usage: $::argv0 <xoname> <krnl_name> <target> <xpfm_path> <device> <xml_path> <package_tcl_path>\n"
38 |     exit
39 | }
40 | 
41 | set xoname  [lindex $::argv 0]
42 | set krnl_name [lindex $::argv 1]
43 | set target    [lindex $::argv 2]
44 | set xpfm_path [lindex $::argv 3]
45 | set device    [lindex $::argv 4]
46 | set xml_path [lindex $::argv 5]
47 | set package_tcl_path [lindex $::argv 6]
48 | 
49 | set suffix "${krnl_name}_${target}_${device}"
50 | 
51 | puts "INFO: ${xoname} ${krnl_name} ${target} ${xpfm_path} ${device}" 
52 | 
53 | source -notrace ${package_tcl_path}
54 | 
55 | if {[file exists "${xoname}"]} {
56 |     file delete -force "${xoname}"
57 | }
58 | 
59 | package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory ./packaged_kernel_${suffix} -kernel_xml ${xml_path}
60 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/simplebmp/simplebmp.h:
--------------------------------------------------------------------------------
 1 | /**********
 2 | Copyright (c) 2019, Xilinx, Inc.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | **********/
29 | //Read and write uncompressed 24 bit BMP image format image
30 | //based on http://en.wikipedia.org/wiki/BMP_file_formt
31 | //Copyright Xilinx
32 | 
33 | 
34 | #ifndef __SIMPLE_BMP
35 | #define __SIMPLE_BMP
36 | 
37 | struct bmpheader_t{
38 |   //Header
39 |   char headerB;
40 |   char headerM;
41 |   uint32_t headerbmpsize;              
42 |   uint16_t headerapp0;
43 |   uint16_t headerapp1;
44 |   uint32_t headerpixelsoffset;
45 | 
46 |   //DIB header
47 |   uint32_t dibheadersize;
48 |   uint32_t dibwidth;
49 |   uint32_t dibheight;
50 |   uint16_t dibplane;
51 |   uint16_t dibdepth;
52 |   uint32_t dibcompression;
53 |   uint32_t dibsize; 
54 |   uint32_t dibhor;
55 |   uint32_t dibver;
56 |   uint32_t dibpal;
57 |   uint32_t dibimportant;
58 |   
59 | };
60 | 
61 | 
62 | struct bmp_t{
63 |   struct bmpheader_t header;
64 |   uint32_t width;
65 |   uint32_t height;
66 |   uint32_t *pixels;
67 | };
68 | 
69 | int writebmp(char *filename,struct bmp_t *bitmap);
70 | 
71 | int readbmp(char *filename,struct bmp_t *bitmap);
72 | //-1 file access error
73 | //-2 invalid BMP
74 | //-3 memory allocation error
75 |  
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/src/hls/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Author:  David Sidler (david.sidler@inf.ethz.ch)
 2 | 
 3 | cmake_minimum_required(VERSION 3.0)
 4 | 
 5 | set (PROJECT_NAME iperf_client)
 6 | project(${PROJECT_NAME})
 7 | 
 8 | # Include custom Find<Module>.cmake scripts to enable searching for Vivado HLS
 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake)
10 | 
11 | # Without this variable set, CMake will build tests when running install
12 | #set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY ON)
13 | 
14 | # Generate Doxygen if available
15 | #find_package(Doxygen)
16 | #if(Doxygen_FOUND)
17 | #  configure_file(${CMAKE_SOURCE_DIR}/Doxyfile.in Doxyfile)
18 | #  add_custom_target(doxygen ALL
19 | #      COMMAND ${DOXYGEN_EXECUTABLE} Doxyfile 
20 | #      WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
21 | #endif()
22 | 
23 | if (NOT hasParent)
24 |    if (DEVICE_NAME STREQUAL "u280")
25 |       set(FPGA_PART xcu280-fsvh2892-2L-e)
26 |       set(FPGA_FAMILY ultraplus)
27 |       set(NETWORK_BANDWIDTH 100 CACHE STRING "Network bandwidth")
28 |    endif()
29 | endif()
30 | set(DATA_WIDTH 8 CACHE STRING "Width of data path in bytes")
31 | set(CLOCK_PERIOD 6.4 CACHE STRING "Target clock period in nanoseconds")
32 | 
33 | 
34 | # Find Xilinx Vivado HLS
35 | find_package(VivadoHLS REQUIRED)
36 | if (NOT VIVADO_HLS_FOUND)
37 |   message(FATAL_ERROR "Vivado HLS not found.")
38 | endif()
39 | 
40 | 
41 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
42 | 
43 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/iperf_client_config.hpp.in iperf_client_config.hpp)
44 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/make.tcl.in make.tcl)
45 | 
46 | 
47 | set(EXAMPLE_HLS_DEPENDS
48 |     ${CMAKE_CURRENT_SOURCE_DIR}/iperf_client.cpp 
49 |     ${CMAKE_CURRENT_SOURCE_DIR}/iperf_client.hpp
50 |     ${CMAKE_CURRENT_SOURCE_DIR}/iperf_client_config.hpp.in
51 |     ${CMAKE_CURRENT_SOURCE_DIR}/test_iperf_client.cpp)
52 | 
53 | 
54 | #Setup HLS custom targets
55 | set(HLS_TARGETS synthesis csim ip installip)
56 | 
57 | foreach (target ${HLS_TARGETS})
58 |    if (NOT TARGET ${target})
59 |       add_custom_target(${target})
60 |    endif()
61 | 
62 |    add_custom_target(${target}.${PROJECT_NAME}
63 |       COMMAND ${VIVADO_HLS_BINARY} -f make.tcl -tclargs ${target}
64 |       DEPENDS ${EXAMPLE_HLS_DEPENDS})
65 |    add_dependencies(${target} ${target}.${PROJECT_NAME})
66 | endforeach()
67 | 
68 | #target dependencies
69 | add_dependencies(ip.${PROJECT_NAME} synthesis.${PROJECT_NAME})
70 | add_dependencies(installip.${PROJECT_NAME} ip.${PROJECT_NAME})
71 | 
72 | # Installation
73 | if (DEFINED ENV{IPREPO_DIR})
74 |    set(IPREPO_DIR $ENV{IPREPO_DIR})
75 | elseif(NOT  IPREPO_DIR)
76 |    set(IPREPO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/iprepo/)
77 | endif()
78 | #install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}_prj/solution1/impl/ip/
79 | #   DESTINATION ${IPREPO_DIR}/${PROJECT_NAME}/)
80 |                                    
81 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_krnl/config_sp_embedding_krnl.txt:
--------------------------------------------------------------------------------
 1 | profile_kernel=data:embedding_krnl_1:all:all
 2 | 
 3 | [connectivity] 
 4 | sp=embedding_krnl_1.table_HBM0:HBM[0]
 5 | sp=embedding_krnl_1.table_HBM1:HBM[1]
 6 | sp=embedding_krnl_1.table_HBM2:HBM[2]
 7 | sp=embedding_krnl_1.table_HBM3:HBM[3]
 8 | sp=embedding_krnl_1.table_HBM4:HBM[4]
 9 | sp=embedding_krnl_1.table_HBM5:HBM[5]
10 | sp=embedding_krnl_1.table_HBM6:HBM[6]
11 | sp=embedding_krnl_1.table_HBM7:HBM[7]
12 | sp=embedding_krnl_1.table_HBM8:HBM[8]
13 | sp=embedding_krnl_1.table_HBM9:HBM[9]
14 | sp=embedding_krnl_1.table_HBM10:HBM[10]
15 | sp=embedding_krnl_1.table_HBM11:HBM[11]
16 | sp=embedding_krnl_1.table_HBM12:HBM[12]
17 | sp=embedding_krnl_1.table_HBM13:HBM[13]
18 | sp=embedding_krnl_1.table_HBM14:HBM[14]
19 | sp=embedding_krnl_1.table_HBM15:HBM[15]
20 | sp=embedding_krnl_1.table_HBM16:HBM[16]
21 | sp=embedding_krnl_1.table_HBM17:HBM[17]
22 | sp=embedding_krnl_1.table_HBM18:HBM[18]
23 | sp=embedding_krnl_1.table_HBM19:HBM[19]
24 | sp=embedding_krnl_1.table_HBM20:HBM[20]
25 | sp=embedding_krnl_1.table_HBM21:HBM[21]
26 | sp=embedding_krnl_1.table_HBM22:HBM[22]
27 | sp=embedding_krnl_1.table_HBM23:HBM[23]
28 | sp=embedding_krnl_1.table_HBM24:HBM[24]
29 | sp=embedding_krnl_1.table_HBM25:HBM[25]
30 | sp=embedding_krnl_1.table_HBM26:HBM[26]
31 | sp=embedding_krnl_1.table_HBM27:HBM[27]
32 | sp=embedding_krnl_1.table_DDR0:DDR[0]
33 | sp=embedding_krnl_1.table_DDR1:DDR[1]
34 | 
35 | sp=network_krnl_1.m00_axi:HBM[28]
36 | sp=network_krnl_1.m01_axi:HBM[29]
37 | 
38 | sc=network_krnl_1.m_axis_udp_rx:embedding_krnl_1.s_axis_udp_rx
39 | sc=network_krnl_1.m_axis_udp_rx_meta:embedding_krnl_1.s_axis_udp_rx_meta
40 | sc=network_krnl_1.m_axis_tcp_port_status:embedding_krnl_1.s_axis_tcp_port_status
41 | sc=network_krnl_1.m_axis_tcp_open_status:embedding_krnl_1.s_axis_tcp_open_status
42 | sc=network_krnl_1.m_axis_tcp_notification:embedding_krnl_1.s_axis_tcp_notification
43 | sc=network_krnl_1.m_axis_tcp_rx_meta:embedding_krnl_1.s_axis_tcp_rx_meta
44 | sc=network_krnl_1.m_axis_tcp_rx_data:embedding_krnl_1.s_axis_tcp_rx_data
45 | sc=network_krnl_1.m_axis_tcp_tx_status:embedding_krnl_1.s_axis_tcp_tx_status
46 | 
47 | sc=embedding_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
48 | sc=embedding_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
49 | sc=embedding_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
50 | sc=embedding_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
51 | sc=embedding_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
52 | sc=embedding_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
53 | sc=embedding_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
54 | sc=embedding_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
55 | 
56 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
57 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx


--------------------------------------------------------------------------------
/FPGA/utils.mk:
--------------------------------------------------------------------------------
 1 | #+-------------------------------------------------------------------------------
 2 | # The following parameters are assigned with default values. These parameters can
 3 | # be overridden through the make command line
 4 | #+-------------------------------------------------------------------------------
 5 | 
 6 | PROFILE := no
 7 | 
 8 | #Generates profile summary report
 9 | ifeq ($(PROFILE), yes)
10 | LDCLFLAGS += --profile_kernel data:all:all:all
11 | endif
12 | 
13 | DEBUG := no
14 | B_TEMP = `$(ABS_COMMON_REPO)/common/utility/parse_platform_list.py $(DEVICE)`
15 | 
16 | #Generates debug summary report
17 | ifeq ($(DEBUG), yes)
18 | LDCLFLAGS += --dk list_ports
19 | endif
20 | 
21 | #Setting Platform Path
22 | ifeq ($(findstring xpfm, $(DEVICE)), xpfm)
23 | 	B_NAME = $(shell dirname $(DEVICE))
24 | else
25 | 	B_NAME = $(B_TEMP)/$(DEVICE)
26 | endif
27 | 
28 | #Checks for XILINX_VITIS
29 | ifndef XILINX_VITIS
30 | $(error XILINX_VITIS variable is not set, please set correctly and rerun)
31 | endif
32 | 
33 | #Checks for Device Family
34 | ifeq ($(HOST_ARCH), aarch32)
35 | 	DEV_FAM = 7Series
36 | else ifeq ($(HOST_ARCH), aarch64)
37 | 	DEV_FAM = Ultrascale
38 | endif
39 | 
40 | #Checks for XILINX_XRT
41 | check-xrt:
42 | ifndef XILINX_XRT
43 | 	$(error XILINX_XRT variable is not set, please set correctly and rerun)
44 | endif
45 | 
46 | #Checks for Correct architecture
47 | ifneq ($(HOST_ARCH), $(filter $(HOST_ARCH),aarch64 aarch32 x86))
48 | $(error HOST_ARCH variable not set, please set correctly and rerun)
49 | endif
50 | 
51 | #Checks for SYSROOT
52 | ifneq ($(HOST_ARCH), x86)
53 | ifndef SYSROOT
54 | $(error SYSROOT variable is not set, please set correctly and rerun)
55 | endif
56 | endif
57 | 
58 | #Checks for g++
59 | ifeq ($(HOST_ARCH), x86)
60 | ifneq ($(shell expr $(shell g++ -dumpversion) \>= 5), 1)
61 | ifndef XILINX_VIVADO
62 | $(error [ERROR]: g++ version older. Please use 5.0 or above.)
63 | else
64 | CXX := $(XILINX_VIVADO)/tps/lnx64/gcc-6.2.0/bin/g++
65 | $(warning [WARNING]: g++ version older. Using g++ provided by the tool : $(CXX))
66 | endif
67 | endif
68 | else ifeq ($(HOST_ARCH), aarch64)
69 | CXX := $(XILINX_VITIS)/gnu/aarch64/lin/aarch64-linux/bin/aarch64-linux-gnu-g++
70 | else ifeq ($(HOST_ARCH), aarch32)
71 | CXX := $(XILINX_VITIS)/gnu/aarch32/lin/gcc-arm-linux-gnueabi/bin/arm-linux-gnueabihf-g++
72 | endif
73 | 
74 | check-devices:
75 | ifndef DEVICE
76 | 	$(error DEVICE not set. Please set the DEVICE properly and rerun. Run "make help" for more details.)
77 | endif
78 | 
79 | #   device2xsa - create a filesystem friendly name from device name
80 | #   $(1) - full name of device
81 | device2xsa = $(strip $(patsubst %.xpfm, % , $(shell basename $(DEVICE))))
82 | 
83 | # Cleaning stuff
84 | RM = rm -f
85 | RMDIR = rm -rf
86 | 
87 | ECHO:= @echo
88 | 
89 | docs: README.md
90 | 
91 | README.md: description.json
92 | 	$(ABS_COMMON_REPO)/common/utility/readme_gen/readme_gen.py description.json
93 | 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/out:
--------------------------------------------------------------------------------
 1 | rm: cannot remove ‘cuda_server’: No such file or directory
 2 | cuda_server.c: In function ‘thread_consume’:
 3 | cuda_server.c:412:53: warning: passing argument 2 of ‘clock_gettime’ from incompatible pointer type [-Wincompatible-pointer-types]
 4 |              clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &network_time[iter * BATCH_NUM_PER_LOOP + block_id]);
 5 |                                                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 6 | In file included from cuda_server.c:5:
 7 | /usr/include/time.h:342:66: note: expected ‘struct timespec *’ but argument is of type ‘timespec *’ {aka ‘struct <anonymous> *’}
 8 |  extern int clock_gettime (clockid_t __clock_id, struct timespec *__tp) __THROW;
 9 |                                                  ~~~~~~~~~~~~~~~~~^~~~
10 | cuda_server.c:421:28: warning: returning ‘int’ from a function with return type ‘void *’ makes pointer from integer without a cast [-Wint-conversion]
11 |                      return -1;
12 |                             ^
13 | cuda_server.c:432:50: warning: passing argument 2 of ‘clock_gettime’ from incompatible pointer type [-Wincompatible-pointer-types]
14 |           clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &cuda_time[iter * BATCH_NUM_PER_LOOP + block_id]);
15 |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16 | In file included from cuda_server.c:5:
17 | /usr/include/time.h:342:66: note: expected ‘struct timespec *’ but argument is of type ‘timespec *’ {aka ‘struct <anonymous> *’}
18 |  extern int clock_gettime (clockid_t __clock_id, struct timespec *__tp) __THROW;
19 |                                                  ~~~~~~~~~~~~~~~~~^~~~
20 | cuda_server.c: In function ‘main’:
21 | cuda_server.c:537:10: error: redeclaration of ‘memcpy_time_ns’ with no linkage
22 |      long memcpy_time_ns[TOTAL_BATCH_NUM];
23 |           ^~~~~~~~~~~~~~
24 | cuda_server.c:535:10: note: previous declaration of ‘memcpy_time_ns’ was here
25 |      long memcpy_time_ns[TOTAL_BATCH_NUM];
26 |           ^~~~~~~~~~~~~~
27 | cuda_server.c:542:47: error: incompatible type for argument 1 of ‘diff’
28 |          memcpy_timespec[i] = diff(network_time[i], cuda_time[i]);
29 |                                    ~~~~~~~~~~~~^~~
30 | cuda_server.c:68:24: note: expected ‘timespec’ {aka ‘struct <anonymous>’} but argument is of type ‘timespec *’ {aka ‘struct <anonymous> *’}
31 |  timespec diff(timespec start, timespec end)
32 |                ~~~~~~~~~^~~~~
33 | cuda_server.c:542:61: error: incompatible type for argument 2 of ‘diff’
34 |          memcpy_timespec[i] = diff(network_time[i], cuda_time[i]);
35 |                                                     ~~~~~~~~~^~~
36 | cuda_server.c:68:40: note: expected ‘timespec’ {aka ‘struct <anonymous>’} but argument is of type ‘timespec *’ {aka ‘struct <anonymous> *’}
37 |  timespec diff(timespec start, timespec end)
38 |                                ~~~~~~~~~^~~
39 | ======== Error: application not found.
40 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_47_krnl/config_sp_embedding_47_krnl.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | [connectivity] 
 3 | slr=cmac_krnl_1:SLR2
 4 | 
 5 | sp=embedding_47_krnl_1.table_HBM0:HBM[0]
 6 | sp=embedding_47_krnl_1.table_HBM1:HBM[1]
 7 | sp=embedding_47_krnl_1.table_HBM2:HBM[2]
 8 | sp=embedding_47_krnl_1.table_HBM3:HBM[3]
 9 | sp=embedding_47_krnl_1.table_HBM4:HBM[4]
10 | sp=embedding_47_krnl_1.table_HBM5:HBM[5]
11 | sp=embedding_47_krnl_1.table_HBM6:HBM[6]
12 | sp=embedding_47_krnl_1.table_HBM7:HBM[7]
13 | sp=embedding_47_krnl_1.table_HBM8:HBM[8]
14 | sp=embedding_47_krnl_1.table_HBM9:HBM[9]
15 | sp=embedding_47_krnl_1.table_HBM10:HBM[10]
16 | sp=embedding_47_krnl_1.table_HBM11:HBM[11]
17 | sp=embedding_47_krnl_1.table_HBM12:HBM[12]
18 | sp=embedding_47_krnl_1.table_HBM13:HBM[13]
19 | sp=embedding_47_krnl_1.table_HBM14:HBM[14]
20 | sp=embedding_47_krnl_1.table_HBM15:HBM[15]
21 | sp=embedding_47_krnl_1.table_HBM16:HBM[16]
22 | sp=embedding_47_krnl_1.table_HBM17:HBM[17]
23 | sp=embedding_47_krnl_1.table_HBM18:HBM[18]
24 | sp=embedding_47_krnl_1.table_HBM19:HBM[19]
25 | sp=embedding_47_krnl_1.table_HBM20:HBM[20]
26 | sp=embedding_47_krnl_1.table_HBM21:HBM[21]
27 | sp=embedding_47_krnl_1.table_HBM22:HBM[22]
28 | sp=embedding_47_krnl_1.table_HBM23:HBM[23]
29 | sp=embedding_47_krnl_1.table_HBM24:HBM[24]
30 | sp=embedding_47_krnl_1.table_HBM25:HBM[25]
31 | sp=embedding_47_krnl_1.table_HBM26:HBM[26]
32 | sp=embedding_47_krnl_1.table_HBM27:HBM[27]
33 | sp=embedding_47_krnl_1.table_DDR0:DDR[0]
34 | sp=embedding_47_krnl_1.table_DDR1:DDR[1]
35 | 
36 | sp=network_krnl_1.m00_axi:HBM[28]
37 | sp=network_krnl_1.m01_axi:HBM[29]
38 | 
39 | sc=network_krnl_1.m_axis_udp_rx:embedding_47_krnl_1.s_axis_udp_rx
40 | sc=network_krnl_1.m_axis_udp_rx_meta:embedding_47_krnl_1.s_axis_udp_rx_meta
41 | sc=network_krnl_1.m_axis_tcp_port_status:embedding_47_krnl_1.s_axis_tcp_port_status
42 | sc=network_krnl_1.m_axis_tcp_open_status:embedding_47_krnl_1.s_axis_tcp_open_status
43 | sc=network_krnl_1.m_axis_tcp_notification:embedding_47_krnl_1.s_axis_tcp_notification
44 | sc=network_krnl_1.m_axis_tcp_rx_meta:embedding_47_krnl_1.s_axis_tcp_rx_meta
45 | sc=network_krnl_1.m_axis_tcp_rx_data:embedding_47_krnl_1.s_axis_tcp_rx_data
46 | sc=network_krnl_1.m_axis_tcp_tx_status:embedding_47_krnl_1.s_axis_tcp_tx_status
47 | 
48 | sc=embedding_47_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
49 | sc=embedding_47_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
50 | sc=embedding_47_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
51 | sc=embedding_47_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
52 | sc=embedding_47_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
53 | sc=embedding_47_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
54 | sc=embedding_47_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
55 | sc=embedding_47_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
56 | 
57 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
58 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_98_krnl/config_sp_embedding_98_krnl.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | [connectivity] 
 3 | slr=cmac_krnl_1:SLR2
 4 | 
 5 | sp=embedding_98_krnl_1.table_HBM0:HBM[0]
 6 | sp=embedding_98_krnl_1.table_HBM1:HBM[1]
 7 | sp=embedding_98_krnl_1.table_HBM2:HBM[2]
 8 | sp=embedding_98_krnl_1.table_HBM3:HBM[3]
 9 | sp=embedding_98_krnl_1.table_HBM4:HBM[4]
10 | sp=embedding_98_krnl_1.table_HBM5:HBM[5]
11 | sp=embedding_98_krnl_1.table_HBM6:HBM[6]
12 | sp=embedding_98_krnl_1.table_HBM7:HBM[7]
13 | sp=embedding_98_krnl_1.table_HBM8:HBM[8]
14 | sp=embedding_98_krnl_1.table_HBM9:HBM[9]
15 | sp=embedding_98_krnl_1.table_HBM10:HBM[10]
16 | sp=embedding_98_krnl_1.table_HBM11:HBM[11]
17 | sp=embedding_98_krnl_1.table_HBM12:HBM[12]
18 | sp=embedding_98_krnl_1.table_HBM13:HBM[13]
19 | sp=embedding_98_krnl_1.table_HBM14:HBM[14]
20 | sp=embedding_98_krnl_1.table_HBM15:HBM[15]
21 | sp=embedding_98_krnl_1.table_HBM16:HBM[16]
22 | sp=embedding_98_krnl_1.table_HBM17:HBM[17]
23 | sp=embedding_98_krnl_1.table_HBM18:HBM[18]
24 | sp=embedding_98_krnl_1.table_HBM19:HBM[19]
25 | sp=embedding_98_krnl_1.table_HBM20:HBM[20]
26 | sp=embedding_98_krnl_1.table_HBM21:HBM[21]
27 | sp=embedding_98_krnl_1.table_HBM22:HBM[22]
28 | sp=embedding_98_krnl_1.table_HBM23:HBM[23]
29 | sp=embedding_98_krnl_1.table_HBM24:HBM[24]
30 | sp=embedding_98_krnl_1.table_HBM25:HBM[25]
31 | sp=embedding_98_krnl_1.table_HBM26:HBM[26]
32 | sp=embedding_98_krnl_1.table_HBM27:HBM[27]
33 | sp=embedding_98_krnl_1.table_DDR0:DDR[0]
34 | sp=embedding_98_krnl_1.table_DDR1:DDR[1]
35 | 
36 | sp=network_krnl_1.m00_axi:HBM[28]
37 | sp=network_krnl_1.m01_axi:HBM[29]
38 | 
39 | sc=network_krnl_1.m_axis_udp_rx:embedding_98_krnl_1.s_axis_udp_rx
40 | sc=network_krnl_1.m_axis_udp_rx_meta:embedding_98_krnl_1.s_axis_udp_rx_meta
41 | sc=network_krnl_1.m_axis_tcp_port_status:embedding_98_krnl_1.s_axis_tcp_port_status
42 | sc=network_krnl_1.m_axis_tcp_open_status:embedding_98_krnl_1.s_axis_tcp_open_status
43 | sc=network_krnl_1.m_axis_tcp_notification:embedding_98_krnl_1.s_axis_tcp_notification
44 | sc=network_krnl_1.m_axis_tcp_rx_meta:embedding_98_krnl_1.s_axis_tcp_rx_meta
45 | sc=network_krnl_1.m_axis_tcp_rx_data:embedding_98_krnl_1.s_axis_tcp_rx_data
46 | sc=network_krnl_1.m_axis_tcp_tx_status:embedding_98_krnl_1.s_axis_tcp_tx_status
47 | 
48 | sc=embedding_98_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
49 | sc=embedding_98_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
50 | sc=embedding_98_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
51 | sc=embedding_98_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
52 | sc=embedding_98_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
53 | sc=embedding_98_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
54 | sc=embedding_98_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
55 | sc=embedding_98_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
56 | 
57 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
58 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx


--------------------------------------------------------------------------------
/FPGA/kernel/cmac_krnl/src/hdl/network_clk_cross.sv:
--------------------------------------------------------------------------------
 1 | `include "network_types.svh"
 2 | `include "network_intf.svh"
 3 | 
 4 | module network_clk_cross (
 5 |     input  wire             net_clk,
 6 |     input  wire             net_aresetn,
 7 |     input  wire             pcie_clk,
 8 |     input  wire             pcie_aresetn,
 9 |     
10 |     // NCLK
11 |     axi_stream.slave        m_axis_net_rx_nclk,
12 |     axi_stream.master       s_axis_net_tx_nclk,
13 | 
14 |     // ACLK
15 |     axi_stream.master       m_axis_net_rx_aclk,
16 |     axi_stream.slave        s_axis_net_tx_aclk
17 | );
18 | 
19 | 
20 | reg net_aresetn_reg = 1'b1;
21 | always @ (posedge net_clk) begin
22 |   net_aresetn_reg <= net_aresetn;
23 | end
24 | 
25 | reg pcie_aresetn_reg = 1'b1;
26 | always @ (posedge pcie_clk) begin
27 |   pcie_aresetn_reg <= pcie_aresetn;
28 | end
29 | 
30 | //
31 | // Crossings init
32 | //
33 | 
34 | axi_stream m_axis_net_rx_nclk_r ();
35 | axi_stream s_axis_net_tx_nclk_r ();
36 | 
37 | axi_stream m_axis_net_rx_aclk_r ();
38 | axi_stream s_axis_net_tx_aclk_r ();
39 | 
40 | // Might be an overkill
41 | axis_data_reg_array #(.N_STAGES(5)) inst_reg_data_nclk1 (.aclk(net_clk), .aresetn(net_aresetn_reg), .s_axis(m_axis_net_rx_nclk), .m_axis(m_axis_net_rx_nclk_r));
42 | axis_data_reg_array #(.N_STAGES(5)) inst_reg_data_nclk2 (.aclk(net_clk), .aresetn(net_aresetn_reg), .s_axis(s_axis_net_tx_nclk_r), .m_axis(s_axis_net_tx_nclk));
43 | axis_data_reg_array #(.N_STAGES(5)) inst_reg_data_aclk1 (.aclk(pcie_clk), .aresetn(pcie_aresetn_reg), .s_axis(m_axis_net_rx_aclk_r), .m_axis(m_axis_net_rx_aclk));
44 | axis_data_reg_array #(.N_STAGES(5)) inst_reg_data_aclk2 (.aclk(pcie_clk), .aresetn(pcie_aresetn_reg), .s_axis(s_axis_net_tx_aclk), .m_axis(s_axis_net_tx_aclk_r));
45 | 
46 | // Data
47 | axis_data_fifo_cc_udp_data inst_cc_udp_data_rx (
48 |     .m_axis_aclk(pcie_clk),
49 |     .s_axis_aclk(net_clk),
50 |     .s_axis_aresetn(net_aresetn_reg),
51 |     .s_axis_tvalid(m_axis_net_rx_nclk_r.valid),
52 |     .s_axis_tready(m_axis_net_rx_nclk_r.ready),
53 |     .s_axis_tdata(m_axis_net_rx_nclk_r.data),
54 |     .s_axis_tlast(m_axis_net_rx_nclk_r.last),
55 |     .s_axis_tkeep(m_axis_net_rx_nclk_r.keep),
56 |     .m_axis_tvalid(m_axis_net_rx_aclk_r.valid),
57 |     .m_axis_tready(m_axis_net_rx_aclk_r.ready),
58 |     .m_axis_tdata(m_axis_net_rx_aclk_r.data),
59 |     .m_axis_tlast(m_axis_net_rx_aclk_r.last),
60 |     .m_axis_tkeep(m_axis_net_rx_aclk_r.keep)
61 | );
62 | 
63 | axis_data_fifo_cc_udp_data inst_cc_udp_data_tx (
64 |     .m_axis_aclk(net_clk),
65 |     .s_axis_aclk(pcie_clk),
66 |     .s_axis_aresetn(pcie_aresetn_reg),
67 |     .s_axis_tvalid(s_axis_net_tx_aclk_r.valid),
68 |     .s_axis_tready(s_axis_net_tx_aclk_r.ready),
69 |     .s_axis_tdata(s_axis_net_tx_aclk_r.data),
70 |     .s_axis_tlast(s_axis_net_tx_aclk_r.last),
71 |     .s_axis_tkeep(s_axis_net_tx_aclk_r.keep),
72 |     .m_axis_tvalid(s_axis_net_tx_nclk_r.valid),
73 |     .m_axis_tready(s_axis_net_tx_nclk_r.ready),
74 |     .m_axis_tdata(s_axis_net_tx_nclk_r.data),
75 |     .m_axis_tlast(s_axis_net_tx_nclk_r.last),
76 |     .m_axis_tkeep(s_axis_net_tx_nclk_r.keep)
77 | );
78 | 
79 | 
80 | 
81 | endmodule


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/out:
--------------------------------------------------------------------------------
 1 | concurrentKernels: 1
 2 | = 1: Concurrent Kernel Execution
 3 | asyncEngineCount: 3
 4 | > 0: Overlap of Data Transfer and Kernel Execution
 5 | = 2: Concurrent Data Transfers
 6 | Device 0 has compute capability 7.5.
 7 | Before Thread
 8 | Printing Port from Thread 8080
 9 | Printing Port from Thread 8084
10 | Printing Port from Thread 8088
11 | Successfully built connection.
12 | Successfully built connection.
13 | Successfully built connection.
14 | Consumed time: 0.230000 seconds, INCLUDING Waiting reader proceess
15 | Throughput: 0.008492 GB / secConsumed time: 0.230000 seconds, INCLUDING Waiting reader proceess
16 | Throughput: 0.259001 GB / secConsumed time: 0.210000 seconds, INCLUDING Waiting reader proceess
17 | Throughput: 0.283668 GB / sec0.000000 0.000000 0.000000 0.000000 0.000000 After Thread
18 | i = 0 FPGA0 time = 80380336 ns
19 | i = 0 FPGA1 time = 40231969 ns
20 | i = 0 CPU0 time = 97120297 ns
21 | i = 0 memcpy time = 60306152 ns
22 | i = 1 FPGA0 time = 40014650 ns
23 | i = 1 FPGA1 time = 41016614 ns
24 | i = 1 CPU0 time = 55752916 ns
25 | i = 1 memcpy time = 40515632 ns
26 | i = 2 FPGA0 time = 35439912 ns
27 | i = 2 FPGA1 time = 40787134 ns
28 | i = 2 CPU0 time = 50551573 ns
29 | i = 2 memcpy time = 38113523 ns
30 | i = 3 FPGA0 time = 26818740 ns
31 | i = 3 FPGA1 time = 35218254 ns
32 | i = 3 CPU0 time = 45141873 ns
33 | i = 3 memcpy time = 31018497 ns
34 | i = 4 FPGA0 time = 28601239 ns
35 | i = 4 FPGA1 time = 26737033 ns
36 | i = 4 CPU0 time = 37267314 ns
37 | i = 4 memcpy time = 27669136 ns
38 | i = 5 FPGA0 time = 28733950 ns
39 | i = 5 FPGA1 time = 26872342 ns
40 | i = 5 CPU0 time = 36780408 ns
41 | i = 5 memcpy time = 27803146 ns
42 | i = 6 FPGA0 time = 29757657 ns
43 | i = 6 FPGA1 time = 27512576 ns
44 | i = 6 CPU0 time = 37795140 ns
45 | i = 6 memcpy time = 28635116 ns
46 | i = 7 FPGA0 time = 30751565 ns
47 | i = 7 FPGA1 time = 27698084 ns
48 | i = 7 CPU0 time = 38149920 ns
49 | i = 7 memcpy time = 29224824 ns
50 | i = 8 FPGA0 time = 30113613 ns
51 | i = 8 FPGA1 time = 27005731 ns
52 | i = 8 CPU0 time = 37479690 ns
53 | i = 8 memcpy time = 28559672 ns
54 | i = 9 FPGA0 time = 29953231 ns
55 | i = 9 FPGA1 time = 26991300 ns
56 | i = 9 CPU0 time = 37454036 ns
57 | i = 9 memcpy time = 28472265 ns
58 | i = 10 FPGA0 time = 30321863 ns
59 | i = 10 FPGA1 time = 27749784 ns
60 | i = 10 CPU0 time = 37802879 ns
61 | i = 10 memcpy time = 29035823 ns
62 | i = 11 FPGA0 time = 30906107 ns
63 | i = 11 FPGA1 time = 27477201 ns
64 | i = 11 CPU0 time = 38108115 ns
65 | i = 11 memcpy time = 29191654 ns
66 | i = 12 FPGA0 time = 29478247 ns
67 | i = 12 FPGA1 time = 25947500 ns
68 | i = 12 CPU0 time = 36493962 ns
69 | i = 12 memcpy time = 27712873 ns
70 | i = 13 FPGA0 time = 22442197 ns
71 | i = 13 FPGA1 time = 19544818 ns
72 | i = 13 CPU0 time = 29770012 ns
73 | i = 13 memcpy time = 20993507 ns
74 | i = 14 FPGA0 time = 13310682 ns
75 | i = 14 FPGA1 time = 9783901 ns
76 | i = 14 CPU0 time = 20882263 ns
77 | i = 14 memcpy time = 11547291 ns
78 | i = 15 FPGA0 time = 6249740 ns
79 | i = 15 FPGA1 time = 4897785 ns
80 | i = 15 CPU0 time = 10993382 ns
81 | i = 15 memcpy time = 5573762 ns
82 | 
83 | Average memcpt time per batch: 0.029023 sec = 29.023304 ms = 29023.304688 us = 29023304.000000 ns
84 | 


--------------------------------------------------------------------------------
/FPGA/host/embedding_377_krnl/host.hpp:
--------------------------------------------------------------------------------
 1 | #define CL_HPP_CL_1_2_DEFAULT_BUILD
 2 | #define CL_HPP_TARGET_OPENCL_VERSION 120
 3 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120
 4 | #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
 5 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 6 | 
 7 | //OCL_CHECK doesn't work if call has templatized function call
 8 | #define OCL_CHECK(error,call)                                       \
 9 |     call;                                                           \
10 |     if (error != CL_SUCCESS) {                                      \
11 |       printf("%s:%d Error calling " #call ", error code is: %d\n",  \
12 |               __FILE__,__LINE__, error);                            \
13 |       exit(EXIT_FAILURE);                                           \
14 |     }
15 | #include "constants.hpp"
16 | 
17 | #include <vector>
18 | #include <unistd.h>
19 | #include <iostream>
20 | #include <fstream>
21 | #include <CL/cl2.hpp>
22 | 
23 | // template <typename T>
24 | // struct aligned_allocator
25 | // {
26 | //   using value_type = T;
27 | //   T* allocate(std::size_t num)
28 | //   {
29 | //     void* ptr = nullptr;
30 | //     if (posix_memalign(&ptr,4096,num*sizeof(T)))
31 | //       throw std::bad_alloc();
32 | //     return reinterpret_cast<T*>(ptr);
33 | //   }
34 | //   void deallocate(T* p, std::size_t num)
35 | //   {
36 | //     free(p);
37 | //   }
38 | // };
39 | 
40 | std::vector<cl::Device> get_devices(const std::string& vendor_name) {
41 | 
42 |     size_t i;
43 |     cl_int err;
44 |     std::vector<cl::Platform> platforms;
45 |     OCL_CHECK(err, err = cl::Platform::get(&platforms));
46 |     cl::Platform platform;
47 |     for (i  = 0 ; i < platforms.size(); i++){
48 |         platform = platforms[i];
49 |         OCL_CHECK(err, std::string platformName = platform.getInfo<CL_PLATFORM_NAME>(&err));
50 |         if (platformName == vendor_name){
51 |             std::cout << "Found Platform" << std::endl;
52 |             std::cout << "Platform Name: " << platformName.c_str() << std::endl;
53 |             break;
54 |         }
55 |     }
56 |     if (i == platforms.size()) {
57 |         std::cout << "Error: Failed to find Xilinx platform" << std::endl;
58 |         exit(EXIT_FAILURE);
59 |     }
60 |    
61 |     //Getting ACCELERATOR Devices and selecting 1st such device 
62 |     std::vector<cl::Device> devices;
63 |     OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
64 |     return devices;
65 | }
66 |    
67 | char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) 
68 | {
69 |     std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
70 | 
71 |   if(access(xclbin_file_name.c_str(), R_OK) != 0) {
72 |     printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str());
73 |     exit(EXIT_FAILURE);
74 |   }
75 |     //Loading XCL Bin into char buffer 
76 |     std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
77 |     std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
78 |     bin_file.seekg (0, bin_file.end);
79 |     nb = bin_file.tellg();
80 |     bin_file.seekg (0, bin_file.beg);
81 |     char *buf = new char [nb];
82 |     bin_file.read(buf, nb);
83 |     return buf;
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/FPGA/host/embedding_47_krnl/host.hpp:
--------------------------------------------------------------------------------
 1 | #define CL_HPP_CL_1_2_DEFAULT_BUILD
 2 | #define CL_HPP_TARGET_OPENCL_VERSION 120
 3 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120
 4 | #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
 5 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 6 | 
 7 | //OCL_CHECK doesn't work if call has templatized function call
 8 | #define OCL_CHECK(error,call)                                       \
 9 |     call;                                                           \
10 |     if (error != CL_SUCCESS) {                                      \
11 |       printf("%s:%d Error calling " #call ", error code is: %d\n",  \
12 |               __FILE__,__LINE__, error);                            \
13 |       exit(EXIT_FAILURE);                                           \
14 |     }
15 | #include "constants.hpp"
16 | 
17 | #include <vector>
18 | #include <unistd.h>
19 | #include <iostream>
20 | #include <fstream>
21 | #include <CL/cl2.hpp>
22 | 
23 | // template <typename T>
24 | // struct aligned_allocator
25 | // {
26 | //   using value_type = T;
27 | //   T* allocate(std::size_t num)
28 | //   {
29 | //     void* ptr = nullptr;
30 | //     if (posix_memalign(&ptr,4096,num*sizeof(T)))
31 | //       throw std::bad_alloc();
32 | //     return reinterpret_cast<T*>(ptr);
33 | //   }
34 | //   void deallocate(T* p, std::size_t num)
35 | //   {
36 | //     free(p);
37 | //   }
38 | // };
39 | 
40 | std::vector<cl::Device> get_devices(const std::string& vendor_name) {
41 | 
42 |     size_t i;
43 |     cl_int err;
44 |     std::vector<cl::Platform> platforms;
45 |     OCL_CHECK(err, err = cl::Platform::get(&platforms));
46 |     cl::Platform platform;
47 |     for (i  = 0 ; i < platforms.size(); i++){
48 |         platform = platforms[i];
49 |         OCL_CHECK(err, std::string platformName = platform.getInfo<CL_PLATFORM_NAME>(&err));
50 |         if (platformName == vendor_name){
51 |             std::cout << "Found Platform" << std::endl;
52 |             std::cout << "Platform Name: " << platformName.c_str() << std::endl;
53 |             break;
54 |         }
55 |     }
56 |     if (i == platforms.size()) {
57 |         std::cout << "Error: Failed to find Xilinx platform" << std::endl;
58 |         exit(EXIT_FAILURE);
59 |     }
60 |    
61 |     //Getting ACCELERATOR Devices and selecting 1st such device 
62 |     std::vector<cl::Device> devices;
63 |     OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
64 |     return devices;
65 | }
66 |    
67 | char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) 
68 | {
69 |     std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
70 | 
71 |   if(access(xclbin_file_name.c_str(), R_OK) != 0) {
72 |     printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str());
73 |     exit(EXIT_FAILURE);
74 |   }
75 |     //Loading XCL Bin into char buffer 
76 |     std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
77 |     std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
78 |     bin_file.seekg (0, bin_file.end);
79 |     nb = bin_file.tellg();
80 |     bin_file.seekg (0, bin_file.beg);
81 |     char *buf = new char [nb];
82 |     bin_file.read(buf, nb);
83 |     return buf;
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/FPGA/host/embedding_98_krnl/host.hpp:
--------------------------------------------------------------------------------
 1 | #define CL_HPP_CL_1_2_DEFAULT_BUILD
 2 | #define CL_HPP_TARGET_OPENCL_VERSION 120
 3 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120
 4 | #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
 5 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 6 | 
 7 | //OCL_CHECK doesn't work if call has templatized function call
 8 | #define OCL_CHECK(error,call)                                       \
 9 |     call;                                                           \
10 |     if (error != CL_SUCCESS) {                                      \
11 |       printf("%s:%d Error calling " #call ", error code is: %d\n",  \
12 |               __FILE__,__LINE__, error);                            \
13 |       exit(EXIT_FAILURE);                                           \
14 |     }
15 | #include "constants.hpp"
16 | 
17 | #include <vector>
18 | #include <unistd.h>
19 | #include <iostream>
20 | #include <fstream>
21 | #include <CL/cl2.hpp>
22 | 
23 | // template <typename T>
24 | // struct aligned_allocator
25 | // {
26 | //   using value_type = T;
27 | //   T* allocate(std::size_t num)
28 | //   {
29 | //     void* ptr = nullptr;
30 | //     if (posix_memalign(&ptr,4096,num*sizeof(T)))
31 | //       throw std::bad_alloc();
32 | //     return reinterpret_cast<T*>(ptr);
33 | //   }
34 | //   void deallocate(T* p, std::size_t num)
35 | //   {
36 | //     free(p);
37 | //   }
38 | // };
39 | 
40 | std::vector<cl::Device> get_devices(const std::string& vendor_name) {
41 | 
42 |     size_t i;
43 |     cl_int err;
44 |     std::vector<cl::Platform> platforms;
45 |     OCL_CHECK(err, err = cl::Platform::get(&platforms));
46 |     cl::Platform platform;
47 |     for (i  = 0 ; i < platforms.size(); i++){
48 |         platform = platforms[i];
49 |         OCL_CHECK(err, std::string platformName = platform.getInfo<CL_PLATFORM_NAME>(&err));
50 |         if (platformName == vendor_name){
51 |             std::cout << "Found Platform" << std::endl;
52 |             std::cout << "Platform Name: " << platformName.c_str() << std::endl;
53 |             break;
54 |         }
55 |     }
56 |     if (i == platforms.size()) {
57 |         std::cout << "Error: Failed to find Xilinx platform" << std::endl;
58 |         exit(EXIT_FAILURE);
59 |     }
60 |    
61 |     //Getting ACCELERATOR Devices and selecting 1st such device 
62 |     std::vector<cl::Device> devices;
63 |     OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
64 |     return devices;
65 | }
66 |    
67 | char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) 
68 | {
69 |     std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
70 | 
71 |   if(access(xclbin_file_name.c_str(), R_OK) != 0) {
72 |     printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str());
73 |     exit(EXIT_FAILURE);
74 |   }
75 |     //Loading XCL Bin into char buffer 
76 |     std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
77 |     std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
78 |     bin_file.seekg (0, bin_file.end);
79 |     nb = bin_file.tellg();
80 |     bin_file.seekg (0, bin_file.beg);
81 |     char *buf = new char [nb];
82 |     bin_file.read(buf, nb);
83 |     return buf;
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/FPGA/host/embedding_krnl/host.hpp:
--------------------------------------------------------------------------------
 1 | #define CL_HPP_CL_1_2_DEFAULT_BUILD
 2 | #define CL_HPP_TARGET_OPENCL_VERSION 120
 3 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120
 4 | #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
 5 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
 6 | 
 7 | //OCL_CHECK doesn't work if call has templatized function call
 8 | #define OCL_CHECK(error,call)                                       \
 9 |     call;                                                           \
10 |     if (error != CL_SUCCESS) {                                      \
11 |       printf("%s:%d Error calling " #call ", error code is: %d\n",  \
12 |               __FILE__,__LINE__, error);                            \
13 |       exit(EXIT_FAILURE);                                           \
14 |     }
15 | #include "constants.hpp"
16 | 
17 | #include <vector>
18 | #include <unistd.h>
19 | #include <iostream>
20 | #include <fstream>
21 | #include <CL/cl2.hpp>
22 | 
23 | // template <typename T>
24 | // struct aligned_allocator
25 | // {
26 | //   using value_type = T;
27 | //   T* allocate(std::size_t num)
28 | //   {
29 | //     void* ptr = nullptr;
30 | //     if (posix_memalign(&ptr,4096,num*sizeof(T)))
31 | //       throw std::bad_alloc();
32 | //     return reinterpret_cast<T*>(ptr);
33 | //   }
34 | //   void deallocate(T* p, std::size_t num)
35 | //   {
36 | //     free(p);
37 | //   }
38 | // };
39 | 
40 | std::vector<cl::Device> get_devices(const std::string& vendor_name) {
41 | 
42 |     size_t i;
43 |     cl_int err;
44 |     std::vector<cl::Platform> platforms;
45 |     OCL_CHECK(err, err = cl::Platform::get(&platforms));
46 |     cl::Platform platform;
47 |     for (i  = 0 ; i < platforms.size(); i++){
48 |         platform = platforms[i];
49 |         OCL_CHECK(err, std::string platformName = platform.getInfo<CL_PLATFORM_NAME>(&err));
50 |         if (platformName == vendor_name){
51 |             std::cout << "Found Platform" << std::endl;
52 |             std::cout << "Platform Name: " << platformName.c_str() << std::endl;
53 |             break;
54 |         }
55 |     }
56 |     if (i == platforms.size()) {
57 |         std::cout << "Error: Failed to find Xilinx platform" << std::endl;
58 |         exit(EXIT_FAILURE);
59 |     }
60 |    
61 |     //Getting ACCELERATOR Devices and selecting 1st such device 
62 |     std::vector<cl::Device> devices;
63 |     OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
64 |     return devices;
65 | }
66 |    
67 | char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb) 
68 | {
69 |     std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
70 | 
71 |   if(access(xclbin_file_name.c_str(), R_OK) != 0) {
72 |     printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str());
73 |     exit(EXIT_FAILURE);
74 |   }
75 |     //Loading XCL Bin into char buffer 
76 |     std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
77 |     std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
78 |     bin_file.seekg (0, bin_file.end);
79 |     nb = bin_file.tellg();
80 |     bin_file.seekg (0, bin_file.beg);
81 |     char *buf = new char [nb];
82 |     bin_file.read(buf, nb);
83 |     return buf;
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/makefile_gen/descgen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from sys import argv
  3 | import json
  4 | import glob
  5 | import os
  6 | import re
  7 | import subprocess
  8 | 
  9 | script, desc_file = argv
 10 | desc = open(desc_file, 'r')
 11 | data = json.load(desc)
 12 | desc.close()
 13 | 
 14 | #top level list : dict_new
 15 | dict_new = {}
 16 | if 'example' in data:
 17 | 	dict_new['name'] = data.pop('example')
 18 | if 'overview' in data:
 19 | 	dict_new['description'] = data.pop('overview')
 20 | if 'board' in data:
 21 | 	dict_new['device'] = data.pop('board')
 22 | if 'nboard' in data:
 23 | 	dict_new['ndevice'] = data.pop('nboard')
 24 | if 'sdx_gui' in data:
 25 | 	dict_new['gui'] = data.pop('sdx_gui')
 26 | 
 27 | 
 28 | 
 29 | #host_list = []
 30 | host_dict = {}
 31 | if 'host_exe' in data:
 32 | 	host_dict['host_exe'] = data.pop('host_exe')
 33 | 
 34 | #if 'host_srcs' in data:
 35 | #	srcs = data['host_srcs'].split(" ")
 36 | #	host_dict['sources'] = srcs
 37 | #	del data['host_srcs'] 
 38 | #if 'host_hdrs' in data:
 39 | #	hdrs = data['host_hdrs'].split(" ")
 40 | #	host_dict['sources'].extend(hdrs)
 41 | #	del data['host_hdrs']
 42 | 
 43 | #linker_list = []
 44 | #linker_dict = {}
 45 | #library_paths = []
 46 | #library_paths.append("REPO_DIR/common/libs/")
 47 | #linker_dict['librarypaths'] = library_paths
 48 | #if 'libs' in data:
 49 | #	library = []
 50 | #	for item in data['libs']:
 51 | #		library.append(item["name"])
 52 | #	linker_dict['libraries'] = library
 53 | 
 54 | if 'linker' in data:
 55 | 	linker_dict = {}
 56 | 	linker_dict.update(data['linker'])
 57 | 	del data['linker']
 58 | 	#linker_list.append(linker_dict)
 59 | 	host_dict['linker'] = linker_dict
 60 | 	 
 61 | 
 62 | 
 63 | if 'libs' or 'compiler' or 'host_srcs' or 'host_hdrs' in data:
 64 | 	#compiler_list = []
 65 | 	compiler_dict = {}
 66 | 	if 'libs' or 'host_srcs' or 'host_hdrs' in data:
 67 | 		srcs = []
 68 | 	if 'libs' in data:	
 69 | 		include_paths = []
 70 | 		for item in data['libs']:
 71 | 			include_paths.append('REPO_DIR/common/includes/'+ item)	
 72 | 			srcs.append('REPO_DIR/common/includes/'+ item)	
 73 | 			compiler_dict['includepaths'] = include_paths
 74 | 		del data['libs']
 75 | 	if 'compiler' in data:
 76 | 		compiler_dict.update(data['compiler'])
 77 | 	if 'host_srcs' in data:
 78 | 		srcs.extend(data['host_srcs'].split(" "))
 79 | 		#compiler_dict['sources'] = srcs
 80 | 		del data['host_srcs'] 
 81 | 	if 'host_hdrs' in data:
 82 | 		hdrs = data['host_hdrs'].split(" ")
 83 | 		srcs.extend(hdrs)
 84 | 		del data['host_hdrs']
 85 | 	
 86 | 	compiler_dict['sources'] = srcs
 87 | 
 88 | 		
 89 | 	#compiler_list.append(compiler_dict)
 90 | 	#host_dict['compiler'] = compiler_list
 91 | 	host_dict['compiler'] = compiler_dict
 92 | 
 93 | 
 94 | if 'cmd_args' in data:
 95 | 	launch_list = []
 96 | 	emu_cmd_dictny = {}
 97 | 	emu_cmd_dictny['name'] = 'generic launch for all flows'
 98 | 	emu_cmd_dictny['cmd_args'] = data.pop('cmd_args')
 99 | 	launch_list.append(emu_cmd_dictny)
100 | 	dict_new['launch'] = launch_list  
101 | 
102 | 
103 | #host_list.append(host_dict)
104 | dict_new['host'] = host_dict
105 | 
106 | dict_new['platform_type'] = "pcie"
107 | 
108 | data.update(dict_new)
109 | 
110 | target = open('description.json', 'w+')
111 | json.dump(data, target, indent=4, sort_keys=False)
112 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/logger/logger.h:
--------------------------------------------------------------------------------
 1 | /**********
 2 | Copyright (c) 2019, Xilinx, Inc.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | **********/
29 | #ifndef LOGGER_H_
30 | #define LOGGER_H_
31 | 
32 | #include <iomanip>
33 | #include <iostream>
34 | #include <string>
35 | #include <vector>
36 | 
37 | 
38 | #define ENABLE_LOG_TOFILE 1
39 | #define ENABLE_LOG_TIME 1
40 | 
41 | //global logging
42 | #define LogInfo(desc, ...) sda::LogWrapper(0, __FILE__, __LINE__, desc, ##__VA_ARGS__)
43 | #define LogWarn(desc, ...) sda::LogWrapper(1, __FILE__, __LINE__, desc, ##__VA_ARGS__)
44 | #define LogError(desc, ...) sda::LogWrapper(2, __FILE__, __LINE__, desc, ##__VA_ARGS__)
45 | 
46 | using namespace std;
47 | 
48 | namespace sda {
49 | 
50 |     enum LOGTYPE {etInfo, etWarning, etError};
51 | 
52 |     //string
53 |     string& ltrim(string& s);
54 |     string& rtrim(string& s);
55 |     string& trim(string& s);
56 |     string GetFileExt(const string& s);
57 |     string GetFileTitleOnly(const string& s);
58 | 
59 |     string ToLower(const string& s);
60 |     string ToUpper(const string& s);
61 | 
62 |     //time
63 |     string GetTimeStamp();
64 | 
65 |     //paths
66 |     string GetApplicationPath();
67 | 
68 | 
69 |     //debug
70 |     template<typename T>
71 |     void PrintPOD(const vector<T>& pod, size_t display_count = 0, const int precision = 4) {
72 | 
73 |         size_t count = pod.size();
74 |         if(display_count > 0)
75 |             count = std::min<size_t>(pod.size(), display_count);
76 | 
77 |         for(size_t i = 0; i < count; i++) {
78 |             cout << std::setprecision(precision) << pod[i] << ", ";
79 |         }
80 |         cout << endl;
81 |     }
82 | 
83 |     //logging
84 |     void LogWrapper(int etype, const char* file, int line, const char* desc, ...);
85 | 
86 | }
87 | 
88 | 
89 | 
90 | #endif /* LOGGER_H_ */
91 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/hls_test_krnl/src/hls/in_casting_bench.hpp:
--------------------------------------------------------------------------------
 1 | /************************************************
 2 | Copyright (c) 2018, Systems Group, ETH Zurich.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | ************************************************/
29 | #pragma once
30 | 
31 | #include "axi_utils.hpp"
32 | #include "toe.hpp"
33 | 
34 | const unsigned DATA_WIDTH = 64 * 8;
35 | // #ifndef __SYNTHESIS__
36 | // static const ap_uint<32> END_TIME		= 1000; //1000000;
37 | // static const ap_uint<40> END_TIME_120	= 1000;
38 | 
39 | // #else
40 | // static const ap_uint<32> END_TIME		= 1546546546;//1501501501;
41 | // static const ap_uint<40> END_TIME_120	= 18750000000;
42 | // #endif
43 | 
44 | 
45 | void in_casting_bench(	hls::stream<ap_uint<16> >& listenPort,
46 | 					hls::stream<bool>& listenPortStatus,
47 | 					hls::stream<appNotification>& notifications,
48 | 					hls::stream<appReadRequest>& readRequest,
49 | 					hls::stream<ap_uint<16> >& rxMetaData,
50 | 					hls::stream<net_axis<DATA_WIDTH> >& rxData,
51 | 					hls::stream<ipTuple>& openConnection,
52 | 					hls::stream<openStatus>& openConStatus,
53 | 					hls::stream<ap_uint<16> >& closeConnection,
54 | 					hls::stream<appTxMeta>& txMetaData,
55 | 					hls::stream<net_axis<DATA_WIDTH> >& txData,
56 | 					hls::stream<appTxRsp>& txStatus,
57 | 					ap_uint<1>		runExperiment,
58 | 					ap_uint<16>		useConn,
59 | 					ap_uint<16>		useIpAddr,
60 | 					ap_uint<16>		pkgWordCount,
61 | 					ap_uint<16>		regBasePort,
62 | 					ap_uint<16>		usePort,
63 | 					ap_uint<16>		expectedRespInKBPerCon,
64 | 					ap_uint<1>		finishExperiment,
65 | 					ap_uint<32>		delayedCycles,
66 | 					ap_uint<32>		regIpAddress0,
67 | 					ap_uint<32>		regIpAddress1,
68 | 					ap_uint<32>		regIpAddress2,
69 | 					ap_uint<32>		regIpAddress3,
70 | 					ap_uint<32>		regIpAddress4,
71 | 					ap_uint<32>		regIpAddress5,
72 | 					ap_uint<32>		regIpAddress6,
73 | 					ap_uint<32>		regIpAddress7,
74 | 					ap_uint<32>		regIpAddress8,
75 | 					ap_uint<32>		regIpAddress9,
76 | 					ap_uint<32>		regIpAddress10);


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/constant.h:
--------------------------------------------------------------------------------
 1 | // Input: (INPUT_FEATURE_LEN, BATCH_SIZE)
 2 | // Layer1: W1 * INPUT + B1 
 3 | //  -> W1 (HIDDEN_SIZE1, INPUT_FEATURE_LEN)
 4 | //  -> B1 (HIDDEN_SIZE1)
 5 | //  -> Result1 (HIDDEN_SIZE1, BATCH_SIZE)
 6 | // Layer2: W2 * Result1 + B2
 7 | //  -> W2 (HIDDEN_SIZE2, HIDDEN_SIZE1)
 8 | //  -> B2 (HIDDEN_SIZE2)
 9 | //  -> Result2 (HIDDEN_SIZE2, BATCH_SIZE)
10 | // Layer3: W3 * Result2 + B3
11 | //  -> W3 (HIDDEN_SIZE3, HIDDEN_SIZE2)
12 | //  -> B3 (HIDDEN_SIZE3)
13 | //  -> Result3 (HIDDEN_SIZE3, BATCH_SIZE)
14 | // Output Layer: W_OUT * Result3 + B_OUT
15 | //  -> W3 (OUTPUT_FEATURE_LEN, HIDDEN_SIZE3)
16 | //  -> B3 (OUTPUT_FEATURE_LEN)
17 | //  -> Result3 (OUTPUT_FEATURE_LEN, BATCH_SIZE)
18 | 
19 | ///////////// OPTION: small model 384 -> 512 /////////////
20 | ///////////// OPTION: large model 876 -> 1024 /////////////
21 | 
22 | /// TODO: CHANGE THIS
23 | // #define INPUT_FEATURE_LEN 1024
24 | 
25 | #define INPUT_FEATURE_LEN_RECEIVER 3968
26 | #define INPUT_FEATURE_LEN_FPGA_SENDER 1952
27 | #define INPUT_FEATURE_LEN_CPU_SENDER 64
28 | 
29 | // BATCH_SIZE GIVEN IN constant.h, need to revisit the constant definition later 
30 | #define HIDDEN_SIZE1 2048 // 1024
31 | #define HIDDEN_SIZE2 512 
32 | #define HIDDEN_SIZE3 256  
33 | #define OUTPUT_FEATURE_LEN 1 
34 | 
35 | /* constraint: SHM_DATA_SIZE === 1 GB */
36 | /* FLOAT_SIZE * BATCH_SIZE * INPUT_DIM * BATCH_NUM_PER_LOOP = 1024 **3 */
37 | #define FLOAT_SIZE 4 
38 | #define BATCH_SIZE 512 // 256
39 | // maintain the same FIFO memory size
40 | // if batch size = 256, using 1024 as FIFO size, if batch size = 512, FIFO size = 512, etc.
41 | // #define BATCH_NUM_PER_LOOP ((1024) / (BATCH_SIZE / 256))  // -> should be renamed as FIFO_BATCH_NUM
42 | #define BATCH_NUM_PER_LOOP 16//((1024 * 256) / (BATCH_SIZE))  // -> should be renamed as FIFO_BATCH_NUM
43 | 
44 | // LOOP = number of GBs to perform 
45 | #define LOOP_NUM 1// 16
46 | 
47 | #define TOTAL_BATCH_NUM (BATCH_NUM_PER_LOOP * LOOP_NUM)
48 | /* matrix (batch * input_dim): 1024 * 1024, float: 4 byte, 1024 batches in queue */
49 | /* 4 GB in total, (1024 * 1024 * 4 * 1024) */
50 | 
51 | /// TODO: CHANGE THIS
52 | // #define BLOCK_ENTRY_NUM (BATCH_SIZE * INPUT_FEATURE_LEN)
53 | // #define BLOCK_SIZE (BLOCK_ENTRY_NUM * FLOAT_SIZE)
54 | 
55 | #define BLOCK_ENTRY_NUM_RECEIVER (BATCH_SIZE * INPUT_FEATURE_LEN_RECEIVER)
56 | #define BLOCK_SIZE_RECEIVER (BLOCK_ENTRY_NUM_RECEIVER * FLOAT_SIZE)
57 | 
58 | #define BLOCK_ENTRY_NUM_FPGA_SENDER (BATCH_SIZE * INPUT_FEATURE_LEN_FPGA_SENDER)
59 | #define BLOCK_SIZE_FPGA_SENDER (BLOCK_ENTRY_NUM_FPGA_SENDER * FLOAT_SIZE)
60 | 
61 | #define BLOCK_ENTRY_NUM_CPU_SENDER (BATCH_SIZE * INPUT_FEATURE_LEN_CPU_SENDER)
62 | #define BLOCK_SIZE_CPU_SENDER (BLOCK_ENTRY_NUM_CPU_SENDER * FLOAT_SIZE)
63 | 
64 | // maximum shared memory size: 1 GB
65 | /// TODO: CHANGE THIS
66 | // #define SHM_DATA_SIZE (BLOCK_SIZE * BATCH_NUM_PER_LOOP)  // TODO: may need to use smaller BATCH_NUM_PER_LOOP
67 | 
68 | #define SHM_DATA_SIZE_FPGA (BLOCK_SIZE_FPGA_SENDER * BATCH_NUM_PER_LOOP)  // TODO: may need to use smaller BATCH_NUM_PER_LOOP
69 | #define SHM_DATA_SIZE_CPU (BLOCK_SIZE_CPU_SENDER * BATCH_NUM_PER_LOOP)  // TODO: may need to use smaller BATCH_NUM_PER_LOOP
70 | 
71 | #define SHM_CONTROL_SIZE 1024
72 | 
73 | #define THREAD_NUM 1 
74 | 
75 | /// TODO: CHANGE THIS
76 | // #define PORT 8080
77 | 
78 | // Stream 0 port: PORT, Stream 1 port: PORT + 1, ...
79 | #define PORT_FPGA_SENDER_0 8080
80 | #define PORT_FPGA_SENDER_1 8084
81 | #define PORT_CPU_SENDER_0 8088
82 | 
83 | #define PORT_RECEIVER 8080


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/scatter.hpp:
--------------------------------------------------------------------------------
 1 | /************************************************
 2 | Copyright (c) 2018, Systems Group, ETH Zurich.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | ************************************************/
29 | #pragma once
30 | 
31 | #include "scatter_config.hpp"
32 | #include "axi_utils.hpp"
33 | #include "packet.hpp"
34 | #include "toe.hpp"
35 | 
36 | // #ifndef __SYNTHESIS__
37 | // static const ap_uint<32> END_TIME		= 1000; //1000000;
38 | // static const ap_uint<40> END_TIME_120	= 1000;
39 | 
40 | // #else
41 | // static const ap_uint<32> END_TIME		= 1546546546;//1501501501;
42 | // static const ap_uint<40> END_TIME_120	= 18750000000;
43 | // #endif
44 | 
45 | 
46 | void scatter(	hls::stream<ap_uint<16> >& listenPort,
47 | 					hls::stream<bool>& listenPortStatus,
48 | 					hls::stream<appNotification>& notifications,
49 | 					hls::stream<appReadRequest>& readRequest,
50 | 					hls::stream<ap_uint<16> >& rxMetaData,
51 | 					hls::stream<net_axis<DATA_WIDTH> >& rxData,
52 | 					hls::stream<ipTuple>& openConnection,
53 | 					hls::stream<openStatus>& openConStatus,
54 | 					hls::stream<ap_uint<16> >& closeConnection,
55 | 					hls::stream<appTxMeta>& txMetaData,
56 | 					hls::stream<net_axis<DATA_WIDTH> >& txData,
57 | 					hls::stream<appTxRsp>& txStatus,
58 | 					ap_uint<1>		runExperiment,
59 | 					ap_uint<16>		useConn,
60 | 					ap_uint<16>		useIpAddr,
61 | 					ap_uint<16>		pkgWordCount,
62 | 					ap_uint<16>		regBasePort,
63 | 					ap_uint<16>		usePort,
64 | 					ap_uint<16>		expectedRespInKBPerCon,
65 | 					ap_uint<1>		finishExperiment,
66 | 					ap_uint<32>		delayedCycles,
67 | 					ap_uint<32>		clientPkgNum,
68 | 					ap_uint<32>		regIpAddress0,
69 | 					ap_uint<32>		regIpAddress1,
70 | 					ap_uint<32>		regIpAddress2,
71 | 					ap_uint<32>		regIpAddress3,
72 | 					ap_uint<32>		regIpAddress4,
73 | 					ap_uint<32>		regIpAddress5,
74 | 					ap_uint<32>		regIpAddress6,
75 | 					ap_uint<32>		regIpAddress7,
76 | 					ap_uint<32>		regIpAddress8,
77 | 					ap_uint<32>		regIpAddress9,
78 | 					ap_uint<32>		regIpAddress10);


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/embedding_377_krnl/config_sp_embedding_377_krnl.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | [connectivity] 
 4 | slr=cmac_krnl_1:SLR2
 5 | 
 6 | sp=embedding_377_krnl_1.table_HBM0:HBM[0]
 7 | sp=embedding_377_krnl_1.table_HBM1:HBM[1]
 8 | sp=embedding_377_krnl_1.table_HBM2:HBM[2]
 9 | sp=embedding_377_krnl_1.table_HBM3:HBM[3]
10 | sp=embedding_377_krnl_1.table_HBM4:HBM[4]
11 | sp=embedding_377_krnl_1.table_HBM5:HBM[5]
12 | sp=embedding_377_krnl_1.table_HBM6:HBM[6]
13 | sp=embedding_377_krnl_1.table_HBM7:HBM[7]
14 | sp=embedding_377_krnl_1.table_HBM8:HBM[8]
15 | sp=embedding_377_krnl_1.table_HBM9:HBM[9]
16 | sp=embedding_377_krnl_1.table_HBM10:HBM[10]
17 | sp=embedding_377_krnl_1.table_HBM11:HBM[11]
18 | sp=embedding_377_krnl_1.table_HBM12:HBM[12]
19 | sp=embedding_377_krnl_1.table_HBM13:HBM[13]
20 | sp=embedding_377_krnl_1.table_HBM14:HBM[14]
21 | sp=embedding_377_krnl_1.table_HBM15:HBM[15]
22 | sp=embedding_377_krnl_1.table_HBM16:HBM[16]
23 | sp=embedding_377_krnl_1.table_HBM17:HBM[17]
24 | sp=embedding_377_krnl_1.table_HBM18:HBM[18]
25 | sp=embedding_377_krnl_1.table_HBM19:HBM[19]
26 | sp=embedding_377_krnl_1.table_HBM20:HBM[20]
27 | sp=embedding_377_krnl_1.table_HBM21:HBM[21]
28 | sp=embedding_377_krnl_1.table_HBM22:HBM[22]
29 | sp=embedding_377_krnl_1.table_HBM23:HBM[23]
30 | sp=embedding_377_krnl_1.table_HBM24:HBM[24]
31 | sp=embedding_377_krnl_1.table_HBM25:HBM[25]
32 | sp=embedding_377_krnl_1.table_HBM26:HBM[26]
33 | sp=embedding_377_krnl_1.table_HBM27:HBM[27]
34 | sp=embedding_377_krnl_1.table_DDR0:DDR[0]
35 | sp=embedding_377_krnl_1.table_DDR1:DDR[1]
36 | 
37 | sp=network_krnl_1.m00_axi:HBM[28]
38 | sp=network_krnl_1.m01_axi:HBM[29]
39 | 
40 | sc=network_krnl_1.m_axis_udp_rx:embedding_377_krnl_1.s_axis_udp_rx
41 | sc=network_krnl_1.m_axis_udp_rx_meta:embedding_377_krnl_1.s_axis_udp_rx_meta
42 | sc=network_krnl_1.m_axis_tcp_port_status:embedding_377_krnl_1.s_axis_tcp_port_status
43 | sc=network_krnl_1.m_axis_tcp_open_status:embedding_377_krnl_1.s_axis_tcp_open_status
44 | sc=network_krnl_1.m_axis_tcp_notification:embedding_377_krnl_1.s_axis_tcp_notification
45 | sc=network_krnl_1.m_axis_tcp_rx_meta:embedding_377_krnl_1.s_axis_tcp_rx_meta
46 | sc=network_krnl_1.m_axis_tcp_rx_data:embedding_377_krnl_1.s_axis_tcp_rx_data
47 | sc=network_krnl_1.m_axis_tcp_tx_status:embedding_377_krnl_1.s_axis_tcp_tx_status
48 | 
49 | sc=embedding_377_krnl_1.m_axis_udp_tx:network_krnl_1.s_axis_udp_tx
50 | sc=embedding_377_krnl_1.m_axis_udp_tx_meta:network_krnl_1.s_axis_udp_tx_meta
51 | sc=embedding_377_krnl_1.m_axis_tcp_listen_port:network_krnl_1.s_axis_tcp_listen_port
52 | sc=embedding_377_krnl_1.m_axis_tcp_open_connection:network_krnl_1.s_axis_tcp_open_connection
53 | sc=embedding_377_krnl_1.m_axis_tcp_close_connection:network_krnl_1.s_axis_tcp_close_connection
54 | sc=embedding_377_krnl_1.m_axis_tcp_read_pkg:network_krnl_1.s_axis_tcp_read_pkg
55 | sc=embedding_377_krnl_1.m_axis_tcp_tx_meta:network_krnl_1.s_axis_tcp_tx_meta
56 | sc=embedding_377_krnl_1.m_axis_tcp_tx_data:network_krnl_1.s_axis_tcp_tx_data
57 | 
58 | sc=cmac_krnl_1.axis_net_rx:network_krnl_1.axis_net_rx
59 | sc=network_krnl_1.axis_net_tx:cmac_krnl_1.axis_net_tx
60 | 
61 | 
62 | [vivado]
63 | #param=compiler.userPreSysLinkTcl=$(PWD)/tcl/plram.tcl 
64 | param=route.enableGlobalHoldIter=true
65 | prop=run.impl_1.STEPS.PLACE_DESIGN.ARGS.DIRECTIVE=SSI_SpreadLogic_high
66 | prop=run.impl_1.{STEPS.PHYS_OPT_DESIGN.IS_ENABLED}=true 
67 | prop=run.impl_1.STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE=ExploreWithHoldFix
68 | #prop=run.impl_1.{STEPS.PHYS_OPT_DESIGN.ARGS.MORE OPTIONS}={-hold_fix -slr_crossing_opt}
69 | prop=run.impl_1.STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE=AlternateCLBRouting
70 | #prop=run.impl_1.{STEPS.PHYS_OPT_DESIGN.ARGS.MORE OPTIONS}={-hold_fix}
71 | prop=run.impl_1.{STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED}=true 
72 | prop=run.impl_1.{STEPS.POST_ROUTE_PHYS_OPT_DESIGN.ARGS.MORE OPTIONS}={-sll_reg_hold_fix -hold_fix -slr_crossing_opt}
73 | 
74 | 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_3_nodes/single_connection_network_client_sender.c:
--------------------------------------------------------------------------------
  1 | // Client side C/C++ program to demonstrate Socket programming 
  2 | #include <stdio.h> 
  3 | #include <stdlib.h> 
  4 | #include <sys/socket.h> 
  5 | #include <arpa/inet.h> 
  6 | #include <unistd.h> 
  7 | #include <string.h> 
  8 | #include <unistd.h>
  9 | #include <time.h>
 10 | #include <pthread.h> 
 11 | 
 12 | #include "constant.h"
 13 | 
 14 | 
 15 | struct Thread_info {
 16 |     int port;
 17 | };
 18 | 
 19 | // A normal C function that is executed as a thread  
 20 | void *thread_send_packets(void* vargp) 
 21 | { 
 22 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
 23 |     printf("Printing Port from Thread %d\n", t_info -> port); 
 24 |     
 25 | 
 26 |     int sock = 0, valread; 
 27 |     struct sockaddr_in serv_addr; 
 28 | 
 29 |     float array_buf[BLOCK_ENTRY_NUM];
 30 |     for (int i = 0; i < BLOCK_ENTRY_NUM; i++) {
 31 |         array_buf[i] = 1;
 32 |     }
 33 | 
 34 |     if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) 
 35 |     { 
 36 |         printf("\n Socket creation error \n"); 
 37 |         return -1; 
 38 |     } 
 39 |    
 40 |     serv_addr.sin_family = AF_INET; 
 41 |     serv_addr.sin_port = htons(t_info -> port); 
 42 |        
 43 |     // Convert IPv4 and IPv6 addresses from text to binary form 
 44 |     if(inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr)<=0)  
 45 |     // if(inet_pton(AF_INET, "10.128.0.11", &serv_addr.sin_addr)<=0)  
 46 |     { 
 47 |         printf("\nInvalid address/ Address not supported \n"); 
 48 |         return -1; 
 49 |     } 
 50 |    
 51 |     if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr))<0) 
 52 |     { 
 53 |         printf("\nConnection Failed \n"); 
 54 |         return -1; 
 55 |     } 
 56 | 
 57 |     printf("Start sending data.\n");
 58 |     ////////////////   Data transfer   ////////////////
 59 |     int i = 0;
 60 |     float total_sent_bytes = 0.0;
 61 | 
 62 |     clock_t start = clock();
 63 | 
 64 |     for (int i = 0; i < LOOP_NUM * BATCH_NUM_PER_LOOP; i++) {
 65 | 
 66 |         int total_sent_bytes = 0;
 67 | 
 68 |         while (total_sent_bytes < BLOCK_SIZE) {
 69 |             int sent_bytes = send(sock, array_buf + total_sent_bytes, BLOCK_SIZE - total_sent_bytes, 0);
 70 |             total_sent_bytes += sent_bytes;
 71 |             if (sent_bytes == -1) {
 72 |                 printf("Sending data UNSUCCESSFUL!\n");
 73 |                 return -1;
 74 |             }
 75 |         }
 76 | 
 77 |         if (total_sent_bytes != BLOCK_SIZE) {
 78 |             printf("Sending error, sending more bytes than a block\n");
 79 |         }
 80 |     }
 81 | 
 82 |     clock_t end = clock();
 83 | 
 84 |     // Should wait until the server said all the data was sent correctly,
 85 |     // otherwise the sender may send packets yet the server did not receive.
 86 |     char msg[32];
 87 |     int recv_bytes = read(sock, msg, 32);
 88 |     printf("received from server: %s\n", msg);
 89 | 
 90 |     float total_size = (float)LOOP_NUM * BATCH_NUM_PER_LOOP * BLOCK_SIZE;
 91 |     printf("Data sent. Packet number:%d\tPacket size:%d bytes\tTotal data:%fGB\n",
 92 |         LOOP_NUM * BATCH_NUM_PER_LOOP, BLOCK_SIZE, total_size / (1024 * 1024 * 1024));   
 93 |     float elapsed_time = (end-start) / (float)CLOCKS_PER_SEC;
 94 |     printf("\nConsumed time: %f seconds\n", elapsed_time);
 95 |     printf("Transfer Throughput: %f GB / sec\n", total_size / elapsed_time / 1024 / 1024 / 1024); 
 96 | 
 97 |     return NULL; 
 98 | } 
 99 | 
100 | int main(int argc, char const *argv[]) 
101 | { 
102 | 
103 |     pthread_t thread_id; 
104 |     printf("Before Thread\n"); 
105 | 
106 |     struct Thread_info t_info_0;
107 |     t_info_0.port = PORT;
108 | 
109 |     pthread_create(&thread_id, NULL, thread_send_packets, (void*) &t_info_0); 
110 |     // pthread_create(&thread_id, NULL, thread_send_packets, NULL); 
111 |     pthread_join(thread_id, NULL); 
112 |     printf("After Thread\n"); 
113 | 
114 |     return 0; 
115 | } 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_1_node_no_FIFO_scatter/single_connection_network_client_sender.c:
--------------------------------------------------------------------------------
  1 | // Client side C/C++ program to demonstrate Socket programming 
  2 | #include <stdio.h> 
  3 | #include <stdlib.h> 
  4 | #include <sys/socket.h> 
  5 | #include <arpa/inet.h> 
  6 | #include <unistd.h> 
  7 | #include <string.h> 
  8 | #include <unistd.h>
  9 | #include <time.h>
 10 | #include <pthread.h> 
 11 | 
 12 | #include "constant.h"
 13 | 
 14 | 
 15 | struct Thread_info {
 16 |     int port;
 17 | };
 18 | 
 19 | // A normal C function that is executed as a thread  
 20 | void *thread_send_packets(void* vargp) 
 21 | { 
 22 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
 23 |     printf("Printing Port from Thread %d\n", t_info -> port); 
 24 |     
 25 | 
 26 |     int sock = 0, valread; 
 27 |     struct sockaddr_in serv_addr; 
 28 | 
 29 |     float array_buf[BLOCK_ENTRY_NUM];
 30 |     for (int i = 0; i < BLOCK_ENTRY_NUM; i++) {
 31 |         array_buf[i] = 1;
 32 |     }
 33 | 
 34 |     if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) 
 35 |     { 
 36 |         printf("\n Socket creation error \n"); 
 37 |         return -1; 
 38 |     } 
 39 |    
 40 |     serv_addr.sin_family = AF_INET; 
 41 |     serv_addr.sin_port = htons(t_info -> port); 
 42 |        
 43 |     // Convert IPv4 and IPv6 addresses from text to binary form 
 44 |     if(inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr)<=0)  
 45 |     // if(inet_pton(AF_INET, "10.128.0.11", &serv_addr.sin_addr)<=0)  
 46 |     { 
 47 |         printf("\nInvalid address/ Address not supported \n"); 
 48 |         return -1; 
 49 |     } 
 50 |    
 51 |     if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr))<0) 
 52 |     { 
 53 |         printf("\nConnection Failed \n"); 
 54 |         return -1; 
 55 |     } 
 56 | 
 57 |     printf("Start sending data.\n");
 58 |     ////////////////   Data transfer   ////////////////
 59 |     int i = 0;
 60 |     float total_sent_bytes = 0.0;
 61 | 
 62 |     clock_t start = clock();
 63 | 
 64 |     for (int i = 0; i < LOOP_NUM * BATCH_NUM_PER_LOOP; i++) {
 65 | 
 66 |         int total_sent_bytes = 0;
 67 | 
 68 |         while (total_sent_bytes < BLOCK_SIZE) {
 69 |             int sent_bytes = send(sock, array_buf + total_sent_bytes, BLOCK_SIZE - total_sent_bytes, 0);
 70 |             total_sent_bytes += sent_bytes;
 71 |             if (sent_bytes == -1) {
 72 |                 printf("Sending data UNSUCCESSFUL!\n");
 73 |                 return -1;
 74 |             }
 75 |         }
 76 | 
 77 |         if (total_sent_bytes != BLOCK_SIZE) {
 78 |             printf("Sending error, sending more bytes than a block\n");
 79 |         }
 80 |     }
 81 | 
 82 |     clock_t end = clock();
 83 | 
 84 |     // Should wait until the server said all the data was sent correctly,
 85 |     // otherwise the sender may send packets yet the server did not receive.
 86 |     char msg[32];
 87 |     int recv_bytes = read(sock, msg, 32);
 88 |     printf("received from server: %s\n", msg);
 89 | 
 90 |     float total_size = (float)LOOP_NUM * BATCH_NUM_PER_LOOP * BLOCK_SIZE;
 91 |     printf("Data sent. Packet number:%d\tPacket size:%d bytes\tTotal data:%fGB\n",
 92 |         LOOP_NUM * BATCH_NUM_PER_LOOP, BLOCK_SIZE, total_size / (1024 * 1024 * 1024));   
 93 |     float elapsed_time = (end-start) / (float)CLOCKS_PER_SEC;
 94 |     printf("\nConsumed time: %f seconds\n", elapsed_time);
 95 |     printf("Transfer Throughput: %f GB / sec\n", total_size / elapsed_time / 1024 / 1024 / 1024); 
 96 | 
 97 |     return NULL; 
 98 | } 
 99 | 
100 | int main(int argc, char const *argv[]) 
101 | { 
102 | 
103 |     pthread_t thread_id; 
104 |     printf("Before Thread\n"); 
105 | 
106 |     struct Thread_info t_info_0;
107 |     t_info_0.port = PORT;
108 | 
109 |     pthread_create(&thread_id, NULL, thread_send_packets, (void*) &t_info_0); 
110 |     // pthread_create(&thread_id, NULL, thread_send_packets, NULL); 
111 |     pthread_join(thread_id, NULL); 
112 |     printf("After Thread\n"); 
113 | 
114 |     return 0; 
115 | } 


--------------------------------------------------------------------------------
/GPU/final_network_cublasLt_3_nodes_no_FIFO_scatter/single_connection_network_client_sender.c:
--------------------------------------------------------------------------------
  1 | // Client side C/C++ program to demonstrate Socket programming 
  2 | #include <stdio.h> 
  3 | #include <stdlib.h> 
  4 | #include <sys/socket.h> 
  5 | #include <arpa/inet.h> 
  6 | #include <unistd.h> 
  7 | #include <string.h> 
  8 | #include <unistd.h>
  9 | #include <time.h>
 10 | #include <pthread.h> 
 11 | 
 12 | #include "constant.h"
 13 | 
 14 | 
 15 | struct Thread_info {
 16 |     int port;
 17 | };
 18 | 
 19 | // A normal C function that is executed as a thread  
 20 | void *thread_send_packets(void* vargp) 
 21 | { 
 22 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
 23 |     printf("Printing Port from Thread %d\n", t_info -> port); 
 24 |     
 25 | 
 26 |     int sock = 0, valread; 
 27 |     struct sockaddr_in serv_addr; 
 28 | 
 29 |     float array_buf[BLOCK_ENTRY_NUM];
 30 |     for (int i = 0; i < BLOCK_ENTRY_NUM; i++) {
 31 |         array_buf[i] = 1;
 32 |     }
 33 | 
 34 |     if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) 
 35 |     { 
 36 |         printf("\n Socket creation error \n"); 
 37 |         return -1; 
 38 |     } 
 39 |    
 40 |     serv_addr.sin_family = AF_INET; 
 41 |     serv_addr.sin_port = htons(t_info -> port); 
 42 |        
 43 |     // Convert IPv4 and IPv6 addresses from text to binary form 
 44 |     if(inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr)<=0)  
 45 |     // if(inet_pton(AF_INET, "10.128.0.11", &serv_addr.sin_addr)<=0)  
 46 |     { 
 47 |         printf("\nInvalid address/ Address not supported \n"); 
 48 |         return -1; 
 49 |     } 
 50 |    
 51 |     if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr))<0) 
 52 |     { 
 53 |         printf("\nConnection Failed \n"); 
 54 |         return -1; 
 55 |     } 
 56 | 
 57 |     printf("Start sending data.\n");
 58 |     ////////////////   Data transfer   ////////////////
 59 |     int i = 0;
 60 |     float total_sent_bytes = 0.0;
 61 | 
 62 |     clock_t start = clock();
 63 | 
 64 |     for (int i = 0; i < LOOP_NUM * BATCH_NUM_PER_LOOP; i++) {
 65 | 
 66 |         int total_sent_bytes = 0;
 67 | 
 68 |         while (total_sent_bytes < BLOCK_SIZE) {
 69 |             int sent_bytes = send(sock, array_buf + total_sent_bytes, BLOCK_SIZE - total_sent_bytes, 0);
 70 |             total_sent_bytes += sent_bytes;
 71 |             if (sent_bytes == -1) {
 72 |                 printf("Sending data UNSUCCESSFUL!\n");
 73 |                 return -1;
 74 |             }
 75 |         }
 76 | 
 77 |         if (total_sent_bytes != BLOCK_SIZE) {
 78 |             printf("Sending error, sending more bytes than a block\n");
 79 |         }
 80 |     }
 81 | 
 82 |     clock_t end = clock();
 83 | 
 84 |     // Should wait until the server said all the data was sent correctly,
 85 |     // otherwise the sender may send packets yet the server did not receive.
 86 |     char msg[32];
 87 |     int recv_bytes = read(sock, msg, 32);
 88 |     printf("received from server: %s\n", msg);
 89 | 
 90 |     float total_size = (float)LOOP_NUM * BATCH_NUM_PER_LOOP * BLOCK_SIZE;
 91 |     printf("Data sent. Packet number:%d\tPacket size:%d bytes\tTotal data:%fGB\n",
 92 |         LOOP_NUM * BATCH_NUM_PER_LOOP, BLOCK_SIZE, total_size / (1024 * 1024 * 1024));   
 93 |     float elapsed_time = (end-start) / (float)CLOCKS_PER_SEC;
 94 |     printf("\nConsumed time: %f seconds\n", elapsed_time);
 95 |     printf("Transfer Throughput: %f GB / sec\n", total_size / elapsed_time / 1024 / 1024 / 1024); 
 96 | 
 97 |     return NULL; 
 98 | } 
 99 | 
100 | int main(int argc, char const *argv[]) 
101 | { 
102 | 
103 |     pthread_t thread_id; 
104 |     printf("Before Thread\n"); 
105 | 
106 |     struct Thread_info t_info_0;
107 |     t_info_0.port = PORT;
108 | 
109 |     pthread_create(&thread_id, NULL, thread_send_packets, (void*) &t_info_0); 
110 |     // pthread_create(&thread_id, NULL, thread_send_packets, NULL); 
111 |     pthread_join(thread_id, NULL); 
112 |     printf("After Thread\n"); 
113 | 
114 |     return 0; 
115 | } 


--------------------------------------------------------------------------------
/GPU/measure_network_cuda_cp_latency_single_node/single_connection_network_client_sender.c:
--------------------------------------------------------------------------------
  1 | // Client side C/C++ program to demonstrate Socket programming 
  2 | #include <stdio.h> 
  3 | #include <stdlib.h> 
  4 | #include <sys/socket.h> 
  5 | #include <arpa/inet.h> 
  6 | #include <unistd.h> 
  7 | #include <string.h> 
  8 | #include <unistd.h>
  9 | #include <time.h>
 10 | #include <pthread.h> 
 11 | 
 12 | #include "constant.h"
 13 | 
 14 | 
 15 | struct Thread_info {
 16 |     int port;
 17 | };
 18 | 
 19 | // A normal C function that is executed as a thread  
 20 | void *thread_send_packets(void* vargp) 
 21 | { 
 22 |     struct Thread_info* t_info = (struct Thread_info*) vargp;
 23 |     printf("Printing Port from Thread %d\n", t_info -> port); 
 24 |     
 25 | 
 26 |     int sock = 0, valread; 
 27 |     struct sockaddr_in serv_addr; 
 28 | 
 29 |     float array_buf[BLOCK_ENTRY_NUM];
 30 |     for (int i = 0; i < BLOCK_ENTRY_NUM; i++) {
 31 |         array_buf[i] = 1;
 32 |     }
 33 | 
 34 |     if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) 
 35 |     { 
 36 |         printf("\n Socket creation error \n"); 
 37 |         return -1; 
 38 |     } 
 39 |    
 40 |     serv_addr.sin_family = AF_INET; 
 41 |     serv_addr.sin_port = htons(t_info -> port); 
 42 |        
 43 |     // Convert IPv4 and IPv6 addresses from text to binary form 
 44 |     if(inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr)<=0)  
 45 |     // if(inet_pton(AF_INET, "10.128.0.11", &serv_addr.sin_addr)<=0)  
 46 |     { 
 47 |         printf("\nInvalid address/ Address not supported \n"); 
 48 |         return -1; 
 49 |     } 
 50 |    
 51 |     if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr))<0) 
 52 |     { 
 53 |         printf("\nConnection Failed \n"); 
 54 |         return -1; 
 55 |     } 
 56 | 
 57 |     printf("Start sending data.\n");
 58 |     ////////////////   Data transfer   ////////////////
 59 |     int i = 0;
 60 |     float total_sent_bytes = 0.0;
 61 | 
 62 |     clock_t start = clock();
 63 | 
 64 |     for (int i = 0; i < LOOP_NUM * BATCH_NUM_PER_LOOP; i++) {
 65 | 
 66 |         int total_sent_bytes = 0;
 67 | 
 68 |         while (total_sent_bytes < BLOCK_SIZE) {
 69 |             int sent_bytes = send(sock, array_buf + total_sent_bytes, BLOCK_SIZE - total_sent_bytes, 0);
 70 |             total_sent_bytes += sent_bytes;
 71 |             if (sent_bytes == -1) {
 72 |                 printf("Sending data UNSUCCESSFUL!\n");
 73 |                 return -1;
 74 |             }
 75 |         }
 76 | 
 77 |         if (total_sent_bytes != BLOCK_SIZE) {
 78 |             printf("Sending error, sending more bytes than a block\n");
 79 |         }
 80 |     }
 81 | 
 82 |     clock_t end = clock();
 83 | 
 84 |     // Should wait until the server said all the data was sent correctly,
 85 |     // otherwise the sender may send packets yet the server did not receive.
 86 |     char msg[32];
 87 |     int recv_bytes = read(sock, msg, 32);
 88 |     printf("received from server: %s\n", msg);
 89 | 
 90 |     float total_size = (float)LOOP_NUM * BATCH_NUM_PER_LOOP * BLOCK_SIZE;
 91 |     printf("Data sent. Packet number:%d\tPacket size:%d bytes\tTotal data:%fGB\n",
 92 |         LOOP_NUM * BATCH_NUM_PER_LOOP, BLOCK_SIZE, total_size / (1024 * 1024 * 1024));   
 93 |     float elapsed_time = (end-start) / (float)CLOCKS_PER_SEC;
 94 |     printf("\nConsumed time: %f seconds\n", elapsed_time);
 95 |     printf("Transfer Throughput: %f GB / sec\n", total_size / elapsed_time / 1024 / 1024 / 1024); 
 96 | 
 97 |     return NULL; 
 98 | } 
 99 | 
100 | int main(int argc, char const *argv[]) 
101 | { 
102 | 
103 |     pthread_t thread_id; 
104 |     printf("Before Thread\n"); 
105 | 
106 |     struct Thread_info t_info_0;
107 |     t_info_0.port = PORT;
108 | 
109 |     pthread_create(&thread_id, NULL, thread_send_packets, (void*) &t_info_0); 
110 |     // pthread_create(&thread_id, NULL, thread_send_packets, NULL); 
111 |     pthread_join(thread_id, NULL); 
112 |     printf("After Thread\n"); 
113 | 
114 |     return 0; 
115 | } 


--------------------------------------------------------------------------------
/FPGA/common/includes/bitmap/bitmap.cpp:
--------------------------------------------------------------------------------
  1 | /**********
  2 | Copyright (c) 2019, Xilinx, Inc.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without modification,
  6 | are permitted provided that the following conditions are met:
  7 | 
  8 | 1. Redistributions of source code must retain the above copyright notice,
  9 | this list of conditions and the following disclaimer.
 10 | 
 11 | 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | this list of conditions and the following disclaimer in the documentation
 13 | and/or other materials provided with the distribution.
 14 | 
 15 | 3. Neither the name of the copyright holder nor the names of its contributors
 16 | may be used to endorse or promote products derived from this software
 17 | without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | **********/
 29 | #include <fcntl.h>
 30 | #include <sys/stat.h>
 31 | #include <sys/types.h>
 32 | #include <unistd.h>
 33 | 
 34 | #include <iostream>
 35 | 
 36 | #include "bitmap.h"
 37 | 
 38 | BitmapInterface::BitmapInterface(const char *f) : filename(f) {
 39 |     core = NULL;
 40 |     dib = NULL;
 41 |     image = NULL;
 42 | 
 43 |     magicNumber = 0;
 44 |     fileSize = 0;
 45 |     offsetOfImage = 0;
 46 | 
 47 |     sizeOfDIB = 0;
 48 |     sizeOfImage = 0;
 49 | 
 50 |     height = -1;
 51 |     width = -1;
 52 | }
 53 | 
 54 | BitmapInterface::~BitmapInterface() {
 55 |     if (core != NULL)
 56 |         delete[] core;
 57 |     if (dib != NULL)
 58 |         delete[] dib;
 59 |     if (image != NULL)
 60 |         delete[] image;
 61 | }
 62 | 
 63 | bool BitmapInterface::readBitmapFile() {
 64 |     // First, open the bitmap file
 65 |     int fd;
 66 |     unsigned int fileSize;
 67 | 
 68 |     fd = open(filename, O_RDONLY);
 69 |     if (fd < 0) {
 70 |         std::cerr << "Cannot read image file " << filename << std::endl;
 71 |         return false;
 72 |     }
 73 | 
 74 |     core = new char[14];
 75 |     read(fd, core, 14);
 76 |     magicNumber = (*(unsigned short *)(&(core[0])));
 77 |     fileSize = (*(unsigned int *)(&(core[2])));
 78 |     offsetOfImage = (*(unsigned int *)(&(core[10])));
 79 | 
 80 |     // Just read in the DIB, but don't process it
 81 |     sizeOfDIB = offsetOfImage - 14;
 82 |     dib = new char[sizeOfDIB];
 83 |     read(fd, dib, sizeOfDIB);
 84 | 
 85 |     width = (*(int *)(&(dib[4])));
 86 |     height = (*(int *)(&(dib[8])));
 87 | 
 88 |     sizeOfImage = fileSize - 14 - sizeOfDIB;
 89 |     int numPixels = sizeOfImage / 3; // RGB
 90 | 
 91 |     image = new int[numPixels];
 92 | 
 93 |     for (int i = 0; i < numPixels; ++i) {
 94 |         // Use an integer for every pixel even though we might not need that
 95 |         //  much space (padding 0 bits in the rest of the integer)
 96 |         image[i] = 0;
 97 |         read(fd, &(image[i]), 3);
 98 |     }
 99 | 
100 |     return true;
101 | }
102 | 
103 | bool BitmapInterface::writeBitmapFile(int *otherImage) {
104 |     int fd;
105 |     fd = open("output.bmp", O_WRONLY | O_CREAT, 0644);
106 | 
107 |     if (fd < 0) {
108 |         std::cerr << "Cannot open output.bmp for writing!" << std::endl;
109 |         return false;
110 |     }
111 | 
112 |     write(fd, core, 14);
113 |     write(fd, dib, sizeOfDIB);
114 | 
115 |     int numPixels = sizeOfImage / 3;
116 | 
117 |     int *outputImage = otherImage != NULL ? otherImage : image;
118 | 
119 |     for (int i = 0; i < numPixels; ++i) {
120 |         write(fd, &(outputImage[i]), 3);
121 |     }
122 | 
123 |     return true;
124 | }
125 | 


--------------------------------------------------------------------------------
/FPGA/common/includes/xcl2/xcl2.cpp:
--------------------------------------------------------------------------------
  1 | /**********
  2 | Copyright (c) 2019, Xilinx, Inc.
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without modification,
  6 | are permitted provided that the following conditions are met:
  7 | 
  8 | 1. Redistributions of source code must retain the above copyright notice,
  9 | this list of conditions and the following disclaimer.
 10 | 
 11 | 2. Redistributions in binary form must reproduce the above copyright notice,
 12 | this list of conditions and the following disclaimer in the documentation
 13 | and/or other materials provided with the distribution.
 14 | 
 15 | 3. Neither the name of the copyright holder nor the names of its contributors
 16 | may be used to endorse or promote products derived from this software
 17 | without specific prior written permission.
 18 | 
 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | **********/
 29 | 
 30 | #include "xcl2.hpp"
 31 | #include <limits.h>
 32 | #include <sys/stat.h>
 33 | #include <unistd.h>
 34 | 
 35 | namespace xcl {
 36 | std::vector<cl::Device> get_devices(const std::string &vendor_name) {
 37 |     size_t i;
 38 |     cl_int err;
 39 |     std::vector<cl::Platform> platforms;
 40 |     OCL_CHECK(err, err = cl::Platform::get(&platforms));
 41 |     cl::Platform platform;
 42 |     for (i = 0; i < platforms.size(); i++) {
 43 |         platform = platforms[i];
 44 |         OCL_CHECK(err,
 45 |                   std::string platformName =
 46 |                       platform.getInfo<CL_PLATFORM_NAME>(&err));
 47 |         if (platformName == vendor_name) {
 48 |             std::cout << "Found Platform" << std::endl;
 49 |             std::cout << "Platform Name: " << platformName.c_str() << std::endl;
 50 |             break;
 51 |         }
 52 |     }
 53 |     if (i == platforms.size()) {
 54 |         std::cout << "Error: Failed to find Xilinx platform" << std::endl;
 55 |         exit(EXIT_FAILURE);
 56 |     }
 57 |     //Getting ACCELERATOR Devices and selecting 1st such device
 58 |     std::vector<cl::Device> devices;
 59 |     OCL_CHECK(err,
 60 |               err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
 61 |     return devices;
 62 | }
 63 | 
 64 | std::vector<cl::Device> get_xil_devices() { return get_devices("Xilinx"); }
 65 | 
 66 | std::vector<unsigned char>
 67 | read_binary_file(const std::string &xclbin_file_name) {
 68 |     std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
 69 | 
 70 |     if (access(xclbin_file_name.c_str(), R_OK) != 0) {
 71 |         printf("ERROR: %s xclbin not available please build\n",
 72 |                xclbin_file_name.c_str());
 73 |         exit(EXIT_FAILURE);
 74 |     }
 75 |     //Loading XCL Bin into char buffer
 76 |     std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
 77 |     std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
 78 |     bin_file.seekg(0, bin_file.end);
 79 |     auto nb = bin_file.tellg();
 80 |     bin_file.seekg(0, bin_file.beg);
 81 |     std::vector<unsigned char> buf;
 82 |     buf.resize(nb);
 83 |     bin_file.read(reinterpret_cast<char *>(buf.data()), nb);
 84 |     return buf;
 85 | }
 86 | 
 87 | bool is_emulation() {
 88 |     bool ret = false;
 89 |     char *xcl_mode = getenv("XCL_EMULATION_MODE");
 90 |     if (xcl_mode != NULL) {
 91 |         ret = true;
 92 |     }
 93 |     return ret;
 94 | }
 95 | 
 96 | bool is_hw_emulation() {
 97 |     bool ret = false;
 98 |     char *xcl_mode = getenv("XCL_EMULATION_MODE");
 99 |     if ((xcl_mode != NULL) && !strcmp(xcl_mode, "hw_emu")) {
100 |         ret = true;
101 |     }
102 |     return ret;
103 | }
104 | 
105 | bool is_xpr_device(const char *device_name) {
106 |     const char *output = strstr(device_name, "xpr");
107 | 
108 |     if (output == NULL) {
109 |         return false;
110 |     } else {
111 |         return true;
112 |     }
113 | }
114 | }; // namespace xcl
115 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/iperf_krnl/src/hls/packet.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * 1. Redistributions of source code must retain the above copyright notice,
  9 |  * this list of conditions and the following disclaimer.
 10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 |  * this list of conditions and the following disclaimer in the documentation
 12 |  * and/or other materials provided with the distribution.
 13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 14 |  * may be used to endorse or promote products derived from this software
 15 |  * without specific prior written permission.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | #ifndef PACKET_HPP
 28 | #define PACKET_HPP
 29 | 
 30 | #include "stdint.h"
 31 | #include "axi_utils.hpp"
 32 | 
 33 | using namespace hls;
 34 | 
 35 | template <int W, int HEADER_SIZE>
 36 | class packetHeader {
 37 | public:
 38 | 	bool ready;
 39 | 	uint16_t idx;
 40 | 	ap_uint<HEADER_SIZE> header;
 41 | 
 42 | public:
 43 | 	packetHeader()
 44 | 		:ready(false), idx(0) {}
 45 | 	packetHeader& operator=(const packetHeader& other)
 46 | 	{
 47 | 		ready = other.ready;
 48 | 		idx = other.idx;
 49 | 		header = other.header;
 50 | 		return *this;
 51 | 	}
 52 | 
 53 | 	void parseWord(ap_uint<W>& w)
 54 | 	{
 55 | 		if (ready)
 56 | 			return;
 57 | 
 58 | 		if (idx*W+W < HEADER_SIZE)
 59 | 		{
 60 | 			header(idx*W+W-1, idx*W) = w;
 61 | 		}
 62 | 		else //(idx*W+W >= HEADER_SIZE)
 63 | 		{
 64 | 			header(HEADER_SIZE-1, idx*W) = w;
 65 | 			ready = true;
 66 | 		}
 67 | 		idx++;
 68 | 		/*(header(idx*W+W-1, idx*W) = w;
 69 | 		if (idx*W+W >= HEADER_SIZE)
 70 | 		{
 71 | 			ready = true;
 72 | 		}*/
 73 | 	}
 74 | 	ap_uint<8> consumeWord(ap_uint<W>& w)
 75 | 	{
 76 | 		if ((idx+1)*W <= HEADER_SIZE)
 77 | 		{
 78 | 			w = header(((idx+1)*W)-1, idx*W);
 79 | 			idx++;
 80 | 			return ((HEADER_SIZE - (idx*W)) / 8);
 81 | 		}
 82 | 		else if (idx*W < HEADER_SIZE)
 83 | 		{
 84 | 			w((HEADER_SIZE%W)-1, 0) = header(HEADER_SIZE-1, idx*W);
 85 | 			idx++;
 86 | 			return 0;//(HEADER_SIZE - (idx*W));
 87 | 		}
 88 | 		return 0;
 89 | 	}
 90 | 	/*bool consumeWord(ap_uint<W>& w)
 91 | 	{
 92 | 		if ((idx+2)*W <= HEADER_SIZE)
 93 | 		{
 94 | 			w = header(((idx+1)*W)-1, idx*W);
 95 | 			idx++;
 96 | 			return false;
 97 | 			/*if ((idx+1)*W > HEADER_SIZE)
 98 | 			{
 99 | 				return true;
100 | 			}
101 | 			else
102 | 			{
103 | 				return false;
104 | 			}*//*
105 | 		}
106 | 		else if ((idx+1)*W <= HEADER_SIZE)
107 | 		{
108 | 			w = header(((idx+1)*W)-1, idx*W);
109 | 			idx++;
110 | 			return true;
111 | 		}
112 | 		return true;
113 | 	}*/
114 | 	/*void consumePartialWord(ap_uint<W>& w)
115 | 	{
116 | 		if (idx*W < HEADER_SIZE)
117 | 		{
118 | 			w((HEADER_SIZE%AXI_WIDTH)-1, 0) = header(HEADER_SIZE-1, idx*W);
119 | 			idx++;
120 | 		}
121 | 		//return true;
122 | 	}
123 | 	/*bool consumeWord(ap_uint<W>& w)
124 | 	{
125 | 		if ((idx+1)*W <= HEADER_SIZE)
126 | 		{
127 | 			w = header(((idx+1)*W)-1, idx*W);
128 | 			idx++;
129 | 			return true;
130 | 		}
131 | 		return false;
132 | 	}
133 | 	bool consumePartialWord(ap_uint<W>& w)
134 | 	{
135 | 		if (idx*W < HEADER_SIZE)
136 | 		{
137 | 			w((HEADER_SIZE%AXI_WIDTH)-1, 0) = header(HEADER_SIZE-1, idx*W);
138 | 			idx++;
139 | 			return true;
140 | 		}
141 | 		return false;
142 | 	}*/
143 | 	void setRawHeader(ap_uint<HEADER_SIZE> h)
144 | 	{
145 | 		header = h;
146 | 	}
147 | 	ap_uint<HEADER_SIZE> getRawHeader()
148 | 	{
149 | 		return header;
150 | 	}
151 | 	bool isReady()
152 | 	{
153 | 		return ready;
154 | 	}
155 | 
156 | 	void clear()
157 | 	{
158 | #pragma HLS pipeline II=1
159 | 		//header = 0;
160 | 		ready = false;
161 | 		idx = 0;
162 | 	}
163 | };
164 | 
165 | #endif
166 | 


--------------------------------------------------------------------------------
/FPGA/kernel/user_krnl/scatter_krnl/src/hls/packet.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, Systems Group, ETH Zurich
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without modification,
  6 |  * are permitted provided that the following conditions are met:
  7 |  *
  8 |  * 1. Redistributions of source code must retain the above copyright notice,
  9 |  * this list of conditions and the following disclaimer.
 10 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 11 |  * this list of conditions and the following disclaimer in the documentation
 12 |  * and/or other materials provided with the distribution.
 13 |  * 3. Neither the name of the copyright holder nor the names of its contributors
 14 |  * may be used to endorse or promote products derived from this software
 15 |  * without specific prior written permission.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 18 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 19 |  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 20 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 21 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 22 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 23 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 25 |  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  */
 27 | #ifndef PACKET_HPP
 28 | #define PACKET_HPP
 29 | 
 30 | #include "stdint.h"
 31 | #include "axi_utils.hpp"
 32 | 
 33 | using namespace hls;
 34 | 
 35 | template <int W, int HEADER_SIZE>
 36 | class packetHeader {
 37 | public:
 38 | 	bool ready;
 39 | 	uint16_t idx;
 40 | 	ap_uint<HEADER_SIZE> header;
 41 | 
 42 | public:
 43 | 	packetHeader()
 44 | 		:ready(false), idx(0) {}
 45 | 	packetHeader& operator=(const packetHeader& other)
 46 | 	{
 47 | 		ready = other.ready;
 48 | 		idx = other.idx;
 49 | 		header = other.header;
 50 | 		return *this;
 51 | 	}
 52 | 
 53 | 	void parseWord(ap_uint<W>& w)
 54 | 	{
 55 | 		if (ready)
 56 | 			return;
 57 | 
 58 | 		if (idx*W+W < HEADER_SIZE)
 59 | 		{
 60 | 			header(idx*W+W-1, idx*W) = w;
 61 | 		}
 62 | 		else //(idx*W+W >= HEADER_SIZE)
 63 | 		{
 64 | 			header(HEADER_SIZE-1, idx*W) = w;
 65 | 			ready = true;
 66 | 		}
 67 | 		idx++;
 68 | 		/*(header(idx*W+W-1, idx*W) = w;
 69 | 		if (idx*W+W >= HEADER_SIZE)
 70 | 		{
 71 | 			ready = true;
 72 | 		}*/
 73 | 	}
 74 | 	ap_uint<8> consumeWord(ap_uint<W>& w)
 75 | 	{
 76 | 		if ((idx+1)*W <= HEADER_SIZE)
 77 | 		{
 78 | 			w = header(((idx+1)*W)-1, idx*W);
 79 | 			idx++;
 80 | 			return ((HEADER_SIZE - (idx*W)) / 8);
 81 | 		}
 82 | 		else if (idx*W < HEADER_SIZE)
 83 | 		{
 84 | 			w((HEADER_SIZE%W)-1, 0) = header(HEADER_SIZE-1, idx*W);
 85 | 			idx++;
 86 | 			return 0;//(HEADER_SIZE - (idx*W));
 87 | 		}
 88 | 		return 0;
 89 | 	}
 90 | 	/*bool consumeWord(ap_uint<W>& w)
 91 | 	{
 92 | 		if ((idx+2)*W <= HEADER_SIZE)
 93 | 		{
 94 | 			w = header(((idx+1)*W)-1, idx*W);
 95 | 			idx++;
 96 | 			return false;
 97 | 			/*if ((idx+1)*W > HEADER_SIZE)
 98 | 			{
 99 | 				return true;
100 | 			}
101 | 			else
102 | 			{
103 | 				return false;
104 | 			}*//*
105 | 		}
106 | 		else if ((idx+1)*W <= HEADER_SIZE)
107 | 		{
108 | 			w = header(((idx+1)*W)-1, idx*W);
109 | 			idx++;
110 | 			return true;
111 | 		}
112 | 		return true;
113 | 	}*/
114 | 	/*void consumePartialWord(ap_uint<W>& w)
115 | 	{
116 | 		if (idx*W < HEADER_SIZE)
117 | 		{
118 | 			w((HEADER_SIZE%AXI_WIDTH)-1, 0) = header(HEADER_SIZE-1, idx*W);
119 | 			idx++;
120 | 		}
121 | 		//return true;
122 | 	}
123 | 	/*bool consumeWord(ap_uint<W>& w)
124 | 	{
125 | 		if ((idx+1)*W <= HEADER_SIZE)
126 | 		{
127 | 			w = header(((idx+1)*W)-1, idx*W);
128 | 			idx++;
129 | 			return true;
130 | 		}
131 | 		return false;
132 | 	}
133 | 	bool consumePartialWord(ap_uint<W>& w)
134 | 	{
135 | 		if (idx*W < HEADER_SIZE)
136 | 		{
137 | 			w((HEADER_SIZE%AXI_WIDTH)-1, 0) = header(HEADER_SIZE-1, idx*W);
138 | 			idx++;
139 | 			return true;
140 | 		}
141 | 		return false;
142 | 	}*/
143 | 	void setRawHeader(ap_uint<HEADER_SIZE> h)
144 | 	{
145 | 		header = h;
146 | 	}
147 | 	ap_uint<HEADER_SIZE> getRawHeader()
148 | 	{
149 | 		return header;
150 | 	}
151 | 	bool isReady()
152 | 	{
153 | 		return ready;
154 | 	}
155 | 
156 | 	void clear()
157 | 	{
158 | #pragma HLS pipeline II=1
159 | 		//header = 0;
160 | 		ready = false;
161 | 		idx = 0;
162 | 	}
163 | };
164 | 
165 | #endif
166 | 


--------------------------------------------------------------------------------
/FPGA/common/utility/readme_gen/gs_summary_util.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os, re
  4 | import fnmatch
  5 | import json
  6 | 
  7 | 
  8 | def get_testcases(dir):
  9 |   testcase_list = []
 10 |   for root, dirnames, filenames in os.walk(dir):
 11 |     for filename in fnmatch.filter(filenames, 'description.json'):
 12 |       testcase_list.append(os.path.join(root, filename))
 13 |   return testcase_list
 14 | 
 15 | def get_drives(dir):
 16 |   folders = []
 17 |   while 1:
 18 |     dir, folder = os.path.split(dir)
 19 |     if folder != "" and folder != ".":
 20 |         folders.append(folder)
 21 |     else:
 22 |         break
 23 |   folders.reverse()
 24 |   return folders
 25 | 
 26 | def get_immediate_subdirectories(dir):
 27 |     return [name for name in os.listdir(dir)
 28 |             if os.path.isdir(os.path.join(dir, name))]
 29 | 
 30 | def gen_category(dir ,outfile, subdircount):
 31 | 
 32 |   links = "[" + dir +"]:"+ dir + "\n"
 33 |   testcaselist = get_testcases(dir);
 34 |   testcaselist.sort();
 35 |   for testcase in testcaselist:
 36 |     drives = get_drives(testcase)
 37 |     link = ""
 38 |     if len(drives) <= subdircount : 
 39 |       continue
 40 |     for drive in drives:
 41 |       if drive == "description.json":
 42 |         continue
 43 |       link += drive +"/"
 44 |     links += "[" + link + "]:" + link + "\n"
 45 |     
 46 |     outfile.write("["+link+"][]")
 47 |     outfile.write("|")
 48 |     desc = open(testcase,'r')
 49 |     data = json.load(desc)
 50 |     outfile.write(('\n').join(data["description"]))
 51 |     outfile.write("|")
 52 |     if 'key_concepts' in data:
 53 |         outfile.write("__Key__ __Concepts__")
 54 |         key_concepts = data["key_concepts"]
 55 |         for i, kc in enumerate(key_concepts):
 56 |             outfile.write("<br>")
 57 |             outfile.write(" - ")
 58 |             outfile.write(kc)
 59 |         outfile.write("<br>")
 60 |     if 'keywords' in data:    
 61 |         outfile.write("__Keywords__")
 62 |         keywords = data["keywords"]
 63 |         for  i, kw in enumerate(keywords):
 64 |             outfile.write("<br>")
 65 |             outfile.write(" - ")
 66 |             outfile.write(kw)
 67 |     outfile.write("\n")
 68 |     desc.close()
 69 |   return links
 70 | 
 71 | def genReadMe(dir):
 72 |   desc = open(os.path.join(dir,"summary.json"),'r')
 73 |   data = json.load(desc)
 74 |   outfile = open(os.path.join(dir, "README.md"), "w")
 75 |   outfile.write(('\n').join((data["description"])))
 76 |   outfile.write("\n")
 77 |   outfile.write("==================================\n")
 78 |   outfile.write(('\n').join((data["description"])))
 79 |   outfile.write("\n")
 80 |   if 'subdirs' in data:
 81 |     subDirs = data['subdirs'];
 82 |   else:
 83 |     subDirs = get_immediate_subdirectories(dir);
 84 |     subDirs.sort();
 85 |   outfile.write("\nS.No.   | Category  | Description \n")
 86 |   outfile.write("--------|-----------|-----------------------------------------\n")
 87 |   counter = 1;
 88 |   links = ""
 89 |   
 90 |   for subDir in subDirs:
 91 |     desc_file = os.path.join(subDir,"summary.json")
 92 |     if os.path.exists(desc_file):
 93 |         subDirDesc = open(os.path.join(subDir,"summary.json"),'r')
 94 |         subDirData = json.load(subDirDesc)
 95 |         outfile.write(str(counter));
 96 |         outfile.write(" | [" +subDir +"][]      |")
 97 |         outfile.write(('\n').join(subDirData["description"]))
 98 |         outfile.write("\n")
 99 |         counter = counter + 1;
100 |       
101 |   outfile.write("\n __Examples Table__ \n")
102 |   table_header = """
103 | Example        | Description           | Key Concepts / Keywords 
104 | ---------------|-----------------------|-------------------------
105 | """
106 |   outfile.write(table_header)
107 |   for subDir in subDirs:
108 |     links = links + gen_category(subDir,outfile,2);
109 | 
110 |   outfile.write("\n")
111 |   outfile.write(links)
112 |   outfile.close();
113 | 
114 | def genReadMe2(dir):
115 |   desc = open(os.path.join(dir,"summary.json"),'r')
116 |   data = json.load(desc)
117 |   outfile = open(os.path.join(dir, "README.md"), "w")
118 |   outfile.write(('\n').join((data["overview"])))
119 |   outfile.write("\n")
120 |   outfile.write("==================================\n")
121 |   outfile.write(('\n').join((data["description"])))
122 |   outfile.write("\n")
123 |   outfile.write("\n __Examples Table__ \n")
124 |   table_header = """
125 | Example        | Description           | Key Concepts / Keywords 
126 | ---------------|-----------------------|-------------------------
127 | """
128 |   outfile.write(table_header)
129 |   links = gen_category(dir,outfile,1)
130 |   outfile.write("\n")
131 |   outfile.write(links)
132 |   outfile.close();
133 | 
134 | 


--------------------------------------------------------------------------------