├── .clang-format ├── .clang-tidy ├── .cmake-format.py ├── .dockerignore ├── .github ├── scripts │ ├── install-xrt.sh │ └── upload-apt-repo.sh └── workflows │ ├── CI.yml │ ├── install.yml │ └── package.yml ├── .gitignore ├── CMakeLists.txt ├── Dockerfile.centos.7 ├── LICENSE ├── README.md ├── cmake ├── FRTConfig.cmake ├── FindSDx.cmake ├── FindTinyXML.cmake └── FindXRT.cmake ├── install.sh ├── src ├── frt.cpp ├── frt.h ├── frt │ ├── arg_info.cpp │ ├── arg_info.h │ ├── buffer.h │ ├── buffer_arg.h │ ├── buffer_test.cpp │ ├── device.h │ ├── devices │ │ ├── intel_opencl_device.cpp │ │ ├── intel_opencl_device.h │ │ ├── opencl_device.cpp │ │ ├── opencl_device.h │ │ ├── opencl_device_matcher.h │ │ ├── opencl_util.h │ │ ├── tapa_fast_cosim_device.cpp │ │ ├── tapa_fast_cosim_device.h │ │ ├── xilinx_environ.cpp │ │ ├── xilinx_environ.h │ │ ├── xilinx_opencl_device.cpp │ │ ├── xilinx_opencl_device.h │ │ ├── xilinx_opencl_stream.cpp │ │ └── xilinx_opencl_stream.h │ ├── stream.h │ ├── stream_interface.h │ ├── stream_wrapper.h │ └── tag.h ├── frt_get_xlnx_env.cpp └── frt_vpp_wrapper └── tests ├── qdma ├── CMakeLists.txt ├── qdma-host.cpp └── qdma-kernel.cpp └── xdma ├── CMakeLists.txt ├── xdma-host.cpp └── xdma-kernel.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IncludeBlocks: Preserve 3 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | Checks: 3 | - "-*" 4 | - "bugprone-*" 5 | - "clang-*" 6 | - "modernize-*" 7 | - "performance-*" 8 | - "readablity-*" 9 | - "-modernize-use-trailing-return-type" 10 | AnalyzeTemporaryDtors: false 11 | FormatStyle: "file" 12 | CheckOptions: 13 | - key: cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor 14 | value: true 15 | -------------------------------------------------------------------------------- /.cmake-format.py: -------------------------------------------------------------------------------- 1 | additional_commands = { 2 | "add_xocc_compile_target": { 3 | "pargs": '+', 4 | "flags": ["SAVE_TEMPS",], 5 | "kwargs": { 6 | "TARGET": 1, 7 | "OUTPUT": 1, 8 | "REPORT_DIR": 1, 9 | "LOG_DIR": 1, 10 | "TEMP_DIR": 1, 11 | "INPUT": 1, 12 | } 13 | }, 14 | "add_xocc_link_target": { 15 | "pargs": '+', 16 | "flags": ["SAVE_TEMPS",], 17 | "kwargs": { 18 | "TARGET": 1, 19 | "OUTPUT": 1, 20 | "OPTIMIZE": 1, 21 | "REPORT_DIR": 1, 22 | "LOG_DIR": 1, 23 | "TEMP_DIR": 1, 24 | "INPUT": 1, 25 | } 26 | }, 27 | "add_xocc_hw_link_targets": { 28 | "pargs": '+', 29 | "flags": [], 30 | "kwargs": { 31 | "KERNEL": 1, 32 | "PLATFORM": 1, 33 | "INPUT": 1, 34 | "PREFIX": 1, 35 | "HW_XO": 1, 36 | "HW_EMU_XCLBIN": 1, 37 | "HW_XCLBIN": 1, 38 | "DRAM_MAPPING": "*", 39 | } 40 | }, 41 | "add_xocc_targets": { 42 | "pargs": 1, 43 | "flags": [], 44 | "kwargs": { 45 | "KERNEL": 1, 46 | "PLATFORM": 1, 47 | "INPUT": 1, 48 | "PREFIX": 1, 49 | "HLS_SRC": 1, 50 | "SW_EMU_XO": 1, 51 | "HW_XO": 1, 52 | "SW_EMU_XCLBIN": 1, 53 | "HW_EMU_XCLBIN": 1, 54 | "HW_XCLBIN": 1, 55 | "DRAM_MAPPING": "*", 56 | } 57 | }, 58 | "add_xocc_targets_with_alias": { 59 | "pargs": 1, 60 | "flags": [], 61 | "kwargs": { 62 | "KERNEL": 1, 63 | "PLATFORM": 1, 64 | "INPUT": 1, 65 | "PREFIX": 1, 66 | "HLS_SRC": 1, 67 | "SW_EMU_XO": 1, 68 | "HW_XO": 1, 69 | "SW_EMU_XCLBIN": 1, 70 | "HW_EMU_XCLBIN": 1, 71 | "HW_XCLBIN": 1, 72 | "DRAM_MAPPING": "*", 73 | } 74 | }, 75 | } 76 | 77 | with section('format'): 78 | max_lines_hwrap = 0 79 | dangle_parens = True 80 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | /.cache 2 | /.git 3 | /.mypy_cache 4 | /.vscode 5 | /build 6 | -------------------------------------------------------------------------------- /.github/scripts/install-xrt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | version="$(grep --perl --only '(?<=VERSION_ID=").+(?=")' /etc/os-release)" 4 | if test "${version//.*/}" -le 22; then 5 | file="xrt_202220.2.14.354_${version}-amd64-xrt.deb" 6 | curl "https://www.xilinx.com/bin/public/openDownload?filename=${file}" \ 7 | --location --output "${file}" 8 | sudo apt-get update 9 | sudo apt-get install -y -f "./${file}" 10 | else 11 | sudo apt-get install -y \ 12 | build-essential \ 13 | cmake \ 14 | git \ 15 | libboost-filesystem-dev \ 16 | libboost-program-options-dev \ 17 | libcurl4-openssl-dev \ 18 | libdrm-dev \ 19 | libncurses-dev \ 20 | libprotobuf-dev \ 21 | libssl-dev \ 22 | libudev-dev \ 23 | libxml2-dev \ 24 | libyaml-dev \ 25 | lsb-release \ 26 | ocl-icd-dev \ 27 | ocl-icd-opencl-dev \ 28 | pkg-config \ 29 | protobuf-compiler \ 30 | uuid-dev \ 31 | 32 | git clone https://github.com/Xilinx/XRT.git 33 | XRT/build/build.sh -opt -noctest 34 | sudo apt-get install -y -f ./XRT/build/Release/xrt*-amd64-xrt.deb 35 | fi 36 | 37 | XILINX_XRT=/opt/xilinx/xrt 38 | echo "XILINX_XRT=${XILINX_XRT}" >>$GITHUB_ENV 39 | echo "CPATH=${XILINX_XRT}/include" >>$GITHUB_ENV 40 | echo "LD_LIBRARY_PATH=${XILINX_XRT}/lib" >>$GITHUB_ENV 41 | echo "${XILINX_XRT}/bin" >>$GITHUB_PATH 42 | -------------------------------------------------------------------------------- /.github/scripts/upload-apt-repo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | # check required parameters 4 | : ${GPG_KEY:?"required but not set"} 5 | : ${SSH_KEY:?"required but not set"} 6 | : ${BUILD_DIR:?"required but not set"} 7 | : ${CACHE_DIR:?"required but not set"} 8 | : ${LABEL:?"required but not set"} 9 | : ${GITHUB_SHA:?"required but not set"} 10 | 11 | # install packaing dependencies 12 | sudo apt-get install -y apt-utils gzip gnupg moreutils rsync 13 | 14 | # setup parameters 15 | arch=amd64 16 | codename="$(grep --perl --only '(?<=UBUNTU_CODENAME=).+' /etc/os-release)" 17 | repo_dir="$(pwd)" 18 | src_dir="${repo_dir}/${BUILD_DIR}" 19 | dest_dir="$(mktemp --directory)" 20 | cache_dir="${repo_dir}/${CACHE_DIR}" 21 | 22 | # setup ssh 23 | mkdir -p -m 700 "${HOME}/.ssh" 24 | (umask 077 && echo "${SSH_KEY}" >"${HOME}/.ssh/id_ed25519") 25 | 26 | # setup gpg 27 | export GNUPGHOME="$(mktemp --directory)" 28 | mkdir -p "${cache_dir}" 29 | echo "${GPG_KEY}" | gpg --no-tty --batch --allow-secret-key-import --import - 30 | 31 | # clone gh-pages 32 | git clone --single-branch --branch gh-pages \ 33 | "git@github.com:${GITHUB_REPOSITORY}" "${dest_dir}" 34 | 35 | # start packaging 36 | pushd "${dest_dir}" 37 | 38 | mkdir -p "pool/main/${codename}" 39 | rsync --recursive --include "*.deb" --exclude "*" \ 40 | --link-dest "${src_dir}/" "${src_dir}/" "pool/main/${codename}" 41 | mkdir -p "dists/${codename}/main/binary-${arch}" 42 | 43 | apt-ftparchive packages --db "${cache_dir}/${codename}.db" \ 44 | "pool/main/${codename}" | 45 | sponge "dists/${codename}/main/binary-${arch}/Packages" 46 | gzip --stdout <"dists/${codename}/main/binary-${arch}/Packages" | 47 | sponge "dists/${codename}/main/binary-${arch}/Packages.gz" 48 | apt-ftparchive contents --db "${cache_dir}/${codename}.db" \ 49 | "pool/main/${codename}" | 50 | sponge "dists/${codename}/main/Contents-${arch}" 51 | gzip --stdout "dists/${codename}/main/Contents-${arch}" | 52 | sponge "dists/${codename}/main/Contents-${arch}.gz" 53 | apt-ftparchive release "dists/${codename}/main/binary-${arch}" | 54 | sponge "dists/${codename}/main/binary-${arch}/Release" 55 | apt-ftparchive release \ 56 | -o="APT::FTPArchive::Release::Codename=${codename}" \ 57 | -o='APT::FTPArchive::Release::Components=main' \ 58 | -o="APT::FTPArchive::Release::Label=${LABEL}" \ 59 | -o="APT::FTPArchive::Release::Architectures=${arch}" \ 60 | "dists/${codename}" >"${dest_dir}/dists/${codename}/Release" 61 | 62 | gpg --no-tty --batch --armor --yes --output "dists/${codename}/Release.gpg" \ 63 | --digest-algo SHA256 --detach-sign "dists/${codename}/Release" 64 | gpg --no-tty --batch --armor --yes --output "dists/${codename}/InRelease" \ 65 | --digest-algo SHA256 --detach-sign --clearsign "dists/${codename}/Release" 66 | 67 | # upload gh-pages 68 | git add --all 69 | git -c user.name='Blaok' -c user.email='i@blaok.me' \ 70 | commit -m "actions update for ${codename}@${GITHUB_SHA}" 71 | git push 72 | 73 | # cleanup gpg and ssh 74 | rm -rf "${GNUPGHOME}" "${HOME}/.ssh/id_ed25519" 75 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | - push 5 | 6 | jobs: 7 | CI: 8 | runs-on: 9 | - self-hosted 10 | - Linux 11 | - xilinx-tools 12 | 13 | strategy: 14 | matrix: 15 | xocl-platform: 16 | - xilinx_u250_gen3x16_xdma_4_1_202210_1 17 | - xilinx_u280_gen3x16_xdma_1_202211_1 18 | xocl-version: 19 | - 2023.2 20 | 21 | env: 22 | # Cannot source Xilinx scripts because they mess up many system libraries. 23 | XILINX_HLS: /opt/tools/xilinx/Vitis_HLS/${{ matrix.xocl-version }} 24 | XILINX_VITIS: /opt/tools/xilinx/Vitis/${{ matrix.xocl-version }} 25 | XILINX_VIVADO: /opt/tools/xilinx/Vivado/${{ matrix.xocl-version }} 26 | 27 | steps: 28 | - name: Checkout myself 29 | uses: actions/checkout@v4 30 | - name: Install dependencies 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install -y --no-install-recommends \ 34 | cmake \ 35 | libgflags-dev \ 36 | libgoogle-glog-dev \ 37 | libgtest-dev \ 38 | libtinyxml-dev \ 39 | opencl-headers \ 40 | python3-pip \ 41 | 42 | python3 -m pip install --user \ 43 | git+https://github.com/UCLA-VAST/tapa-fast-cosim.git@93646a999b786e94626982700dfaf708fe3deec0 44 | - name: Configure myself 45 | run: cmake -S. -Bbuild -DXRT_PLATFORM=${{ matrix.xocl-platform }} 46 | - name: Build myself 47 | run: cmake --build build --target all 48 | - name: Test myself 49 | working-directory: build 50 | run: | 51 | # Some Vitis versions have a bug where xclbinutil is not found without 52 | # manually sourcing the XRT setup script. 53 | source "${XILINX_XRT}"/setup.sh 54 | source "${XILINX_VITIS}"/settings64.sh 55 | # TODO: re-enable csim once fixed 56 | /usr/bin/ctest --verbose --stop-on-failure --exclude-regex '^xdma-csim$' 57 | -------------------------------------------------------------------------------- /.github/workflows/install.yml: -------------------------------------------------------------------------------- 1 | name: install 2 | 3 | on: 4 | - page_build 5 | - workflow_dispatch 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | container: 11 | image: ${{ matrix.os }} 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | os: 16 | - ubuntu:20.04 17 | - ubuntu:22.04 18 | 19 | steps: 20 | - name: Checkout myself 21 | uses: actions/checkout@v4 22 | - name: Install from script 23 | run: ./install.sh 24 | -------------------------------------------------------------------------------- /.github/workflows/package.yml: -------------------------------------------------------------------------------- 1 | name: package 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | branches-ignore: 8 | - "**/docs?" 9 | pull_request: 10 | branches-ignore: 11 | - "**/docs?" 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: 19 | - ubuntu-20.04 20 | - ubuntu-22.04 21 | steps: 22 | - name: Checkout myself 23 | uses: actions/checkout@v4 24 | - name: Install build dependencies 25 | run: | 26 | # https://bugs.launchpad.net/ubuntu/+source/google-glog/+bug/1991919 27 | sudo apt-get install -y libunwind-15-dev || true 28 | sudo apt-get install -y \ 29 | libgflags-dev \ 30 | libgoogle-glog-dev \ 31 | libgtest-dev \ 32 | libtinyxml-dev \ 33 | opencl-headers \ 34 | 35 | ./.github/scripts/install-xrt.sh 36 | - name: Configure myself 37 | run: cmake -S . -B build -D CMAKE_BUILD_TYPE=Release 38 | - name: Build myself 39 | run: cmake --build build --target all 40 | - name: Test myself 41 | run: ctest --exclude-regex '^.*-.*sim$' 42 | working-directory: build 43 | - name: Package myself 44 | run: cmake --build build --target package 45 | - name: Upload deb package 46 | uses: actions/upload-artifact@v4 47 | with: 48 | name: frt-${{ matrix.os }} 49 | path: build/*.deb 50 | 51 | release: 52 | if: github.event_name == 'push' && github.repository == 'Blaok/fpga-runtime' && startsWith(github.event.ref, 'refs/tags') 53 | runs-on: ${{ matrix.os }} 54 | concurrency: release 55 | strategy: 56 | matrix: 57 | os: 58 | - ubuntu-20.04 59 | - ubuntu-22.04 60 | needs: 61 | - build 62 | steps: 63 | - name: Checkout myself 64 | uses: actions/checkout@v4 65 | - name: Download artifacts 66 | uses: actions/download-artifact@v4 67 | with: 68 | name: frt-${{ matrix.os }} 69 | path: build 70 | - name: Cache APT database 71 | uses: actions/cache@v3 72 | with: 73 | path: apt-db-cache 74 | key: ${{ runner.os }}-apt-db-cache 75 | - name: Update APT repository 76 | run: ./.github/scripts/upload-apt-repo.sh 77 | env: 78 | BUILD_DIR: build 79 | CACHE_DIR: apt-db-cache 80 | LABEL: FPGA Runtime Repository 81 | GPG_KEY: ${{ secrets.GPG_KEY }} 82 | SSH_KEY: ${{ secrets.SSH_KEY }} 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | /.cache 3 | /.mypy_cache 4 | /.vscode 5 | /build 6 | /compile_commands.json 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13) 2 | cmake_policy(SET CMP0076 NEW) 3 | 4 | project(frt) 5 | 6 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") 7 | list(APPEND CMAKE_PROGRAM_PATH "${CMAKE_CURRENT_LIST_DIR}/src") 8 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 9 | find_package(gflags REQUIRED) 10 | find_package(Threads REQUIRED) 11 | find_package(OpenCL REQUIRED) 12 | find_package(TinyXML REQUIRED) 13 | find_package(XRT REQUIRED) 14 | 15 | set(DEFAULT_ENABLE_XOCL_STREAM ON) 16 | if(XRT_VERSION VERSION_GREATER_EQUAL 2.13) 17 | set(DEFAULT_ENABLE_XOCL_STREAM OFF) 18 | endif() 19 | option( 20 | ENABLE_XOCL_STREAM 21 | "Enable Xilinx streaming APIs" 22 | ${DEFAULT_ENABLE_XOCL_STREAM} 23 | ) 24 | 25 | include_directories(SYSTEM ${XRT_INCLUDE_DIR}) 26 | 27 | file( 28 | DOWNLOAD 29 | https://github.com/nlohmann/json/releases/download/v3.10.5/json.hpp 30 | ${CMAKE_CURRENT_BINARY_DIR}/include/nlohmann/json.hpp 31 | ) 32 | file( 33 | DOWNLOAD 34 | https://github.com/arun11299/cpp-subprocess/raw/v2.0/subprocess.hpp 35 | ${CMAKE_CURRENT_BINARY_DIR}/include/subprocess.hpp 36 | TLS_VERIFY ON 37 | ) 38 | execute_process( 39 | COMMAND 40 | sed -i "-es/map/unordered_map/" 41 | "-es/std::initializer_list/std::initializer_list/" 42 | "-es/waitpid(pid, \\&status, WNOHANG)/waitpid(pid, \\&status, 0)/" 43 | ${CMAKE_CURRENT_BINARY_DIR}/include/subprocess.hpp 44 | ) 45 | 46 | file( 47 | DOWNLOAD 48 | https://github.com/tfussell/miniz-cpp/raw/052335e4f7/zip_file.hpp 49 | ${CMAKE_CURRENT_BINARY_DIR}/include/zip_file.hpp 50 | TLS_VERIFY ON 51 | ) 52 | execute_process( 53 | COMMAND 54 | sed -i "-es/std::uint64_t/mz_uint64/" 55 | ${CMAKE_CURRENT_BINARY_DIR}/include/zip_file.hpp 56 | ) 57 | 58 | set(frt_sources 59 | src/frt.cpp 60 | src/frt/arg_info.cpp 61 | src/frt/devices/intel_opencl_device.cpp 62 | src/frt/devices/opencl_device.cpp 63 | src/frt/devices/tapa_fast_cosim_device.cpp 64 | src/frt/devices/xilinx_environ.cpp 65 | src/frt/devices/xilinx_opencl_device.cpp 66 | ) 67 | set(frt_compile_features 68 | cxx_std_17 69 | cxx_alias_templates 70 | cxx_auto_type 71 | cxx_decltype 72 | cxx_defaulted_functions 73 | cxx_nullptr 74 | cxx_override 75 | cxx_range_for 76 | cxx_right_angle_brackets 77 | cxx_rvalue_references 78 | cxx_strong_enums 79 | cxx_variadic_templates 80 | ) 81 | set(frt_compile_definitions 82 | CL_HPP_CL_1_2_DEFAULT_BUILD 83 | CL_HPP_TARGET_OPENCL_VERSION=120 84 | CL_HPP_MINIMUM_OPENCL_VERSION=120 85 | CL_TARGET_OPENCL_VERSION=120 86 | CL_MINIMUM_OPENCL_VERSION=120 87 | ) 88 | if(ENABLE_XOCL_STREAM) 89 | message(STATUS "Building FRT with Xilinx streaming APIs") 90 | list(APPEND frt_sources src/frt/devices/xilinx_opencl_stream.cpp) 91 | list(APPEND frt_compile_definitions FRT_ENABLE_XOCL_STREAM) 92 | endif() 93 | 94 | set(frt_private_link_libraries 95 | -l:libOpenCL.so.1 # Nvidia's OpenCL doesn't have libOpenCL.so 96 | gflags 97 | glog 98 | stdc++fs 99 | tinyxml 100 | ) 101 | set(frt_public_link_libraries Threads::Threads) 102 | 103 | add_library(frt_static STATIC) 104 | add_library(frt ALIAS frt_static) 105 | add_library(frt_shared SHARED) 106 | target_sources(frt_static PRIVATE ${frt_sources}) 107 | target_sources(frt_shared PRIVATE ${frt_sources}) 108 | target_compile_features(frt_static PUBLIC ${frt_compile_features}) 109 | target_compile_features(frt_shared PUBLIC ${frt_compile_features}) 110 | target_compile_definitions(frt_static PRIVATE ${frt_compile_definitions}) 111 | target_compile_definitions(frt_shared PRIVATE ${frt_compile_definitions}) 112 | set_target_properties(frt_static frt_shared PROPERTIES OUTPUT_NAME frt) 113 | set_target_properties(frt_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) 114 | set_target_properties(frt_static PROPERTIES EXPORT_NAME frt) 115 | target_include_directories( 116 | frt_static 117 | PUBLIC $ 118 | PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include 119 | ) 120 | target_include_directories( 121 | frt_shared 122 | PUBLIC $ 123 | PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include 124 | ) 125 | target_link_libraries( 126 | frt_static 127 | PRIVATE ${frt_private_link_libraries} 128 | PUBLIC ${frt_public_link_libraries} 129 | ) 130 | target_link_libraries( 131 | frt_shared 132 | PRIVATE ${frt_private_link_libraries} 133 | PUBLIC ${frt_public_link_libraries} 134 | ) 135 | 136 | add_executable(frt_get_xlnx_env) 137 | target_sources( 138 | frt_get_xlnx_env 139 | PRIVATE src/frt/devices/xilinx_environ.cpp src/frt_get_xlnx_env.cpp 140 | ) 141 | target_compile_features(frt_get_xlnx_env PRIVATE cxx_std_17) 142 | target_include_directories( 143 | frt_get_xlnx_env PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include 144 | ) 145 | target_link_libraries( 146 | frt_get_xlnx_env PRIVATE Threads::Threads -static-libgcc -static-libstdc++ 147 | ) 148 | 149 | include(GNUInstallDirs) 150 | install( 151 | TARGETS frt_static frt_shared frt_get_xlnx_env 152 | EXPORT FRTTargets 153 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 154 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 155 | ) 156 | install( 157 | DIRECTORY "${CMAKE_SOURCE_DIR}/src/" 158 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 159 | FILES_MATCHING 160 | PATTERN "devices" EXCLUDE 161 | PATTERN "*.h" 162 | ) 163 | install( 164 | PROGRAMS "${CMAKE_SOURCE_DIR}/src/frt_vpp_wrapper" 165 | DESTINATION ${CMAKE_INSTALL_BINDIR} 166 | ) 167 | 168 | export( 169 | EXPORT FRTTargets 170 | NAMESPACE frt:: 171 | FILE ${CMAKE_CURRENT_BINARY_DIR}/frt/FRTTargets.cmake 172 | ) 173 | set(ConfigPackageLocation lib/cmake/frt) 174 | install( 175 | EXPORT FRTTargets 176 | FILE FRTTargets.cmake 177 | NAMESPACE frt:: 178 | DESTINATION ${ConfigPackageLocation} 179 | ) 180 | install( 181 | FILES cmake/FRTConfig.cmake 182 | DESTINATION ${ConfigPackageLocation} 183 | COMPONENT Devel 184 | ) 185 | install( 186 | FILES cmake/FindXRT.cmake 187 | RENAME XRTConfig.cmake 188 | DESTINATION ${ConfigPackageLocation} 189 | COMPONENT Devel 190 | ) 191 | install( 192 | FILES cmake/FindSDx.cmake 193 | RENAME SDxConfig.cmake 194 | DESTINATION ${ConfigPackageLocation} 195 | COMPONENT Devel 196 | ) 197 | 198 | set(CPACK_PACKAGE_NAME libfrt-dev) 199 | set(CPACK_PACKAGE_VERSION_MAJOR 0) 200 | set(CPACK_PACKAGE_VERSION_MINOR 0) 201 | set(CPACK_PACKAGE_VERSION_PATCH 20240801.1) 202 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "FPGA Runtime for PCIe-Based FPGAs") 203 | 204 | string( 205 | JOIN 206 | ", " 207 | CPACK_DEBIAN_PACKAGE_DEPENDS 208 | "bash" 209 | "coreutils" 210 | "fuse" # for `fusermount` 211 | "libgflags-dev" 212 | "libgoogle-glog-dev" 213 | "libtinyxml-dev" 214 | "ocl-icd-libopencl1 | libopencl1" 215 | "procps" # for `ps` 216 | "util-linux" # for `setsid` 217 | ) 218 | string( 219 | JOIN 220 | ", " 221 | CPACK_DEBIAN_PACKAGE_RECOMMENDS 222 | "bindfs" 223 | "csh" 224 | "squashfs-tools" 225 | "squashfuse" 226 | "xrt" 227 | ) 228 | set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Blaok Chi") 229 | set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") 230 | set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT) 231 | 232 | string( 233 | JOIN 234 | ", " 235 | CPACK_RPM_PACKAGE_REQUIRES 236 | "bash" 237 | "coreutils" 238 | "fuse" # for `fusermount` 239 | "gflags" 240 | "glog" 241 | "ocl-icd-devel" 242 | "procps-ng" # for `ps` 243 | "tinyxml-devel" 244 | "util-linux" # for `setsid` 245 | ) 246 | set(CPACK_RPM_PACKAGE_NAME frt-devel) 247 | set(CPACK_RPM_FILE_NAME RPM-DEFAULT) 248 | 249 | set(CPACK_GENERATOR 250 | TGZ DEB 251 | CACHE STRING "Enable selected CPack generators" 252 | ) 253 | include(CPack) 254 | 255 | enable_testing() 256 | 257 | find_package(GTest) 258 | if(GTest_FOUND) 259 | include(GoogleTest) 260 | 261 | add_executable(buffer_test src/frt/buffer_test.cpp) 262 | target_link_libraries(buffer_test frt GTest::gtest_main) 263 | gtest_discover_tests(buffer_test) 264 | endif() 265 | 266 | add_subdirectory(tests/xdma) 267 | -------------------------------------------------------------------------------- /Dockerfile.centos.7: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | ARG XRT_VERSION=202110.2.11.634_7.6.1810 3 | RUN yum install --assumeyes --setopt=skip_missing_names_on_install=False \ 4 | "https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm" \ 5 | && yum clean all \ 6 | && rm -rf /var/cache/yum 7 | RUN yum install --assumeyes --setopt=skip_missing_names_on_install=False \ 8 | "https://www.xilinx.com/bin/public/openDownload?filename=xrt_${XRT_VERSION}-x86_64-xrt.rpm" \ 9 | cmake3 \ 10 | gcc-c++ \ 11 | ninja-build \ 12 | rpm-build \ 13 | tinyxml-devel \ 14 | && yum clean all \ 15 | && rm -rf /var/cache/yum 16 | ENV XILINX_XRT=/opt/xilinx/xrt 17 | WORKDIR /usr/src/frt 18 | COPY . . 19 | CMD \ 20 | cmake3 -GNinja -S. -Bbuild \ 21 | -DCMAKE_BUILD_TYPE=Release \ 22 | -DCPACK_GENERATOR=RPM \ 23 | && cmake3 --build build --target package 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Blaok 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FPGA Runtime 2 | 3 | This project provides a convenient runtime for PCIe-based FPGAs programmed under the OpenCL host-kernel model. 4 | Both Intel and Xilinx platforms are supported. 5 | 6 | ## Prerequisites 7 | 8 | + Ubuntu 20.04+ 9 | 10 | ## Install from Binary 11 | 12 | ```bash 13 | ./install.sh 14 | ``` 15 | 16 | ## Usage 17 | 18 | ### Invoking 19 | 20 | ```C++ 21 | template 22 | fpga::Instance Invoke(const std::string& bitstream, Args&&... args); 23 | ``` 24 | 25 | This invokes the kernel contained in file `bitstream`. 26 | `bitstream` should be a file that can be read via `ifstream` and can be a pipe with proper `EOF`. 27 | `args` are the arguments to the kernel. 28 | If an argument is not a scalar, it needs to be wrapped in one of the following wrappers: 29 | 30 | ```C++ 31 | ReadOnly(T* ptr, size_t n); 32 | WriteOnly(T* ptr, size_t n); 33 | ReadWrite(T* ptr, size_t n); 34 | ``` 35 | 36 | This will tell the runtime the data exchange direction and how many elements are allocated. 37 | The directions are with respect to the host, not the device (because *this is host code*). 38 | **Passing a host pointer directly will not work (doesn't even compile).** 39 | 40 | ### Device Selection 41 | 42 | By default, FRT selects devices using metadata from the `bitstream`. 43 | This may not always work as expected, often due to the following reasons: 44 | 45 | 1. Xilinx [2RP](https://docs.xilinx.com/r/en-US/ug1301-getting-started-guide-alveo-accelerator-cards/Programming-the-Shell-Partition-for-DFX-2RP-Platforms) shell platforms must be flashed by admin (`root`) before running any user logic. 46 | 2. FRT may not know how to match the device name in the `bitstream` and the runtime device name. If you encounter this issue, please feel free to [file a bug](https://github.com/Blaok/fpga-runtime/issues/new). 47 | 48 | #### Selecting Xilinx Device by PCIe BDF 49 | 50 | For Xilinx devices, it is possible to select the device by its 51 | [PCIe BDF](https://docs.xilinx.com/r/en-US/ug1531-vck5000-install/Obtaining-Card-BDF-Values). 52 | 53 | To do this, make sure you parsed `gflags` in your main function: 54 | 55 | ```C++ 56 | #include 57 | ... 58 | int main(int argc, char* argv[]) { 59 | gflags::ParseCommandLineFlags(&argc, &argv, /*remove_flags=*/true); 60 | ... 61 | } 62 | ``` 63 | 64 | When running the host program, add `--xocl_bdf=`, e.g., 65 | 66 | ```Bash 67 | ./host --xocl_bdf=0000:d8:00.1 ... 68 | ``` 69 | 70 | ### Profiling 71 | 72 | `Invoke` returns an `fpga::Instance` object that contains profiling information. 73 | 74 | ```C++ 75 | double Instance::LoadTimeSeconds(); 76 | double Instance::ComputeTimeSeconds(); 77 | double Instance::StoreTimeSeconds(); 78 | double Instance::LoadThroughputGbps(); 79 | double Instance::StoreThroughputGbps(); 80 | ``` 81 | 82 | ### Streaming 83 | 84 | Streaming is supported (on legacy Xilinx platforms). 85 | 86 | ```C++ 87 | class fpga::ReadStream; 88 | class fpga::WriteStream; 89 | ``` 90 | 91 | The streams need to be created and passed to `fpga::Invoke` as a parameter. 92 | If the arguments to `fpga::Invoke` contains a stream, 93 | it will not wait for the kernel to finish; 94 | instead, it will return an `fpga::Instance` object immediately. 95 | The host program can read from `fpga::ReadStream` and/or write to 96 | `fpga::WriteStream`. 97 | When all stream I/O are done, 98 | `instance.Finish()` should be invoked to wait until the kernel finishes. 99 | -------------------------------------------------------------------------------- /cmake/FRTConfig.cmake: -------------------------------------------------------------------------------- 1 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 2 | find_package(Threads REQUIRED) 3 | find_package(XRT PATHS ${CMAKE_CURRENT_LIST_DIR}) 4 | find_package(SDx PATHS ${CMAKE_CURRENT_LIST_DIR}) 5 | 6 | include("${CMAKE_CURRENT_LIST_DIR}/FRTTargets.cmake") 7 | -------------------------------------------------------------------------------- /cmake/FindSDx.cmake: -------------------------------------------------------------------------------- 1 | if(NOT CMAKE_BUILD_TYPE) 2 | set(CMAKE_BUILD_TYPE Debug) 3 | endif() 4 | 5 | find_program(XOCC NAMES v++ xocc PATHS "$ENV{XILINX_VITIS}/bin" "$ENV{XILINX_SDX}/bin") 6 | find_program(VPP_WRAPPER frt_vpp_wrapper) 7 | 8 | include(FindPackageHandleStandardArgs) 9 | find_package_handle_standard_args(SDx 10 | FOUND_VAR 11 | SDx_FOUND 12 | REQUIRED_VARS 13 | XOCC) 14 | 15 | function(add_xocc_compile_target target_name) 16 | # Generates a `.xo` file via `xocc --compile`. 17 | # 18 | # The added target will have the following properties: 19 | # 20 | # * KERNEL 21 | # * PLATFORM 22 | # * TARGET 23 | # * FILE_NAME 24 | # * DRAM_MAPPING 25 | # 26 | # Positional Arguments: 27 | # 28 | # * target_name: Name of the added cmake target. 29 | # 30 | # Named Arguments: 31 | # 32 | # * TARGET: Synthesis target (sw_emu|hw_emu|hw) [--target]. 33 | # * OUTPUT: Output filename [--output]. 34 | # * REPORT_DIR: A directory to copy report files to [--report_dir]. 35 | # * LOG_DIR: A directory to copy internally generated log files to 36 | # [--log_dir]. 37 | # * TEMP_DIR: A directory to use for temporary files and directories 38 | # [--temp_dir]. 39 | # * INPUT: Input HLS source target. 40 | # * SAVE_TEMPS: Boolean, whether to keep intermediate files. 41 | # 42 | # Other Arguments: 43 | # 44 | # * Optional, will be passed to `xocc --compile` directly. 45 | 46 | # parse arguments and extract information 47 | cmake_parse_arguments(XOCC_COMPILE 48 | "SAVE_TEMPS" 49 | "TARGET;OUTPUT;REPORT_DIR;LOG_DIR;TEMP_DIR;INPUT" 50 | "" 51 | ${ARGN}) 52 | set(target ${XOCC_COMPILE_TARGET}) 53 | set(output ${XOCC_COMPILE_OUTPUT}) 54 | set(report_dir ${XOCC_COMPILE_REPORT_DIR}) 55 | set(log_dir ${XOCC_COMPILE_LOG_DIR}) 56 | set(temp_dir ${XOCC_COMPILE_TEMP_DIR}) 57 | set(input ${XOCC_COMPILE_INPUT}) 58 | get_target_property(kernel ${input} KERNEL) 59 | get_target_property(platform ${input} PLATFORM) 60 | get_target_property(dram_mapping ${input} DRAM_MAPPING) 61 | get_target_property(input_file ${input} FILE_NAME) 62 | get_filename_component(output ${output} ABSOLUTE) 63 | get_filename_component(temp_dir ${temp_dir} ABSOLUTE) 64 | get_filename_component(input_file ${input_file} ABSOLUTE) 65 | 66 | 67 | # compose the xocc compile command 68 | if (VPP_WRAPPER) 69 | set(xocc_cmd ${VPP_WRAPPER}) 70 | else() 71 | set(xocc_cmd "") 72 | endif() 73 | list(APPEND xocc_cmd ${XOCC} --compile) 74 | list(APPEND xocc_cmd --output ${output}) 75 | list(APPEND xocc_cmd --kernel ${kernel}) 76 | list(APPEND xocc_cmd --platform ${platform}) 77 | list(APPEND xocc_cmd --target ${target}) 78 | list(APPEND xocc_cmd --report_level 2) 79 | list(APPEND xocc_cmd --report_dir ${report_dir}) 80 | list(APPEND xocc_cmd --log_dir ${log_dir}) 81 | list(APPEND xocc_cmd --temp_dir=${temp_dir}) 82 | list(APPEND xocc_cmd --xp prop:kernel.${kernel}.kernel_flags=-std=c++11) 83 | if(CMAKE_BUILD_TYPE MATCHES Debug) 84 | list(APPEND xocc_cmd --debug) 85 | endif() 86 | if(${XOCC_COMPILE_SAVE_TEMPS}) 87 | list(APPEND xocc_cmd --save-temps) 88 | endif() 89 | list(APPEND xocc_cmd ${input_file}) 90 | list(APPEND xocc_cmd ${XOCC_COMPILE_UNPARSED_ARGUMENTS}) 91 | 92 | add_custom_command(OUTPUT ${output} 93 | COMMAND ${xocc_cmd} 94 | DEPENDS ${input} ${input_file} 95 | VERBATIM) 96 | 97 | add_custom_target(${target_name} DEPENDS ${output}) 98 | set_target_properties(${target_name} 99 | PROPERTIES KERNEL 100 | ${kernel} 101 | PLATFORM 102 | ${platform} 103 | TARGET 104 | ${target} 105 | FILE_NAME 106 | ${output} 107 | DRAM_MAPPING 108 | "${dram_mapping}") 109 | endfunction() 110 | 111 | function(add_xocc_link_target target_name) 112 | # Generates a `.xclbin` file via `xocc --link`. 113 | # 114 | # The added target will have the following properties: 115 | # 116 | # * KERNEL 117 | # * PLATFORM 118 | # * TARGET 119 | # * FILE_NAME 120 | # * DRAM_MAPPING 121 | # 122 | # Positional Arguments: 123 | # 124 | # * target_name: Name of the added cmake target. 125 | # 126 | # Named Arguments: 127 | # 128 | # * TARGET: Synthesis target (sw_emu|hw_emu|hw) [--target]. 129 | # * OUTPUT: Output filename [--output]. 130 | # * OPTIMIZE: Optimize level [--optimize]. 131 | # * REPORT_DIR: A directory to copy report files to [--report_dir]. 132 | # * LOG_DIR: A directory to copy internally generated log files to 133 | # [--log_dir]. 134 | # * TEMP_DIR: A directory to use for temporary files and directories 135 | # [--temp_dir]. 136 | # * INPUT: Input target generated via add_xocc_compile_target. 137 | # * SAVE_TEMPS: Boolean, whether to keep intermediate files. 138 | # 139 | # Other Arguments: 140 | # 141 | # * Optional, will be passed to `xocc --link` directly. 142 | 143 | # parse arguments and extract information 144 | cmake_parse_arguments( 145 | XOCC_LINK 146 | "SAVE_TEMPS" 147 | "TARGET;OUTPUT;OPTIMIZE;REPORT_DIR;LOG_DIR;TEMP_DIR;INPUT" 148 | "" 149 | ${ARGN}) 150 | set(target ${XOCC_LINK_TARGET}) 151 | set(output ${XOCC_LINK_OUTPUT}) 152 | set(optimize ${XOCC_LINK_OPTIMIZE}) 153 | set(report_dir ${XOCC_LINK_REPORT_DIR}) 154 | set(log_dir ${XOCC_LINK_LOG_DIR}) 155 | set(temp_dir ${XOCC_LINK_TEMP_DIR}) 156 | set(input ${XOCC_LINK_INPUT}) 157 | get_target_property(kernel ${input} KERNEL) 158 | get_target_property(platform ${input} PLATFORM) 159 | get_target_property(dram_mapping ${input} DRAM_MAPPING) 160 | get_target_property(input_file ${input} FILE_NAME) 161 | get_filename_component(output ${output} ABSOLUTE) 162 | get_filename_component(temp_dir ${temp_dir} ABSOLUTE) 163 | get_filename_component(input_file ${input_file} ABSOLUTE) 164 | 165 | # compose the xocc link command 166 | if (VPP_WRAPPER) 167 | set(xocc_cmd ${VPP_WRAPPER}) 168 | else() 169 | set(xocc_cmd "") 170 | endif() 171 | list(APPEND xocc_cmd ${XOCC} --link) 172 | list(APPEND xocc_cmd --output ${output}) 173 | list(APPEND xocc_cmd --kernel ${kernel}) 174 | list(APPEND xocc_cmd --platform ${platform}) 175 | list(APPEND xocc_cmd --target ${target}) 176 | list(APPEND xocc_cmd --report_level 2) 177 | list(APPEND xocc_cmd --report_dir ${report_dir}) 178 | list(APPEND xocc_cmd --log_dir ${log_dir}) 179 | list(APPEND xocc_cmd --temp_dir=${temp_dir}) 180 | list(APPEND xocc_cmd --optimize ${optimize}) 181 | list(APPEND xocc_cmd --connectivity.nk ${kernel}:1:${kernel}) 182 | foreach(map ${dram_mapping}) 183 | list(APPEND xocc_cmd --connectivity.sp ${kernel}.m_axi_${map}) 184 | endforeach() 185 | if(CMAKE_BUILD_TYPE MATCHES Debug) 186 | list(APPEND xocc_cmd --debug) 187 | endif() 188 | if(${XOCC_LINK_SAVE_TEMPS}) 189 | list(APPEND xocc_cmd --save-temps) 190 | endif() 191 | list(APPEND xocc_cmd ${input_file}) 192 | list(APPEND xocc_cmd ${XOCC_LINK_UNPARSED_ARGUMENTS}) 193 | 194 | add_custom_command(OUTPUT ${output} 195 | COMMAND ${xocc_cmd} 196 | DEPENDS ${input} ${input_file} 197 | VERBATIM) 198 | 199 | add_custom_target(${target_name} DEPENDS ${output}) 200 | set_target_properties(${target_name} 201 | PROPERTIES KERNEL 202 | ${kernel} 203 | PLATFORM 204 | ${platform} 205 | TARGET 206 | ${target} 207 | FILE_NAME 208 | ${output} 209 | DRAM_MAPPING 210 | "${dram_mapping}") 211 | 212 | endfunction() 213 | 214 | function(add_xocc_hw_link_targets output_dir) 215 | # Add cmake targets for hardware simulation and hardware execution. 216 | # 217 | # Positional Arguments: 218 | # 219 | # * output_dir: Output directory. 220 | # 221 | # Required Named Arguments: 222 | # 223 | # * KERNEL: Kernel name [--kernel]. 224 | # * PLATFORM: Platform [--platform]. 225 | # * INPUT: Input filename. 226 | # 227 | # Optional Named Arguments: 228 | # 229 | # * PREFIX: Prefix of the generated targets. 230 | # * DRAM_MAPPING: A list of mappings from variable name to DDR banks (e.g. 231 | # gmem0:DDR[0]). 232 | # * HW_XO: Returns the name of the hw_xo_target. 233 | # * HW_EMU_XCLBIN: Returns the name of the hw_emu_xclbin_target. 234 | # * HW_XCLBIN: Returns the name of the hw_xclbin_target. 235 | # 236 | # Other Arguments: 237 | # 238 | # * Optional, will be passed to `xocc --link` directly. 239 | 240 | set(one_value_keywords KERNEL PLATFORM INPUT) 241 | list(APPEND one_value_keywords 242 | PREFIX 243 | HW_XO 244 | HW_EMU_XCLBIN 245 | HW_XCLBIN) 246 | cmake_parse_arguments(XOCC 247 | "" 248 | "${one_value_keywords}" 249 | "DRAM_MAPPING" 250 | ${ARGN}) 251 | if(XOCC_PREFIX) 252 | set(prefix ${XOCC_PREFIX}.) 253 | endif() 254 | if(TARGET ${XOCC_INPUT}) 255 | get_target_property(kernel ${XOCC_INPUT} KERNEL) 256 | get_target_property(platform ${XOCC_INPUT} PLATFORM) 257 | get_target_property(input_file ${XOCC_INPUT} FILE_NAME) 258 | get_target_property(dram_mapping ${XOCC_INPUT} DRAM_MAPPING) 259 | set(prefix ${prefix}${kernel}.${platform}) 260 | set(hw_xo_target ${XOCC_INPUT}) 261 | else() 262 | set(kernel ${XOCC_KERNEL}) 263 | set(platform ${XOCC_PLATFORM}) 264 | set(input_file ${XOCC_INPUT}) 265 | set(dram_mapping ${XOCC_DRAM_MAPPING}) 266 | set(prefix ${prefix}${kernel}.${platform}) 267 | set(hw_xo_target ${prefix}.hw_xo) 268 | add_custom_target(${hw_xo_target} DEPENDS ${input_file}) 269 | set_target_properties(${hw_xo_target} 270 | PROPERTIES KERNEL 271 | ${kernel} 272 | PLATFORM 273 | ${platform} 274 | TARGET 275 | hw 276 | FILE_NAME 277 | ${input_file} 278 | DRAM_MAPPING 279 | "${dram_mapping}") 280 | endif() 281 | 282 | set(hw_emu_xclbin_target ${prefix}.hw_emu_xclbin) 283 | set(hw_xclbin_target ${prefix}.hw_xclbin) 284 | if(XOCC_HW_XO) 285 | set(${XOCC_HW_XO} ${hw_xo_target} PARENT_SCOPE) 286 | endif() 287 | if(XOCC_HW_EMU_XCLBIN) 288 | set(${XOCC_HW_EMU_XCLBIN} ${hw_emu_xclbin_target} PARENT_SCOPE) 289 | endif() 290 | if(XOCC_HW_XCLBIN) 291 | set(${XOCC_HW_XCLBIN} ${hw_xclbin_target} PARENT_SCOPE) 292 | endif() 293 | 294 | if(CMAKE_BUILD_TYPE MATCHES Debug) 295 | set(optimize quick) 296 | elseif(CMAKE_BUILD_TYPE MATCHES Release) 297 | set(optimize 3) 298 | else() 299 | set(optimize 0) 300 | endif() 301 | 302 | set(hw_emu_xclbin ${prefix}.hw_emu.xclbin) 303 | set(hw_xclbin ${prefix}.hw.xclbin) 304 | add_xocc_link_target(${hw_emu_xclbin_target} 305 | OUTPUT ${output_dir}/${hw_emu_xclbin} 306 | TARGET hw_emu 307 | OPTIMIZE ${optimize} 308 | REPORT_DIR ${output_dir}/${hw_emu_xclbin}.report 309 | LOG_DIR ${output_dir}/${hw_emu_xclbin}.log 310 | TEMP_DIR ${output_dir}/${hw_emu_xclbin}.temp 311 | INPUT ${hw_xo_target} 312 | SAVE_TEMPS 313 | ${XOCC_UNPARSED_ARGUMENTS}) 314 | add_xocc_link_target(${hw_xclbin_target} 315 | OUTPUT ${output_dir}/${hw_xclbin} 316 | TARGET hw 317 | OPTIMIZE ${optimize} 318 | REPORT_DIR ${output_dir}/${hw_xclbin}.report 319 | LOG_DIR ${output_dir}/${hw_xclbin}.log 320 | TEMP_DIR ${output_dir}/${hw_xclbin}.temp 321 | INPUT ${hw_xo_target} 322 | SAVE_TEMPS 323 | ${XOCC_UNPARSED_ARGUMENTS}) 324 | endfunction() 325 | 326 | function(add_xocc_targets output_dir) 327 | # Add cmake targets for software / hardware simulation and hardware execution. 328 | # 329 | # Positional Arguments: 330 | # 331 | # * output_dir: Output directory. 332 | # 333 | # Required Named Arguments: 334 | # 335 | # * KERNEL: Kernel name [--kernel]. 336 | # * PLATFORM: Platform [--platform]. 337 | # * INPUT: Input filename. 338 | # 339 | # Optional Named Arguments: 340 | # 341 | # * PREFIX: Prefix of the generated targets. 342 | # * DRAM_MAPPING: A list of mappings from variable name to DDR banks (e.g. 343 | # gmem0:DDR[0]). 344 | # * HLS_SRC: Returns the name of the hls_src_target. 345 | # * SW_EMU_XO: Returns the name of the sw_emu_xo_target. 346 | # * HW_XO: Returns the name of the hw_xo_target. 347 | # * SW_EMU_XCLBIN: Returns the name of the sw_emu_xclbin_target. 348 | # * HW_EMU_XCLBIN: Returns the name of the hw_emu_xclbin_target. 349 | # * HW_XCLBIN: Returns the name of the hw_xclbin_target. 350 | 351 | set(one_value_keywords KERNEL PLATFORM INPUT) 352 | list(APPEND one_value_keywords 353 | PREFIX 354 | HLS_SRC 355 | SW_EMU_XO 356 | HW_XO 357 | SW_EMU_XCLBIN 358 | HW_EMU_XCLBIN 359 | HW_XCLBIN) 360 | cmake_parse_arguments(XOCC 361 | "" 362 | "${one_value_keywords}" 363 | "DRAM_MAPPING" 364 | ${ARGN}) 365 | set(kernel ${XOCC_KERNEL}) 366 | set(platform ${XOCC_PLATFORM}) 367 | set(input_file ${XOCC_INPUT}) 368 | set(dram_mapping ${XOCC_DRAM_MAPPING}) 369 | if(XOCC_PREFIX) 370 | set(prefix ${XOCC_PREFIX}.) 371 | endif() 372 | set(prefix ${prefix}${kernel}.${platform}) 373 | set(hls_src_target ${prefix}.hls_src) 374 | set(sw_emu_xo_target ${prefix}.sw_emu_xo) 375 | set(hw_xo_target ${prefix}.hw_xo) 376 | set(sw_emu_xclbin_target ${prefix}.sw_emu_xclbin) 377 | set(hw_emu_xclbin_target ${prefix}.hw_emu_xclbin) 378 | set(hw_xclbin_target ${prefix}.hw_xclbin) 379 | if(XOCC_HLS_SRC) 380 | set(${XOCC_HLS_SRC} ${hls_src_target} PARENT_SCOPE) 381 | endif() 382 | if(XOCC_SW_EMU_XO) 383 | set(${XOCC_SW_EMU_XO} ${sw_emu_xo_target} PARENT_SCOPE) 384 | endif() 385 | if(XOCC_HW_XO) 386 | set(${XOCC_HW_XO} ${hw_xo_target} PARENT_SCOPE) 387 | endif() 388 | if(XOCC_SW_EMU_XCLBIN) 389 | set(${XOCC_SW_EMU_XCLBIN} ${sw_emu_xclbin_target} PARENT_SCOPE) 390 | endif() 391 | if(XOCC_HW_EMU_XCLBIN) 392 | set(${XOCC_HW_EMU_XCLBIN} ${hw_emu_xclbin_target} PARENT_SCOPE) 393 | endif() 394 | if(XOCC_HW_XCLBIN) 395 | set(${XOCC_HW_XCLBIN} ${hw_xclbin_target} PARENT_SCOPE) 396 | endif() 397 | add_custom_target(${hls_src_target} DEPENDS ${input_file}) 398 | set_target_properties(${hls_src_target} 399 | PROPERTIES KERNEL 400 | ${kernel} 401 | PLATFORM 402 | ${platform} 403 | FILE_NAME 404 | ${input_file} 405 | DRAM_MAPPING 406 | "${dram_mapping}") 407 | 408 | if(CMAKE_BUILD_TYPE MATCHES Debug) 409 | set(optimize quick) 410 | elseif(CMAKE_BUILD_TYPE MATCHES Release) 411 | set(optimize 3) 412 | else() 413 | set(optimize 0) 414 | endif() 415 | 416 | set(sw_emu_xo ${prefix}.sw_emu.xo) 417 | set(hw_xo ${prefix}.hw.xo) 418 | set(sw_emu_xclbin ${prefix}.sw_emu.xclbin) 419 | set(hw_emu_xclbin ${prefix}.hw_emu.xclbin) 420 | set(hw_xclbin ${prefix}.hw.xclbin) 421 | add_xocc_compile_target(${sw_emu_xo_target} 422 | OUTPUT ${output_dir}/${sw_emu_xo} 423 | TARGET sw_emu 424 | REPORT_DIR ${output_dir}/${sw_emu_xo}.report 425 | LOG_DIR ${output_dir}/${sw_emu_xo}.log 426 | TEMP_DIR ${output_dir}/${sw_emu_xo}.temp 427 | INPUT ${hls_src_target} 428 | SAVE_TEMPS) 429 | add_xocc_compile_target(${hw_xo_target} 430 | OUTPUT ${output_dir}/${hw_xo} 431 | TARGET hw 432 | REPORT_DIR ${output_dir}/${hw_xo}.report 433 | LOG_DIR ${output_dir}/${hw_xo}.log 434 | TEMP_DIR ${output_dir}/${hw_xo}.temp 435 | INPUT ${hls_src_target} 436 | SAVE_TEMPS) 437 | add_xocc_link_target(${sw_emu_xclbin_target} 438 | OUTPUT ${output_dir}/${sw_emu_xclbin} 439 | TARGET sw_emu 440 | OPTIMIZE ${optimize} 441 | REPORT_DIR ${output_dir}/${sw_emu_xclbin}.report 442 | LOG_DIR ${output_dir}/${sw_emu_xclbin}.log 443 | TEMP_DIR ${output_dir}/${sw_emu_xclbin}.temp 444 | INPUT ${sw_emu_xo_target} 445 | SAVE_TEMPS) 446 | add_xocc_link_target(${hw_emu_xclbin_target} 447 | OUTPUT ${output_dir}/${hw_emu_xclbin} 448 | TARGET hw_emu 449 | OPTIMIZE ${optimize} 450 | REPORT_DIR ${output_dir}/${hw_emu_xclbin}.report 451 | LOG_DIR ${output_dir}/${hw_emu_xclbin}.log 452 | TEMP_DIR ${output_dir}/${hw_emu_xclbin}.temp 453 | INPUT ${hw_xo_target} 454 | SAVE_TEMPS) 455 | add_xocc_link_target(${hw_xclbin_target} 456 | OUTPUT ${output_dir}/${hw_xclbin} 457 | TARGET hw 458 | OPTIMIZE ${optimize} 459 | REPORT_DIR ${output_dir}/${hw_xclbin}.report 460 | LOG_DIR ${output_dir}/${hw_xclbin}.log 461 | TEMP_DIR ${output_dir}/${hw_xclbin}.temp 462 | INPUT ${hw_xo_target} 463 | SAVE_TEMPS) 464 | endfunction() 465 | 466 | function(add_xocc_targets_with_alias) 467 | # Add cmake targets for software / hardware simulation and hardware execution, 468 | # with global alias hls, bitstream, sw_emu_xclbin, hw_emu_xclbin, and xclbins. 469 | # 470 | # Takes the same set of arguments as add_xocc_targets. 471 | 472 | cmake_parse_arguments(XOCC 473 | "" 474 | "HW_XO;SW_EMU_XCLBIN;HW_EMU_XCLBIN;HW_XCLBIN" 475 | "" 476 | ${ARGV}) 477 | add_xocc_targets(${ARGV} 478 | HW_XO hw_xo_target 479 | SW_EMU_XCLBIN sw_emu_xclbin_target 480 | HW_EMU_XCLBIN hw_emu_xclbin_target 481 | HW_XCLBIN hw_xclbin_target) 482 | if(XOCC_HW_XO) 483 | set(${XOCC_HW_XO} ${hw_xo_target} PARENT_SCOPE) 484 | endif() 485 | if(XOCC_SW_EMU_XCLBIN) 486 | set(${XOCC_SW_EMU_XCLBIN} ${sw_emu_xclbin_target} PARENT_SCOPE) 487 | endif() 488 | if(XOCC_HW_EMU_XCLBIN) 489 | set(${XOCC_HW_EMU_XCLBIN} ${hw_emu_xclbin_target} PARENT_SCOPE) 490 | endif() 491 | if(XOCC_HW_XCLBIN) 492 | set(${XOCC_HW_XCLBIN} ${hw_xclbin_target} PARENT_SCOPE) 493 | endif() 494 | 495 | add_custom_target(hls DEPENDS ${hw_xo_target}) 496 | add_custom_target(sw_emu_xclbin DEPENDS ${sw_emu_xclbin_target}) 497 | add_custom_target(hw_emu_xclbin DEPENDS ${hw_emu_xclbin_target}) 498 | add_custom_target(bitstream DEPENDS ${hw_xclbin_target}) 499 | add_custom_target(xclbins DEPENDS sw_emu_xclbin hw_emu_xclbin bitstream) 500 | endfunction() 501 | -------------------------------------------------------------------------------- /cmake/FindTinyXML.cmake: -------------------------------------------------------------------------------- 1 | find_path(TinyXML_INCLUDE_DIR NAMES tinyxml.h) 2 | find_library(TinyXML_LIBRARY NAMES tinyxml) 3 | 4 | find_package_handle_standard_args(TinyXML 5 | FOUND_VAR 6 | TinyXML_FOUND 7 | REQUIRED_VARS 8 | TinyXML_LIBRARY 9 | TinyXML_INCLUDE_DIR) 10 | 11 | mark_as_advanced(TinyXML_INCLUDE_DIR TinyXML_LIBRARY) 12 | 13 | if(TinyXML_FOUND AND NOT TARGET TinyXML::TinyXML) 14 | add_library(TinyXML::TinyXML IMPORTED INTERFACE) 15 | set_target_properties( 16 | TinyXML::TinyXML 17 | PROPERTIES INTERFACE_LINK_LIBRARIES "${TinyXML_LIBRARY}") 18 | set_target_properties(TinyXML::TinyXML 19 | PROPERTIES INTERFACE_INCLUDE_DIRECTORIES 20 | "${TinyXML_INCLUDE_DIRS}") 21 | endif() 22 | -------------------------------------------------------------------------------- /cmake/FindXRT.cmake: -------------------------------------------------------------------------------- 1 | find_path( 2 | XRT_INCLUDE_DIR 3 | NAMES xclbin.h 4 | PATHS "$ENV{XILINX_XRT}/include") 5 | find_library( 6 | XILINXOPENCL_LIBRARY 7 | NAMES xilinxopencl 8 | PATHS "$ENV{XILINX_XRT}/lib") 9 | get_filename_component(XRT_VERSION ${XILINXOPENCL_LIBRARY} REALPATH) 10 | string(REGEX REPLACE "^.*\.so\." "" XRT_VERSION ${XRT_VERSION}) 11 | 12 | include(FindPackageHandleStandardArgs) 13 | find_package_handle_standard_args( 14 | XRT 15 | FOUND_VAR XRT_FOUND 16 | VERSION_VAR XRT_VERSION 17 | REQUIRED_VARS XILINXOPENCL_LIBRARY XRT_INCLUDE_DIR 18 | ) 19 | 20 | mark_as_advanced(XRT_INCLUDE_DIR XRT_LIBRARY) 21 | 22 | if(XRT_FOUND AND NOT TARGET xrt::xrt) 23 | add_library(xrt::xrt IMPORTED INTERFACE) 24 | set_target_properties(xrt::xrt PROPERTIES INTERFACE_LINK_LIBRARIES 25 | ${XILINXOPENCL_LIBRARY}) 26 | set_target_properties(xrt::xrt PROPERTIES INTERFACE_INCLUDE_DIRECTORIES 27 | "${XRT_INCLUDE_DIR}") 28 | endif() 29 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | function install-frt-for-ubuntu() { 5 | local codename="$1" 6 | 7 | if ! which sudo >/dev/null; then 8 | apt-get update 9 | apt-get install -y sudo 10 | fi 11 | 12 | sudo apt-get update 13 | sudo apt-get install -y apt-transport-https gnupg wget 14 | 15 | wget -O- https://about.blaok.me/fpga-runtime/frt.gpg.key | gpg --dearmor | 16 | sudo tee /usr/share/keyrings/.frt.gpg.tmp >/dev/null 17 | sudo mv /usr/share/keyrings/.frt.gpg.tmp /usr/share/keyrings/frt.gpg 18 | sudo tee /etc/apt/sources.list.d/frt.list </dev/null 2>/dev/null; then 31 | yum install -y sudo 32 | fi 33 | 34 | if ! yum list installed epel-release >/dev/null 2>/dev/null; then 35 | sudo yum install -y --setopt=skip_missing_names_on_install=False \ 36 | "https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm" 37 | fi 38 | 39 | sudo yum install -y --setopt=skip_missing_names_on_install=False \ 40 | "https://github.com/Blaok/fpga-runtime/releases/latest/download/frt-devel.centos.${version}.x86_64.rpm" 41 | } 42 | 43 | source /etc/os-release 44 | 45 | case "${ID}.${VERSION_ID}" in 46 | ubuntu.20.04 | ubuntu.22.04) 47 | install-frt-for-ubuntu "${UBUNTU_CODENAME}" 48 | ;; 49 | centos.7) 50 | install-frt-for-centos "${VERSION_ID}" 51 | ;; 52 | *) 53 | echo "unsupported os" >&2 54 | exit 1 55 | ;; 56 | esac 57 | -------------------------------------------------------------------------------- /src/frt.cpp: -------------------------------------------------------------------------------- 1 | #include "frt.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "frt/devices/intel_opencl_device.h" 12 | #include "frt/devices/tapa_fast_cosim_device.h" 13 | #include "frt/devices/xilinx_opencl_device.h" 14 | 15 | namespace fpga { 16 | 17 | Instance::Instance(const std::string& bitstream) { 18 | LOG(INFO) << "Loading " << bitstream; 19 | cl::Program::Binaries binaries; 20 | { 21 | std::ifstream stream(bitstream, std::ios::binary); 22 | binaries = {{std::istreambuf_iterator(stream), 23 | std::istreambuf_iterator()}}; 24 | } 25 | 26 | if ((device_ = internal::XilinxOpenclDevice::New(binaries))) { 27 | return; 28 | } 29 | 30 | if ((device_ = internal::IntelOpenclDevice::New(binaries))) { 31 | return; 32 | } 33 | 34 | if ((device_ = internal::TapaFastCosimDevice::New( 35 | bitstream, 36 | std::string_view(reinterpret_cast(binaries.begin()->data()), 37 | binaries.begin()->size())))) { 38 | return; 39 | } 40 | 41 | LOG(FATAL) << "Unexpected bitstream file"; 42 | } 43 | 44 | size_t Instance::SuspendBuf(int index) { return device_->SuspendBuffer(index); } 45 | 46 | void Instance::WriteToDevice() { device_->WriteToDevice(); } 47 | 48 | void Instance::ReadFromDevice() { device_->ReadFromDevice(); } 49 | 50 | void Instance::Exec() { device_->Exec(); } 51 | 52 | void Instance::Finish() { device_->Finish(); } 53 | 54 | std::vector Instance::GetArgsInfo() const { 55 | return device_->GetArgsInfo(); 56 | } 57 | 58 | int64_t Instance::LoadTimeNanoSeconds() const { 59 | return device_->LoadTimeNanoSeconds(); 60 | } 61 | 62 | int64_t Instance::ComputeTimeNanoSeconds() const { 63 | return device_->ComputeTimeNanoSeconds(); 64 | } 65 | 66 | int64_t Instance::StoreTimeNanoSeconds() const { 67 | return device_->StoreTimeNanoSeconds(); 68 | } 69 | 70 | double Instance::LoadTimeSeconds() const { 71 | return static_cast(LoadTimeNanoSeconds()) * 1e-9; 72 | } 73 | 74 | double Instance::ComputeTimeSeconds() const { 75 | return static_cast(ComputeTimeNanoSeconds()) * 1e-9; 76 | } 77 | 78 | double Instance::StoreTimeSeconds() const { 79 | return static_cast(StoreTimeNanoSeconds()) * 1e-9; 80 | } 81 | 82 | double Instance::LoadThroughputGbps() const { 83 | return static_cast(device_->LoadBytes()) / 84 | static_cast(LoadTimeNanoSeconds()); 85 | } 86 | 87 | double Instance::StoreThroughputGbps() const { 88 | return static_cast(device_->StoreBytes()) / 89 | static_cast(StoreTimeNanoSeconds()); 90 | } 91 | 92 | void Instance::ConditionallyFinish(bool has_stream) { 93 | if (!has_stream) { 94 | VLOG(1) << "no stream found; waiting for command to finish"; 95 | Finish(); 96 | } 97 | } 98 | 99 | } // namespace fpga 100 | -------------------------------------------------------------------------------- /src/frt.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_H_ 2 | #define FPGA_RUNTIME_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "frt/arg_info.h" 17 | #include "frt/buffer.h" 18 | #include "frt/device.h" 19 | #include "frt/stream.h" 20 | #include "frt/stream_wrapper.h" 21 | #include "frt/tag.h" 22 | 23 | namespace fpga { 24 | 25 | template 26 | using ReadOnlyBuffer = internal::Buffer; 27 | template 28 | using WriteOnlyBuffer = internal::Buffer; 29 | template 30 | using ReadWriteBuffer = internal::Buffer; 31 | template 32 | using PlaceholderBuffer = internal::Buffer; 33 | 34 | template 35 | ReadOnlyBuffer ReadOnly(T* ptr, size_t n) { 36 | return ReadOnlyBuffer(ptr, n); 37 | } 38 | template 39 | WriteOnlyBuffer WriteOnly(T* ptr, size_t n) { 40 | return WriteOnlyBuffer(ptr, n); 41 | } 42 | template 43 | ReadWriteBuffer ReadWrite(T* ptr, size_t n) { 44 | return ReadWriteBuffer(ptr, n); 45 | } 46 | template 47 | PlaceholderBuffer Placeholder(T* ptr, size_t n) { 48 | return PlaceholderBuffer(ptr, n); 49 | } 50 | 51 | using ReadStream = internal::Stream; 52 | using WriteStream = internal::Stream; 53 | 54 | class Instance { 55 | public: 56 | Instance(const std::string& bitstream); 57 | 58 | // Sets a scalar argument. 59 | template 60 | void SetArg(int index, T arg) { 61 | device_->SetScalarArg(index, &arg, sizeof(arg)); 62 | } 63 | 64 | // Sets a buffer argument. 65 | template 66 | void SetArg(int index, internal::Buffer arg) { 67 | device_->SetBufferArg(index, tag, arg); 68 | } 69 | 70 | // Sets a stream argument. 71 | template 72 | void SetArg(int index, internal::Stream& arg) { 73 | device_->SetStreamArg(index, tag, arg); 74 | } 75 | 76 | // Sets all arguments. 77 | template 78 | void SetArgs(Args&&... args) { 79 | SetArg(0, std::forward(args)...); 80 | } 81 | 82 | // Allocates buffer for an argument. This function is now deprecated and its 83 | // original functionality is now part of `SetArg`. 84 | template 85 | [[deprecated("'SetArg' is sufficient")]] void AllocBuf(int index, T arg) {} 86 | 87 | // Suspends a buffer from being transferred between host and device and 88 | // returns the number of transfer operations suspended. 89 | size_t SuspendBuf(int index); 90 | 91 | // Writes buffers to the device. 92 | void WriteToDevice(); 93 | 94 | // Reads buffers to the device. 95 | void ReadFromDevice(); 96 | 97 | // Executes the program on the device. 98 | void Exec(); 99 | 100 | // Waits for the program to finish. 101 | void Finish(); 102 | 103 | // Invokes the program on the device. This is a shortcut for `SetArgs`, 104 | // `WriteToDevice`, `Exec`, `ReadFromDevice`, and if there is no stream 105 | // arguments, `Finish` as well. 106 | template 107 | Instance& Invoke(Args&&... args) { 108 | SetArgs(std::forward(args)...); 109 | WriteToDevice(); 110 | Exec(); 111 | ReadFromDevice(); 112 | bool has_stream = false; 113 | bool _[sizeof...(Args)] = {( 114 | has_stream |= 115 | std::is_base_of::type>::value)...}; 117 | ConditionallyFinish(has_stream); 118 | return *this; 119 | } 120 | 121 | // Returns information of all args as a vector, sorted by the index. 122 | std::vector GetArgsInfo() const; 123 | 124 | // Returns the load time in nanoseconds. 125 | int64_t LoadTimeNanoSeconds() const; 126 | 127 | // Returns the compute time in nanoseconds. 128 | int64_t ComputeTimeNanoSeconds() const; 129 | 130 | // Returns the store time in nanoseconds. 131 | int64_t StoreTimeNanoSeconds() const; 132 | 133 | // Returns the load time in seconds. 134 | double LoadTimeSeconds() const; 135 | 136 | // Returns the compute time in seconds. 137 | double ComputeTimeSeconds() const; 138 | 139 | // Returns the store time in seconds. 140 | double StoreTimeSeconds() const; 141 | 142 | // Returns the load throughput in GB/s. 143 | double LoadThroughputGbps() const; 144 | 145 | // Returns the store throughput in GB/s. 146 | double StoreThroughputGbps() const; 147 | 148 | private: 149 | template 150 | void SetArg(int index, T&& arg, Args&&... other_args) { 151 | SetArg(index, std::forward(arg)); 152 | SetArg(index + 1, std::forward(other_args)...); 153 | } 154 | 155 | void ConditionallyFinish(bool has_stream); 156 | 157 | std::unique_ptr device_; 158 | }; 159 | 160 | template 161 | Instance Invoke(const std::string& bitstream, Arg&& arg, Args&&... args) { 162 | return std::move(Instance(bitstream).Invoke(std::forward(arg), 163 | std::forward(args)...)); 164 | } 165 | 166 | } // namespace fpga 167 | 168 | #endif // FPGA_RUNTIME_H_ 169 | -------------------------------------------------------------------------------- /src/frt/arg_info.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/arg_info.h" 2 | 3 | #include 4 | 5 | namespace fpga { 6 | 7 | std::ostream& operator<<(std::ostream& os, const ArgInfo::Cat& cat) { 8 | switch (cat) { 9 | case ArgInfo::kScalar: 10 | return os << "scalar"; 11 | case ArgInfo::kMmap: 12 | return os << "mmap"; 13 | case ArgInfo::kStream: 14 | return os << "stream"; 15 | } 16 | return os; 17 | } 18 | 19 | std::ostream& operator<<(std::ostream& os, const ArgInfo& arg) { 20 | os << "ArgInfo: {index: " << arg.index << ", name: '" << arg.name 21 | << "', type: '" << arg.type << "', category: " << arg.cat; 22 | os << "}"; 23 | return os; 24 | } 25 | 26 | } // namespace fpga 27 | -------------------------------------------------------------------------------- /src/frt/arg_info.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_ARG_INFO_H_ 2 | #define FPGA_RUNTIME_ARG_INFO_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace fpga { 8 | 9 | struct ArgInfo { 10 | enum Cat { 11 | kScalar = 0, 12 | kMmap = 1, 13 | kStream = 2, 14 | }; 15 | int index; 16 | std::string name; 17 | std::string type; 18 | Cat cat; 19 | }; 20 | 21 | std::ostream& operator<<(std::ostream& os, const ArgInfo::Cat& cat); 22 | std::ostream& operator<<(std::ostream& os, const ArgInfo& arg); 23 | 24 | } // namespace fpga 25 | 26 | #endif // FPGA_RUNTIME_ARG_INFO_H_ 27 | -------------------------------------------------------------------------------- /src/frt/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_BUFFER_H_ 2 | #define FPGA_RUNTIME_BUFFER_H_ 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | 10 | #include "frt/tag.h" 11 | 12 | namespace fpga { 13 | namespace internal { 14 | 15 | template 16 | class Buffer { 17 | public: 18 | Buffer(T* ptr, size_t n) : ptr_(ptr), n_(n) {} 19 | T* Get() const { return ptr_; } 20 | size_t Size() const { return n_; } 21 | size_t SizeInBytes() const { return n_ * sizeof(T); } 22 | 23 | template 24 | Buffer Reinterpret() const { 25 | static_assert(std::is_standard_layout::value, 26 | "T must have standard layout"); 27 | static_assert(std::is_standard_layout::value, 28 | "U must have standard layout"); 29 | 30 | if constexpr (sizeof(U) > sizeof(T)) { 31 | constexpr auto N = sizeof(U) / sizeof(T); 32 | CHECK_EQ(sizeof(U) % sizeof(T), 0) 33 | << "sizeof(U) must be a multiple of sizeof(T) when Buffer is " 34 | "reinterpreted as Buffer (i.e., `Reinterpret()`); got " 35 | "sizeof(U) = " 36 | << sizeof(U) << ", sizeof(T) = " << sizeof(T); 37 | CHECK_EQ(Size() % N, 0) 38 | << "size of Buffer must be a multiple of N (= " 39 | "sizeof(U)/sizeof(T)) when reinterpreted as Buffer (i.e., " 40 | "`Reinterpret()`); got size = " 41 | << Size() << ", N = " << sizeof(U) << " / " << sizeof(T) << " = " << N 42 | << ", but " << Size() << " % " << N << " != 0"; 43 | } else if constexpr (sizeof(U) < sizeof(T)) { 44 | CHECK_EQ(sizeof(T) % sizeof(U), 0) 45 | << "sizeof(T) must be a multiple of sizeof(U) when Buffer is " 46 | "reinterpreted as Buffer (i.e., `Reinterpret()`); got " 47 | "sizeof(T) = " 48 | << sizeof(T) << ", sizeof(U) = " << sizeof(U); 49 | } 50 | CHECK_EQ(reinterpret_cast(Get()) % alignof(U), 0) 51 | << "data of Buffer must be " << alignof(U) 52 | << "-byte aligned when reinterpreted as Buffer (i.e., " 53 | "`Reinterpret()`) because alignof(U) = " 54 | << alignof(U); 55 | return Buffer(reinterpret_cast(Get()), 56 | Size() * sizeof(T) / sizeof(U)); 57 | } 58 | 59 | private: 60 | T* const ptr_; 61 | const size_t n_; 62 | }; 63 | 64 | } // namespace internal 65 | } // namespace fpga 66 | 67 | #endif // FPGA_RUNTIME_BUFFER_H_ 68 | -------------------------------------------------------------------------------- /src/frt/buffer_arg.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_BUFFER_ARG_H_ 2 | #define FPGA_RUNTIME_BUFFER_ARG_H_ 3 | 4 | #include 5 | 6 | #include "frt/buffer.h" 7 | #include "frt/tag.h" 8 | 9 | namespace fpga { 10 | namespace internal { 11 | 12 | class BufferArg { 13 | public: 14 | template 15 | BufferArg(Buffer buffer) 16 | : ptr_(const_cast(reinterpret_cast(buffer.Get()))), 17 | size_(sizeof(T)), 18 | n_(buffer.SizeInBytes() / sizeof(T)) {} 19 | 20 | BufferArg() = default; 21 | BufferArg(const BufferArg&) = default; 22 | BufferArg& operator=(const BufferArg&) = default; 23 | BufferArg(BufferArg&&) = default; 24 | BufferArg& operator=(BufferArg&&) = default; 25 | 26 | char* Get() const { return ptr_; } 27 | size_t SizeInCount() const { return n_; } 28 | size_t SizeInBytes() const { return size_ * n_; } 29 | 30 | private: 31 | char* ptr_; 32 | size_t size_; 33 | size_t n_; 34 | }; 35 | 36 | } // namespace internal 37 | } // namespace fpga 38 | 39 | #endif // FPGA_RUNTIME_BUFFER_ARG_H_ 40 | -------------------------------------------------------------------------------- /src/frt/buffer_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "frt/buffer.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include "frt.h" 14 | #include "frt/tag.h" 15 | 16 | namespace fpga::internal { 17 | namespace { 18 | 19 | class BufferTest : public testing::Test { 20 | protected: 21 | void SetUp() override { elements_.resize(10); } 22 | 23 | std::vector elements_; 24 | }; 25 | 26 | TEST_F(BufferTest, SizeReturnsElementCount) { 27 | auto buf = fpga::Placeholder(elements_.data(), elements_.size()); 28 | 29 | EXPECT_EQ(buf.Size(), elements_.size()); 30 | } 31 | 32 | TEST_F(BufferTest, SizeInBytesReturnsByteCount) { 33 | auto buf = fpga::ReadOnly(elements_.data(), elements_.size()); 34 | 35 | EXPECT_EQ(buf.SizeInBytes(), elements_.size() * sizeof(elements_[0])); 36 | } 37 | 38 | TEST_F(BufferTest, ReinterpretAsWiderTypeSucceeds) { 39 | float* ptr = elements_.data(); 40 | int64_t size = elements_.size(); 41 | if (reinterpret_cast(ptr) % 2 != 0) { 42 | // Force alignment if necessary. 43 | ++ptr; 44 | size -= 2; 45 | ASSERT_GT(size, 0); 46 | ASSERT_EQ(size % 2, 0); 47 | } 48 | 49 | auto buf = fpga::WriteOnly(ptr, size).Reinterpret(); 50 | 51 | EXPECT_EQ(reinterpret_cast(buf.Get()), 52 | reinterpret_cast(ptr)); 53 | EXPECT_EQ(buf.Size(), size / 2); 54 | } 55 | 56 | TEST_F(BufferTest, ReinterpretAsNarrowerTypeSucceeds) { 57 | auto buf1 = fpga::ReadWrite(elements_.data(), elements_.size()); 58 | 59 | auto buf2 = buf1.Reinterpret(); 60 | 61 | EXPECT_EQ(reinterpret_cast(buf1.Get()), 62 | reinterpret_cast(buf2.Get())); 63 | EXPECT_EQ(buf1.SizeInBytes(), buf2.SizeInBytes()); 64 | } 65 | 66 | TEST_F(BufferTest, ReinterpretAsSameSizeTypeSucceeds) { 67 | auto buf1 = fpga::Placeholder(elements_.data(), elements_.size()); 68 | 69 | auto buf2 = buf1.Reinterpret(); 70 | 71 | EXPECT_EQ(reinterpret_cast(buf1.Get()), 72 | reinterpret_cast(buf2.Get())); 73 | EXPECT_EQ(buf1.SizeInBytes(), buf2.SizeInBytes()); 74 | } 75 | 76 | TEST_F(BufferTest, ReinterpretAsWiderTypeWithUnalignedTypeFails) { 77 | auto buf = fpga::ReadOnly(elements_.data(), elements_.size()); 78 | 79 | EXPECT_DEATH((buf.Reinterpret>()), 80 | "sizeof\\(U\\) must be a multiple of sizeof\\(T\\)"); 81 | } 82 | 83 | TEST_F(BufferTest, ReinterpretAsWiderTypeWithUnalignedSizeFails) { 84 | auto buf = fpga::WriteOnly(elements_.data(), elements_.size() - 1); 85 | 86 | EXPECT_DEATH(buf.Reinterpret(), 87 | "size of Buffer must be a multiple of N " 88 | "\\(= sizeof\\(U\\)/sizeof\\(T\\)\\)"); 89 | } 90 | 91 | TEST_F(BufferTest, ReinterpretAsNarrowerTypeWithUnalignedTypeFails) { 92 | auto buf = fpga::ReadWrite(elements_.data(), elements_.size()); 93 | 94 | EXPECT_DEATH((buf.Reinterpret>()), 95 | "sizeof\\(T\\) must be a multiple of sizeof\\(U\\)"); 96 | } 97 | 98 | TEST_F(BufferTest, ReinterpretAsWiderTypeWithUnalignedDataFails) { 99 | float* ptr = elements_.data(); 100 | int64_t size = elements_.size(); 101 | if (reinterpret_cast(ptr) % 2 == 0) { 102 | // Force alignment if necessary. 103 | ++ptr; 104 | size -= 2; 105 | ASSERT_GT(size, 0); 106 | ASSERT_EQ(size % 2, 0); 107 | } 108 | 109 | EXPECT_DEATH(fpga::Placeholder(ptr, size).Reinterpret(), 110 | "data of Buffer must be 8-byte aligned"); 111 | } 112 | 113 | } // namespace 114 | } // namespace fpga::internal 115 | -------------------------------------------------------------------------------- /src/frt/device.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_DEVICE_H_ 2 | #define FPGA_RUNTIME_DEVICE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "frt/arg_info.h" 10 | #include "frt/buffer_arg.h" 11 | #include "frt/stream_wrapper.h" 12 | #include "frt/tag.h" 13 | 14 | namespace fpga { 15 | namespace internal { 16 | 17 | class Device { 18 | public: 19 | virtual ~Device() = default; 20 | 21 | virtual void SetScalarArg(int index, const void* arg, int size) = 0; 22 | virtual void SetBufferArg(int index, Tag tag, const BufferArg& arg) = 0; 23 | virtual void SetStreamArg(int index, Tag tag, StreamWrapper& arg) = 0; 24 | virtual size_t SuspendBuffer(int index) = 0; 25 | 26 | virtual void WriteToDevice() = 0; 27 | virtual void ReadFromDevice() = 0; 28 | virtual void Exec() = 0; 29 | virtual void Finish() = 0; 30 | 31 | virtual std::vector GetArgsInfo() const = 0; 32 | virtual int64_t LoadTimeNanoSeconds() const = 0; 33 | virtual int64_t ComputeTimeNanoSeconds() const = 0; 34 | virtual int64_t StoreTimeNanoSeconds() const = 0; 35 | virtual size_t LoadBytes() const = 0; 36 | virtual size_t StoreBytes() const = 0; 37 | }; 38 | 39 | } // namespace internal 40 | } // namespace fpga 41 | 42 | #endif // FPGA_RUNTIME_DEVICE_H_ 43 | -------------------------------------------------------------------------------- /src/frt/devices/intel_opencl_device.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/devices/intel_opencl_device.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "frt/devices/opencl_device_matcher.h" 14 | #include "frt/devices/opencl_util.h" 15 | #include "frt/stream_wrapper.h" 16 | #include "frt/tag.h" 17 | 18 | namespace fpga { 19 | namespace internal { 20 | 21 | namespace { 22 | 23 | class DeviceMatcher : public OpenclDeviceMatcher { 24 | public: 25 | explicit DeviceMatcher(std::string target_device_name) 26 | : target_device_name_(std::move(target_device_name)) {} 27 | 28 | std::string GetTargetName() const override { return target_device_name_; } 29 | 30 | std::string Match(cl::Device device) const override { 31 | const std::string device_name = device.getInfo(); 32 | LOG(INFO) << "Found device: " << device_name; 33 | 34 | // Intel devices contain a std::string that is unavailable from the binary. 35 | const std::string prefix = target_device_name_ + " : "; 36 | if (device_name == target_device_name_ || 37 | device_name.substr(0, prefix.size()) == prefix) { 38 | return device_name; 39 | } 40 | return ""; 41 | } 42 | 43 | private: 44 | const std::string target_device_name_; 45 | }; 46 | 47 | } // namespace 48 | 49 | IntelOpenclDevice::IntelOpenclDevice(const cl::Program::Binaries& binaries) { 50 | std::string target_device_name; 51 | std::string vendor_name; 52 | std::vector kernel_names; 53 | std::vector kernel_arg_counts; 54 | int arg_count = 0; 55 | auto data = binaries.begin()->data(); 56 | if (data[EI_CLASS] == ELFCLASS32) { 57 | vendor_name = "Intel(R) FPGA SDK for OpenCL(TM)"; 58 | auto elf_header = reinterpret_cast(data); 59 | auto elf_section_headers = reinterpret_cast( 60 | (reinterpret_cast(elf_header) + elf_header->e_shoff)); 61 | auto elf_section = [&](int idx) -> const Elf32_Shdr* { 62 | return &elf_section_headers[idx]; 63 | }; 64 | auto elf_str_table = 65 | (elf_header->e_shstrndx == SHN_UNDEF) 66 | ? nullptr 67 | : reinterpret_cast(elf_header) + 68 | elf_section(elf_header->e_shstrndx)->sh_offset; 69 | for (int i = 0; i < elf_header->e_shnum; ++i) { 70 | auto section_header = elf_section(i); 71 | auto section_name = 72 | elf_str_table ? elf_str_table + section_header->sh_name : nullptr; 73 | if (strcmp(section_name, ".acl.kernel_arg_info.xml") == 0) { 74 | TiXmlDocument doc; 75 | doc.Parse(reinterpret_cast(elf_header) + 76 | section_header->sh_offset, 77 | 0, TIXML_ENCODING_UTF8); 78 | for (auto xml_kernel = 79 | doc.FirstChildElement("board")->FirstChildElement("kernel"); 80 | xml_kernel != nullptr; 81 | xml_kernel = xml_kernel->NextSiblingElement("kernel")) { 82 | kernel_names.push_back(xml_kernel->Attribute("name")); 83 | kernel_arg_counts.push_back(arg_count); 84 | for (auto xml_arg = xml_kernel->FirstChildElement("argument"); 85 | xml_arg != nullptr; 86 | xml_arg = xml_arg->NextSiblingElement("argument")) { 87 | auto& arg = arg_table_[arg_count]; 88 | arg.index = arg_count; 89 | ++arg_count; 90 | arg.name = xml_arg->Attribute("name"); 91 | arg.type = xml_arg->Attribute("type_name"); 92 | auto cat = atoi(xml_arg->Attribute("opencl_access_type")); 93 | switch (cat) { 94 | case 0: 95 | arg.cat = ArgInfo::kScalar; 96 | break; 97 | case 2: 98 | arg.cat = ArgInfo::kMmap; 99 | break; 100 | default: 101 | LOG(WARNING) << "Unknown argument category: " << cat; 102 | } 103 | } 104 | } 105 | } else if (strcmp(section_name, ".acl.board") == 0) { 106 | const std::string board_name(reinterpret_cast(elf_header) + 107 | section_header->sh_offset, 108 | section_header->sh_size); 109 | if (board_name == "EmulatorDevice") { 110 | setenv("CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA", "1", 0); 111 | LOG(INFO) << "Running hardware simulation with Intel OpenCL"; 112 | } else if (board_name == "SimulatorDevice") { 113 | setenv("CL_CONTEXT_MPSIM_DEVICE_INTELFPGA", "1", 0); 114 | LOG(INFO) << "Running software simulation with Intel OpenCL"; 115 | } else { 116 | LOG(INFO) << "Running on-board execution with Intel OpenCL"; 117 | } 118 | target_device_name = board_name; 119 | } 120 | } 121 | if (kernel_names.empty() || target_device_name.empty()) { 122 | LOG(FATAL) << "Unexpected ELF file"; 123 | } 124 | } else if (data[EI_CLASS] == ELFCLASS64) { 125 | vendor_name = "Intel(R) FPGA Emulation Platform for OpenCL(TM)"; 126 | target_device_name = "Intel(R) FPGA Emulation Device"; 127 | LOG(FATAL) << "Fast emulator not supported"; 128 | } else { 129 | LOG(FATAL) << "Unexpected ELF file"; 130 | } 131 | 132 | Initialize(binaries, vendor_name, DeviceMatcher(target_device_name), 133 | kernel_names, kernel_arg_counts); 134 | } 135 | 136 | std::unique_ptr IntelOpenclDevice::New( 137 | const cl::Program::Binaries& binaries) { 138 | if (binaries.size() != 1 || binaries.begin()->size() < SELFMAG || 139 | memcmp(binaries.begin()->data(), ELFMAG, SELFMAG) != 0) { 140 | return nullptr; 141 | } 142 | return std::make_unique(binaries); 143 | } 144 | 145 | void IntelOpenclDevice::SetStreamArg(int index, Tag tag, StreamWrapper& arg) { 146 | LOG(FATAL) << "Intel OpenCL device does not support streaming"; 147 | }; 148 | 149 | void IntelOpenclDevice::WriteToDevice() { 150 | load_event_.resize(load_indices_.size()); 151 | int i = 0; 152 | for (auto index : load_indices_) { 153 | auto buffer = buffer_table_[index]; 154 | CL_CHECK(cmd_.enqueueWriteBuffer( 155 | buffer, /* blocking = */ CL_FALSE, /* offset = */ 0, 156 | buffer.getInfo(), host_ptr_table_[index], 157 | /* events = */ nullptr, &load_event_[i])); 158 | ++i; 159 | } 160 | } 161 | 162 | void IntelOpenclDevice::ReadFromDevice() { 163 | store_event_.resize(store_indices_.size()); 164 | int i = 0; 165 | for (auto index : store_indices_) { 166 | auto buffer = buffer_table_[index]; 167 | cmd_.enqueueReadBuffer(buffer, /* blocking = */ CL_FALSE, 168 | /* offset = */ 0, buffer.getInfo(), 169 | host_ptr_table_[index], &compute_event_, 170 | &store_event_[i]); 171 | ++i; 172 | } 173 | } 174 | 175 | cl::Buffer IntelOpenclDevice::CreateBuffer(int index, cl_mem_flags flags, 176 | void* host_ptr, size_t size) { 177 | flags |= /* CL_MEM_HETEROGENEOUS_INTELFPGA = */ 1 << 19; 178 | host_ptr_table_[index] = host_ptr; 179 | host_ptr = nullptr; 180 | return OpenclDevice::CreateBuffer(index, flags, host_ptr, size); 181 | } 182 | 183 | } // namespace internal 184 | } // namespace fpga 185 | -------------------------------------------------------------------------------- /src/frt/devices/intel_opencl_device.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_INTEL_OPENCL_DEVICE_H_ 2 | #define FPGA_RUNTIME_INTEL_OPENCL_DEVICE_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "frt/devices/opencl_device.h" 12 | 13 | namespace fpga { 14 | namespace internal { 15 | 16 | class IntelOpenclDevice : public OpenclDevice { 17 | public: 18 | IntelOpenclDevice(const cl::Program::Binaries& binaries); 19 | 20 | static std::unique_ptr New(const cl::Program::Binaries& binaries); 21 | 22 | void SetStreamArg(int index, Tag tag, StreamWrapper& arg) override; 23 | void WriteToDevice() override; 24 | void ReadFromDevice() override; 25 | 26 | private: 27 | cl::Buffer CreateBuffer(int index, cl_mem_flags flags, void* host_ptr, 28 | size_t size) override; 29 | 30 | std::unordered_map host_ptr_table_; 31 | }; 32 | 33 | } // namespace internal 34 | } // namespace fpga 35 | 36 | #endif // FPGA_RUNTIME_INTEL_OPENCL_DEVICE_H_ 37 | -------------------------------------------------------------------------------- /src/frt/devices/opencl_device.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/devices/opencl_device.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "frt/devices/opencl_device_matcher.h" 13 | #include "frt/devices/opencl_util.h" 14 | 15 | namespace fpga { 16 | namespace internal { 17 | 18 | namespace { 19 | 20 | template 21 | int64_t GetTime(const cl::Event& event) { 22 | cl_int err; 23 | int64_t time = event.getProfilingInfo(&err); 24 | CL_CHECK(err); 25 | return time; 26 | } 27 | 28 | template 29 | int64_t Earliest(const std::vector& events, 30 | int64_t default_value = 0) { 31 | if (events.size() != 0) { 32 | default_value = std::numeric_limits::max(); 33 | for (auto& event : events) { 34 | default_value = std::min(default_value, GetTime(event)); 35 | } 36 | } 37 | return default_value; 38 | } 39 | 40 | template 41 | int64_t Latest(const std::vector& events, 42 | int64_t default_value = 0) { 43 | if (events.size() != 0) { 44 | default_value = std::numeric_limits::min(); 45 | for (auto& event : events) { 46 | default_value = std::max(default_value, GetTime(event)); 47 | } 48 | } 49 | return default_value; 50 | } 51 | 52 | } // namespace 53 | 54 | void OpenclDevice::SetScalarArg(int index, const void* arg, int size) { 55 | auto pair = GetKernel(index); 56 | pair.second.setArg(pair.first, size, arg); 57 | } 58 | 59 | void OpenclDevice::SetBufferArg(int index, Tag tag, const BufferArg& arg) { 60 | cl_mem_flags flags = 0; 61 | switch (tag) { 62 | case Tag::kPlaceHolder: 63 | break; 64 | case Tag::kReadOnly: 65 | flags = CL_MEM_READ_ONLY; 66 | break; 67 | case Tag::kWriteOnly: 68 | flags = CL_MEM_WRITE_ONLY; 69 | break; 70 | case Tag::kReadWrite: 71 | flags = CL_MEM_READ_WRITE; 72 | break; 73 | } 74 | cl::Buffer buffer = CreateBuffer(index, flags, arg.Get(), arg.SizeInBytes()); 75 | if (tag == Tag::kReadOnly || tag == Tag::kReadWrite) { 76 | store_indices_.insert(index); 77 | } 78 | if (tag == Tag::kWriteOnly || tag == Tag::kReadWrite) { 79 | load_indices_.insert(index); 80 | } 81 | auto pair = GetKernel(index); 82 | pair.second.setArg(pair.first, buffer); 83 | } 84 | 85 | size_t OpenclDevice::SuspendBuffer(int index) { 86 | return load_indices_.erase(index) + store_indices_.erase(index); 87 | } 88 | 89 | void OpenclDevice::Exec() { 90 | compute_event_.resize(kernels_.size()); 91 | int i = 0; 92 | for (auto& pair : kernels_) { 93 | CL_CHECK(cmd_.enqueueNDRangeKernel(pair.second, cl::NullRange, 94 | cl::NDRange(1), cl::NDRange(1), 95 | &load_event_, &compute_event_[i])); 96 | ++i; 97 | } 98 | } 99 | 100 | void OpenclDevice::Finish() { 101 | CL_CHECK(cmd_.flush()); 102 | CL_CHECK(cmd_.finish()); 103 | } 104 | 105 | std::vector OpenclDevice::GetArgsInfo() const { 106 | std::vector args; 107 | args.reserve(arg_table_.size()); 108 | for (const auto& arg : arg_table_) { 109 | args.push_back(arg.second); 110 | } 111 | std::sort(args.begin(), args.end(), 112 | [](const ArgInfo& lhs, const ArgInfo& rhs) { 113 | return lhs.index < rhs.index; 114 | }); 115 | return args; 116 | } 117 | 118 | int64_t OpenclDevice::LoadTimeNanoSeconds() const { 119 | return Latest(load_event_) - 120 | Earliest(load_event_); 121 | } 122 | int64_t OpenclDevice::ComputeTimeNanoSeconds() const { 123 | return Latest(compute_event_) - 124 | Earliest(compute_event_); 125 | } 126 | int64_t OpenclDevice::StoreTimeNanoSeconds() const { 127 | return Latest(store_event_) - 128 | Earliest(store_event_); 129 | } 130 | size_t OpenclDevice::LoadBytes() const { 131 | size_t total_size = 0; 132 | cl_int err; 133 | for (const auto& buffer : GetLoadBuffers()) { 134 | total_size += buffer.getInfo(&err); 135 | CL_CHECK(err); 136 | } 137 | return total_size; 138 | } 139 | size_t OpenclDevice::StoreBytes() const { 140 | size_t total_size = 0; 141 | cl_int err; 142 | for (const auto& buffer : GetStoreBuffers()) { 143 | total_size += buffer.getInfo(&err); 144 | CL_CHECK(err); 145 | } 146 | return total_size; 147 | } 148 | 149 | void OpenclDevice::Initialize(const cl::Program::Binaries& binaries, 150 | const std::string& vendor_name, 151 | const OpenclDeviceMatcher& device_matcher, 152 | const std::vector& kernel_names, 153 | const std::vector& kernel_arg_counts) { 154 | std::vector platforms; 155 | CL_CHECK(cl::Platform::get(&platforms)); 156 | cl_int err; 157 | for (const auto& platform : platforms) { 158 | std::string platformName = platform.getInfo(&err); 159 | CL_CHECK(err); 160 | LOG(INFO) << "Found platform: " << platformName.c_str(); 161 | if (platformName == vendor_name) { 162 | std::vector devices; 163 | CL_CHECK(platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices)); 164 | for (const auto& device : devices) { 165 | if (std::string device_name = device_matcher.Match(device); 166 | !device_name.empty()) { 167 | LOG(INFO) << "Using " << device_name; 168 | device_ = device; 169 | context_ = cl::Context(device, nullptr, nullptr, nullptr, &err); 170 | if (err == CL_DEVICE_NOT_AVAILABLE) { 171 | LOG(WARNING) << "Device '" << device_name << "' not available"; 172 | continue; 173 | } 174 | CL_CHECK(err); 175 | cmd_ = cl::CommandQueue(context_, device, 176 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | 177 | CL_QUEUE_PROFILING_ENABLE, 178 | &err); 179 | CL_CHECK(err); 180 | std::vector binary_status; 181 | program_ = 182 | cl::Program(context_, {device}, binaries, &binary_status, &err); 183 | for (auto status : binary_status) { 184 | CL_CHECK(status); 185 | } 186 | CL_CHECK(err); 187 | CL_CHECK(program_.build()); 188 | for (int i = 0; i < kernel_names.size(); ++i) { 189 | kernels_[kernel_arg_counts[i]] = 190 | cl::Kernel(program_, kernel_names[i].c_str(), &err); 191 | CL_CHECK(err); 192 | } 193 | return; 194 | } 195 | } 196 | LOG(FATAL) << "Target device '" << device_matcher.GetTargetName() 197 | << "' not found"; 198 | } 199 | } 200 | LOG(FATAL) << "Target platform '" + vendor_name + "' not found"; 201 | } 202 | 203 | cl::Buffer OpenclDevice::CreateBuffer(int index, cl_mem_flags flags, 204 | void* host_ptr, size_t size) { 205 | cl_int err; 206 | auto buffer = cl::Buffer(context_, flags, size, host_ptr, &err); 207 | CL_CHECK(err); 208 | buffer_table_[index] = buffer; 209 | return buffer; 210 | } 211 | 212 | std::vector OpenclDevice::GetLoadBuffers() const { 213 | std::vector buffers; 214 | buffers.reserve(load_indices_.size()); 215 | for (auto index : load_indices_) { 216 | buffers.push_back(buffer_table_.at(index)); 217 | } 218 | return buffers; 219 | } 220 | 221 | std::vector OpenclDevice::GetStoreBuffers() const { 222 | std::vector buffers; 223 | buffers.reserve(store_indices_.size()); 224 | for (auto index : store_indices_) { 225 | buffers.push_back(buffer_table_.at(index)); 226 | } 227 | return buffers; 228 | } 229 | 230 | std::pair OpenclDevice::GetKernel(int index) const { 231 | auto it = std::prev(kernels_.upper_bound(index)); 232 | return {index - it->first, it->second}; 233 | } 234 | 235 | } // namespace internal 236 | } // namespace fpga 237 | -------------------------------------------------------------------------------- /src/frt/devices/opencl_device.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_OPENCL_DEVICE_H_ 2 | #define FPGA_RUNTIME_OPENCL_DEVICE_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include "frt/arg_info.h" 16 | #include "frt/device.h" 17 | #include "frt/devices/opencl_device_matcher.h" 18 | #include "frt/stream_wrapper.h" 19 | #include "frt/tag.h" 20 | 21 | namespace fpga { 22 | namespace internal { 23 | 24 | class OpenclDevice : public Device { 25 | public: 26 | void SetScalarArg(int index, const void* arg, int size) override; 27 | void SetBufferArg(int index, Tag tag, const BufferArg& arg) override; 28 | size_t SuspendBuffer(int index) override; 29 | 30 | void Exec() override; 31 | void Finish() override; 32 | 33 | std::vector GetArgsInfo() const override; 34 | int64_t LoadTimeNanoSeconds() const override; 35 | int64_t ComputeTimeNanoSeconds() const override; 36 | int64_t StoreTimeNanoSeconds() const override; 37 | size_t LoadBytes() const override; 38 | size_t StoreBytes() const override; 39 | 40 | protected: 41 | void Initialize(const cl::Program::Binaries& binaries, 42 | const std::string& vendor_name, 43 | const OpenclDeviceMatcher& device_matcher, 44 | const std::vector& kernel_names, 45 | const std::vector& kernel_arg_counts); 46 | virtual cl::Buffer CreateBuffer(int index, cl_mem_flags flags, void* host_ptr, 47 | size_t size); 48 | 49 | std::vector GetLoadBuffers() const; 50 | std::vector GetStoreBuffers() const; 51 | std::pair GetKernel(int index) const; 52 | 53 | cl::Device device_; 54 | cl::Context context_; 55 | cl::CommandQueue cmd_; 56 | cl::Program program_; 57 | // Maps prefix sum of arg count to kernels. 58 | std::map kernels_; 59 | std::unordered_map buffer_table_; 60 | std::unordered_map arg_table_; 61 | std::unordered_set load_indices_; 62 | std::unordered_set store_indices_; 63 | std::vector load_event_; 64 | std::vector compute_event_; 65 | std::vector store_event_; 66 | }; 67 | 68 | } // namespace internal 69 | } // namespace fpga 70 | 71 | #endif // FPGA_RUNTIME_OPENCL_DEVICE_H_ 72 | -------------------------------------------------------------------------------- /src/frt/devices/opencl_device_matcher.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FPGA_RUNTIME_OPENCL_DEVICE_MATCHER_H_ 3 | #define FPGA_RUNTIME_OPENCL_DEVICE_MATCHER_H_ 4 | 5 | #include 6 | 7 | #include 8 | 9 | namespace fpga { 10 | namespace internal { 11 | 12 | class OpenclDeviceMatcher { 13 | public: 14 | // Returns the name of the target device. 15 | virtual std::string GetTargetName() const = 0; 16 | 17 | // Returns the name of matched device. Empty if not matched. 18 | virtual std::string Match(cl::Device device) const = 0; 19 | }; 20 | 21 | } // namespace internal 22 | } // namespace fpga 23 | 24 | #endif // FPGA_RUNTIME_OPENCL_DEVICE_MATCHER_H_ 25 | -------------------------------------------------------------------------------- /src/frt/devices/opencl_util.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_OPENCL_UTIL_H_ 2 | #define FPGA_RUNTIME_OPENCL_UTIL_H_ 3 | 4 | #include 5 | #include 6 | 7 | #define CASE(err) \ 8 | case err: \ 9 | return #err; 10 | inline const char* OpenclErrToString(cl_int err) { 11 | switch (err) { 12 | CASE(CL_SUCCESS); 13 | CASE(CL_DEVICE_NOT_FOUND); 14 | CASE(CL_DEVICE_NOT_AVAILABLE); 15 | CASE(CL_COMPILER_NOT_AVAILABLE); 16 | CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE); 17 | CASE(CL_OUT_OF_RESOURCES); 18 | CASE(CL_OUT_OF_HOST_MEMORY); 19 | CASE(CL_PROFILING_INFO_NOT_AVAILABLE); 20 | CASE(CL_MEM_COPY_OVERLAP); 21 | CASE(CL_IMAGE_FORMAT_MISMATCH); 22 | CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED); 23 | CASE(CL_BUILD_PROGRAM_FAILURE); 24 | CASE(CL_MAP_FAILURE); 25 | #ifdef CL_VERSION_1_1 26 | CASE(CL_MISALIGNED_SUB_BUFFER_OFFSET); 27 | CASE(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); 28 | #endif // CL_VERSION_1_1 29 | #ifdef CL_VERSION_1_2 30 | CASE(CL_COMPILE_PROGRAM_FAILURE); 31 | CASE(CL_LINKER_NOT_AVAILABLE); 32 | CASE(CL_LINK_PROGRAM_FAILURE); 33 | CASE(CL_DEVICE_PARTITION_FAILED); 34 | CASE(CL_KERNEL_ARG_INFO_NOT_AVAILABLE); 35 | #endif // CL_VERSION_1_2 36 | 37 | CASE(CL_INVALID_VALUE); 38 | CASE(CL_INVALID_DEVICE_TYPE); 39 | CASE(CL_INVALID_PLATFORM); 40 | CASE(CL_INVALID_DEVICE); 41 | CASE(CL_INVALID_CONTEXT); 42 | CASE(CL_INVALID_QUEUE_PROPERTIES); 43 | CASE(CL_INVALID_COMMAND_QUEUE); 44 | CASE(CL_INVALID_HOST_PTR); 45 | CASE(CL_INVALID_MEM_OBJECT); 46 | CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); 47 | CASE(CL_INVALID_IMAGE_SIZE); 48 | CASE(CL_INVALID_SAMPLER); 49 | CASE(CL_INVALID_BINARY); 50 | CASE(CL_INVALID_BUILD_OPTIONS); 51 | CASE(CL_INVALID_PROGRAM); 52 | CASE(CL_INVALID_PROGRAM_EXECUTABLE); 53 | CASE(CL_INVALID_KERNEL_NAME); 54 | CASE(CL_INVALID_KERNEL_DEFINITION); 55 | CASE(CL_INVALID_KERNEL); 56 | CASE(CL_INVALID_ARG_INDEX); 57 | CASE(CL_INVALID_ARG_VALUE); 58 | CASE(CL_INVALID_ARG_SIZE); 59 | CASE(CL_INVALID_KERNEL_ARGS); 60 | CASE(CL_INVALID_WORK_DIMENSION); 61 | CASE(CL_INVALID_WORK_GROUP_SIZE); 62 | CASE(CL_INVALID_WORK_ITEM_SIZE); 63 | CASE(CL_INVALID_GLOBAL_OFFSET); 64 | CASE(CL_INVALID_EVENT_WAIT_LIST); 65 | CASE(CL_INVALID_EVENT); 66 | CASE(CL_INVALID_OPERATION); 67 | CASE(CL_INVALID_GL_OBJECT); 68 | CASE(CL_INVALID_BUFFER_SIZE); 69 | CASE(CL_INVALID_MIP_LEVEL); 70 | CASE(CL_INVALID_GLOBAL_WORK_SIZE); 71 | #ifdef CL_VERSION_1_1 72 | CASE(CL_INVALID_PROPERTY); 73 | #endif // CL_VERSION_1_1 74 | #ifdef CL_VERSION_1_2 75 | CASE(CL_INVALID_IMAGE_DESCRIPTOR); 76 | CASE(CL_INVALID_COMPILER_OPTIONS); 77 | CASE(CL_INVALID_LINKER_OPTIONS); 78 | CASE(CL_INVALID_DEVICE_PARTITION_COUNT); 79 | #endif // CL_VERSION_1_2 80 | #ifdef CL_VERSION_2_0 81 | CASE(CL_INVALID_PIPE_SIZE); 82 | CASE(CL_INVALID_DEVICE_QUEUE); 83 | #endif // CL_VERSION_2_0 84 | #ifdef CL_VERSION_2_2 85 | CASE(CL_INVALID_SPEC_ID); 86 | CASE(CL_MAX_SIZE_RESTRICTION_EXCEEDED); 87 | #endif // CL_VERSION_2_2 88 | 89 | default: 90 | return "CL_UNKNOWN_ERROR"; 91 | } 92 | } 93 | #undef CASE 94 | 95 | #define CL_CHECK(err) \ 96 | do { \ 97 | cl_int error = (err); \ 98 | LOG_IF(FATAL, error != CL_SUCCESS) << OpenclErrToString(error); \ 99 | } while (0) 100 | 101 | #endif // FPGA_RUNTIME_OPENCL_UTIL_H_ 102 | -------------------------------------------------------------------------------- /src/frt/devices/tapa_fast_cosim_device.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/devices/tapa_fast_cosim_device.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "frt/arg_info.h" 27 | #include "frt/devices/xilinx_environ.h" 28 | 29 | #ifdef __cpp_lib_filesystem 30 | #include 31 | namespace fs = std::filesystem; 32 | #else 33 | #include 34 | namespace fs = std::experimental::filesystem; 35 | #endif 36 | 37 | DEFINE_bool(xosim_start_gui, false, "start Vivado GUI for simulation"); 38 | DEFINE_bool(xosim_save_waveform, false, "save waveform in the work directory"); 39 | DEFINE_string(xosim_work_dir, "", 40 | "if not empty, use the specified work directory instead of a " 41 | "temporary one"); 42 | 43 | namespace fpga { 44 | namespace internal { 45 | 46 | namespace { 47 | 48 | using clock = std::chrono::steady_clock; 49 | 50 | std::string GetWorkDirectory() { 51 | if (!FLAGS_xosim_work_dir.empty()) { 52 | LOG_IF(INFO, fs::create_directories(FLAGS_xosim_work_dir)) 53 | << "created work directory '" << FLAGS_xosim_work_dir << "'"; 54 | return fs::absolute(FLAGS_xosim_work_dir).string(); 55 | } 56 | std::string dir = 57 | (fs::temp_directory_path() / "tapa-fast-cosim.XXXXXX").string(); 58 | LOG_IF(FATAL, ::mkdtemp(&dir[0]) == nullptr) 59 | << "failed to create work directory"; 60 | return dir; 61 | } 62 | 63 | std::string GetInputDataPath(const std::string& work_dir, int index) { 64 | return work_dir + "/" + std::to_string(index) + ".bin"; 65 | } 66 | 67 | std::string GetOutputDataPath(const std::string& work_dir, int index) { 68 | return work_dir + "/" + std::to_string(index) + "_out.bin"; 69 | } 70 | 71 | std::string GetConfigPath(const std::string& work_dir) { 72 | return work_dir + "/config.json"; 73 | } 74 | 75 | } // namespace 76 | 77 | TapaFastCosimDevice::TapaFastCosimDevice(std::string_view xo_path) 78 | : xo_path(fs::absolute(xo_path)), work_dir(GetWorkDirectory()) { 79 | miniz_cpp::zip_file xo_file = this->xo_path; 80 | std::string kernel_xml; 81 | for (auto& info : xo_file.infolist()) { 82 | constexpr std::string_view kSuffix = "/kernel.xml"; 83 | if (info.filename.size() >= kSuffix.size() && 84 | std::equal(kSuffix.rbegin(), kSuffix.rend(), info.filename.rbegin())) { 85 | kernel_xml = xo_file.read(info); 86 | break; 87 | } 88 | } 89 | LOG_IF(FATAL, kernel_xml.empty()) 90 | << "Missing 'kernel.xml' in '" << xo_path << "'"; 91 | 92 | TiXmlDocument doc; 93 | doc.Parse(kernel_xml.data(), nullptr, TIXML_ENCODING_UTF8); 94 | for (const TiXmlElement* xml_arg = doc.FirstChildElement("root") 95 | ->FirstChildElement("kernel") 96 | ->FirstChildElement("args") 97 | ->FirstChildElement("arg"); 98 | xml_arg != nullptr; xml_arg = xml_arg->NextSiblingElement("arg")) { 99 | ArgInfo arg; 100 | arg.index = atoi(xml_arg->Attribute("id")); 101 | LOG_IF(FATAL, arg.index != args_.size()) 102 | << "Expecting argument #" << args_.size() << ", got argument #" 103 | << arg.index << " in the metadata"; 104 | arg.name = xml_arg->Attribute("name"); 105 | arg.type = xml_arg->Attribute("type"); 106 | switch (int cat = atoi(xml_arg->Attribute("addressQualifier")); cat) { 107 | case 0: 108 | arg.cat = ArgInfo::kScalar; 109 | break; 110 | case 1: 111 | arg.cat = ArgInfo::kMmap; 112 | break; 113 | case 4: 114 | arg.cat = ArgInfo::kStream; 115 | break; 116 | default: 117 | LOG(WARNING) << "Unknown argument category: " << cat; 118 | } 119 | args_.push_back(arg); 120 | } 121 | 122 | LOG(INFO) << "Running hardware simulation with TAPA fast cosim"; 123 | } 124 | 125 | TapaFastCosimDevice::~TapaFastCosimDevice() { 126 | if (FLAGS_xosim_work_dir.empty()) { 127 | fs::remove_all(work_dir); 128 | } 129 | } 130 | 131 | std::unique_ptr TapaFastCosimDevice::New(std::string_view path, 132 | std::string_view content) { 133 | constexpr std::string_view kZipMagic("PK\3\4", 4); 134 | if (content.size() < kZipMagic.size() || 135 | memcmp(content.data(), kZipMagic.data(), kZipMagic.size()) != 0) { 136 | return nullptr; 137 | } 138 | return std::make_unique(path); 139 | } 140 | 141 | void TapaFastCosimDevice::SetScalarArg(int index, const void* arg, int size) { 142 | LOG_IF(FATAL, index >= args_.size()) 143 | << "Cannot set argument #" << index << "; there are only " << args_.size() 144 | << " arguments"; 145 | LOG_IF(FATAL, args_[index].cat != ArgInfo::kScalar) 146 | << "Cannot set argument '" << args_[index].name 147 | << "' as a scalar; it is a " << args_[index].cat; 148 | std::basic_string_view arg_str( 149 | reinterpret_cast(arg), size); 150 | std::stringstream ss; 151 | ss << "'h"; 152 | // Assuming litten-endian. 153 | for (auto it = arg_str.crbegin(); it < arg_str.crend(); ++it) { 154 | ss << std::setfill('0') << std::setw(2) << std::hex << int(*it); 155 | } 156 | scalars_[index] = ss.str(); 157 | } 158 | 159 | void TapaFastCosimDevice::SetBufferArg(int index, Tag tag, 160 | const BufferArg& arg) { 161 | LOG_IF(FATAL, index >= args_.size()) 162 | << "Cannot set argument #" << index << "; there are only " << args_.size() 163 | << " arguments"; 164 | LOG_IF(FATAL, args_[index].cat != ArgInfo::kMmap) 165 | << "Cannot set argument '" << args_[index].name 166 | << "' as an mmap; it is a " << args_[index].cat; 167 | buffer_table_.insert({index, arg}); 168 | if (tag == Tag::kReadOnly || tag == Tag::kReadWrite) { 169 | store_indices_.insert(index); 170 | } 171 | if (tag == Tag::kWriteOnly || tag == Tag::kReadWrite) { 172 | load_indices_.insert(index); 173 | } 174 | } 175 | 176 | void TapaFastCosimDevice::SetStreamArg(int index, Tag tag, StreamWrapper& arg) { 177 | LOG(FATAL) << "TAPA fast cosim device does not support streaming"; 178 | } 179 | 180 | size_t TapaFastCosimDevice::SuspendBuffer(int index) { 181 | return load_indices_.erase(index) + store_indices_.erase(index); 182 | } 183 | 184 | void TapaFastCosimDevice::WriteToDevice() { 185 | // All buffers must have a data file. 186 | auto tic = clock::now(); 187 | for (const auto& [index, buffer_arg] : buffer_table_) { 188 | std::ofstream(GetInputDataPath(work_dir, index), 189 | std::ios::out | std::ios::binary) 190 | .write(buffer_arg.Get(), buffer_arg.SizeInBytes()); 191 | } 192 | load_time_ = clock::now() - tic; 193 | } 194 | 195 | void TapaFastCosimDevice::ReadFromDevice() { 196 | auto tic = clock::now(); 197 | for (int index : store_indices_) { 198 | auto buffer_arg = buffer_table_.at(index); 199 | std::ifstream(GetOutputDataPath(work_dir, index), 200 | std::ios::in | std::ios::binary) 201 | .read(buffer_arg.Get(), buffer_arg.SizeInBytes()); 202 | } 203 | store_time_ = clock::now() - tic; 204 | } 205 | 206 | void TapaFastCosimDevice::Exec() { 207 | auto tic = clock::now(); 208 | 209 | nlohmann::json json; 210 | json["xo_path"] = xo_path; 211 | auto& scalar_to_val = json["scalar_to_val"]; 212 | for (const auto& [index, scalar] : scalars_) { 213 | scalar_to_val[std::to_string(index)] = scalar; 214 | } 215 | auto& axi_to_c_array_size = json["axi_to_c_array_size"]; 216 | auto& axi_to_data_file = json["axi_to_data_file"]; 217 | for (const auto& [index, content] : buffer_table_) { 218 | axi_to_c_array_size[std::to_string(index)] = content.SizeInCount(); 219 | axi_to_data_file[std::to_string(index)] = GetInputDataPath(work_dir, index); 220 | } 221 | std::ofstream(GetConfigPath(work_dir)) << json.dump(2); 222 | 223 | std::vector argv = { 224 | "python3", 225 | "-m", 226 | "tapa_fast_cosim.main", 227 | "--config_path=" + GetConfigPath(work_dir), 228 | "--tb_output_dir=" + work_dir + "/output", 229 | "--launch_simulation", 230 | }; 231 | if (FLAGS_xosim_start_gui) { 232 | argv.push_back("--start_gui"); 233 | } 234 | if (FLAGS_xosim_save_waveform) { 235 | argv.push_back("--save_waveform"); 236 | } 237 | int rc = 238 | subprocess::Popen(argv, subprocess::environment(xilinx::GetEnviron())) 239 | .wait(); 240 | LOG_IF(FATAL, rc != 0) << "TAPA fast cosim failed"; 241 | 242 | compute_time_ = clock::now() - tic; 243 | } 244 | 245 | void TapaFastCosimDevice::Finish() { 246 | // Not implemented. 247 | } 248 | 249 | std::vector TapaFastCosimDevice::GetArgsInfo() const { return args_; } 250 | 251 | int64_t TapaFastCosimDevice::LoadTimeNanoSeconds() const { 252 | return load_time_.count(); 253 | } 254 | 255 | int64_t TapaFastCosimDevice::ComputeTimeNanoSeconds() const { 256 | return compute_time_.count(); 257 | } 258 | 259 | int64_t TapaFastCosimDevice::StoreTimeNanoSeconds() const { 260 | return store_time_.count(); 261 | } 262 | 263 | size_t TapaFastCosimDevice::LoadBytes() const { 264 | size_t total_size = 0; 265 | for (auto& [index, buffer_arg] : buffer_table_) { 266 | total_size += buffer_arg.SizeInBytes(); 267 | } 268 | return total_size; 269 | } 270 | 271 | size_t TapaFastCosimDevice::StoreBytes() const { 272 | size_t total_size = 0; 273 | for (int index : store_indices_) { 274 | auto buffer_arg = buffer_table_.at(index); 275 | total_size += buffer_arg.SizeInBytes(); 276 | } 277 | return total_size; 278 | } 279 | 280 | } // namespace internal 281 | } // namespace fpga 282 | -------------------------------------------------------------------------------- /src/frt/devices/tapa_fast_cosim_device.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_TAPA_FAST_COSIM_ 2 | #define FPGA_RUNTIME_TAPA_FAST_COSIM_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include "frt/buffer.h" 14 | #include "frt/device.h" 15 | 16 | namespace fpga { 17 | namespace internal { 18 | 19 | class TapaFastCosimDevice : public Device { 20 | public: 21 | TapaFastCosimDevice(std::string_view bitstream); 22 | TapaFastCosimDevice(const TapaFastCosimDevice&) = delete; 23 | TapaFastCosimDevice& operator=(const TapaFastCosimDevice&) = delete; 24 | TapaFastCosimDevice(TapaFastCosimDevice&&) = delete; 25 | TapaFastCosimDevice& operator=(TapaFastCosimDevice&&) = delete; 26 | 27 | ~TapaFastCosimDevice() override; 28 | 29 | static std::unique_ptr New(std::string_view path, 30 | std::string_view content); 31 | 32 | void SetScalarArg(int index, const void* arg, int size) override; 33 | void SetBufferArg(int index, Tag tag, const BufferArg& arg) override; 34 | void SetStreamArg(int index, Tag tag, StreamWrapper& arg) override; 35 | size_t SuspendBuffer(int index) override; 36 | 37 | void WriteToDevice() override; 38 | void ReadFromDevice() override; 39 | void Exec() override; 40 | void Finish() override; 41 | 42 | std::vector GetArgsInfo() const override; 43 | int64_t LoadTimeNanoSeconds() const override; 44 | int64_t ComputeTimeNanoSeconds() const override; 45 | int64_t StoreTimeNanoSeconds() const override; 46 | size_t LoadBytes() const override; 47 | size_t StoreBytes() const override; 48 | 49 | const std::string xo_path; 50 | const std::string work_dir; 51 | 52 | private: 53 | std::unordered_map scalars_; 54 | std::unordered_map buffer_table_; 55 | std::vector args_; 56 | std::unordered_set load_indices_; 57 | std::unordered_set store_indices_; 58 | 59 | std::chrono::nanoseconds load_time_; 60 | std::chrono::nanoseconds compute_time_; 61 | std::chrono::nanoseconds store_time_; 62 | }; 63 | 64 | } // namespace internal 65 | } // namespace fpga 66 | 67 | #endif // FPGA_RUNTIME_TAPA_FAST_COSIM_ 68 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_environ.cpp: -------------------------------------------------------------------------------- 1 | #include "xilinx_environ.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace fpga::xilinx { 10 | 11 | namespace { 12 | 13 | void UpdateEnviron(std::string_view script, Environ& environ) { 14 | subprocess::OutBuffer output = subprocess::check_output( 15 | { 16 | "bash", 17 | "-c", 18 | "source \"$0\" >/dev/null 2>&1 && env -0", 19 | script, 20 | }, 21 | subprocess::environment(environ)); 22 | 23 | for (size_t n = 0; n < output.length;) { 24 | std::string_view line = output.buf.data() + n; 25 | n += line.size() + 1; 26 | 27 | auto pos = line.find('='); 28 | environ[std::string(line.substr(0, pos))] = line.substr(pos + 1); 29 | } 30 | } 31 | 32 | } // namespace 33 | 34 | Environ GetEnviron() { 35 | std::string xilinx_tool; 36 | for (const char* env : { 37 | "XILINX_VITIS", 38 | "XILINX_SDX", 39 | "XILINX_HLS", 40 | "XILINX_VIVADO", 41 | }) { 42 | if (const char* value = getenv(env)) { 43 | xilinx_tool = value; 44 | break; 45 | } 46 | } 47 | 48 | if (xilinx_tool.empty()) { 49 | for (std::string hls : {"vitis_hls", "vivado_hls"}) { 50 | subprocess::OutBuffer buf = subprocess::check_output({ 51 | "bash", 52 | "-c", 53 | "\"$0\" -version -help -l /dev/null 2>/dev/null", 54 | hls, 55 | }); 56 | std::istringstream lines(std::string(buf.buf.data(), buf.length)); 57 | for (std::string line; getline(lines, line);) { 58 | std::string_view prefix = "source "; 59 | std::string suffix = "/scripts/" + hls + "/hls.tcl -notrace"; 60 | if (line.size() > prefix.size() + suffix.size() && 61 | line.compare(0, prefix.size(), prefix) == 0 && 62 | line.compare(line.size() - suffix.size(), suffix.size(), suffix) == 63 | 0) { 64 | xilinx_tool = line.substr( 65 | prefix.size(), line.size() - prefix.size() - suffix.size()); 66 | break; 67 | } 68 | } 69 | } 70 | } 71 | 72 | Environ environ; 73 | UpdateEnviron(xilinx_tool + "/settings64.sh", environ); 74 | if (const char* xrt = getenv("XILINX_XRT")) { 75 | UpdateEnviron(std::string(xrt) + "/setup.sh", environ); 76 | } 77 | return environ; 78 | } 79 | 80 | } // namespace fpga::xilinx 81 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_environ.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace fpga::xilinx { 5 | 6 | using Environ = std::unordered_map; 7 | 8 | Environ GetEnviron(); 9 | 10 | } // namespace fpga::xilinx 11 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_opencl_device.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/devices/xilinx_opencl_device.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "frt/devices/opencl_device_matcher.h" 26 | #include "frt/devices/opencl_util.h" 27 | #include "frt/devices/xilinx_environ.h" 28 | #include "frt/devices/xilinx_opencl_stream.h" 29 | #include "frt/stream_wrapper.h" 30 | #include "frt/tag.h" 31 | 32 | #ifdef __cpp_lib_filesystem 33 | #include 34 | namespace fs = std::filesystem; 35 | #else 36 | #include 37 | namespace fs = std::experimental::filesystem; 38 | #endif 39 | 40 | DEFINE_string(xocl_bdf, "", 41 | "if not empty, use the specified PCIe Bus:Device:Function " 42 | "instead of trying to match device name"); 43 | 44 | namespace fpga { 45 | namespace internal { 46 | 47 | namespace { 48 | 49 | std::vector Split(std::string_view text, char delimiter, 50 | size_t maxsplit = -1) { 51 | std::vector pieces; 52 | size_t piece_len = 0; 53 | for (char character : text) { 54 | if (pieces.size() == maxsplit) break; 55 | if (character == delimiter) { 56 | pieces.push_back(text.substr(0, piece_len)); 57 | text.remove_prefix(piece_len + 1); 58 | piece_len = 0; 59 | } else { 60 | ++piece_len; 61 | } 62 | } 63 | pieces.push_back(text); 64 | return pieces; 65 | } 66 | 67 | bool StartsWith(std::string_view text, std::string_view prefix) { 68 | return text.size() >= prefix.size() && 69 | text.substr(0, prefix.size()) == prefix; 70 | } 71 | 72 | std::string Concat(std::initializer_list pieces) { 73 | size_t total_length = 0; 74 | for (auto piece : pieces) total_length += piece.size(); 75 | std::string text; 76 | text.reserve(total_length); 77 | for (auto piece : pieces) text += piece; 78 | return text; 79 | } 80 | 81 | class DeviceMatcher : public OpenclDeviceMatcher { 82 | public: 83 | explicit DeviceMatcher(std::string target_device_name) 84 | : target_device_name_(std::move(target_device_name)), 85 | target_device_name_pieces_( 86 | Split(target_device_name_, /*delimiter=*/'_', /*maxsplit=*/4)) {} 87 | 88 | // Not copyable nor movable because the `string_view`s won't be valid. 89 | DeviceMatcher(const DeviceMatcher&) = delete; 90 | DeviceMatcher(DeviceMatcher&&) = delete; 91 | DeviceMatcher& operator=(const DeviceMatcher&) = delete; 92 | DeviceMatcher& operator=(DeviceMatcher&&) = delete; 93 | 94 | std::string GetTargetName() const override { return target_device_name_; } 95 | 96 | std::string Match(cl::Device device) const override { 97 | const std::string device_name = device.getInfo(); 98 | char bdf[32]; 99 | size_t bdf_size = 0; 100 | cl_int rc = clGetDeviceInfo(device.get(), CL_DEVICE_PCIE_BDF, sizeof(bdf), bdf, 101 | &bdf_size); 102 | if (rc != CL_SUCCESS) { return ""; } 103 | const std::string device_name_and_bdf = 104 | Concat({device_name, " (bdf=", bdf, ")"}); 105 | LOG(INFO) << "Found device: " << device_name_and_bdf; 106 | 107 | if (const std::string target_bdf = FLAGS_xocl_bdf; !target_bdf.empty()) { 108 | if (target_bdf == bdf) { 109 | return device_name_and_bdf; 110 | } 111 | return ""; 112 | } 113 | 114 | if (device_name == target_device_name_) return device_name_and_bdf; 115 | 116 | // Xilinx devices might have unrelated names in the binary: 117 | // 1) target_device_name == "xilinx_u250_gen3x16_xdma_3_1_202020_1" 118 | // device_name == "xilinx_u250_gen3x16_xdma_shell_3_1" 119 | // 2) target_device_name == "xilinx_u200_gen3x16_xdma_1_202110_1" 120 | // device_name == "xilinx_u200_gen3x16_xdma_base_1" 121 | 122 | // For 1), this is {"xilinx", "u250", "gen3x16", "xdma", "3_1_202020_1"}. 123 | // For 2), this is {"xilinx", "u200", "gen3x16", "xdma", "1_202110_1"}. 124 | if (target_device_name_pieces_.size() < 5) return ""; 125 | 126 | // For 1), this is {"xilinx", "u250", "gen3x16", "xdma", "shell", "3_1"}. 127 | // For 2), this is {"xilinx", "u200", "gen3x16", "xdma", "base", "1"}. 128 | std::vector device_name_pieces = 129 | Split(device_name, /*delimiter=*/'_', /*maxsplit=*/5); 130 | if (device_name_pieces.size() < 6) return ""; 131 | 132 | for (int i = 0; i < 4; ++i) { 133 | if (device_name_pieces[i] != target_device_name_pieces_[i]) return ""; 134 | } 135 | 136 | if (StartsWith(target_device_name_pieces_[4], device_name_pieces[5])) { 137 | return device_name_and_bdf; 138 | } 139 | 140 | return ""; 141 | } 142 | 143 | private: 144 | const std::string target_device_name_; 145 | const std::vector target_device_name_pieces_; 146 | }; 147 | 148 | } // namespace 149 | 150 | XilinxOpenclDevice::XilinxOpenclDevice(const cl::Program::Binaries& binaries) { 151 | std::string target_device_name; 152 | std::vector kernel_names; 153 | std::vector kernel_arg_counts; 154 | int arg_count = 0; 155 | const auto axlf_top = reinterpret_cast(binaries.begin()->data()); 156 | switch (axlf_top->m_header.m_mode) { 157 | case XCLBIN_FLAT: 158 | case XCLBIN_PR: 159 | case XCLBIN_TANDEM_STAGE2: 160 | case XCLBIN_TANDEM_STAGE2_WITH_PR: 161 | break; 162 | case XCLBIN_HW_EMU: 163 | setenv("XCL_EMULATION_MODE", "hw_emu", 0); 164 | break; 165 | case XCLBIN_SW_EMU: 166 | setenv("XCL_EMULATION_MODE", "sw_emu", 0); 167 | break; 168 | default: 169 | LOG(FATAL) << "Unknown xclbin mode"; 170 | } 171 | target_device_name = 172 | reinterpret_cast(axlf_top->m_header.m_platformVBNV); 173 | LOG_IF(FATAL, target_device_name.empty()) 174 | << "Cannot determine target device name from binary"; 175 | if (auto metadata = xclbin::get_axlf_section(axlf_top, EMBEDDED_METADATA)) { 176 | TiXmlDocument doc; 177 | doc.Parse( 178 | reinterpret_cast(axlf_top) + metadata->m_sectionOffset, 179 | nullptr, TIXML_ENCODING_UTF8); 180 | auto xml_core = doc.FirstChildElement("project") 181 | ->FirstChildElement("platform") 182 | ->FirstChildElement("device") 183 | ->FirstChildElement("core"); 184 | std::string target_meta = xml_core->Attribute("target"); 185 | for (auto xml_kernel = xml_core->FirstChildElement("kernel"); 186 | xml_kernel != nullptr; 187 | xml_kernel = xml_kernel->NextSiblingElement("kernel")) { 188 | kernel_names.push_back(xml_kernel->Attribute("name")); 189 | kernel_arg_counts.push_back(arg_count); 190 | for (auto xml_arg = xml_kernel->FirstChildElement("arg"); 191 | xml_arg != nullptr; xml_arg = xml_arg->NextSiblingElement("arg")) { 192 | auto& arg = arg_table_[arg_count]; 193 | arg.index = arg_count; 194 | ++arg_count; 195 | arg.name = xml_arg->Attribute("name"); 196 | arg.type = xml_arg->Attribute("type"); 197 | auto cat = atoi(xml_arg->Attribute("addressQualifier")); 198 | switch (cat) { 199 | case 0: 200 | arg.cat = ArgInfo::kScalar; 201 | break; 202 | case 1: 203 | arg.cat = ArgInfo::kMmap; 204 | break; 205 | case 4: 206 | arg.cat = ArgInfo::kStream; 207 | break; 208 | default: 209 | LOG(WARNING) << "Unknown argument category: " << cat; 210 | } 211 | } 212 | } 213 | // m_mode doesn't always work 214 | if (target_meta == "hw_em") { 215 | setenv("XCL_EMULATION_MODE", "hw_emu", 0); 216 | } else if (target_meta == "csim") { 217 | setenv("XCL_EMULATION_MODE", "sw_emu", 0); 218 | } 219 | } else { 220 | LOG(FATAL) << "Cannot determine kernel name from binary"; 221 | } 222 | 223 | if (const char* xcl_emulation_mode = getenv("XCL_EMULATION_MODE")) { 224 | for (const auto& [name, value] : xilinx::GetEnviron()) { 225 | setenv(name.c_str(), value.c_str(), /* __replace = */ 1); 226 | } 227 | 228 | const auto uid = std::to_string(geteuid()); 229 | 230 | // Vitis software simulation stucks without $USER. 231 | setenv("USER", uid.c_str(), /* __replace = */ 0); 232 | 233 | const char* tmpdir_or_null = getenv("TMPDIR"); 234 | std::string tmpdir = tmpdir_or_null ? tmpdir_or_null : "/tmp"; 235 | tmpdir += "/.frt." + uid; 236 | if (mkdir(tmpdir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR) && errno != EEXIST) { 237 | LOG(FATAL) << "Cannot create FRT tmpdir '" << tmpdir 238 | << "': " << strerror(errno); 239 | } 240 | 241 | // If SDACCEL_EM_RUN_DIR is not set, use a per-use tmpdir for `.run`. 242 | setenv("SDACCEL_EM_RUN_DIR", tmpdir.c_str(), 0); 243 | 244 | // If EMCONFIG_PATH is not set, use a per-user and per-device tmpdir to 245 | // cache `emconfig.json`. 246 | fs::path emconfig_dir; 247 | if (const char* emconfig_dir_or_null = getenv("EMCONFIG_PATH")) { 248 | emconfig_dir = emconfig_dir_or_null; 249 | } else { 250 | emconfig_dir = tmpdir; 251 | emconfig_dir /= "emconfig." + target_device_name; 252 | setenv("EMCONFIG_PATH", emconfig_dir.c_str(), 0); 253 | } 254 | 255 | // Detect if emconfig already exists. 256 | bool is_emconfig_ready = false; 257 | if (fs::path emconfig_path = emconfig_dir / "emconfig.json"; 258 | fs::is_regular_file(emconfig_path)) { 259 | nlohmann::json json = nlohmann::json::parse(std::ifstream(emconfig_path)); 260 | try { 261 | for (const auto& board : json.at("Platform").at("Boards")) { 262 | for (const auto& device : board.at("Devices")) { 263 | if (device.at("Name") == target_device_name) { 264 | is_emconfig_ready = true; 265 | } 266 | } 267 | } 268 | } catch (const nlohmann::json::out_of_range&) { 269 | } 270 | } 271 | 272 | // Generate `emconfig.json` when necessary. 273 | if (!is_emconfig_ready) { 274 | fs::path emconfig_dir_per_pid = emconfig_dir; 275 | emconfig_dir_per_pid += "." + std::to_string(getpid()); 276 | int return_code = subprocess::call({ 277 | "emconfigutil", 278 | "--platform", 279 | target_device_name, 280 | "--od", 281 | emconfig_dir_per_pid.native(), 282 | }); 283 | LOG_IF(FATAL, return_code != 0) << "emconfigutil failed"; 284 | 285 | // Use `rename` to create the emconfig directory atomically. 286 | fs::path emconfig_dir_per_pid_tmp = emconfig_dir_per_pid; 287 | emconfig_dir_per_pid_tmp += ".tmp"; 288 | fs::create_directory_symlink( 289 | emconfig_dir_per_pid.filename(), // Use relative path for symlink. 290 | emconfig_dir_per_pid_tmp); 291 | fs::rename(emconfig_dir_per_pid_tmp, emconfig_dir); 292 | } 293 | if (xcl_emulation_mode == std::string_view("sw_emu")) { 294 | LOG(INFO) << "Running software simulation with Xilinx OpenCL"; 295 | } else if (xcl_emulation_mode == std::string_view("hw_emu")) { 296 | LOG(INFO) << "Running hardware simulation with Xilinx OpenCL"; 297 | } else { 298 | LOG(FATAL) << "Unexpected XCL_EMULATION_MODE: " << xcl_emulation_mode; 299 | } 300 | } else { 301 | LOG(INFO) << "Running on-board execution with Xilinx OpenCL"; 302 | } 303 | 304 | Initialize(binaries, /*vendor_name=*/"Xilinx", 305 | DeviceMatcher(target_device_name), kernel_names, 306 | kernel_arg_counts); 307 | } 308 | 309 | std::unique_ptr XilinxOpenclDevice::New( 310 | const cl::Program::Binaries& binaries) { 311 | if (binaries.size() != 1 || binaries.begin()->size() < 8 || 312 | memcmp(binaries.begin()->data(), "xclbin2", 8) != 0) { 313 | return nullptr; 314 | } 315 | return std::make_unique(binaries); 316 | } 317 | 318 | void XilinxOpenclDevice::SetStreamArg(int index, Tag tag, StreamWrapper& arg) { 319 | #ifdef FRT_ENABLE_XOCL_STREAM 320 | auto pair = GetKernel(index); 321 | arg.Attach(std::make_unique( 322 | arg.name, device_, pair.second, pair.first, tag)); 323 | #else // FRT_ENABLE_XOCL_STREAM 324 | LOG(FATAL) << "Xilinx OpenCL streaming is disabled"; 325 | #endif // FRT_ENABLE_XOCL_STREAM 326 | } 327 | 328 | void XilinxOpenclDevice::WriteToDevice() { 329 | if (!load_indices_.empty()) { 330 | load_event_.resize(1); 331 | CL_CHECK(cmd_.enqueueMigrateMemObjects(GetLoadBuffers(), /* flags = */ 0, 332 | /* events = */ nullptr, 333 | load_event_.data())); 334 | } else { 335 | load_event_.clear(); 336 | } 337 | } 338 | 339 | void XilinxOpenclDevice::ReadFromDevice() { 340 | if (!store_indices_.empty()) { 341 | store_event_.resize(1); 342 | CL_CHECK(cmd_.enqueueMigrateMemObjects( 343 | GetStoreBuffers(), CL_MIGRATE_MEM_OBJECT_HOST, &compute_event_, 344 | store_event_.data())); 345 | } else { 346 | store_event_.clear(); 347 | } 348 | } 349 | 350 | cl::Buffer XilinxOpenclDevice::CreateBuffer(int index, cl_mem_flags flags, 351 | void* host_ptr, size_t size) { 352 | flags |= CL_MEM_USE_HOST_PTR; 353 | return OpenclDevice::CreateBuffer(index, flags, host_ptr, size); 354 | } 355 | 356 | } // namespace internal 357 | } // namespace fpga 358 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_opencl_device.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_XILINX_OPENCL_DEVICE_H_ 2 | #define FPGA_RUNTIME_XILINX_OPENCL_DEVICE_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "frt/devices/opencl_device.h" 13 | 14 | namespace fpga { 15 | namespace internal { 16 | 17 | class XilinxOpenclDevice : public OpenclDevice { 18 | public: 19 | XilinxOpenclDevice(const cl::Program::Binaries& binaries); 20 | 21 | static std::unique_ptr New(const cl::Program::Binaries& binaries); 22 | 23 | void SetStreamArg(int index, Tag tag, StreamWrapper& arg) override; 24 | void WriteToDevice() override; 25 | void ReadFromDevice() override; 26 | 27 | private: 28 | cl::Buffer CreateBuffer(int index, cl_mem_flags flags, void* host_ptr, 29 | size_t size) override; 30 | }; 31 | 32 | } // namespace internal 33 | } // namespace fpga 34 | 35 | #endif // FPGA_RUNTIME_XILINX_OPENCL_DEVICE_H_ 36 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_opencl_stream.cpp: -------------------------------------------------------------------------------- 1 | #include "frt/devices/xilinx_opencl_stream.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "frt/devices/opencl_util.h" 10 | 11 | // Link against libxilinxopencl only if necessary. 12 | #pragma weak clCreateStream 13 | #pragma weak clReadStream 14 | #pragma weak clReleaseStream 15 | #pragma weak clWriteStream 16 | 17 | namespace fpga { 18 | namespace internal { 19 | 20 | XilinxOpenclStream::~XilinxOpenclStream() { 21 | if (stream_ != nullptr) { 22 | auto err = clReleaseStream(stream_); 23 | if (err != CL_SUCCESS) { 24 | LOG(ERROR) << "clReleaseStream: " << OpenclErrToString(err); 25 | } 26 | } 27 | } 28 | 29 | XilinxOpenclStream::XilinxOpenclStream(const std::string& name, 30 | cl::Device device, cl::Kernel kernel, 31 | int index, Tag tag) 32 | : name_(name), kernel_(std::move(kernel)), device_(std::move(device)) { 33 | cl_stream_flags flags; 34 | switch (tag) { 35 | case Tag::kReadOnly: 36 | flags = 37 | #ifdef XCL_STREAM_WRITE_ONLY 38 | XCL_STREAM_WRITE_ONLY 39 | #else // XCL_STREAM_WRITE_ONLY 40 | CL_STREAM_READ_ONLY 41 | #endif // XCL_STREAM_WRITE_ONLY 42 | ; 43 | break; 44 | case Tag::kWriteOnly: 45 | flags = 46 | #ifdef XCL_STREAM_READ_ONLY 47 | XCL_STREAM_READ_ONLY 48 | #else // XCL_STREAM_READ_ONLY 49 | CL_STREAM_WRITE_ONLY 50 | #endif // XCL_STREAM_READ_ONLY 51 | ; 52 | break; 53 | default: 54 | LOG(FATAL) << "Invalid argument"; 55 | } 56 | 57 | VLOG(1) << "Stream '" << name_ << "' attached to argument #" << index; 58 | cl_mem_ext_ptr_t ext; 59 | ext.flags = index; 60 | ext.param = kernel_.get(); 61 | ext.obj = nullptr; 62 | cl_int err; 63 | stream_ = clCreateStream(device_.get(), flags, CL_STREAM, &ext, &err); 64 | CL_CHECK(err); 65 | } 66 | 67 | void XilinxOpenclStream::Read(void* host_ptr, size_t size, bool eot) { 68 | if (stream_ == nullptr) { 69 | LOG(FATAL) << "Cannot read from null stream"; 70 | } 71 | cl_stream_xfer_req req{0}; 72 | if (eot) { 73 | req.flags = CL_STREAM_EOT; 74 | } 75 | req.priv_data = const_cast(name_.c_str()); 76 | cl_int err; 77 | clReadStream(stream_, host_ptr, size, &req, &err); 78 | CL_CHECK(err); 79 | } 80 | 81 | void XilinxOpenclStream::Write(const void* host_ptr, size_t size, bool eot) { 82 | if (stream_ == nullptr) { 83 | LOG(FATAL) << "Cannot write to null stream"; 84 | } 85 | cl_stream_xfer_req req{0}; 86 | if (eot) { 87 | req.flags = CL_STREAM_EOT; 88 | } 89 | req.priv_data = const_cast(name_.c_str()); 90 | cl_int err; 91 | clWriteStream(stream_, const_cast(host_ptr), size, &req, &err); 92 | CL_CHECK(err); 93 | } 94 | 95 | } // namespace internal 96 | } // namespace fpga 97 | -------------------------------------------------------------------------------- /src/frt/devices/xilinx_opencl_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_XILINX_OPENCL_STREAM_H_ 2 | #define FPGA_RUNTIME_XILINX_OPENCL_STREAM_H_ 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include "frt/stream_interface.h" 9 | #include "frt/tag.h" 10 | 11 | extern "C" { 12 | struct _cl_stream; 13 | } // extern "C" 14 | 15 | namespace fpga { 16 | namespace internal { 17 | 18 | class XilinxOpenclStream : public StreamInterface { 19 | public: 20 | XilinxOpenclStream(const std::string& name, cl::Device device, 21 | cl::Kernel kernel, int index, Tag tag); 22 | XilinxOpenclStream(const XilinxOpenclStream&) = delete; 23 | XilinxOpenclStream& operator=(const XilinxOpenclStream&) = delete; 24 | XilinxOpenclStream(XilinxOpenclStream&&) = delete; 25 | XilinxOpenclStream& operator=(XilinxOpenclStream&&) = delete; 26 | ~XilinxOpenclStream() override; 27 | 28 | void Read(void* ptr, size_t size, bool eot) override; 29 | void Write(const void* ptr, size_t size, bool eot) override; 30 | 31 | private: 32 | const std::string& name_; 33 | _cl_stream* stream_ = nullptr; 34 | cl::Kernel kernel_; 35 | cl::Device device_; 36 | }; 37 | 38 | } // namespace internal 39 | } // namespace fpga 40 | 41 | #endif // FPGA_RUNTIME_XILINX_OPENCL_STREAM_H_ 42 | -------------------------------------------------------------------------------- /src/frt/stream.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_STREAM_H_ 2 | #define FPGA_RUNTIME_STREAM_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "frt/stream_wrapper.h" 8 | #include "frt/tag.h" 9 | 10 | namespace fpga { 11 | namespace internal { 12 | 13 | template 14 | class Stream; 15 | 16 | template <> 17 | class Stream : public StreamWrapper { 18 | public: 19 | Stream(const std::string& name) : StreamWrapper(name) {} 20 | 21 | template 22 | void Read(T* host_ptr, size_t size, bool eot = true) { 23 | stream_->Read(host_ptr, size * sizeof(T), eot); 24 | } 25 | }; 26 | 27 | template <> 28 | class Stream : public StreamWrapper { 29 | public: 30 | Stream(const std::string& name) : StreamWrapper(name) {} 31 | 32 | template 33 | void Write(const T* host_ptr, size_t size, bool eot = true) { 34 | stream_->Write(host_ptr, size * sizeof(T), eot); 35 | } 36 | }; 37 | 38 | } // namespace internal 39 | } // namespace fpga 40 | 41 | #endif // FPGA_RUNTIME_STREAM_H_ 42 | -------------------------------------------------------------------------------- /src/frt/stream_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_STREAM_INTERFACE_H_ 2 | #define FPGA_RUNTIME_STREAM_INTERFACE_H_ 3 | 4 | #include 5 | 6 | namespace fpga { 7 | namespace internal { 8 | 9 | class StreamInterface { 10 | public: 11 | virtual ~StreamInterface() = default; 12 | virtual void Read(void* ptr, size_t size, bool eot) = 0; 13 | virtual void Write(const void* ptr, size_t size, bool eot) = 0; 14 | }; 15 | 16 | } // namespace internal 17 | } // namespace fpga 18 | 19 | #endif // FPGA_RUNTIME_STREAM_INTERFACE_H_ 20 | -------------------------------------------------------------------------------- /src/frt/stream_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_STREAM_WRAPPER_H_ 2 | #define FPGA_RUNTIME_STREAM_WRAPPER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "frt/stream_interface.h" 8 | 9 | namespace fpga { 10 | namespace internal { 11 | 12 | class StreamWrapper { 13 | public: 14 | void Attach(std::unique_ptr&& stream) { 15 | stream_ = std::move(stream); 16 | } 17 | const std::string name; 18 | 19 | protected: 20 | StreamWrapper(const std::string& name) : name(name) {} 21 | std::unique_ptr stream_; 22 | }; 23 | 24 | } // namespace internal 25 | } // namespace fpga 26 | 27 | #endif // FPGA_RUNTIME_STREAM_WRAPPER_H_ 28 | -------------------------------------------------------------------------------- /src/frt/tag.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGA_RUNTIME_TAG_H_ 2 | #define FPGA_RUNTIME_TAG_H_ 3 | 4 | namespace fpga { 5 | namespace internal { 6 | 7 | enum class Tag { 8 | kPlaceHolder = 0, 9 | kReadOnly = 1, 10 | kWriteOnly = 2, 11 | kReadWrite = 3, 12 | }; 13 | 14 | } // namespace internal 15 | } // namespace fpga 16 | 17 | #endif // FPGA_RUNTIME_TAG_H_ 18 | -------------------------------------------------------------------------------- /src/frt_get_xlnx_env.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "frt/devices/xilinx_environ.h" 5 | 6 | namespace { 7 | 8 | void WriteEnv(const std::string& key, const std::string& value) { 9 | std::cout.write(key.data(), key.size()); 10 | std::cout.write("=", 1); 11 | std::cout.write(value.data(), value.size()); 12 | std::cout.write("\0", 1); 13 | } 14 | 15 | } // namespace 16 | 17 | int main(int argc, char* argv[]) { 18 | fpga::xilinx::Environ environ = fpga::xilinx::GetEnviron(); 19 | 20 | // If arguments are specified, only print specified environment variables. 21 | if (argc > 1) { 22 | for (int i = 1; i < argc; ++i) { 23 | std::string key = argv[i]; 24 | WriteEnv(key, environ[key]); 25 | } 26 | return 0; 27 | } 28 | 29 | for (const auto& [key, value] : environ) { 30 | WriteEnv(key, value); 31 | } 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/frt_vpp_wrapper: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This is a drop-in replacement `v++` wrapper that moves the `--temp_dir` to a 4 | # local filesystem when possible. 5 | # 6 | # This script is designed to avoid remote filesystem I/O and thus improve run 7 | # time considerably, without changing the user experience significantly. 8 | # 9 | # If `--temp_dir=/path/to/dir` is specified and `/path/to/dir` is on a remote 10 | # filesystem (e.g., ceph, cifs, or nfs), this script will try to create a local 11 | # temporary directory using `mktemp`, bind-mount that directory (using 12 | # `mount --bind` or `bindfs`), and pack the results to `/path/to/dir.sqfs` 13 | # using `mksquashfs`. If possible, this script will also try to mount the packed 14 | # squashfs to `/path/to/dir`. Note that `--temp_dir /path/to/dir` does not 15 | # trigger this optimization. If `--temp_dir /path/to/dir1` and 16 | # `--temp_dir=/path/to/dir2` are mixed, the last `--temp_dir=` takes precedence, 17 | # but it may not be honored by the wrapped command. 18 | # 19 | # The exit code is designed to always reflect the result of the wrapped command. 20 | # 21 | # Possible outcomes from this script: 22 | # 1) No behavior is modified, if `--temp_dir` is not specified, or 23 | # `--temp_dir` is specified using ' ' as the separator, instead of '=', or 24 | # the local temporary directory cannot be efficiently packed and saved, 25 | # e.g., `mksquashfs` is unavailable, or `/path/to/dir.sqfs` is not 26 | # writable. 27 | # 2) `temp_dir` is successfully packed to `/path/to/dir.sqfs`, and 28 | # `/path/to/dir` is mounted read-only from `/path/to/dir.sqfs`. The 29 | # wrapped command may or may not succeed. The local temporary directory is 30 | # removed. 31 | # 3) `temp_dir` is successfully packed, but `/path/to/dir` is not mounted 32 | # from `/path/to/dir.sqfs`. The wrapped command may or may not succeed. In 33 | # this case, the user may mount it manually (recommended), or `unsquashfs` 34 | # it (not recommended). The local temporary directory is removed. This can 35 | # happen, e.g., if `squashfuse` is unavailable. 36 | # 4) `temp_dir` cannot be packed successfully. The local temporary directory 37 | # is kept as-is. This is not supposed to happen unless the state of system 38 | # changes while the wrapped command was running. 39 | 40 | set -e 41 | 42 | declare -r log_prefix='${BASHPID} ${BASH_SOURCE##*/}:${LINENO}]' 43 | declare -r log_datetime='$(date +"%m%d %T.%N")' 44 | declare -r verbose_ps4="+ I\D{%m%d %T} ${log_prefix} " 45 | declare stderr_fd 46 | declare temp_dir 47 | declare local_temp_dir 48 | declare sid 49 | 50 | shopt -s expand_aliases 51 | alias log_info="log \"I${log_datetime} ${log_prefix}\"" 52 | alias log_warning="log \"W${log_datetime} ${log_prefix}\"" 53 | alias log_error="log \"E${log_datetime} ${log_prefix}\"" 54 | 55 | function log() { 56 | echo -e "$@" >&${stderr_fd} 57 | } 58 | 59 | # May set `temp_dir` globally. 60 | function parse_temp_dir() { 61 | local arg 62 | local stripped_arg 63 | for arg in "$@"; do 64 | stripped_arg="${arg#--temp_dir=}" 65 | if [[ "${arg}" != "${stripped_arg}" ]]; then 66 | temp_dir="${stripped_arg}" 67 | fi 68 | done 69 | } 70 | 71 | function is_network_filesystem() { 72 | local fstype 73 | fstype="$(stat "$1" --file-system --format=%T)" 74 | 75 | # Try to unmount a previously mounted squashfs. 76 | case "${fstype}" in 77 | squashfs | fuseblk) 78 | if umount -l "$1" || fusermount -uz "$1"; then 79 | fstype="$(stat "$1" --file-system --format=%T)" 80 | fi 81 | ;; 82 | esac 83 | readonly fstype 84 | 85 | case "${fstype}" in 86 | acfs | \ 87 | afs | \ 88 | ceph | \ 89 | cifs | \ 90 | coda | \ 91 | fhgfs | \ 92 | gfs/gfs2 | \ 93 | gpfs | \ 94 | ibrix | \ 95 | k-afs | \ 96 | lustre | \ 97 | novell | \ 98 | nfs | \ 99 | ocfs2 | \ 100 | panfs | \ 101 | prl_fs | \ 102 | smb | \ 103 | smb2 | \ 104 | snfs) 105 | log_info "temp_dir '$1' is on a ${fstype} filesystem" 106 | return 0 107 | ;; 108 | esac 109 | return 1 110 | } 111 | 112 | function push_xtrace() { 113 | local trap_cmd 114 | trap_cmd="PS4=${PS4@Q}" 115 | if [[ ! -o xtrace ]]; then 116 | trap_cmd="set +x; ${trap_cmd}" 117 | fi 118 | echo "${trap_cmd}" 119 | } 120 | 121 | function clean_up() { 122 | trap "$(push_xtrace)" RETURN 123 | PS4="${verbose_ps4}" 124 | set -x 125 | 126 | kill -9 $(ps --sid "${sid}" --format pid=) || true 127 | 128 | if [[ -n "${temp_dir}" ]]; then 129 | declare -r temp_sqfs="${temp_dir}.sqfs" 130 | if mksquashfs "${local_temp_dir}" "${temp_sqfs}" -noappend; then 131 | rm -rf "${local_temp_dir}" || true 132 | log_info \ 133 | "temp_dir '${temp_dir}' is packed as '${temp_dir}.sqfs'," \ 134 | "which can be manually mounted using one of the following commands:\n" \ 135 | " sudo mount ${temp_sqfs@Q} ${temp_dir@Q}\n" \ 136 | " squashfuse -o allow_other ${temp_sqfs@Q} ${temp_dir@Q}\n" \ 137 | " squashfuse ${temp_sqfs@Q} ${temp_dir@Q}" 138 | if umount -l "${temp_dir}" || fusermount -uz "${temp_dir}"; then 139 | if mount "${temp_sqfs}" "${temp_dir}" || 140 | squashfuse -o allow_other "${temp_sqfs}" "${temp_dir}" || 141 | squashfuse "${temp_sqfs}" "${temp_dir}"; then 142 | log_info "temp_dir '${temp_dir}' is mounted read-only from '${temp_dir}.sqfs'" 143 | fi 144 | else 145 | log_warning "failed to unmount ${temp_dir}" 146 | return 0 147 | fi 148 | fi 149 | else 150 | rm -rf "${local_temp_dir}" || true 151 | fi 152 | 153 | } 154 | 155 | function wrap() { 156 | trap "$(push_xtrace)" RETURN 157 | PS4="${verbose_ps4}" 158 | set -x 159 | 160 | mkdir --parent "${temp_dir}" 161 | if is_network_filesystem "${temp_dir}"; then 162 | local_temp_dir="$(mktemp --directory --suffix=.frt_vpp_wrapper)" 163 | readonly local_temp_dir 164 | 165 | if ! mksquashfs -version; then 166 | log_warning "not binding temp_dir due to the lack of 'squashfs-tools'" 167 | unset temp_dir 168 | return 0 169 | fi 170 | 171 | declare -r temp_sqfs="${temp_dir}.sqfs" 172 | if ! touch "${temp_sqfs}" || [[ ! -w "${temp_sqfs}" ]]; then 173 | log_warning "not binding temp_dir because '${temp_dir}.sqfs' is not writable" 174 | unset temp_dir 175 | return 0 176 | fi 177 | 178 | if mount --bind "${local_temp_dir}" "${temp_dir}" || 179 | bindfs "${local_temp_dir}" "${temp_dir}" || 180 | bindfs --no-allow-other "${local_temp_dir}" "${temp_dir}"; then 181 | log_info \ 182 | "temp_dir '${temp_dir}' is bound from '${local_temp_dir}'" \ 183 | "to speedup I/O" 184 | else 185 | log_warning \ 186 | "failed to mount '${temp_dir}' on a local filesystem;" \ 187 | "I/O can be very slow" 188 | unset temp_dir 189 | fi 190 | fi 191 | return 0 192 | } 193 | 194 | function prepare() { 195 | exec {stderr_fd}>&2 196 | parse_temp_dir "$@" 197 | if [[ -n "${temp_dir}" ]]; then 198 | if [[ -o xtrace ]]; then 199 | if [[ -z "${BASH_XTRACEFD}" ]]; then 200 | BASH_XTRACEFD=2 201 | fi 202 | else 203 | if ! exec {BASH_XTRACEFD}>"${temp_dir}.frt_vpp_wrapper.log"; then 204 | log_warning "failed to log details to '${temp_dir}.frt_vpp_wrapper.log'" 205 | exec {BASH_XTRACEFD}>/dev/null 206 | fi 207 | fi 208 | wrap >&${BASH_XTRACEFD} 2>&1 209 | fi 210 | trap "clean_up >&${BASH_XTRACEFD} 2>&1" SIGINT SIGTERM SIGPIPE SIGCHLD EXIT 211 | } 212 | 213 | function main() { 214 | prepare "$@" || true 215 | 216 | # The wrapped command often have dangling processes writing `temp_dir` after 217 | # the main process exits, so we wait for all subprocesses. Some versions of 218 | # the wrapped command crashes on non-C locales, thus we force the locale 219 | # settings here. 220 | setsid env LC_ALL=C "$@" & 221 | sid=$! 222 | wait "${sid}" 223 | local -a pids 224 | while pids=($(ps --sid "${sid}" --format pid=)) && ((${#pids[@]})); do 225 | # The first process may disappear just before `tail`, but there can be 226 | # others left dangling, so we ignore errors here. 227 | tail /dev/null --follow --pid="${pids[1]}" || true 228 | done 229 | } 230 | 231 | main "$@" 232 | -------------------------------------------------------------------------------- /tests/qdma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(../../cmake/FindSDx.cmake) 2 | 3 | add_executable(qdma-vadd) 4 | target_sources(qdma-vadd PRIVATE qdma-host.cpp) 5 | target_compile_features(qdma-vadd PRIVATE cxx_auto_type) 6 | target_link_libraries(qdma-vadd PRIVATE frt pthread) 7 | 8 | if(NOT XRT_PLATFORM) 9 | set(XRT_PLATFORM xilinx_u200_qdma_201920_1) 10 | endif() 11 | set(KERNEL VecAdd) 12 | 13 | add_xocc_targets(${CMAKE_CURRENT_BINARY_DIR} 14 | PREFIX qdma 15 | KERNEL ${KERNEL} 16 | PLATFORM ${XRT_PLATFORM} 17 | INPUT qdma-kernel.cpp 18 | SW_EMU_XCLBIN sw_emu_xclbin 19 | HW_EMU_XCLBIN hw_emu_xclbin 20 | HW_XCLBIN hw_xclbin) 21 | 22 | add_custom_target(qdma-csim 23 | COMMAND qdma-vadd 24 | $ 1000 25 | DEPENDS qdma-vadd ${sw_emu_xclbin} 26 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 27 | add_custom_target(qdma-cosim 28 | COMMAND qdma-vadd 29 | $ 1000 30 | DEPENDS qdma-vadd ${hw_emu_xclbin} 31 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 32 | add_custom_target(qdma-hw 33 | COMMAND qdma-vadd $ 34 | 10000000 35 | DEPENDS qdma-vadd ${hw_xclbin} 36 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 37 | add_custom_target(qdma-emu DEPENDS qdma-csim qdma-cosim) 38 | 39 | add_test(NAME qdma-csim 40 | COMMAND ${CMAKE_COMMAND} 41 | --build 42 | ${CMAKE_BINARY_DIR} 43 | --target 44 | qdma-csim) 45 | add_test(NAME qdma-cosim 46 | COMMAND ${CMAKE_COMMAND} 47 | --build 48 | ${CMAKE_BINARY_DIR} 49 | --target 50 | qdma-cosim) 51 | -------------------------------------------------------------------------------- /tests/qdma/qdma-host.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "frt.h" 7 | 8 | using std::clog; 9 | using std::endl; 10 | 11 | int main(int argc, char* argv[]) { 12 | if (argc < 3) { 13 | clog << "Usage: " << argv[0] << " " << endl; 14 | return 1; 15 | } 16 | uint64_t n = (atoi(argv[2]) / 1024 + 1) * 1024; 17 | auto a = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 18 | auto b = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 19 | auto c = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 20 | auto c_base = new float[n]; 21 | for (int i = 0; i < n; ++i) { 22 | a[i] = i * i % 10; 23 | b[i] = i * i % 9; 24 | c[i] = 0; 25 | c_base[i] = a[i] + b[i]; 26 | } 27 | 28 | fpga::WriteStream a_stream("a"); 29 | fpga::WriteStream b_stream("b"); 30 | fpga::ReadStream c_stream("c"); 31 | auto instance = fpga::Invoke(argv[1], a_stream, b_stream, c_stream); 32 | const uint64_t kBatchSize = 1ULL << 29; 33 | auto t1 = std::thread([&]() { 34 | for (uint64_t i = 0; i < n; i += kBatchSize) { 35 | a_stream.Write(a + i, std::min(kBatchSize, n - i), !(i + kBatchSize < n)); 36 | } 37 | }); 38 | auto t2 = std::thread([&]() { 39 | for (uint64_t i = 0; i < n; i += kBatchSize) { 40 | b_stream.Write(b + i, std::min(kBatchSize, n - i), !(i + kBatchSize < n)); 41 | } 42 | }); 43 | auto t3 = std::thread([&]() { 44 | for (uint64_t i = 0; i < n; i += kBatchSize) { 45 | c_stream.Read(c + i, std::min(kBatchSize, n - i), !(i + kBatchSize < n)); 46 | } 47 | }); 48 | t1.join(); 49 | t2.join(); 50 | t3.join(); 51 | instance.Finish(); 52 | 53 | clog << "Compute latency: " << instance.ComputeTimeSeconds() << " s" << endl; 54 | for (int i = 0; i < n; ++i) { 55 | if (c[i] != c_base[i]) { 56 | clog << "FAIL: " << c[i] << " != " << c_base[i] << endl; 57 | return 1; 58 | } 59 | } 60 | clog << "PASS!" << endl; 61 | free(a); 62 | free(b); 63 | free(c); 64 | delete[] c_base; 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /tests/qdma/qdma-kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using pkt = qdma_axis<32, 0, 0, 0>; 6 | 7 | extern "C" { 8 | 9 | void VecAdd(hls::stream& a, hls::stream& b, hls::stream& c) { 10 | #pragma HLS interface axis port = a 11 | #pragma HLS interface axis port = b 12 | #pragma HLS interface axis port = c 13 | #pragma HLS interface s_axilite port = return bundle = control 14 | for (bool eos = false; !eos;) { 15 | #pragma HLS pipeline 16 | pkt a_pkt = a.read(); 17 | pkt b_pkt = b.read(); 18 | ap_uint<32> a_raw = a_pkt.get_data(); 19 | ap_uint<32> b_raw = b_pkt.get_data(); 20 | eos = a_pkt.get_last() | b_pkt.get_last(); 21 | float a_val = reinterpret_cast(a_raw); 22 | float b_val = reinterpret_cast(b_raw); 23 | 24 | float c_val = a_val + b_val; 25 | ap_uint<32> c_raw = reinterpret_cast&>(c_val); 26 | 27 | pkt c_pkt; 28 | c_pkt.set_data(c_raw); 29 | c_pkt.set_last(eos); 30 | c_pkt.set_keep(-1); 31 | c.write(c_pkt); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /tests/xdma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(../../cmake/FindSDx.cmake) 2 | 3 | add_executable(xdma-vadd) 4 | target_sources(xdma-vadd 5 | PRIVATE 6 | xdma-host.cpp 7 | xdma-kernel.cpp) 8 | target_compile_features(xdma-vadd PRIVATE cxx_auto_type) 9 | target_link_libraries(xdma-vadd PRIVATE frt) 10 | 11 | if(NOT XRT_PLATFORM) 12 | set(XRT_PLATFORM xilinx_u250_xdma_201830_2) 13 | endif() 14 | set(KERNEL VecAdd) 15 | 16 | if (${XRT_PLATFORM} MATCHES "_(u50|u280)_") 17 | set(DRAM HBM) 18 | else() 19 | set(DRAM DDR) 20 | endif() 21 | add_xocc_targets(${CMAKE_CURRENT_BINARY_DIR} 22 | PREFIX xdma 23 | KERNEL ${KERNEL} 24 | PLATFORM ${XRT_PLATFORM} 25 | INPUT xdma-kernel.cpp 26 | DRAM_MAPPING gmem0:${DRAM}[0] gmem1:${DRAM}[1] gmem2:${DRAM}[2] 27 | SW_EMU_XCLBIN sw_emu_xclbin 28 | HW_EMU_XCLBIN hw_emu_xclbin 29 | HW_XCLBIN hw_xclbin) 30 | 31 | file( 32 | DOWNLOAD 33 | https://github.com/Licheng-Guo/tapa-fast-cosim/raw/02ecd87c4d24d6513d8c0d15af8046d5ee05a2ae/example/vadd/vadd.xo 34 | ${CMAKE_CURRENT_BINARY_DIR}/vadd.xo 35 | TLS_VERIFY ON 36 | ) 37 | 38 | add_custom_target(xdma-csim 39 | COMMAND xdma-vadd 40 | $ 1000000 41 | DEPENDS xdma-vadd ${sw_emu_xclbin} 42 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 43 | add_custom_target(xdma-xosim 44 | COMMAND xdma-vadd ${CMAKE_CURRENT_BINARY_DIR}/vadd.xo 1000 45 | DEPENDS xdma-vadd ${CMAKE_CURRENT_BINARY_DIR}/vadd.xo 46 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 47 | add_custom_target(xdma-cosim 48 | COMMAND xdma-vadd 49 | $ 1000 50 | DEPENDS xdma-vadd ${hw_emu_xclbin} 51 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 52 | add_custom_target(xdma-hw 53 | COMMAND xdma-vadd $ 54 | 10000000 55 | DEPENDS xdma-vadd ${hw_xclbin} 56 | WORKING_DIRECTORY ${CMAKE_PROJECT_DIR}) 57 | add_custom_target(xdma-emu DEPENDS xdma-csim xdma-xosim xdma-cosim) 58 | 59 | add_test(NAME xdma-csim 60 | COMMAND ${CMAKE_COMMAND} 61 | --build 62 | ${CMAKE_BINARY_DIR} 63 | --target 64 | xdma-csim) 65 | add_test(NAME xdma-xosim 66 | COMMAND ${CMAKE_COMMAND} 67 | --build 68 | ${CMAKE_BINARY_DIR} 69 | --target 70 | xdma-xosim) 71 | add_test(NAME xdma-cosim 72 | COMMAND ${CMAKE_COMMAND} 73 | --build 74 | ${CMAKE_BINARY_DIR} 75 | --target 76 | xdma-cosim) 77 | -------------------------------------------------------------------------------- /tests/xdma/xdma-host.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "frt.h" 9 | 10 | using std::clog; 11 | using std::endl; 12 | 13 | extern "C" { 14 | void VecAdd(const float* a, const float* b, float* c, uint64_t n); 15 | } 16 | 17 | int main(int argc, char* argv[]) { 18 | gflags::ParseCommandLineFlags(&argc, &argv, /* remove_flags = */ true); 19 | 20 | if (argc < 3) { 21 | clog << "Usage: " << argv[0] << " " << endl; 22 | return 1; 23 | } 24 | uint64_t n = (atoi(argv[2]) / 1024 + 1) * 1024; 25 | auto a = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 26 | auto b = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 27 | auto c = reinterpret_cast(aligned_alloc(4096, sizeof(float) * n)); 28 | auto c_base = new float[n]; 29 | for (int i = 0; i < n; ++i) { 30 | a[i] = i * i % 10; 31 | b[i] = i * i % 9; 32 | c[i] = -1; 33 | c_base[i] = 1; 34 | } 35 | auto instance = fpga::Invoke(argv[1], fpga::WriteOnly(a, n), 36 | fpga::WriteOnly(b, n), fpga::ReadOnly(c, n), n); 37 | for (const auto& arg : instance.GetArgsInfo()) { 38 | clog << arg << "\n"; 39 | } 40 | clog << "Load throughput: " << instance.LoadThroughputGbps() << " GB/s\n"; 41 | clog << "Compute latency: " << instance.ComputeTimeSeconds() << " s" << endl; 42 | clog << "Store throughput: " << instance.StoreThroughputGbps() << " GB/s\n"; 43 | VecAdd(a, b, c_base, n); 44 | for (int i = 0; i < n; ++i) { 45 | if (c[i] != c_base[i]) { 46 | clog << "FAIL: " << c[i] << " != " << c_base[i] << endl; 47 | return 1; 48 | } 49 | } 50 | clog << "PASS!" << endl; 51 | free(a); 52 | free(b); 53 | free(c); 54 | delete[] c_base; 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /tests/xdma/xdma-kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" { 4 | 5 | void VecAdd(const float* a, const float* b, float* c, uint64_t n) { 6 | #pragma HLS interface m_axi port = a offset = slave bundle = gmem0 7 | #pragma HLS interface m_axi port = b offset = slave bundle = gmem1 8 | #pragma HLS interface m_axi port = c offset = slave bundle = gmem2 9 | #pragma HLS interface s_axilite port = a bundle = control 10 | #pragma HLS interface s_axilite port = b bundle = control 11 | #pragma HLS interface s_axilite port = c bundle = control 12 | #pragma HLS interface s_axilite port = n bundle = control 13 | #pragma HLS interface s_axilite port = return bundle = control 14 | for (uint64_t i = 0; i < n; ++i) { 15 | #pragma HLS pipeline 16 | c[i] = a[i] + b[i]; 17 | } 18 | } 19 | } 20 | --------------------------------------------------------------------------------