├── Creating_AFI.md ├── Optimization_lab.md ├── README.md ├── debug_lab.md ├── images ├── Fig-binary_container.png ├── Fig-build.png ├── Fig-hw_button.png ├── Fig-refresh.png ├── Fig-run.png ├── SDX_IDE.png ├── connecting_lab │ ├── FigConnectingLab-1.png │ ├── FigConnectingLab-10.png │ ├── FigConnectingLab-11.png │ ├── FigConnectingLab-12.png │ ├── FigConnectingLab-13-1.png │ ├── FigConnectingLab-13-2.png │ ├── FigConnectingLab-14.png │ ├── FigConnectingLab-15.png │ ├── FigConnectingLab-16.png │ ├── FigConnectingLab-17.png │ ├── FigConnectingLab-2.png │ ├── FigConnectingLab-3.png │ ├── FigConnectingLab-4-1.png │ ├── FigConnectingLab-4-2.png │ ├── FigConnectingLab-5.png │ ├── FigConnectingLab-6.png │ ├── FigConnectingLab-7.png │ ├── FigConnectingLab-8.png │ ├── FigConnectingLab-9.png │ └── nimbix │ │ ├── connect_to_instance.png │ │ ├── linux_desktop.png │ │ ├── select_desktop_mode.png │ │ ├── select_instance.png │ │ └── select_instance_config.png ├── debug_lab │ ├── FigDebugLab-10.png │ ├── FigDebugLab-11.png │ ├── FigDebugLab-12.png │ ├── FigDebugLab-13.png │ ├── FigDebugLab-14.png │ ├── FigDebugLab-15.png │ ├── FigDebugLab-16.png │ ├── FigDebugLab-17.png │ ├── FigDebugLab-18.png │ ├── FigDebugLab-2.png │ ├── FigDebugLab-3.png │ ├── FigDebugLab-4.png │ ├── FigDebugLab-5.png │ ├── FigDebugLab-6.png │ ├── FigDebugLab-7.png │ ├── FigDebugLab-8.png │ ├── FigDebugLab-9.png │ ├── add_virtual_cable.png │ ├── enable_chipscope.png │ ├── hw_manager_open_target.png │ ├── localhost_connected.png │ ├── rtl_kernel_exe_properties.png │ ├── run_trigger_immediate.png │ ├── set_virtual_cable_port.png │ ├── trigger_button.png │ └── turn_off_autobuild.png ├── f1_platform.png ├── helloworld │ ├── FigGUIflowLab-10.png │ ├── FigGUIflowLab-11.png │ ├── FigGUIflowLab-13.png │ ├── FigGUIflowLab-14.png │ ├── FigGUIflowLab-15-1.png │ ├── FigGUIflowLab-15-2.png │ ├── FigGUIflowLab-15-3.png │ ├── FigGUIflowLab-16.png │ ├── FigGUIflowLab-18.png │ ├── FigGUIflowLab-19.png │ ├── FigGUIflowLab-20.png │ ├── FigGUIflowLab-21-1.png │ ├── FigGUIflowLab-21.png │ ├── FigGUIflowLab-22.png │ ├── FigGUIflowLab-23.png │ ├── FigGUIflowLab-24.png │ ├── FigGUIflowLab-25.png │ ├── FigGUIflowLab-26.png │ ├── FigGUIflowLab-27.png │ ├── FigGUIflowLab-28.png │ ├── FigGUIflowLab-29.png │ ├── FigGUIflowLab-30.png │ ├── FigGUIflowLab-31.png │ ├── FigGUIflowLab-6.png │ ├── FigGUIflowLab-8.png │ ├── FigGUIflowLab-9.png │ ├── add_xclbin_argument.png │ ├── empty_application_project.png │ ├── file_permissions.png │ ├── import_from_dir.png │ ├── import_srcs.png │ ├── sdx_hello_world_ide.png │ ├── select_srcs.png │ ├── select_u200_platform.png │ ├── select_vector_add_fn.png │ └── sys_estimate.png ├── makefile_lab │ ├── FigMakefileLab-1.png │ ├── FigMakefileLab-2.png │ ├── FigMakefileLab-3.png │ ├── FigMakefileLab-4.png │ ├── FigMakefileLab-5.png │ ├── FigMakefileLab-6.png │ └── linker_flag.png ├── nice_dcv.png ├── nice_dcv_desktop.png ├── optimization_lab │ ├── FigOptimizationLab-11.png │ ├── FigOptimizationLab-12.png │ ├── FigOptimizationLab-13.png │ ├── FigOptimizationLab-14.png │ ├── FigOptimizationLab-15-1.png │ ├── FigOptimizationLab-15.png │ ├── FigOptimizationLab-16.png │ ├── FigOptimizationLab-17.png │ ├── FigOptimizationLab-18.png │ ├── FigOptimizationLab-19.png │ ├── FigOptimizationLab-20-1.png │ ├── FigOptimizationLab-20.png │ ├── FigOptimizationLab-21.png │ ├── FigOptimizationLab-22-1.png │ ├── FigOptimizationLab-22.png │ ├── FigOptimizationLab-23.png │ ├── FigOptimizationLab-5.png │ ├── FigOptimizationLab-6.png │ ├── FigOptimizationLab-7.png │ ├── FigOptimizationLab-8.png │ ├── application_timeline_after_host_optimiaztion.png │ ├── application_timeline_before_host_optimiaztion.png │ ├── compute_unit_settings.png │ ├── localhost_connected.png │ └── zoon_buttons.png ├── putty_dcv.png ├── putty_ip4.png └── rtlkernel_lab │ ├── FigRTLKernelLab-10.png │ ├── FigRTLKernelLab-11.png │ ├── FigRTLKernelLab-12.png │ ├── FigRTLKernelLab-13.png │ ├── FigRTLKernelLab-14.png │ ├── FigRTLKernelLab-15.png │ ├── FigRTLKernelLab-16.png │ ├── FigRTLKernelLab-17.png │ ├── FigRTLKernelLab-18.png │ ├── FigRTLKernelLab-19.png │ ├── FigRTLKernelLab-20.png │ ├── FigRTLKernelLab-21.png │ ├── FigRTLKernelLab-22.png │ ├── FigRTLKernelLab-4.png │ ├── FigRTLKernelLab-5.png │ ├── FigRTLKernelLab-6.png │ ├── FigRTLKernelLab-7.png │ ├── FigRTLKernelLab-8.png │ ├── FigRTLKernelLab-9.png │ ├── hardware_emulation_application_timeline.png │ └── hw_emulation_completed_successfully.png ├── rtl_kernel_wizard_lab.md ├── sdx_introduction.md ├── setup_aws.md ├── setup_local_computer.md ├── setup_nimbix.md ├── setup_sdx.md ├── setup_xup_aws_workshop.md ├── slides ├── 01_Course_Intro.pdf ├── 02_Intro_to_AWS_EC2_F1.pdf ├── 03_SDAccel_Tool_Overview.pdf ├── 04_SDAccel_Flows.pdf ├── 05_Optimization_Techniques.pdf ├── 06_RTL_Kernel_Wizard.pdf └── 07_Debugging.pdf ├── solutions ├── hello_world │ ├── aws │ │ └── awsf1_2xlarge_18_3_hello_world_sol.sdx.zip │ └── u200 │ │ ├── u200_nimbix_ubuntu16_04_hello_world_sol.sdx.zip │ │ └── u200_rh7_5_hello_world_sol_.sdx.zip └── optimization_lab │ ├── aws │ └── aws_2xlarge_18_3_optimization_lab.sdx.zip │ └── u200 │ ├── u200_nimbix_ubuntu16_04_optimization_lab_sol.sdx.zip │ └── u200_rh7_5_optimization_lab_sol.sdx.zip └── sources ├── debug ├── aws │ └── awsf1_2xlarge_18_3_debug.sdx.zip └── u200 │ ├── u200_nimbix_ubuntu16_04_rtl_kernel.sdx.zip │ └── u200_rh7_5_debug.sdx.zip ├── helloworld_ocl ├── command_line.ipynb └── src │ ├── krnl_vadd.cl │ ├── vadd.cpp │ ├── vadd.h │ ├── xcl.cpp │ └── xcl.h └── optimization_lab ├── idct.cpp └── krnl_idct.cpp /Creating_AFI.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Creating an Amazon FPGA Image (AFI) 16 | 17 | This document guides you through the steps to create an AWS Amazon FPGA Image (AFI) which can be run AWS EC2 F1 instance to verify the deign works in hardware. It assumes that a full system (SDx project) is built which consists of an *host* application (.exe) and an FPGA binary file (.xclbin). 18 | 19 | ### Create an AFI 20 | 21 | To execute the application on F1, the following files are needed: 22 | 23 | - Host application (exe) 24 | - Amazon FPGA Image (awsxclbin) 25 | 26 | The awsxclbin is an Amazon specific version of the FPGA binary file (xclbin) produced by the SDx software. 27 | 28 | The awsxclbin can be created by running the *create\_sdaccel\_afi.sh* script which is included in the aws-fpga Git repository. 29 | 30 | The script can be found in the following location in the aws-fpga repository: 31 | 32 | ``` 33 | ./aws-fpga/SDAccel/tools/create_sdaccel_afi.sh 34 | ``` 35 | 36 | Before running the commands below, make sure the SDx setup script has been sourced (the following command assumes the aws-fpga Git repository is cloned to the user home area) 37 | 38 | ``` 39 | source ~/aws-fpga/sdaccel_setup.sh 40 | ``` 41 | 42 | * Create an AFI by running the create\_sdaccel\_afi.sh script and wait for the completion of the AFI creation process 43 | 44 | ``` 45 | $SDAccel/tools/create_sdaccel_afi.sh –xclbin=.xclbin –s3_bucket= -s3_dcp_key= -s3_logs_key= 46 | ``` 47 | In the above command, set the *xclbin* ; the Amazon S3 <bucket-name>, <dcp-folder-name>, and <logs-folder-name> with the names you had given when running CLI script. You can choose any valid folder name for the dcp and logs folder. The Amazon S3 bucket name should match an S3 bucket you have set up. 48 | 49 | Learn more about setting up S3 buckets at [https://github.com/aws/aws-fpga/blob/master/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md](https://github.com/aws/aws-fpga/blob/master/SDAccel/docs/Setup_AWS_CLI_and_S3_Bucket.md) 50 | 51 | The create\_sdaccel\_afi.sh script does the following: 52 | 53 | - Starts a background process to create the AFI 54 | - Generates a \_afi\_id.txt which contains the FPGA Image Identifier (or AFI ID) and Global FPGA Image Identifier (or AGFI ID) of the generated AFIs 55 | - Creates the \*.awsxclbin AWS FPGA binary file which is passed to the host application to determine which AFI should be loaded to the FPGA. 56 | - Uploads the xclbin to the AWS cloud for processing. 57 | ## Check the AFI status 58 | 59 | The AFI will become available after some time in the AWS cloud and can then be used to program the FPGA in an AWS EC2 F1 instance. To check the AFI status, the AFI ID is required. 60 | 61 | - In the directory the *create_sdaccel_afi.sh* script was run, enter the following command to find the AFI ID 62 | 63 | ``` 64 | cat *afi_id.txt 65 | ``` 66 | * Enter the **describe-fpga-images** API command to check the status of the AFI generation process: 67 | 68 | ``` 69 | aws ec2 describe-fpga-images --fpga-image-ids 70 | ``` 71 | * For example, 72 | 73 | ``` 74 | aws ec2 describe-fpga-images --fpga-image-ids afi-0b9167434a1c74ba9 75 | ``` 76 | 77 | Note: When AFI creation is in progress, the *State* will be pending. When the AFI creation is finished, the output should show *available*: 78 | 79 | ``` 80 | ... 81 | "State": { 82 | "Code": "available" 83 | }, 84 | 85 | ... 86 | ``` 87 | 88 | Wait until the AFI becomes available before proceeding to execute on the F1 instance. 89 | 90 | -------------------------------------------------------------------------------- /Optimization_lab.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Optimization Lab 16 | 17 | ## Introduction 18 | 19 | In this lab you will create an SDx project and analyze the design to optimize the host code and kernel code to improve the performance of the design. 20 | 21 | ## Objectives 22 | 23 | After completing this lab, you will be able to: 24 | 25 | - Analyze the design and read project reports 26 | - Optimize the kernel code to improve throughput 27 | - Optimize the host code to improve the data transfer rate 28 | - Verify the functionality of the design in hardware 29 | 30 | ### Create an SDx Project 31 | 32 | * Start SDx and select the default workspace (or continue with the workspace from the previous lab) 33 | 34 | * Click on the **Create Application Project** 35 | 36 | * In the _New Project_'s page enter **optimization\_lab** in the _Project name:_ field and click **Next** 37 | 38 | * Select your target platform and click **Next** 39 | 40 | * Select **Empty Application** and click **Finish** 41 | 42 | ### Import the source files into the project 43 | 44 | * In the _Project Explorer_ expand the *optimization_lab* folder if necessary, and right-click on the **src** folder and select **Import…** 45 | 46 | * Select **General > File System**, click **Next**, browse to the source directory at **~/compute_acceleration/sources/optimization\_lab** and click **OK** 47 | 48 | * Select the **idct.cpp** and **krnl\_idct.cpp** files and click **Finish** 49 | 50 | * Expand the **src** folder in the _Project Explorer_ and note the two added files 51 | 52 | ### Add a function as a hardware kernel 53 | 54 | * Click on the _Add Hardware Function_ button icon (![alt tag](./images/Fig-hw_button.png)) in the **Hardware Functions** tab to see functions available for implementation in hardware. 55 | 56 | * Select _krnl\_idct_ function and click **OK** 57 | 58 | ![](./images/optimization_lab/FigOptimizationLab-6.png) 59 | 60 | * Notice a **binary\_container\_1** folder is created automatically under which the _kml\_idct_ function is added 61 | 62 | ### Analyze the source files 63 | 64 | * From the *Project Explorer* open the **src>krnl\_idct.cpp** file 65 | 66 | * The **Outline** panel should be visible. It displays an outline of the code of the source file that is currently in scope. If you can't see it, go to **Window > Show View > Outline**. 67 | 68 | The outline view can be used to navigate the source file. For example, function names are displayed in the outline view, and clicking on a function will jump to the line of code where the function is defined. 69 | 70 | ![](./images/optimization_lab/FigOptimizationLab-7.png) 71 | 72 | * In the _Outline_ viewer, click **idct** to look up the function 73 | 74 | The `idct()` function is the core algorithm in the kernel. It is a computationally intensive function that can be highly parallelized on the FPGA, providing significant acceleration over a CPU-based implementation. 75 | 76 | * Review the code 77 | 78 | - **krnl\_idct** : Top-level function for the hardware kernel. Interface properties for the kernel are specified in this function 79 | - **krnl\_idct\_dataflow** : Called by the **krnl\_idct** function and encapsulates the main functions of the kernel 80 | - **read\_blocks** : Reads data from global memory data sent by the host application and streams to the *execute* function 81 | - **execute** : For each 8x8 block received, calls the **idct** function to perform the actual IDCT computation 82 | - **write\_blocks** : Receives results from the **execute** function and writes them back to global memory for the host application 83 | 84 | * Open the **idct.cpp** file. Again, use the _Outline_ viewer to quickly look up and inspect the important functions of the host application: 85 | - **main** : Initializes the test vectors, sets-up OpenCL resources, runs the reference model, runs the hardware kernel, releases the OpenCL resources, and compares the results of the reference IDCT model with the hardware implementation 86 | - **runFPGA** : Takes in a vector of inputs and for each 8x8 block calls the hardware accelerated IDCT using the **write** , **run** , **read** , and **finish** helper functions. These function use OpenCL API calls to communicate with the FPGA 87 | - **runCPU** : Takes in a vector of inputs and, for each 8x8 block, calls **idctSoft** , a reference implementation of the IDCT 88 | - **idctSoft** : Software implementation of the IDCT algorithm, used to check the results from the FPGA 89 | - **oclDct** : This class is used to encapsulate the OpenCL runtime calls to interact with the kernel in the FPGA 90 | - **aligned\_allocator** , **smalloc** , **load\_file\_to\_memory** : These are small helper functions used during test vector generation and OpenCL setup 91 | * Look at the code around line number 580 of the **idct.cpp** file by pressing Ctrl+l (small L) and entering 496. 92 | This section of code is where the OpenCL environment is setup in the host application. It is typical of most SDx application and will look very familiar to developers with prior OpenCL experience. This body of code can often be reused as-is from project to project. 93 | 94 | To setup the OpenCL environment, the following API calls are made: 95 | 96 | - **clGetPlatformIDs** : Queries the system to identify any available OpenCL platforms. It is called twice as it first checks the number of available platforms before extracting the list of platforms 97 | - **clGetPlatformInfo** : Get information about the OpenCL platform, such as vendor name and platform name 98 | - **clGetDeviceIDs** : Obtain a list of devices available on a platform 99 | - **clCreateContext** : Creates an OpenCL context, which manages the runtime objects 100 | - **clGetDeviceInfo** : Get information about an OpenCL device like the device name 101 | - **clCreateProgramWithBinary** : Creates a program object for a context, and loads specified binary data into the program object. The actual program is obtained before this call through the `load_file_to_memory()` function 102 | - **clCreateKernel** : Creates a kernel object 103 | - **clCreateCommandQueue** : Create a command-queue on a specific device 104 | 105 | Note: all objects accessed through a **clCreate...** function call should be released before terminating the program by calling a corresponding **clRelease...** This avoids memory leakage and clears the locks on the device 106 | 107 | ### Configure the System Port options 108 | 109 | #### Configure the System Port in the SDx GUI 110 | 111 | In the *idct.cpp* file, locate lines 286-297. Note that two memory buffers, *mInBuffer* and *mOutBuffer* are being used. The memory buffers will be located in external DRAM. The kernel will have one or more ports connected to the memory bank(s). By default, the compiler will connect all ports to BANK0 or DDR[0]. For Alveo, memory interfaces can be configured from the SDx GUI, or via a "System Port" switch (--sp) that is passed to the XOCC Kernel Linker. For AWS only the "System Port" switches are currently supported. 112 | 113 | * For Alveo, in the *Assistant view, right click on Emulation-HW and click **Settings*** 114 | * In the *Hardware Function Settings* expand *optimization_lab > Emulation-SW > binary_container_1* and select **krnl_idct** 115 | 116 | * Under *Compute Unit Settings* expand *krnl_idct* and krnl_idct_1 117 | 118 | * From the dropdown block under *Memory* select the following: 119 | 120 | * block: DDR[0] 121 | * q: DDR[0] 122 | * voutp: DDR[1] 123 | 124 | ![](./images/optimization_lab/compute_unit_settings.png) 125 | 126 | Click **Apply and Close** 127 | 128 | #### Configure the System Port command line switch 129 | 130 | For AWS, you can set memory interfaces by passing the --sp compiler switch to the XOCC kernel linker: 131 | 132 | ``` 133 | --sp .: 134 | ``` 135 | 136 | Where: 137 | 138 | * **** is the instance name of the kernel 139 | * **** is the name of the memory interface 140 | 141 | The interface names can be found in the **Emulation-SW (or Emulation-HW) > binary_container_1 > reports > link >binary_container_1.xclbin.info** log file 142 | 143 | * In this case the linker switches would be 144 | 145 | ```console 146 | --sp krnl_idct_1.m_axi_gmem0:bank0 147 | --sp krnl_idct_1.m_axi_gmem1:bank0 148 | --sp krnl_idct_1.m_axi_gmem2:bank1 149 | ``` 150 | 151 | * In the Project Explorer pane, right-click the project **optimization_lab_example** and select the **C/C++ Settings** 152 | 153 | * Select **C/C++ Build** > **Settings** in the left pane 154 | * Select the **Miscellaneous** under **SDx XOCC Kernel Linker** 155 | 156 | ![](./images/optimization_lab/FigOptimizationLab-8.png) 157 | 158 | The switches can be added individually, or all on one line. 159 | 160 | ### Build and run software emulation (Emulation-SW) 161 | 162 | * Make sure the **project.sdx** under _Optimization\_lab_ in the **Project Explorer** tab is selected 163 | * Select **Emulation-SW** as the *Active Build Configuration* 164 | * Build the project (![alt tag](./images/Fig-build.png)) button 165 | * In the Project Explorer pane, right-click the project **optimization\_lab** and select **Run As** > **Run Configurations…** 166 | * Select the **Arguments** tab 167 | * Click on the **Automatically add binary container(s) to arguments** check box 168 | This will add **../binary\_container\_1.xclbin** 169 | * Click **Apply** and then click **Run** 170 | The application will be run and the output will be displayed in the Console tab 171 | 172 | ```console 173 | [Console output redirected to file:/home/cmccabe/workspace/optimization_lab/Emulation-SW/optimization_lab-Default.launch.log] 174 | FPGA number of 64*int16_t blocks per transfer: 256 175 | DEVICE: xilinx_u200_xdma_201830_1 176 | Loading Bitstream: ../binary_container_1.xclbin 177 | INFO: Loaded file 178 | Create Kernel: krnl_idct 179 | Create Compute Unit 180 | Setup complete 181 | Running CPU version 182 | Running FPGA version 183 | Runs complete validating results 184 | TEST PASSED 185 | RUN COMPLETE 186 | ``` 187 | 188 | ### Review the software emulation reports 189 | * In the **Assistant** tab, expand **optimization\_lab** > **Emulation-SW** > **optimization\_lab-Default** 190 | 191 | There will be two files generated by the tool after running the software emulation: Profile Summary and Application Timeline 192 | 193 | ![](./images/optimization_lab/FigOptimizationLab-11.png) 194 | 195 | * Double-click the **Profile Summary** report and review it 196 | 197 | ![](./images/optimization_lab/FigOptimizationLab-12.png) 198 | 199 | * Click on each of tabs and review the report: 200 | - **Top Operations** : Shows all the major top operations of memory transfer between the host and kernel to global memory, and kernel execution. This allows you to identify throughput bottlenecks when transferring data. Efficient transfer of data to the kernel/host allows for faster execution times 201 | - **Kernels & Compute Units** : Shows the number of times the kernel was executed. Includes the total, minimum, average, and maximum run times. If the design has multiple compute units, it will show each compute unit's utilization. When accelerating an algorithm, the faster the kernel executes, the higher the throughput which can be achieved. It is best to optimize the kernel to be as fast as it can be with the data it requires 202 | - **Data Transfers** : This tab has no bearing in software emulation as no actual data transfers are emulated across the host to the platform. In hardware emulation, this shows the throughput and bandwidth of the read/writes to the global memory that the host and kernel share 203 | - **OpenCL APIs** : Shows all the OpenCL API command executions, how many time each was executed, and how long they take to execute 204 | 205 | * Double-click the **Application Timeline** report and review it 206 | 207 | ![](./images/optimization_lab/FigOptimizationLab-13.png) 208 | 209 | The **Application Timeline** collects and displays host and device events on a common timeline to help you understand and visualize the overall health and performance of your systems. These events include OpenCL API calls from the host code: when they happen and how long each of them takes. 210 | 211 | ### Perform HW Emulation 212 | * Click on the drop-down button of _Active build configuration_ and select **Emulation-HW** 213 | 214 | * Assign the System Ports as you did in the Emulation-SW mode 215 | 216 | * In the *Assistant view* > right click on *Emulation-HW* and click **Settings** 217 | 218 | * Expand *optimization_lab > Emulation-HW > binary_container_1* and select **krnl_idct** 219 | * Under *Compute Unit Settings* expand *krnl_idct* and krnl_idct_1 220 | * Select the following: 221 | 222 | - mInBuffer: DDR[0] 223 | - mOutBuffer: DDR[1] 224 | 225 | * Build the project (![alt tag](./images/Fig-build.png)) 226 | 227 | * Select **Run > Run Configurations…** to open the configurations window 228 | 229 | * In the *Main* tab, click to select **Use waveform for kernel debugging** and **Launch live waveform** 230 | 231 | ![](./images/optimization_lab/FigOptimizationLab-14.png) 232 | 233 | * Click on the **Arguments** tab and make sure the _binary\_container\_1.xclbin_ is already assigned 234 | 235 | If no argument was assigned click to select **Automatically add binary container(s) to arguments**, and click **Apply** 236 | 237 | * Click **Run** to run the application 238 | 239 | The Console tab shows that the test was completed successfully along with the data transfer rate 240 | 241 | ``` 242 | [Console output redirected to file:/home/nimbix/workspace/optimization_lab/Emulation-HW/optimization_lab-Default.launch.log] 243 | FPGA number of 64*int16_t blocks per transfer: 256 244 | DEVICE: xilinx_u200_xdma_201830_1 245 | Loading Bitstream: ../binary_container_1.xclbin 246 | INFO: Loaded file 247 | INFO: [SDx-EM 01] Hardware emulation runs simulation underneath. Using a large data set will result in long simulation times. It is recommended that a small dataset is used for faster execution. This flow does not use cycle accurate models and hence the performance data generated is approximate. 248 | Create Kernel: krnl_idct 249 | Create Compute Unit 250 | Setup complete 251 | Running CPU version 252 | Running FPGA version 253 | Runs complete validating results 254 | TEST PASSED 255 | RUN COMPLETE 256 | INFO: [SDx-EM 22] [Wall clock time: 14:53, Emulation time: 0.075225 ms] Data transfer between kernel(s) and global memory(s) 257 | krnl_idct_1:m_axi_gmem-DDR[0] RD = 128.000 KB WR = 0.000 KB 258 | krnl_idct_1:m_axi_gmem1-DDR[0] RD = 0.500 KB WR = 0.000 KB 259 | krnl_idct_1:m_axi_gmem2-DDR[1] RD = 0.000 KB WR = 128.000 KB 260 | 261 | INFO: [SDx-EM 06-0] Waiting for the simulator process to exit 262 | ``` 263 | 264 | Notice that Vivado was started and the simulation waveform window is updated. 265 | 266 | * Click on the Zoom full button and scroll down the waveform window to see activities taking place in the kernel 267 | 268 | Notice that the execution is sequential 269 | 270 | ![](./images/optimization_lab/FigOptimizationLab-15-1.png) 271 | 272 | You can close Vivado when you are ready. We will not examine the transactions in detail. 273 | 274 | ### Understand the HLS Report, profile summary, and Application Timeline 275 | 276 | * In the **Assistant** tab, expand **optimization\_lab** > **Emulation-HW** > **optimization\_lab-Default** 277 | 278 | * Double-click the **Profile Summary** report and review it 279 | 280 | ![](./images/optimization_lab/FigOptimizationLab-16.png) 281 | 282 | * Click on the **Kernels & Compute Units** tab of the Profile Summary report 283 | 284 | * Review the Kernel **Total Time (ms)** 285 | 286 | This number will serve as a baseline (reference point) to compare against after optimization. 287 | 288 | ![](./images/optimization_lab/FigOptimizationLab-17.png) 289 | 290 | * In the **Assistant** tab, expand **optimization\_lab** > **Emulation-HW** > **binary\_container\_1** > **krnl\_idct** 291 | 292 | * Double-click the **HLS Report** and review it 293 | 294 | ![](./images/optimization_lab/FigOptimizationLab-18.png) 295 | 296 | * In the **Performance Estimates** section, expand the **Latency (clock cycles)** > **Summary** and note the following numbers: 297 | 298 | - Latency (min/max): ~6000 299 | - Interval (min/max): ~6000 300 | 301 | The numbers may vary slightly depending on the target hardware you selected. 302 | The numbers will serve as a baseline for comparison against optimized versions of the kernel 303 | 304 | * In the HLS report, expand **Latency (clock cycles)** > **Detail** > **Instance** 305 | 306 | - Note that the 3 sub-functions read, execute and write have roughly the same latency and that their sum total is equivalent to the total Interval reported in the Summary table 307 | - This indicates that the three sub-functions are executing sequentially, hinting to an optimization opportunity 308 | 309 | * Close all the reports 310 | 311 | ### Analyze the kernel code and apply the DATAFLOW directive 312 | 313 | * Open the **src > krnl\_idct.cpp** file 314 | 315 | * Using the **Outline** viewer, navigate to the **krnl\_idct\_dataflow** function 316 | Observe that the three functions are communicating using **hls::streams** objects. These objects model a FIFO-based communication scheme. This is the recommended coding style which should be used whenever possible to exhibit streaming behavior and allow **DATAFLOW** optimization 317 | 318 | * Enable the DATAFLOW optimization by uncommenting the **#pragma HLS DATAFLOW** present in the krnl\_idct\_dataflow function (line 319). 319 | 320 | The DATAFLOW optimization allows each of the subsequent functions to execute as independent processes. This results in overlapping and pipelined execution of the read, execute and write functions instead of sequential execution. The FIFO channels between the different processes do not need to buffer the complete dataset anymore but can directly stream the data to the next block. 321 | 322 | * Comment the three **#pragma HLS stream** statements on lines 324, 325 and 326 323 | 324 | * Save the file 325 | 326 | ### Build the project in Hardware emulation configuration and analyze the HLS report 327 | 328 | * Make sure the active configuration is **Emulation-HW** 329 | * Click on the Build button (![alt tag](./images/Fig-build.png)) to build the project 330 | * In the **Assistant** tab, expand **optimization\_lab** > **Emulation-HW** > **binary\_container\_1** > **krnl\_idct** 331 | * Double-click the **HLS Report** and review it 332 | 333 | ()[./images/optimization_lab/FigOptimizationLab-19.png] 334 | 335 | * In the **Performance Estimates** section, expand the **Latency (clock cycles)** > **Summary** and note the following numbers: 336 | - Latency (min/max): ~2000 337 | - Interval (min/max): ~2000 338 | 339 | ### Run the Hardware Emulation 340 | 341 | * Run the application by clicking the Run button (![alt tag](./images/Fig-run.png)) and wait for the run to finish with RUN COMPLETE message 342 | Notice the affect of the dataflow optimization in the VIvado simulation waveform view. Execution of reading, writing, pipelining and kernel is not concurrent. 343 | 344 | ![](./images/optimization_lab/FigOptimizationLab-20-1.png) 345 | 346 | * In the **Assistant** tab, expand **optimization\_lab > Emulation-HW > optimization\_lab-Default** and double-click the **Profile Summary** report 347 | 348 | * Select the **Kernels & Compute Units** tab. 349 | Compare the **Kernel Total Time (ms)** with the results from the un-optimized run (numbers may vary slightly to the results displayed below) 350 | 351 | ![](./images/optimization_lab/FigOptimizationLab-20.png) 352 | 353 | ### Analyze the host code 354 | 355 | * Open the **src > idct.cpp** file 356 | 357 | * Using the **Outline** viewer, navigate to the **runFPGA** function 358 | 359 | For each block of 8x8 values, the **runFPGA** function writes data to the FPGA, runs the kernel, and reads results back. Communication with the FPGA is handled by the OpenCL API calls made within the `cu.write()`, `cu.run()` and `cu.read()` function calls 360 | 361 | - `clEnqueueMigrateMemObjects()` schedules the transfer of data to or from the FPGA 362 | - `clEnqueueTask()` schedules the executing of the kernel 363 | These OpenCL functions use events to signal their completion and synchronize execution 364 | 365 | * Open the **Application Timeline** of the _Emulation-HW_ run 366 | The green segments at the bottom indicate when the IDCT kernel is running 367 | ![](./images/optimization_lab/application_timeline_before_host_optimiaztion.png) 368 | 369 | * Notice that there are gaps between each of the green segments indicating that the operations are not overlapping 370 | 371 | * Zoom in by performing a left mouse drag across one of these gaps to get a more detailed view 372 | - The two green segments correspond to two consecutive invocations of the IDCT kernel 373 | - The gap between the two segments is indicative of the kernel idle time between these two invocations 374 | - The **Data Transfer** section of the timeline shows that **Read** and **Write** operations are happening when the kernel is idle 375 | - The Read operation is to retrieve the results from the execution which just finished and the Write operation is to send inputs for the next execution 376 | - This represents a sequential execution flow of each iteration 377 | 378 | * Close the **Application Timeline** 379 | 380 | * In the **idct.cpp** file, go to the `oclDct::write()` function (line ~260) 381 | - Notice on line ~274, the function synchronizes on the **outEvVec** event through a call to `clWaitForEvents()` 382 | 383 | ``` 384 | clWaitForEvents(1, &outEvVec[mCount]); 385 | ``` 386 | 387 | - This event is generated by the completion of the `clEnqueueMigrateMemObjects()` call in the `oclDct::read()` function (line ~360) 388 | - Effectively the next execution of the `oclDct::write()` function is gated by the completion of the previous `oclDct::read()` function, resulting in the sequential behavior observed in the **Application Timeline** 389 | 390 | * Use the **Outline** viewer to locate the definition of the **NUM\_SCHED** macro in the **idct.cpp** file 391 | - This macro defines the depth of the event queue 392 | - The value of 1 explains the observed behavior: new tasks (write, run, read) are only enqueued when the previous has completed effectively synchronizing each loop iteration 393 | - By increasing the value of the **NUM\_SCHED** macro, we increase the depth of the event queue and enable more blocks to be enqueued for processing, which may result in the write, run and read tasks to overlap and allow the kernel to execute continuously or at least more frequently 394 | - This technique is called software pipelining 395 | 396 | * Modify line 152 to increase the value of **NUM\_SCHED** to 6 as follows 397 | 398 | ``` 399 | #define NUM_SCHED 6 400 | ``` 401 | 402 | * Save the file 403 | 404 | ### Run Hardware Emulation 405 | 406 | * Change the run configuration by unchecking the **Use waveform for kernel debugging** option, click **Apply**, and then click **Close** 407 | 408 | * Run the application by clicking the Run button (![](./images/Fig-run.png)) 409 | - Since only the idct.cpp file was changed, the incremental makefile rebuilds only the host code before running emulation 410 | - This should be much faster than also recompiling the kernel to hardware 411 | * In the **Assistant** tab, expand **optimization\_lab > Emulation-HW > optimization\_lab-Default** 412 | * Double-click the **Application Timeline** report 413 | 414 | Observe how **software pipelining** enables overlapping of data transfers and kernel execution. 415 | ​ 416 | ![](./images/optimization_lab/application_timeline_after_host_optimiaztion.png) 417 | 418 | Note: system tasks might slow down communication between the application and the hardware simulation, impacting on the performance results. The effect of software pipelining is considerably higher when running on the actual hardware. 419 | 420 | ### Run the Application in hardware 421 | 422 | As before, building the FPGA hardware takes some time, and a precompiled solution is provided. 423 | 424 | For AWS, execute the following in a new terminal, as this needs to be run as sudo 425 | 426 | ``` 427 | sudo sh 428 | source /opt/Xilinx/SDx/2018.3.op2405991/settings64.sh 429 | source /opt/xilinx/xrt/setup.sh 430 | export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/SDAccel/aws_platform/xilinx_aws-vu9p-f1-04261818_dynamic_5_0 431 | ``` 432 | 433 | - Start SDx (execute ```sdx``` from the terminal) and from the SDx file menu, select **import** 434 | - Expand *Xilinx* and select **SDx Project** and click **Next** 435 | - Choose *SDx project exported zip* file and click **Next** 436 | 437 | - Browse to **~/compute_acceleration/solutions/optimization_lab/[aws|u200]/** and select the corresponding ***.sdx.zip** for your OS, and click **OK** 438 | 439 | You should see a new *optimiaztion_lab* folder in the Project Explorer 440 | 441 | #### Set the executable file permissions 442 | 443 | Zip files do not preserve Unix file permissions, so the executable permissions must be modified manually. 444 | 445 | - Expand *optimiaztion_lab > System* and right click on **optimiaztion_lab.exe** 446 | 447 | - Select **Execute** for the *Owner* permissions and click **Apply and Close** 448 | 449 | ![](./images/helloworld/file_permissions.png) 450 | 451 | #### Disable Auto building 452 | 453 | - Right click on the project folder, select C/C++ Build Settings 454 | - In C/C++ Build, in the **Behavior** tab make sure the following are unchecked: *Build on resource save (Auto Build)*, *Build (incremental build)* and *Clean* 455 | - Click **Apply and Close** 456 | 457 | #### Run the application 458 | 459 | - Open the project.sdx and select **System** as the *Active build configuration* 460 | 461 | - In the SDx *Run* menu, select **Run Configurations** 462 | 463 | - Expand OpenCL if necessary, and select the **optimiaztion_lab-Default** configuration 464 | 465 | - Check the *binary container* has been included in the *Arguments* tab 466 | 467 | For Alveo, this will be the binary_container_1.**xclbin** file, and for AWS this will be the binary_container_1.**awsxclbin** file. 468 | 469 | * On the profiling tab, disable profiling as data transfer in this example will slow down the application (you can rerun with profiling enabled later if you wish) 470 | 471 | * Click **Run** 472 | 473 | 474 | 475 | The FPGA bitstream will be downloaded and the host application will be executed. 476 | 477 | 478 | ## Conclusion 479 | 480 | In this lab, you used SDx to create a project and add a kernel (hardware) function. You performed software and hardware emulation, analyzed the design and the various reports generated by the tools. You then optimized the kernel code using the DATAFLOW pragma, and host code by increasing the number of read, write, and run tasks to improve throughput and data transfer rates. You then validated the functionality in hardware. . 481 | 482 | --------------------------------------- 483 | 484 | 485 | Start the next lab: [5. RTL-Kernel Wizard Lab](rtl_kernel_wizard_lab.md) 486 | 487 | --------------------------------------- 488 | 489 | 490 | ## Appendix Build Full Hardware 491 | 492 | **Set the build configuration to System and build the system (Note that since the building of the project takes over two hours skip this step in the workshop environment).** 493 | 494 | * Either select **Project > Build Configurations > Set Active > System** or click on the drop-down button of _Active build configuration_ and select **System** 495 | * Set the XOCC Kernel Linker flag as before 496 | * Either select **Project > Build Project** or click on the (![alt tag](./images/Fig-build.png)) button 497 | This will build the project under the **System** directory. The built project will include **optimization\_lab.exe** file along with **binary\_container\_1.xclbin** file. This step takes about two hours 498 | 499 | ### AWS-F1 500 | 501 | Once the full system is built, you can create an AWS F1 AFI 502 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | ### Introduction 16 | 17 | Welcome to the XUP SDx tutorial. These labs will provide hands-on experience using the SDx software tools with Xilinx FPGA hardware. 18 | 19 | The SDx tools can be run on your local machine, or in the cloud (e.g. AWS or Nimbix). 20 | 21 | The tutorial instructions target the following hardware and software: 22 | 23 | * SDx 2018.3 24 | * AWS EC2 F1 f1.2xlarge (cloud) 25 | * Alveo U200 (local board, or cloud) 26 | 27 | You can build designs locally or in the cloud, and test on hardware locally (if you have an Alveo board) or in the cloud (AWS EC2 F1, Nimbix Alveo U200). 28 | 29 | You will learn how to develop applications using the SDx development environment that supports OpenCL/C/C++ and RTL kernels. 30 | 31 | #### Overview of the SDx flow 32 | 33 | ![alt tag](./images/f1_platform.png) 34 | 35 | 36 | 1. SDx is the development environment used to create host applications and hardware accelerators. It includes host CPU and FPGA compilers, and profiling and debugging tools. 37 | 2. The host application can be written in C or C++ and uses the OpenCL API or the XRT (Xilinx Runtime Library) to interact with the accelerated hardware functions running in the FPGA. The accelerated hardware functions (also referred to as kernels) can be written in C, C++, OpenCL or RTL. 38 | 39 | 40 | #### Tutorial overview 41 | 42 | It is recommended to complete each lab before proceeding to the next. 43 | 44 | * [**Setup SDx**](setup_sdx.md) 45 | This will show you how to setup SDx on your own machine, or how to use it in the cloud (instructions are provided for AWS and Nimbix clouds). 46 | 47 | * [**Introduction to SDx**](sdx_introduction.md) 48 | This lab guides you through the steps involved in using a GUI flow to create an SDx project. After creating a project, you will run CPU and hardware emulation to verify the functionality. You will then use an AWS F1 instance to validate the design on F1. 49 | 50 | * [**Optimization**](Optimization_lab.md) 51 | This lab guides you through the steps involved in creating a project and adding a kernel function. After creating a project, you will run CPU and hardware emulation to verify the functionality, analyze various generated reports and then apply techniques both on host and kernel side to improve throughput and data transfer rate. 52 | 53 | * [**RTL-Kernel Wizard**](rtl_kernel_wizard_lab.md) 54 | This lab guides you through the steps involved in using a RTL Kernel wizard to wrap a user RTL-based IP so the generated IP can be used in SDx project. 55 | 56 | * [**Debug**](debug_lab.md) 57 | This lab will show you how to carry out host application debug, and debug of the hardware kernel. 58 | 59 | * [**SDx command line**](command_line.md) 60 | This lab guides you through the steps involved in using the SDx command line (using a Makefile) to build and perform CPU and hardware emulation of your design. You will then test the design using FPGA hardware. 61 | 62 | --------------------------------------- 63 | 64 | 65 | When you have setup your instance, go to the first lab [Introduction to SDx](sdx_introduction.md) 66 | 67 | -------------------------------------------------------------------------------- /debug_lab.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Hardware/Software Debugging 16 | 17 | ## Introduction 18 | 19 | This lab is a continuation of the previous (**RTL-Kernel Wizard Lab**) lab. You will use ChipScope to monitor signals at the kernel interface level and perform software debugging using SDx. Note that this lab is not currently supported on Nimbix as the Xilinx Virtual Cable (XVC is not supported) 20 | 21 | ## Objectives 22 | 23 | After completing this lab, you will be able to: 24 | 25 | * Add ChipScope cores to an SDx design 26 | * Use ChipScope to monitor signals at the kernel interface 27 | * Debug a software application in SDx 28 | 29 | ## Steps 30 | 31 | ### Open SDx and import the project 32 | 33 | To save time on compilation, a precompiled project will be provided with the Chipscope debug cores already included in the design. 34 | 35 | * Open SDx 36 | Make sure your target platform has already been imported. You will see an error if the platform used by the precompiled project is not available. 37 | * From the SDx *File* menu, select **Import** 38 | * In the *Import Wizard*, expand *Xilinx* and select **SDX Project** and click **Next** 39 | * Select **SDx project exported zip file** and click **Next** 40 | * Browse to the appropriate *folder* for your target and click **OK** You should see a .sdx.zip file in the folder you select. 41 | ~/compute_acceleration/sources/debug/ 42 | * Select the appropriate project archive for your target and click **Next** 43 | 44 | #### Set permissions on imported executable 45 | 46 | * Once the project has been imported, in *Project Explorer* expand **debug>System** 47 | * Right click on *debug.exe* and select **Properties** 48 | * Tick the box to add **Execute** to the *Owner* permissions 49 | 50 | ![](./images/debug_lab/rtl_kernel_exe_properties.png) 51 | 52 | 53 | 54 | * If you don't see an option to set the permissions, open a terminal, browse to the directory containing the debug.exe, and run the following command to change the permissions to make the file executable: 55 | 56 | ``` 57 | chmod 777 debug.exe 58 | ``` 59 | 60 | ### Hardware Debugging 61 | 62 | #### Review Appendix-I section to understand how to add the ChipScope Debug bridge core and build the project. The debug core has been included in the precompiled sources provided 63 | 64 | #### Run the application 65 | 66 | * Open the project.sdx and select **System** as the Active build configuration. 67 | 68 | * From the **Run** menu, select **Run Configurations** 69 | * Expand *OpenCL* and select *debug-Default* 70 | * For Alveo, in the *Arguments* tab make sure **Automatically add binary container(s) to arguments** is selected. For AWS, make sure the *.awsxclbin filename is listed as an argument 71 | * Click **Run** 72 | 73 | The host application will start executing, load the bitstream, and wait for user input (press any key to continue) 74 | 75 | ![](./images/debug_lab/FigDebugLab-3.png) 76 | 77 | ### Set up the Xilinx Virtual Cable (XVC) 78 | 79 | The Xilinx Virtual Cable (XVC) is a virtual device that gives you JTAG debug capabilities over PCIe to the target device. XVC will be used to debug the design. 80 | 81 | #### For Alveo U200 82 | 83 | For an Alveo board, you need to determine the XVC device in your system. XVC is installed as part of the SDx and XRT installation. 84 | 85 | ``` 86 | ls /dev/xvc_pub* 87 | ``` 88 | 89 | This will report something similar to the output below: 90 | 91 | ``` 92 | /dev/xvc_pub.u513 93 | ``` 94 | 95 | Each computer may have a different value for *xvc_pub.\** so you will need to check the name for your computer. 96 | 97 | * In a terminal window, start a virtual jtag connection 98 | 99 | Run the following command (where _u513_ should be the value your obtained from the previous command): 100 | 101 | ``` 102 | sdx_debug_hw --xvc_pcie /dev/xvc_pub.u513 --hw_server 103 | ``` 104 | 105 | ``` 106 | launching xvc_pcie... 107 | xvc_pcie -d /dev/xvc_pub.u513 -s TCP::10200 108 | launching hw_server... 109 | hw_server -sTCP::3121 110 | 111 | **************************** 112 | *** Press Ctrl-C to exit *** 113 | **************************** 114 | ``` 115 | 116 | The Virtual JTAG XVC Server will start listening to TCP port **10200** in this case. This is the port you will need to connect to from Vivado (below). Note the *hw_server* is listening to TCP port 3121. 117 | 118 | Skip the next section and continue with [Connecting Vivado to the XVC](#connect_vivado_to_xvc) 119 | 120 | #### For AWS 121 | For AWS run the following script which will manage setup of the XVC: 122 | 123 | ``` 124 | sudo fpga-start-virtual-jtag -P 10200 -S 0 125 | ``` 126 | 127 | ![](./images/debug_lab/FigDebugLab-4.png) 128 | 129 | 130 | 131 | ### Connecting Vivado to the XVC 132 | 133 | * Start Vivado from another terminal 134 | 135 | ``` 136 | vivado 137 | ``` 138 | 139 | * Click on **Open Hardware Manager** link 140 | * Click **Open Target > Autoconnect** 141 | 142 | ![](./images/debug_lab/hw_manager_open_target.png) 143 | 144 | * Right click on *localhost (0)* and select **Add Xilinx Virtual Cable (XVC)** 145 | 146 | ![](./images/debug_lab/add_virtual_cable.png) 147 | 148 | * Enter **localhost** as the *host name*, and **10200** as the port (or the *port number* for your machine obtained previously) and click **OK** 149 | 150 | ![](./images/debug_lab/set_virtual_cable_port.png) 151 | 152 | * Right click on the *debug_bridge* and select **Refresh Device**. 153 | 154 | The Vivado Hardware Manager should open showing _Hardware_, _Waveform_, _Settings-hw_, _Trigger-Setup_ windows. The _Hardware_ window also shows the detected ILA cores (*hw_ila_\**), inserted in the design. The Alveo design will have one ILA. The AWS design will have two ILAs, one monitoring the AWS shell interface. 155 | 156 | ![](./images/debug_lab/FigDebugLab-7.png) 157 | 158 | 159 | * Select the *debug_bridge* in the Hardware panel 160 | * In the _Hardware Device Properties_ view, click on the browse button beside **Probes file** 161 | * Browse to the project's **./workspace/debug/System** folder, select the **.ltx** file and click **OK** 162 | * Select the *hw_ila_1* tab, and notice four (Slot_0 to Slot_3) probes are filled in the Waveform window 163 | * Click on the **Run Trigger immediate** button ![](./images/debug_lab/run_trigger_immediate.png) and observe the waveform window is fills with data showing that the four channels were _Inactive_ for the duration of the signal capture. 164 | 165 | ![](./images/debug_lab/FigDebugLab-8.png) 166 | 167 | * Expand **slot_1 : KVAdd_1_m01_axi : Interface** , then find and expand **slot_1 : KVAdd_1_m01_axi : W Channel** in the Waveform window. 168 | * Select the **WVALID** signal and drag it to the Trigger Setup - hw window 169 | 170 | ![](./images/debug_lab/FigDebugLab-9.png) 171 | 172 | * Click on drop-down button of the Value field and select trigger condition value as 1 173 | 174 | ![](./images/debug_lab/FigDebugLab-10.png) 175 | 176 | * Click on the _Run trigger_ button ![](./images/debug_lab/trigger_button.png)and observe the _hw_ila_1_ probe is waiting for the trigger condition to occur 177 | 178 | ![](./images/debug_lab/FigDebugLab-11.png) 179 | 180 | * Switch to the SDx window select the *Console* window and press the **Enter key** to allow the program to continue executing 181 | Observe that the program completes displaying **INFO: Test completed successfully** in the Console window 182 | * Switch back to Vivado and notice that because the trigger condition was met, the waveform window has been populated with new captured data. 183 | 184 | ![](./images/debug_lab/FigDebugLab-12.png) 185 | 186 | * Expand **Slot_0, slot_1,** and **slot_2** groups, zoom in to the region around samples _450 to 1000_, and observe the data transfers taking place on each channels. Also note the addresses from where data are read and where the results are written to. 187 | 188 | ![](./images/debug_lab/FigDebugLab-13.png) 189 | 190 | * Zoom in on one of the transactions and hover your mouse at each successive sample and notice the data content changing 191 | * When you are finished, close Vivado by selecting **File > Exit** 192 | * Close the jtag probe by switching to its terminal window and pressing _Ctrl-C_ 193 | 194 | ### Perform Software Debugging 195 | 196 | * Switch to the SDx GUI 197 | 198 | * From the **Run** menu, select **Debug Configurations** 199 | 200 | * Make sure that the **Arguments** tab shows **../binary_container_1.xclbin** 201 | 202 | * Click **Debug** 203 | 204 | * Click **Yes** when prompted to switch to the _Debug perspective_ 205 | The bitstream will be downloaded to the FPGA and the host application will start executing, halting at **main()** entry point 206 | 207 | * In _host_example.cpp_ view scroll down to line ~262 and double-click on the left border to set a breakpoint At this point, three buffers would have been created 208 | 209 | ![](./images/debug_lab/FigDebugLab-14.png) 210 | 211 | * Click on the **Resume** button or press **F8** 212 | 213 | * When prompted click in the console and press *Enter* 214 | The program will resume executing and stop when it reaches the breakpoint 215 | At this point you can click on the various monitoring tabs (*Variables, Command Queue, Memory Buffers* etc.) and see the contents currently in scope 216 | SDx debug allows command queues and memory buffers to be examined as the program execution progresses 217 | 218 | * Click back to select *Debug.exe > #Thread 1* in the Debug panel 219 | 220 | * Click on the **Step Over** button or press **F6** 221 | 222 | 223 | The execution will progress one statement at a time 224 | 225 | * Continue pressing **F6** until you reach line ~326 at which point kernel will finish executing 226 | 227 | * Select the **Memory Buffers** tab 228 | Notice that three buffers are allocated, their IDs, DDR memory address, and sizes 229 | 230 | ![](./images/debug_lab/FigDebugLab-15.png) 231 | 232 | * Select the **Command Queue** tab and notice that there no commands enqueued. 233 | 234 | ![](./images/debug_lab/FigDebugLab-16.png) 235 | 236 | Lines ~326-330 creates commands to read the data and results 237 | 238 | ``` 239 | err |= clEnqueueReadBuffer( ... ); 240 | ``` 241 | 242 | * Press **F6** to execute the first `clEnqueueReadBuffer()` to create a read buffer command for reading operand _d\_A_ 243 | Notice the Command Queue tab shows one command submitted 244 | 245 | ![](./images/debug_lab/FigDebugLab-17.png) 246 | 247 | * Press **F6** to execute the next `clEnqueueReadBuffer()` for _d\_B_ 248 | Notice the Command Queue tab shows two commands submitted 249 | 250 | ![](./images/debug_lab/FigDebugLab-18.png) 251 | 252 | * Set a breakpoint at line ~384 (`clReleaseKernel()`) and press **F8** to resume the execution 253 | Notice that the Command Queue tab still shows entries 254 | * Press **F6** to execute `clReleaseKernel()` 255 | Notice the Memory Buffers tab is empty as all memories are released 256 | * Click **F8** to complete the execution 257 | * Close the SDx program 258 | 259 | ## Conclusion 260 | 261 | In this lab, you used the ChipScope Debug bridge and cores to perform hardware debugging. You also performed software debugging using the SDx GUI. 262 | 263 | ## Appendix-I 264 | 265 | ### Steps to Add ChipScope Debug core and build the design 266 | 267 | * In the **Assistant** tab, expand **System > binary_container_1 > KVadd** 268 | * Select **KVAdd**, right-click and select **Settings...** 269 | * In the **Hardware Function Settings** window, click **Refresh**, and then click on the _ChipScope Debug_ option for the _KVAdd_ kernel 270 | 271 | ![](./images/debug_lab/enable_chipscope.png) 272 | 273 | * Click **Apply and close** 274 | * In the **Project Explorer** tab, expand **src > sdx_debug > KVAdd** and double-click on the **host_example.cpp** to open it in the editor window 275 | * Around line 240 (after the _clCreateKernel_ section) enter the following lines of code and save the file. This will pause the host software execution after creating kernel but before allocating buffer 276 | ``` 277 | printf("\nPress ENTER to continue after setting up ILA trigger..."); 278 | getc(stdin); 279 | ``` 280 | 281 | ![](./images/debug_lab/FigDebugLab-2.png) 282 | 283 | * Build the design 284 | 285 | ### Disable automatic rebuilding of the design 286 | 287 | When you export a project, and re-import it, the file modified dates may change and cause SDx to make the output executable and hardware kernel "out-of-date". This may cause the design to be automatically recompiled when an attempt is made to run the application from the GUI. 288 | 289 | * To disable automatic rebuilding, right click on the project folder, and select **C/C++ Build Settings** 290 | 291 | * Select **C/C++ Build** and click on the **Behavior** tab 292 | 293 | * Uncheck the following: 294 | * Build on resource save (Auto Build) 295 | * Build (Incremental build) 296 | * Clean 297 | 298 | When you export a project, and re-import it, these settings stop the bitstream being automatically rebuilt. 299 | 300 | ![](./images/debug_lab/turn_off_autobuild.png) 301 | 302 | If you need to rebuild the project, you can re-enable these settings. If you only need to update the host application, you can run the following command in a terminal in the project folder to rebuild the .exe only (where *debug.exe* is the name of the executable): 303 | 304 | ``` 305 | cd ./workspace/debug/System 306 | make debug.exe 307 | ``` 308 | 309 | ### References 310 | 311 | [SDx Debug techniques](https://www.xilinx.com/html_docs/xilinx2018_3/sdaccel_doc/dtp1532068222773.html) 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | -------------------------------------------------------------------------------- /images/Fig-binary_container.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/Fig-binary_container.png -------------------------------------------------------------------------------- /images/Fig-build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/Fig-build.png -------------------------------------------------------------------------------- /images/Fig-hw_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/Fig-hw_button.png -------------------------------------------------------------------------------- /images/Fig-refresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/Fig-refresh.png -------------------------------------------------------------------------------- /images/Fig-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/Fig-run.png -------------------------------------------------------------------------------- /images/SDX_IDE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/SDX_IDE.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-1.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-10.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-11.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-12.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-13-1.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-13-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-13-2.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-14.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-15.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-16.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-17.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-2.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-3.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-4-1.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-4-2.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-5.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-6.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-7.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-8.png -------------------------------------------------------------------------------- /images/connecting_lab/FigConnectingLab-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/FigConnectingLab-9.png -------------------------------------------------------------------------------- /images/connecting_lab/nimbix/connect_to_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/nimbix/connect_to_instance.png -------------------------------------------------------------------------------- /images/connecting_lab/nimbix/linux_desktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/nimbix/linux_desktop.png -------------------------------------------------------------------------------- /images/connecting_lab/nimbix/select_desktop_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/nimbix/select_desktop_mode.png -------------------------------------------------------------------------------- /images/connecting_lab/nimbix/select_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/nimbix/select_instance.png -------------------------------------------------------------------------------- /images/connecting_lab/nimbix/select_instance_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/connecting_lab/nimbix/select_instance_config.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-10.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-11.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-12.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-13.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-14.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-15.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-16.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-17.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-18.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-2.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-3.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-4.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-5.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-6.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-7.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-8.png -------------------------------------------------------------------------------- /images/debug_lab/FigDebugLab-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/FigDebugLab-9.png -------------------------------------------------------------------------------- /images/debug_lab/add_virtual_cable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/add_virtual_cable.png -------------------------------------------------------------------------------- /images/debug_lab/enable_chipscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/enable_chipscope.png -------------------------------------------------------------------------------- /images/debug_lab/hw_manager_open_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/hw_manager_open_target.png -------------------------------------------------------------------------------- /images/debug_lab/localhost_connected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/localhost_connected.png -------------------------------------------------------------------------------- /images/debug_lab/rtl_kernel_exe_properties.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/rtl_kernel_exe_properties.png -------------------------------------------------------------------------------- /images/debug_lab/run_trigger_immediate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/run_trigger_immediate.png -------------------------------------------------------------------------------- /images/debug_lab/set_virtual_cable_port.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/set_virtual_cable_port.png -------------------------------------------------------------------------------- /images/debug_lab/trigger_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/trigger_button.png -------------------------------------------------------------------------------- /images/debug_lab/turn_off_autobuild.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/debug_lab/turn_off_autobuild.png -------------------------------------------------------------------------------- /images/f1_platform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/f1_platform.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-10.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-11.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-13.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-14.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-15-1.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-15-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-15-2.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-15-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-15-3.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-16.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-18.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-19.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-20.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-21-1.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-21.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-22.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-23.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-24.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-25.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-26.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-27.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-28.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-29.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-30.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-31.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-6.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-8.png -------------------------------------------------------------------------------- /images/helloworld/FigGUIflowLab-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/FigGUIflowLab-9.png -------------------------------------------------------------------------------- /images/helloworld/add_xclbin_argument.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/add_xclbin_argument.png -------------------------------------------------------------------------------- /images/helloworld/empty_application_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/empty_application_project.png -------------------------------------------------------------------------------- /images/helloworld/file_permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/file_permissions.png -------------------------------------------------------------------------------- /images/helloworld/import_from_dir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/import_from_dir.png -------------------------------------------------------------------------------- /images/helloworld/import_srcs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/import_srcs.png -------------------------------------------------------------------------------- /images/helloworld/sdx_hello_world_ide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/sdx_hello_world_ide.png -------------------------------------------------------------------------------- /images/helloworld/select_srcs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/select_srcs.png -------------------------------------------------------------------------------- /images/helloworld/select_u200_platform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/select_u200_platform.png -------------------------------------------------------------------------------- /images/helloworld/select_vector_add_fn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/select_vector_add_fn.png -------------------------------------------------------------------------------- /images/helloworld/sys_estimate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/helloworld/sys_estimate.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-1.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-2.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-3.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-4.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-5.png -------------------------------------------------------------------------------- /images/makefile_lab/FigMakefileLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/FigMakefileLab-6.png -------------------------------------------------------------------------------- /images/makefile_lab/linker_flag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/makefile_lab/linker_flag.png -------------------------------------------------------------------------------- /images/nice_dcv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/nice_dcv.png -------------------------------------------------------------------------------- /images/nice_dcv_desktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/nice_dcv_desktop.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-11.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-12.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-13.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-14.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-15-1.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-15.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-16.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-17.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-18.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-19.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-20-1.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-20.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-21.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-22-1.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-22.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-23.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-5.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-6.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-7.png -------------------------------------------------------------------------------- /images/optimization_lab/FigOptimizationLab-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/FigOptimizationLab-8.png -------------------------------------------------------------------------------- /images/optimization_lab/application_timeline_after_host_optimiaztion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/application_timeline_after_host_optimiaztion.png -------------------------------------------------------------------------------- /images/optimization_lab/application_timeline_before_host_optimiaztion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/application_timeline_before_host_optimiaztion.png -------------------------------------------------------------------------------- /images/optimization_lab/compute_unit_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/compute_unit_settings.png -------------------------------------------------------------------------------- /images/optimization_lab/localhost_connected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/localhost_connected.png -------------------------------------------------------------------------------- /images/optimization_lab/zoon_buttons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/optimization_lab/zoon_buttons.png -------------------------------------------------------------------------------- /images/putty_dcv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/putty_dcv.png -------------------------------------------------------------------------------- /images/putty_ip4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/putty_ip4.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-10.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-11.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-12.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-13.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-14.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-15.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-16.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-17.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-18.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-19.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-20.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-21.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-22.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-4.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-5.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-6.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-7.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-8.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/FigRTLKernelLab-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/FigRTLKernelLab-9.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/hardware_emulation_application_timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/hardware_emulation_application_timeline.png -------------------------------------------------------------------------------- /images/rtlkernel_lab/hw_emulation_completed_successfully.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/images/rtlkernel_lab/hw_emulation_completed_successfully.png -------------------------------------------------------------------------------- /rtl_kernel_wizard_lab.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Using the RTL Kernel Wizard 16 | 17 | ## Introduction 18 | 19 | This lab guides you through the steps involved in using the SDx RTL Kernel wizard. This allows RTL code to be used in an SDx design. 20 | 21 | ## Objectives 22 | 23 | After completing this lab, you will be able to: 24 | 25 | - Understand how to use the RTL Kernel wizard available in SDx 26 | - Create a new RTL based IP 27 | - Add the new IP to an application 28 | - Verify the functionality of the design in hardware 29 | 30 | ## Steps 31 | 32 | ### Create an SDx Project 33 | 34 | * Start SDx and select the default workspace (or continue with the workspace from the previous lab) 35 | 36 | * Click on the **Create Application Project** link on the _Welcome_ page 37 | 38 | * In the _New Project_'s page enter **rtl\_kernel** in the _Project name:_ field and click **Next** 39 | 40 | * Select your target platform and click **Next** 41 | 42 | * Select **Empty Application** and click **Finish** 43 | 44 | ### Create RTL\_Kernel Project using RTL Kernel Wizard 45 | 46 | * Make sure the **project.sdx** under _rtl\_kernel\_example_ in the **Project Explorer** tab is selected 47 | 48 | * Select **Xilinx > RTL Kernel Wizard…** 49 | 50 | ![](./images/rtlkernel_lab/FigRTLKernelLab-4.png) 51 | 52 | * Click **Next** 53 | 54 | * Change _Kernel_ name to **KVAdd**, (for Kernel Vector Addition), _Kernel vendor_ to **Xilinx** leaving the _Kernel library_ and _Number of clocks_ to the default values 55 |

56 | 57 |

58 |

59 | Setting general settings including name and number of clocks 60 |

61 | 62 | * Click **Next** 63 | 64 | * Leave _Number of scalar kernel input arguments_ set to the default value of **1** and the _Argument type_ as **unit** and click **Next** 65 |

66 | 67 |

68 |

69 | Selecting number of scalar arguments 70 |

71 | 72 | * We will have three arguments to the kernel (2 input and 1 output) which will be passed through Global Memory. Set _Number of AXI master interfaces_ to be **3** 73 | 74 | * Keep the width of each AXI master data width to **64** (note this is specified in bytes so this will give a width of 512 bits for each interface), name **A** as the argument name for *m00\_axi*, **B** for *m01\_axi*, and **Res** for *m02\_axi* 75 |

76 | 77 |

78 |

79 | Selecting number of AXI master interfaces, their widths, and naming them 80 |

81 | 82 | * Click **Next** and the summary page will be displayed showing a function prototype and register map for the kernel. 83 | 84 | Note the control register and the scalar operand are accessed via the S\_AXI\_CONTROL interface. The control register is at offset 0x0 and the scalar operand is at offset 0x10. 85 | 86 | ![](./images/rtlkernel_lab/FigRTLKernelLab-8.png) 87 | 88 | * Click **OK** to close the wizard 89 | 90 | Notice that a Vivado Project will be created and opened 91 | 92 | ![](./images/rtlkernel_lab/FigRTLKernelLab-9.png) 93 | 94 | ### Analyze the design created by the RTL Kernel wizard 95 | 96 | * Expand the hierarchy of the Design Sources in the Sources window and notice all the design sources, constraint file, and the basic testbench generated by the wizard 97 | 98 | ![](./images/rtlkernel_lab/FigRTLKernelLab-10.png) 99 | 100 | There is one module to handle the control signals (ap_start, ap_done, and ap_idle) and three master AXI channels to read source operands from, and write the result to DDR. The expanded m02_axi module shows *adder*, *read*, *write* instances. 101 | * Select **Flow Navigator > RTL ANALYSIS > Open Elaborated Design** which will analyze the design and open a schematic view. Click **OK** 102 | * You should see two top-level blocks: example and control as seen below 103 | 104 | ![](./images/rtlkernel_lab/FigRTLKernelLab-11.png) 105 | 106 | Notice the AXI Master interfaces are 64 bytes (or 512 bits) wide as specified earlier. 107 | 108 | * Double-click on the example block and observe the three hierarchical Master AXI blocks 109 | 110 | ![](./images/rtlkernel_lab/FigRTLKernelLab-12.png) 111 | 112 | * Zoom in into the top section and see the control logic the wizard has generated the ap_start, ap_idle, and ap_done control signals 113 | 114 | ![](./images/rtlkernel_lab/FigRTLKernelLab-13.png) 115 | 116 | * Traverse through one of the AXI interface blocks (e.g. m02) and observe that the design consists of a Read Master, Write Master, and an Adder. (Click on the image to download an enlarged version if necessary) 117 | 118 | ![](./images/rtlkernel_lab/FigRTLKernelLab-14.png) 119 | 120 | * Close the elaborated view by selecting **File > Close Elaborated Design** 121 | * Click **OK** 122 | 123 | ### Generate the RTL Kernel 124 | 125 | * Select **Flow > Generate RTL Kernel** 126 | * Click **OK** using the default option (Sources-only kernel) 127 | The packager will be run, generating the xo file which will be used in the design. 128 | * Click **OK**, and **Yes** to exit Vivado and return to SDx. 129 | 130 | ### Analyze the RTL kernel added to the SDx project 131 | 132 | * Expand the _src_ folder under the **rtl\_kernel\_example** 133 | Notice that the _sdx\_rtl\_kernel\_wizard_ folder has been added to the project. Expanding this folder shows the kernel (.xo) and a C++ file have been included. 134 | 135 | ![](./images/rtlkernel_lab/FigRTLKernelLab-15.png) 136 | 137 | * Double-click on the **host_example.cpp** to open it. 138 | * The _main_ function is defined around line 60. The number of words it transfers is 4096. 139 | * Notice around line 94 the source operands and expected results are initialized. 140 | * Around line 200 (from the `clCreateProgramWithBinary()` function) shows the loading of the xclbin and creating the OpenCL kernel (`clCreateKernel()`). 141 | * The following lines show how the buffers are created in the device memory and enqueued (`clCreateBuffer()`, `clEnqueueWriteBuffer()`). 142 | * Around lines 290, the arguments to the kernel are set (`clSetKernelArg()`), and the kernel is enqueued to be executed (`clEnqueueTask()`)). 143 | * Around line 320 results are read back (`clEnqueueReadBuffer()`) and compared to the expected results. 144 | * The _Shutdown and cleanup section_ shows releasing of the memory, program, and kernel. 145 | 146 | ### Add the design as a hardware kernel, and build the project 147 | 148 | * Select **project.sdx** in the _Project Explorer_ tab to see the project settings page 149 | 150 | * Click on the **Add Hardware Function button** (![](./images/Fig-hw_button.png)) and select _KVAdd_ 151 | 152 | * Select **Emulation-HW **on the drop-down button of _Active build configuration_ 153 | 154 | ![](./images/rtlkernel_lab/FigRTLKernelLab-17.png) 155 | 156 | * Select **Project > Build Project** or click on the (![](./images/Fig-build.png)) button 157 | 158 | This will build the project including rtl\_kernel\_example.exe file under the Emulation-HW directory 159 | 160 | * Select **Run > Run Configurations…** to open the configurations window 161 | 162 | * Click on the **Arguments** tab select **Automatically add binary container(s) to arguments** 163 | 164 | * Click **Apply**, and then click **Run** to run the application 165 | 166 | ![](./images/rtlkernel_lab/FigRTLKernelLab-19.png) 167 | 168 | * The Console tab shows that the test was completed successfully along with the data transfer rate 169 | 170 | ![](./images/rtlkernel_lab/hw_emulation_completed_successfully.png) 171 | 172 | * In the **Assistant** tab, expand **Emulation-HW > rtl_kernel_example-Default**, and double-click on the **Application Timeline** entry, expand all entries in the timeline graph, zoom appropriately and observe the transactions 173 | 174 | 175 | 176 | 177 | 178 | ![](./images/rtlkernel_lab/hardware_emulation_application_timeline.png) 179 | 180 | This example will be used as the starting point for the next lab, so it is not necessary to test the design in hardware now. 181 | 182 | ## Conclusion 183 | 184 | In this lab, you used the RTL Kernel wizard to create an example RTL adder application. You configured the template and saw the example code that was generated. You performed HW emulation and analyzed the application timeline. 185 | 186 | --------------------------------------- 187 | 188 | Start the next lab: [6. Debug Lab](debug_lab.md) 189 | -------------------------------------------------------------------------------- /sdx_introduction.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # SDx 16 | 17 | ## Introduction 18 | 19 | This lab guides you through the steps involved in creating an SDx project. After creating the project you will run software and hardware emulation to verify the functionality of the design. You can also test the design in hardware. 20 | 21 | ### Description of example application 22 | 23 | The source code for the example design will be provided. The design consists of a C++ host application and an OpenCL kernel. The OpenCL kernel is a simple vector addition. The elements of 2 vectors (A & B) will be added together, and the result returned in a third array (C). The host application will initialize the two input arrays, send data to the kernel, and read back the result. The first elements of the arrays will be initialised with 0, or a value passed to the function. The initial value for each subsequent element will be incremented. E.g. If the initialization value is 0, A & B will have the initial values [0,1,2,3 ...]. This will result in the following values returned to C [0,2,4,6 ...] 24 | 25 | You will compile and check a software only version of the application. The *vector add* OpenCL kernel will then be implemented as a hardware kernel. You will first build an emulation version of the design and run a simulation of the hardware kernel. You will then test the application with the kernel running in the FPGA. 26 | 27 | ## Objectives 28 | 29 | After completing this lab, you will be able to: 30 | 31 | - Create an project using the SDx GUI 32 | - Run Software Emulation to verify the functionality of a design 33 | - Run Hardware Emulation to verify the functionality of the generated hardware 34 | - Build the system and test it in hardware 35 | - Perform profile and application timeline analysis on the design running in hardware 36 | 37 | 38 | ## Steps 39 | ### Create an SDx Project 40 | 41 | * Launch SDx by executing **sdx** in a terminal window, or click on the **SDX** desktop icon if available. 42 | You will be prompted to select a workspace directory 43 | 44 | * Click Launch to accept the default (usually ~/workspace) 45 | The Xilinx SDx IDE window will be displayed. 46 | 47 | ![](./images/SDX_IDE.png) 48 | 49 | * Click on **Create Application Project** 50 | The _Project Type_ page will be displayed 51 | 52 | * Enter **hello_world** as the _Project name_ and click **Next** 53 | 54 | * Select the target platform and click **Next** 55 | 56 | ![](./images/helloworld/select_u200_platform.png) 57 | 58 | * Select **Empty Application** and click **Finish** 59 | 60 | ![](./images/helloworld/empty_application_project.png) 61 | 62 | The SDx *development* view should open: 63 | 64 | ![](./images/helloworld/sdx_hello_world_ide.png) 65 | 66 | There are different *view* settings in SDx, and all perspectives are customizable. Panels can be added or removed, and the layout can be changed. For example, the *Debug* perspective has a different default layout and views. 67 | 68 | There are six main windows in this perspective: Project Explorer, Main panel (SDx Application Project Settings in the image above) , Assistant, Outline, multi-tab console, and Emulation Console. 69 | 70 | * In the Project explorer, expand the *hello_world* folder if necessary, and *right click* on the **src** folder, and select **import** 71 | 72 | * In the *Import* window, select **General>File System** and click **Next** 73 | 74 | ![](./images/helloworld/import_srcs.png) 75 | 76 | * Browse to the ~/compute_acceleration/sources/helloworld_ocl/src directory, and click **OK** to select this directory 77 | 78 | ![](./images/helloworld/import_from_dir.png) 79 | 80 | * Select all five source files **krnl_vadd.cl**, **vadd.cpp**, **vadd.h**, **xcl.cpp**, **xcl.h** and click **Finish** 81 | 82 | ![](./images/helloworld/select_srcs.png) 83 | 84 | * **krnl_vadd.cl** is the OpenCL source for a simple vector addition kernel 85 | * **vadd.cpp/.h** are the host application source and header 86 | * **xcl.cpp/.h** are the source and header for some helpful OpenCL function wrappers 87 | 88 | ### Perform Software Emulation 89 | 90 | * Make sure the *SDx Application Project Settings* are visible in the main panel. If they are not, open **project.sdx** under _hello\_world_ in the **Project Explorer** tab. 91 | 92 | * In the project settings, in the **Hardware Functions** section, click on the _Add Hardware Function_ button icon (![alt tag](./images/Fig-hw_button.png)). 93 | 94 | SDx will automatically parse the source files, and will list functions defined in the design here. 95 | 96 | This design has only one function `krnl_vadd()` that is a candidate for hardware acceleration. 97 | 98 | ![](./images/helloworld/select_vector_add_fn.png) 99 | 100 | * Select the `krnl_vadd()` function and click **OK** 101 | 102 | This will automatically add a *binary container* and include the function inside it. 103 | 104 | * Click on the drop-down button of _Active build configuration_ and make sure **Emulation-SW** is selected. Alternatively, this can also be set from the **Project** menu **Build Configurations > Set Active > Emulation-SW** 105 | 106 | ![](./images/helloworld/FigGUIflowLab-6.png) 107 | 108 | * Click on the build (![alt tag](./images/Fig-build.png)) button or alternatively, select **Project > Build Project** 109 | 110 | This will build the project and generate hello_world.exe under the *Emulation-SW* directory 111 | 112 | * From the SDx menu, select **Run > Run Configurations…** to open the configurations window 113 | 114 | From here, arguments can be passed to the host application. The binary container needs to be passed to the host. 115 | 116 | As mentioned in the introduction, the two arrays that will be added together can be initialized by passing a value to the host application. This initialization value can also be set here. 117 | 118 | * Click on the **Arguments** tab and select **Automatically add binary container(s) arguments** 119 | 120 | **../binary_container_1.xclbin** should be automatically added to the text area. Add the initialization value for the arrays by typing a number into the box. In this case, type **0** after *../binary_container_1.xclbin* (with a space in between the two arguments). 121 | 122 | ![](./images/helloworld/FigGUIflowLab-9.png) 123 | 124 | * Click **Run** to save the configuration and run the application 125 | 126 | The application can also be run by clicking the Run button (![alt tag](./images/Fig-run.png)). 127 | 128 | The application will be run and the output will be displayed in the Console tab 129 | 130 | You should see a **TEST PASSED** message, preceded by the values of the elements of the arrays, and the result of the addition. 131 | 132 | ``` 133 | TEST PASSED 134 | ``` 135 | 136 | You can go back and change the initialization value passed to the host in the *Run Configuration*, and rerun the application to see different numerical results. 137 | 138 | 139 | ### Perform Hardware Emulation 140 | 141 | The *Software Emulation* flow checks functional correctness of the software application, but it does not guarantee the correctness of the design on the FPGA target. The *Hardware (HW) Emulation* flow can be used to verify the functionality of the generated logic. This flow invokes the hardware simulator in the SDx environment. As a consequence, the HW Emulation flow will take a little longer to build and run than the SW Emulation flow. 142 | 143 | The Hardware Emulation flow is not cycle accurate, but provides more detailed profiling information than software emulation. It can be used to do some analysis and optimization of the performance of the application. 144 | 145 | * Click on the drop-down button of _Active build configuration_ and select **Emulation-HW** 146 | 147 | ![](./images/helloworld/FigGUIflowLab-8.png) 148 | 149 | * Click on the (![alt tag](./images/Fig-build.png)) button. 150 | 151 | This will build the project including hello\_world.exe file under the Emulation-HW directory 152 | 153 | * Select **Run > Run Configurations…** to open the configurations window 154 | * Click on the **Arguments** tab and check if _binary\_container\_1.xclbin_ is already assigned. Optionally set an initialization value as before. 155 | 156 | * Click **Run** to run the application 157 | * The Console tab shows that the test was completed successfully along with the data transfer rate 158 | 159 | ```console 160 | TEST PASSED 161 | INFO: [SDx-EM 22] [Wall clock time: 11:36, Emulation time: 0.0418116 ms] 162 | Data transfer between kernel(s) and global memory(s) 163 | krnl_vadd_1:m_axi_gmem-DDR RD = 32.000 KB WR = 16.000 KB 164 | ``` 165 | 166 | ### Review the HLS Report 167 | 168 | * In the **Assistant** tab, under **Emulation-HW > binary\_container\_1 > krnl\_vadd** double-click on the **HLS Report** 169 | 170 | ![](./images/helloworld/FigGUIflowLab-11.png) 171 | 172 | The window will open showing the Synthesis report for the **krnl_vadd** accelerator. 173 | 174 | * Scroll down the window and observe the timing, latency, and loop performance results. 175 | 176 | Observe the target clock period. This will be 3.33 (ns) or 4.00 (ns) depending on the target (AWS/Alveo) you are using. You will see how to set the clock later. Check the estimated actual clock period, which should be less than the target, indicating that the timing has been met. 177 | 178 | ![](./images/helloworld/FigGUIflowLab-13.png) 179 | 180 | * Scroll down further and observe the resource utilization by the accelerator (again the numbers may be different to your results) 181 | 182 | ![](./images/helloworld/FigGUIflowLab-14.png) 183 | 184 | ### Review the profile summary report 185 | 186 | * In the **Assistant** tab under **Emulation-HW > hello\_world-Default** double-click on the **Profile Summary** entry 187 | 188 | The numbers in this report will vary depending on the value of the DATA_SIZE constant in vadd.cpp. When you have completed this section, you can go back and change DATA_SIZE, recompile and rerun the application, and check the updated profiling results. 189 | 190 | Notice the report window has four tabs: **Top Operations, Kernels and Compute Units, Data Transfers, OpenCL APIs**. 191 | 192 | The *Top Operations* tab summarizes the profiling information for the design. There is only one kernel in this design. Arrays A and B are transferred to memory, then to the kernel, and the result C is written back from the kernel to memory. 193 | 194 | The PCIe interface between the host and FPGA is 512 bits, or 64 bytes. This will determine the average bytes per transfer, and the transfer efficiency. 195 | 196 | ![](./images/helloworld/FigGUIflowLab-16.png) 197 | 198 | * Click on the **Kernels & Compute Units** tab and observe the number of Enqueues (1), and the kernel execution time`. 199 | 200 | * Click on the **Data Transfers** tab. 201 | 202 | ​ ![](./images/helloworld/FigGUIflowLab-18.png) 203 | 204 | * Look at the transfers between *HOST and Global Memory* 205 | 206 | In the host application, A and B are written from *HOST* memory to *GLOBAL* memory and C is read back to *HOST* memory from *GLOBAL* memory. This is why the *WRITE* data is ~2x the amount of the *READ* data. There is only one transfer as the arrays are transferred as one block of data. 207 | 208 | * Look at the transfers between *Kernels and Global Memory*. In this example there is only one kernel. 209 | 210 | The kernel *READS* A and B, and *WRITES* C. This is why there are 2x the number of read transfers compared to write transfers. There are multiple transfers as each element of the array is read into the design sequentially. 211 | 212 | Observe the average size of the data transferred, and the estimated transfer rates. These profile summary will be useful when trying to optimize your own designs. 213 | 214 | ### Review the System Estimate report 215 | 216 | * Double-click on the **System Estimate** entry under the **Emulation-HW > binary_container_1 > krnl_vadd** in the **Assistant** tab 217 | The report shows the estimated frequency and the resource utilization for the given kernel (krnl\_vadd) 218 | 219 | ![](./images/helloworld/sys_estimate.png) 220 | 221 | * Close SDx 222 | 223 | ### System Build 224 | 225 | At this stage, you could build the project, but as it takes some time to compile you can skip this step for now. See the Appendix below for instructions on how to build the hardware. 226 | 227 | ## Run the precompiled solution (Optional) 228 | 229 | As building the FPGA hardware takes some time, a precompiled solution is provided. The results should not be any different to the HW and SW emulation, so this step is optional. 230 | 231 | For AWS, execute the following in a new terminal, as this needs to be run as sudo 232 | 233 | ``` 234 | sudo sh 235 | source /opt/Xilinx/SDx/2018.3.op2405991/settings64.sh 236 | source /opt/xilinx/xrt/setup.sh 237 | export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/SDAccel/aws_platform/xilinx_aws-vu9p-f1-04261818_dynamic_5_0 238 | ``` 239 | 240 | * Start SDx (execute ```sdx``` from the terminal) and from the SDx file menu, select **import** 241 | * Again from the SDx file menu, select **import** 242 | * Expand *Xilinx* and select **SDx Project** and click **Next** 243 | * Choose *SDx project exported zip file* and click **Next** 244 | * Browse to **~/compute_acceleration/solutions/hello_world/[aws|u200]/hello_world_sol.zip** and click **OK** 245 | 246 | You should see a new *hello_world_* folder in the Project Explorer 247 | 248 | #### Set the executable file permissions 249 | 250 | Zip files do not preserve Unix file permissions, so the executable permissions must be modified manually. 251 | 252 | * Expand *hello_world_sol > System* and right click on **hello_world_sol.exe** 253 | 254 | * Select **Execute** for the *Owner* permissions and click **Apply and Close** 255 | 256 | ![](./images/helloworld/file_permissions.png) 257 | 258 | #### Disable Auto building 259 | 260 | * Right click on the project folder, select C/C++ Build Settings 261 | * In C/C++ Build, in the **Behavior** tab make sure the following are unchecked: *Build on resource save (Auto Build)*, *Build (incremental build)* and *Clean* 262 | * Click **Apply and Close** 263 | 264 | #### Run the application 265 | 266 | * Open the project.sdx and select **System** as the *Active build configuration* 267 | * In the SDx *Run* menu, select **Run Configurations** 268 | * Expand OpenCL if necessary, and select the **hello_world-Default** configuration 269 | * Check that the *binary container* has been included in the *Arguments* tab and click **Run** 270 | * Note for AWS, don't click the box to automatically include the binary container. This will change the argument to ../binary_container_1.xclbin instead of ../binary_container_1.**aws**xclbin binary required for AWS. 271 | 272 | You should see the application output in the console. The output should be similar to what you saw for the SW and HW Emulation runs. 273 | 274 | ## Conclusion 275 | 276 | In this lab, you used SDx to create a project. You ran the design using the software and hardware emulation flows, verified the output, and reviewed the reports. 277 | 278 | --------------------------------------- 279 | 280 | Continue to the [optimization Lab](./Optimization_lab.md) 281 | 282 | --------------------------------------- 283 | 284 | ## Appendix: Build Full Hardware 285 | 286 | Note that building the project can take around two hours. Skip this step in a tutorial environment. 287 | 288 | * Click on the drop-down button of _Active build configuration_ and select **System** or select **Project > Build Configurations > Set Active > System** 289 | 290 | * Click on the (![alt tag](./images/Fig-build.png)) button or select **Project > Build Project** 291 | This will build the project under the **System** directory. The built project will include **hello\_world.exe** file along with **binary\_container\_1.xclbin** file. 292 | 293 | ### Test on Alveo 294 | 295 | * Once the project is built, you can click on the *run* button (![alt tag](./images/Fig-run.png)) and verify you see the same results as before. For AWS, see the next step. 296 | 297 | ### Test on AWS (create AFI) 298 | 299 | Before the design can be run on AWS an AFI (Amazon FPGA Image) is required. 300 | 301 | Once the full system is built, you can create an AFI by following the steps listed here 302 | 303 | -------------------------------------------------------------------------------- /setup_aws.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Connecting to AWS 16 | 17 | To get started with AWS, you will need an Amazon account. You will also need AWS credit to run the tutorial. If you are a professor or a student, you may be eligible to free credit by registering with [AWS educate](https://aws.amazon.com/education/awseducate/). 18 | 19 | 20 | 21 | ## Set up an AWS instance 22 | 23 | Use the following guide to setup and AWS instance. 24 | 25 | 26 | 27 | Make sure to use the FPGA Developer AMI. Version 1.6.0 includes the Xilinx SDx 2018.3 tools that this tutorial is based on: 28 | 29 | ### Login into the AWS and starting an F1 instance 30 | 31 | 1. Once you have an account, log in to the EC2 AWS Console: 32 | 33 | https://console.aws.amazon.com/ec2 34 | 35 | This should bring you to the EC2 dashboard (Elastic Compute). 36 | 37 | In the EC2 dashboard, select Launch Instance. From here you should be able to start your instance. 38 | 39 | ## Additional setup 40 | 41 | You may want to do some additional setup to allow you to VNC to your instance. You can also follow the instructions in [Setup XUP AWS Workshop](setup_xup_aws_workshop) to connect to your instance. 42 | 43 | ### VNC server setup 44 | 45 | When setting up an instance for the first time, you need to install vncserver software. 46 | 47 | #### Install VNC server 48 | In a terminal, execute the following commands 49 | 50 | ``` 51 | sudo yum install -y tigervnc-server 52 | sudo yum groupinstall -y "Server with GUI" 53 | ``` 54 | 55 | When installing vncserver, you will be prompted to set up a password that you will need later. 56 | 57 | ### Start vncserver 58 | 59 | Each time you start an instance, you will need to start vncserver 60 | 61 | 62 | ``` 63 | vncserver -geometry 1920x1080 64 | ``` 65 | 66 | You can choose your preferred geometry (screensize) 67 | 68 | You should see a status message in the terminal once *vncserver* has started. 69 | 70 | Take note of the number after the “:” 71 | 72 | In this case, 1. This is the port the VNC viewer will connect to on the VNC server and needs to be specified as a two digit number below: 01. 73 | 74 | Connect to AWS instance from VNC viewer 75 | From VNC viewer, specify the IP address of your AWS instance, followed by the VNC port number (as identified above), in this case :1 76 | 77 | When prompted, enter the VNC server password set up earlier. 78 | 79 | You should then be connected to the AWS instance. 80 | 81 | ### Verify XRT and SDx tools 82 | 83 | Open a terminal and verify the Xilinx SDx tools have been preinstalled and are on the path: 84 | 85 | ``` 86 | which sdx 87 | ``` 88 | 89 | Note that the XRT tools are installed (/opt/xilinx/xrt) but are not included on the path by default. 90 | 91 | ``` 92 | sudo chmod 777 /opt/xilinx/xrt/setup.sh 93 | ``` 94 | 95 | Execute the following to add `source /opt/xilinx/xrt/setup.sh` to ~/.bashrc (or manually edit ~/.bashrc and add the line). 96 | 97 | ``` 98 | echo "source /opt/xilinx/xrt/setup.sh" >> ~/.bashrc 99 | ``` 100 | 101 | AWS_FPGA_REPO_DIR is defined in /etc/profile.d/aws-f1.sh 102 | 103 | 104 | ``` 105 | cd ~/src/project_data 106 | git clone https://github.com/aws/aws-fpga 107 | cd $AWS_FPGA_REPO_DIR 108 | source sdaccel_setup.sh 109 | echo "export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/SDAccel/aws_platform/xilinx_aws-vu9p-f1-04261818_dynamic_5_0" >> ~/.bashrc 110 | 111 | ``` 112 | 113 | For more details see: 114 | 115 | https://github.com/aws/aws-fpga/blob/master/SDAccel/README.md 116 | 117 | 118 | -------------------------------------------------------------------------------- /setup_local_computer.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Setup SDx on your own computer 16 | 17 | To run (or build) these labs on your own computer, install SDAccel and the SDAccel license. For non-commercial/academic use, SDAccel licenses are available from the [Xilinx University Program](www.xilinx.com/university). 18 | 19 | [Download SDAccel 2018.3](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/sdaccel-development-environment/2018-3.html) and install the tools. 20 | 21 | [Download XRT and the U200 package](https://www.xilinx.com/products/boards-and-kits/alveo/u200.html#gettingStarted) for your computer, and install both packages. 22 | 23 | ### Setup the tools 24 | 25 | Add the following to your environment setup. 26 | 27 | ```csh 28 | source /opt/xilinx/xrt/setup.(c)sh 29 | source $XILINX_SDX/settings64.(c)sh 30 | setenv PLATFORM_REPO_PATHS 31 | ``` 32 | 33 | ```bash 34 | export PLATFORM_REPO_PATHS=$ALVEO_PLATFROM_INSTALLATION_DIRECTORY 35 | ``` -------------------------------------------------------------------------------- /setup_nimbix.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Connecting to Nimbix 16 | 17 | * Log in to Nimbix: https://platform.jarvice.com/ 18 | 19 | * Click *Compute* in the top left menu to select a compute instance 20 | 21 | * Type Xilinx to filter the list of instances 22 | 23 | ![](./images/connecting_lab/nimbix/select_instance.png) 24 | 25 | * Select the *Xilinx SDAccel Development* instance 26 | 27 | * Click on Desktop mode 28 | 29 | ![](./images/connecting_lab/nimbix/select_desktop_mode.png) 30 | 31 | * Select the instance you prefer. 32 | 33 | The smallest instance can be used for the labs. For the first part of the labs, you don't need to select an instance with Alveo hardware. 34 | 35 | ![](./images/connecting_lab/nimbix/select_instance_config.png) 36 | 37 | When the instance is ready, you will see the option to *Click here to connect*. 38 | 39 | * Click on the link to connect 40 | 41 | ![](./images/connecting_lab/nimbix/connect_to_instance.png) 42 | 43 | A Linux desktop will open in a new tab in your browser. 44 | 45 | ![](./images/connecting_lab/nimbix/linux_desktop.png) -------------------------------------------------------------------------------- /setup_sdx.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Setup SDx 16 | 17 | There are two main parts to this tutorial - using the [Xilinx SDx software](https://www.xilinx.com/products/design-tools/software-zone/sdaccel.html) and building (compiling) designs, and using and testing those designs in hardware. 18 | 19 | You can run this tutorial in different ways. 20 | 21 | * If you have an Alveo board, you can run all parts of the tutorial on a local machine. 22 | 23 | * You can use the SDx software in the cloud, with hardware in the cloud (AWS or Nimbix). 24 | 25 | * You can use the SDx software on a local machine for building designs, and only switch to the cloud to test in hardware. 26 | 27 | This tutorial shows how to use SDx with either AWS EC2 F1 or Alveo U200 (locally, or in the Nimbix cloud). Sources and precompiled and solutions are provided for AWS EC2 F1 x2.large and Alveo U200. You may be able to use the SDx tutorial instructions with other cloud providers, and other hardware. 28 | 29 | Once you have decided how you want to run the tutorial, follow the appropriate instructions below. 30 | 31 | ## Local computer 32 | 33 | To use your own computer, [install and set up SDx and install the Alveo U200 packages](./setup_local_computer.md) 34 | 35 | ## Use Nimbix (Alveo) 36 | 37 | The Xilinx SDx tools and Alveo U200 hardware is available in the Nimbix cloud. Use the following instructions to [connect to a Nimbix Alveo instance](./setup_nimbix.md). A [free 100 hr Alveo trial](https://www.nimbix.net/alveo/) is currently available from Nimbix. This is the easiest way to work through this tutorial with Alveo U200 hardware. However, please note the debug lab is not currently supported on Nimbix as the Xilinx Virtual Cable is not available. 38 | 39 | ## AWS EC2 F1 40 | 41 | An [FPGA Developer AMI](https://aws.amazon.com/marketplace/pp/B06VVYBLZZ) (Amazon Machine Image) is available with the Xilinx SDx software preinstalled. This can be used to target AWS EC2 F1 hardware. An AMI is like a Virtual Machine image. You can use this AMI and the following instructions to [set up and connect to an AWS instance](./setup_aws.md) 42 | 43 | You can also install Xilinx SDx on your local machine, build design offline, and use AWS F1 hardware for testing. See the Amazon guide to using [AWS EC2 FPGA Development Kit](https://github.com/aws/aws-fpga) for details on setting up your machine. 44 | 45 | ## XUP AWS Tutorial 46 | 47 | If you are attending a live instructor-led XUP AWS tutorial, preconfigured AWS F1 instances will be provided for you. Use the following instructions to [connect to your assigned AWS XUP tutorial instance](./setup_xup_aws_workshop.md) 48 | 49 | # Getting started with the tutorials 50 | 51 | Once you have setup your computer/cloud instance, you can *git clone* this repository to get started running the tutorial. The repository includes these instructions, and also a copy of source files, and solutions you will need for the tutorial. 52 | 53 | The tutorial assumes you will clone this repository to your Linux home area. If you choose to clone it somewhere else, you will need to adjust the path where specified in the tutorial instructions. 54 | 55 | ```console 56 | cd ~ 57 | git clone https://github.com/xupgit/compute_acceleration 58 | ``` 59 | 60 | Proceed to the first lab [introducing SDx](sdx_introduction.md) 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /setup_xup_aws_workshop.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

XUP SDx Labs (2018.3)

1. Setup SDx2. Introduction to SDx3. Optimization4. RTL Kernel Wizard5. Debugging6. SDx command line
14 | 15 | # Connecting to AWS 16 | 17 | ## Introduction 18 | 19 | The following instructions are for attendees on a live instructor led workshop where an AWS F1 instance has been set up for you, and you have been provided with log-in details. If you are not attending a live workshop, go back to the [Setup SDx](./get_started) page and follow one of the other options to work through these labs. 20 | 21 | This lab will guide you through the steps involved in connecting to a Xilinx workshop AWS EC2 F1 instance, and starting and stopping the instance. 22 | 23 | ## Objectives 24 | 25 | After completing this lab, you will be able to: 26 | 27 | - Connect to an AWS EC2 F1 instance using the provided credentials 28 | - Start an instance 29 | - Start a RDP (Remote Desktop Protocol) session 30 | - Close the RDP session 31 | - Stop the instance 32 | 33 | ## Steps 34 | Each registered participant to Xilinx workshop has been allocated a pre-configured EC2 F1 instance and should have received an email with the following details: 35 | 36 | - Account ID, 37 | - IAM username, 38 | - Link to access a pre-configured EC2 F1 instance 39 | 40 | ### Login into the AWS and starting an F1 instance 41 | 42 | * Follow the link provided by your instructor, or go to [https://console.aws.amazon.com/ec2](https://console.aws.amazon.com/ec2) to open a login page 43 | If you had used the link then you should see a login page similar to shown here: 44 | 45 |

46 | 47 |

48 |

49 | Login page accessed through the provided link 50 |

51 | 52 | * Use the log in details provided by your instructor. 53 | 54 | * In the top right corner, using the drop-down button, select a region with F1 instances, such as **N. Virginia (US East)** or the region indicated by your instructor 55 | 56 |

57 | 58 |

59 |

60 | Selecting a region 61 |

62 | If you select the wrong region you may not see your instance. 63 | 64 | * Click on the **EC2** link on the dashboard or if not visible, then click on the _Services_ drop-down button and then click on **EC2** 65 | 66 |

67 | 68 | 69 |

70 |

71 | Accessing EC2 service 72 |

73 | 74 | * Click on the **Instances** link on the left panel 75 |

76 | 77 |

78 |

79 | Accessing Instances 80 |

81 | You may see several instances 82 | * Enter your username in the filter field just below the **Launch Instance** button and hit enter 83 |

84 | 85 |

86 |

87 | Filtering your instance 88 |

89 | * Making sure that your instance is selected, click on the **Actions > Instance State > Start** 90 |

91 | 92 |

93 |

94 | Starting an instance 95 |

96 | * Click on the **Yes, Start** button 97 | * Click on the refresh button(![alt tag](./images/Fig-refresh.png)) to see the updated status to _Running_ 98 |

99 | 100 |

101 |

102 | Running state 103 |

104 | * Make a note of the Public DNS and IPv4 Public IP which will be used by PuTTy and Remote Desktop (RDP) 105 |

106 | 107 |

108 |

109 | Assigned IP to the running instance 110 |

111 | 112 | 113 | 114 | ### Connecting to AWS instance using NICE DCV 115 | 116 | NICE DCV as recommended by Amazon will be used to remote desktop to the instance. 117 | 118 | * Download and install the appropriate NICE DCV client if necessary from here: https://download.nice-dcv.com 119 | 120 | The NICE DCV session has already been started on the instance provided. See the Appendix for details on how to setup a session. 121 | 122 | ### Start NICE DCV 123 | 124 | * Open the NICE DCV application, enter the I*Pv4 Public IP* from the Amazon console and click **Open** 125 | 126 | ![](./images/nice_dcv.png) 127 | 128 | * When prompted, enter the username and password provided by your instructor to connect to the instance. 129 | 130 | ![](./images/nice_dcv_desktop.png) 131 | 132 | 133 | 134 | # Verify XRT and SDx tools 135 | 136 | - Right-click on the desktop and select **Open Terminal** and verify the Xilinx SDx tools have been preinstalled and are on the path by executing the following command: 137 | 138 | ``` 139 | which sdx 140 | ``` 141 | 142 | The XRT (Xilinx Run Time) tools are installed (/opt/xilinx/xrt) but are not included on the path by default. 143 | 144 | * Execute the following to change the permissions of the XRT setup file, and to automatically source the XRT tools. Make sure to only execute this once. 145 | 146 | 147 | ``` 148 | sudo chmod 774 /opt/xilinx/xrt/setup.sh 149 | echo "source /opt/xilinx/xrt/setup.sh" >> ~/.bashrc 150 | ``` 151 | 152 | For your reference, in the commands below, $AWS_FPGA_REPO_DIR has already been defined in the environment from: /etc/profile.d/aws-f1.sh 153 | 154 | * Exectue the following to clone the *aws-fpga* repository and setup the Xilinx tools. aws-fpga includes the AWS F1 tools, HDK and documentation: 155 | 156 | ``` 157 | cd ~/src/project_data 158 | git clone https://github.com/aws/aws-fpga 159 | cd $AWS_FPGA_REPO_DIR 160 | source sdaccel_setup.sh 161 | echo "export PLATFORM_REPO_PATHS=/home/centos/src/project_data/aws-fpga/SDAccel/aws_platform/xilinx_aws-vu9p-f1-04261818_dynamic_5_0" >> ~/.bashrc 162 | 163 | ``` 164 | 165 | For more details see: 166 | 167 | https://github.com/aws/aws-fpga/blob/master/SDAccel/README.md 168 | 169 | --------------------------------------- 170 | 171 | Return to [Setup SDx](./setup_sdx.md) and go to the **Getting started with the tutorials** section to finish setting up by cloning the tutorial labs. 172 | 173 | --------------------------------------- 174 | 175 | ## Appendix 176 | 177 | ## Set up the NICE DCV session 178 | 179 | Open PuTTY, enter the IPv4 Public IP address from the Amazon console, and click open 180 | 181 | ![](./images/putty_ip4.png) 182 | 183 | This should open a terminal to the AWS instance. 184 | 185 | * In the terminal, enter the following command to start the DCV server: 186 | 187 | ``` 188 | dcv create-session --type virtual --user centos centos 189 | ``` 190 | 191 | ![](./images/putty_dcv.png) 192 | 193 | * Stop the firewall 194 | 195 | ``` 196 | sudo systemctl disable firewalld 197 | sudo systemctl stop firewalld 198 | ``` 199 | 200 | 201 | 202 | ## Interacting with the Instance using Putty 203 | 204 | * Start **PuTTY** or your preferred SSH client 205 | 206 | * Enter _centos@<public\_dns\_entry>_ in the **Host Name** field and **22** in the _Port_ field 207 | Make sure that SSH is selected as the Connection type 208 |

209 | 210 |

211 |

212 | Session settings in PuTTY 213 |

214 | 215 | * Expand **SSH** under the _Connection_ in the left panel and click **Auth** 216 | 217 | * Click on the **Browse…** button, browse to where the private key has been stored 218 | If you don't have the private key file (as in workshop) you can skip this step 219 | 220 | * Click **Open** 221 |

222 | 223 |

224 |

225 | Selecting private key file 226 |

227 | 228 | * Click **Yes** 229 | The PuTTY window will open. It will ask for the password (in case of the workshop). Enter the provided password 230 | 231 |

232 | 233 |

234 |

235 | The PuTTY window showing the connection 236 |

237 | 238 | * Set a password for the RDP connection with the following command 239 | 240 | ``` 241 | sudo passwd 242 | ``` 243 | 244 | You will use the same password in the RDP connection. 245 | 246 | * Enter **exit** to close the session 247 | 248 | 249 | 250 | ### Connect using RDP (deprecated for 2018.3) 251 | 252 | **You can communicate with the instance using command line through PuTTY or Git Bash, and using GUI through remote desktop (RDP) connection.** 253 | 254 | - Start a remote desktop session 255 | 256 | - Enter the _IPv4_ address 257 | 258 | - Click on the **Show Options** 259 | 260 | ![](G:/cathalmccabe/awslabs/images/connecting_lab/FigConnectingLab-10.png) 261 | 262 | - Select the **Display** tab and select _True Color (24 bit)_ and click **Connect** 263 | 264 | ![](G:/cathalmccabe/awslabs/images/connecting_lab/FigConnectingLab-11.png) 265 | 266 | - A certificate warning will be displayed. Click **Yes** to open the RDP session 267 | 268 | - Enter centos as the username and enter the provided password and click **OK** 269 | 270 | ![](./images/connecting_lab/FigConnectingLab-12.png) -------------------------------------------------------------------------------- /slides/01_Course_Intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/01_Course_Intro.pdf -------------------------------------------------------------------------------- /slides/02_Intro_to_AWS_EC2_F1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/02_Intro_to_AWS_EC2_F1.pdf -------------------------------------------------------------------------------- /slides/03_SDAccel_Tool_Overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/03_SDAccel_Tool_Overview.pdf -------------------------------------------------------------------------------- /slides/04_SDAccel_Flows.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/04_SDAccel_Flows.pdf -------------------------------------------------------------------------------- /slides/05_Optimization_Techniques.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/05_Optimization_Techniques.pdf -------------------------------------------------------------------------------- /slides/06_RTL_Kernel_Wizard.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/06_RTL_Kernel_Wizard.pdf -------------------------------------------------------------------------------- /slides/07_Debugging.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/slides/07_Debugging.pdf -------------------------------------------------------------------------------- /solutions/hello_world/aws/awsf1_2xlarge_18_3_hello_world_sol.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/hello_world/aws/awsf1_2xlarge_18_3_hello_world_sol.sdx.zip -------------------------------------------------------------------------------- /solutions/hello_world/u200/u200_nimbix_ubuntu16_04_hello_world_sol.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/hello_world/u200/u200_nimbix_ubuntu16_04_hello_world_sol.sdx.zip -------------------------------------------------------------------------------- /solutions/hello_world/u200/u200_rh7_5_hello_world_sol_.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/hello_world/u200/u200_rh7_5_hello_world_sol_.sdx.zip -------------------------------------------------------------------------------- /solutions/optimization_lab/aws/aws_2xlarge_18_3_optimization_lab.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/optimization_lab/aws/aws_2xlarge_18_3_optimization_lab.sdx.zip -------------------------------------------------------------------------------- /solutions/optimization_lab/u200/u200_nimbix_ubuntu16_04_optimization_lab_sol.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/optimization_lab/u200/u200_nimbix_ubuntu16_04_optimization_lab_sol.sdx.zip -------------------------------------------------------------------------------- /solutions/optimization_lab/u200/u200_rh7_5_optimization_lab_sol.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/solutions/optimization_lab/u200/u200_rh7_5_optimization_lab_sol.sdx.zip -------------------------------------------------------------------------------- /sources/debug/aws/awsf1_2xlarge_18_3_debug.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/sources/debug/aws/awsf1_2xlarge_18_3_debug.sdx.zip -------------------------------------------------------------------------------- /sources/debug/u200/u200_nimbix_ubuntu16_04_rtl_kernel.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/sources/debug/u200/u200_nimbix_ubuntu16_04_rtl_kernel.sdx.zip -------------------------------------------------------------------------------- /sources/debug/u200/u200_rh7_5_debug.sdx.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xupgit/compute_acceleration/425ead9a60dd28f48f6b63d0abd8c4cd3d3976e8/sources/debug/u200/u200_rh7_5_debug.sdx.zip -------------------------------------------------------------------------------- /sources/helloworld_ocl/src/krnl_vadd.cl: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Vendor: Xilinx 3 | Associated Filename: krnl_vadd.cl 4 | Purpose: SDx vector addition example 5 | ******************************************************************************* 6 | Copyright (C) 2017 XILINX, Inc. 7 | 8 | This file contains confidential and proprietary information of Xilinx, Inc. and 9 | is protected under U.S. and international copyright and other intellectual 10 | property laws. 11 | 12 | DISCLAIMER 13 | This disclaimer is not a license and does not grant any rights to the materials 14 | distributed herewith. Except as otherwise provided in a valid license issued to 15 | you by Xilinx, and to the maximum extent permitted by applicable law: 16 | (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX 17 | HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, 18 | INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR 19 | FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether 20 | in contract or tort, including negligence, or under any other theory of 21 | liability) for any loss or damage of any kind or nature related to, arising under 22 | or in connection with these materials, including for any direct, or any indirect, 23 | special, incidental, or consequential loss or damage (including loss of data, 24 | profits, goodwill, or any type of loss or damage suffered as a result of any 25 | action brought by a third party) even if such damage or loss was reasonably 26 | foreseeable or Xilinx had been advised of the possibility of the same. 27 | 28 | CRITICAL APPLICATIONS 29 | Xilinx products are not designed or intended to be fail-safe, or for use in any 30 | application requiring fail-safe performance, such as life-support or safety 31 | devices or systems, Class III medical devices, nuclear facilities, applications 32 | related to the deployment of airbags, or any other applications that could lead 33 | to death, personal injury, or severe property or environmental damage 34 | (individually and collectively, "Critical Applications"). Customer assumes the 35 | sole risk and liability of any use of Xilinx products in Critical Applications, 36 | subject only to applicable laws and regulations governing limitations on product 37 | liability. 38 | 39 | THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT 40 | ALL TIMES. 41 | 42 | *******************************************************************************/ 43 | 44 | //------------------------------------------------------------------------------ 45 | // 46 | // kernel: vadd 47 | // 48 | // Purpose: Demonstrate Vector Add in OpenCL 49 | // 50 | 51 | #define BUFFER_SIZE 256 52 | kernel __attribute__((reqd_work_group_size(1, 1, 1))) 53 | void krnl_vadd( 54 | global const int* a, 55 | global const int* b, 56 | global int* c, 57 | const int n_elements) 58 | { 59 | int arrayA[BUFFER_SIZE]; 60 | for (int i = 0 ; i < n_elements ; i += BUFFER_SIZE) 61 | { 62 | int size = BUFFER_SIZE; 63 | //boundary check 64 | if (i + size > n_elements) size = n_elements - i; 65 | 66 | //Burst reading A 67 | readA: for (int j = 0 ; j < size ; j++) 68 | arrayA[j] = a[i+j]; 69 | 70 | //Burst reading B and calculating C and Burst writing 71 | // to Global memory 72 | vadd_wrteC: for (int j = 0 ; j < size ; j++) 73 | c[i+j] = arrayA[j] + b[i+j]; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /sources/helloworld_ocl/src/vadd.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Vendor: Xilinx 3 | Associated Filename: vadd.cpp 4 | Purpose: SDAccel vector addition 5 | 6 | ******************************************************************************* 7 | Copyright (C) 2017 XILINX, Inc. 8 | 9 | This file contains confidential and proprietary information of Xilinx, Inc. and 10 | is protected under U.S. and international copyright and other intellectual 11 | property laws. 12 | 13 | DISCLAIMER 14 | This disclaimer is not a license and does not grant any rights to the materials 15 | distributed herewith. Except as otherwise provided in a valid license issued to 16 | you by Xilinx, and to the maximum extent permitted by applicable law: 17 | (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX 18 | HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, 19 | INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR 20 | FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether 21 | in contract or tort, including negligence, or under any other theory of 22 | liability) for any loss or damage of any kind or nature related to, arising under 23 | or in connection with these materials, including for any direct, or any indirect, 24 | special, incidental, or consequential loss or damage (including loss of data, 25 | profits, goodwill, or any type of loss or damage suffered as a result of any 26 | action brought by a third party) even if such damage or loss was reasonably 27 | foreseeable or Xilinx had been advised of the possibility of the same. 28 | 29 | CRITICAL APPLICATIONS 30 | Xilinx products are not designed or intended to be fail-safe, or for use in any 31 | application requiring fail-safe performance, such as life-support or safety 32 | devices or systems, Class III medical devices, nuclear facilities, applications 33 | related to the deployment of airbags, or any other applications that could lead 34 | to death, personal injury, or severe property or environmental damage 35 | (individually and collectively, "Critical Applications"). Customer assumes the 36 | sole risk and liability of any use of Xilinx products in Critical Applications, 37 | subject only to applicable laws and regulations governing limitations on product 38 | liability. 39 | 40 | THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT 41 | ALL TIMES. 42 | 43 | *******************************************************************************/ 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include "vadd.h" 51 | 52 | static const int DATA_SIZE = 4096; 53 | 54 | int main(int argc, char* argv[]) { 55 | 56 | const char *kernel_name = "krnl_vadd"; // Open CL Kernel name 57 | int init_value; // Initialization value for vector arrays 58 | std::vector devices; // OpenCL devices 59 | cl::Device device; 60 | 61 | if(argc != 2 and argc != 3) { 62 | std::cout << "Usage: " << argv[0] <<" [vector initialization value]\n" << std::endl; 63 | return EXIT_FAILURE; 64 | } 65 | char* xclbinFilename = argv[1]; 66 | char* p; 67 | 68 | // Check if vector init value was passed, and convert to int, otherwise default to 0 69 | if(argc == 3){ 70 | errno = 0; 71 | long conv = strtol(argv[2], &p, 10); 72 | if(errno !=0 || *p != '\0' || conv > INT_MAX){ 73 | printf("Invalid vector initialization value %s\nValue should be an Integer\nExiting\n", argv[2]); 74 | return -1; 75 | }else{ 76 | init_value = conv; 77 | } 78 | }else{ 79 | init_value = 0; 80 | } 81 | 82 | // Compute the size of array in bytes 83 | size_t size_in_bytes = DATA_SIZE * sizeof(int); 84 | 85 | // Creates a vector of DATA_SIZE elements 86 | // using customized allocator for getting buffer alignment to 4k boundary 87 | std::vector> source_a(DATA_SIZE); 88 | std::vector> source_b(DATA_SIZE); 89 | std::vector> source_results(DATA_SIZE); 90 | 91 | // Read in a user defined initial value for the arrays 92 | 93 | printf("Init arrays\n"); 94 | // Initialize the arrays 95 | std::iota (std::begin(source_a), std::end(source_a), init_value); 96 | std::iota (std::begin(source_b), std::end(source_b), init_value); 97 | 98 | // Check for the Xilinx device on the current platform 99 | std::cout << "Get Xilinx platform" << std::endl; 100 | get_xilinx_platform(&device, &devices); 101 | 102 | // Creating Context and Command Queue for selected device 103 | cl::Context context(device); 104 | cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE); 105 | cl::Kernel krnl_vector_add; 106 | 107 | krnl_vector_add = load_xcl_bin(kernel_name, xclbinFilename, &context, &devices); 108 | 109 | // Allocate memory on the Device. The cl::Buffer objects can 110 | // be used to reference the memory locations on the device. 111 | cl::Buffer buffer_a(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, 112 | size_in_bytes, source_a.data()); 113 | cl::Buffer buffer_b(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, 114 | size_in_bytes, source_b.data()); 115 | cl::Buffer buffer_result(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, 116 | size_in_bytes, source_results.data()); 117 | 118 | // Data will be transferred from host memory over PCIe to the FPGA on-board 119 | // DDR memory. 120 | q.enqueueMigrateMemObjects({buffer_a,buffer_b},0/* 0 means from host*/); 121 | 122 | // set the kernel Arguments 123 | int narg=0; 124 | krnl_vector_add.setArg(narg++,buffer_a); 125 | krnl_vector_add.setArg(narg++,buffer_b); 126 | krnl_vector_add.setArg(narg++,buffer_result); 127 | krnl_vector_add.setArg(narg++,DATA_SIZE); 128 | 129 | /* 130 | Launch the Kernel 131 | */ 132 | q.enqueueTask(krnl_vector_add); 133 | 134 | // Get the results: Transfer data from FPGA DDR to host memory "source_results" 135 | q.enqueueMigrateMemObjects({buffer_result},CL_MIGRATE_MEM_OBJECT_HOST); 136 | q.finish(); 137 | 138 | // Verify the result 139 | int match = 0; 140 | for (int i = 0; i < DATA_SIZE; i++) { 141 | int host_result = source_a[i] + source_b[i]; 142 | printf(results_message.c_str(), source_a[i], source_b[i], source_results[i]); 143 | if (source_results[i] != host_result) { 144 | printf(error_message.c_str(), i, host_result, source_results[i]); 145 | match = 1; 146 | break; 147 | } 148 | } 149 | 150 | std::cout << "TEST " << (match ? "FAILED" : "PASSED") << std::endl; 151 | return (match ? EXIT_FAILURE : EXIT_SUCCESS); 152 | 153 | } 154 | -------------------------------------------------------------------------------- /sources/helloworld_ocl/src/vadd.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Vendor: Xilinx 3 | Associated Filename: vadd.h 4 | Purpose: SDAccel vector addition 5 | Revision History: January 28, 2016 6 | 7 | ******************************************************************************* 8 | Copyright (C) 2016 XILINX, Inc. 9 | 10 | This file contains confidential and proprietary information of Xilinx, Inc. and 11 | is protected under U.S. and international copyright and other intellectual 12 | property laws. 13 | 14 | DISCLAIMER 15 | This disclaimer is not a license and does not grant any rights to the materials 16 | distributed herewith. Except as otherwise provided in a valid license issued to 17 | you by Xilinx, and to the maximum extent permitted by applicable law: 18 | (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX 19 | HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, 20 | INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR 21 | FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether 22 | in contract or tort, including negligence, or under any other theory of 23 | liability) for any loss or damage of any kind or nature related to, arising under 24 | or in connection with these materials, including for any direct, or any indirect, 25 | special, incidental, or consequential loss or damage (including loss of data, 26 | profits, goodwill, or any type of loss or damage suffered as a result of any 27 | action brought by a third party) even if such damage or loss was reasonably 28 | foreseeable or Xilinx had been advised of the possibility of the same. 29 | 30 | CRITICAL APPLICATIONS 31 | Xilinx products are not designed or intended to be fail-safe, or for use in any 32 | application requiring fail-safe performance, such as life-support or safety 33 | devices or systems, Class III medical devices, nuclear facilities, applications 34 | related to the deployment of airbags, or any other applications that could lead 35 | to death, personal injury, or severe property or environmental damage 36 | (individually and collectively, "Critical Applications"). Customer assumes the 37 | sole risk and liability of any use of Xilinx products in Critical Applications, 38 | subject only to applicable laws and regulations governing limitations on product 39 | liability. 40 | 41 | THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT 42 | ALL TIMES. 43 | 44 | *******************************************************************************/ 45 | 46 | #pragma once 47 | 48 | #define CL_HPP_CL_1_2_DEFAULT_BUILD 49 | #define CL_HPP_TARGET_OPENCL_VERSION 120 50 | #define CL_HPP_MINIMUM_OPENCL_VERSION 120 51 | #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1 52 | 53 | #include 54 | 55 | //TARGET_DEVICE macro needs to be passed from gcc command line 56 | #if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE) 57 | #define STR_VALUE(arg) #arg 58 | #define GET_STRING(name) STR_VALUE(name) 59 | #define TARGET_DEVICE GET_STRING(SDX_PLATFORM) 60 | #endif 61 | 62 | static const std::string error_message = 63 | "Error: Result mismatch:\n" 64 | "i = %d CPU result = %d Device result = %d\n"; 65 | 66 | static const std::string results_message = 67 | "%d + %d = %d\n"; 68 | 69 | //Customized buffer allocation for 4K boundary alignment 70 | template 71 | struct aligned_allocator 72 | { 73 | using value_type = T; 74 | T* allocate(std::size_t num) 75 | { 76 | void* ptr = nullptr; 77 | if (posix_memalign(&ptr,4096,num*sizeof(T))) 78 | throw std::bad_alloc(); 79 | return reinterpret_cast(ptr); 80 | } 81 | void deallocate(T* p, std::size_t num) 82 | { 83 | free(p); 84 | } 85 | }; 86 | 87 | 88 | int get_xilinx_platform(cl::Device *device, std::vector *devices){ 89 | 90 | //TARGET_DEVICE macro needs to be passed from gcc command line 91 | const char *target_device_name = TARGET_DEVICE; 92 | 93 | std::vector platforms; 94 | bool found_device = false; 95 | //traversing all Platforms To find Xilinx Platform and targeted 96 | //Device in Xilinx Platform 97 | cl::Platform::get(&platforms); 98 | for(size_t i = 0; (i < platforms.size() ) & (found_device == false) ;i++){ 99 | cl::Platform platform = platforms[i]; 100 | std::string platformName = platform.getInfo(); 101 | if ( platformName == "Xilinx"){ 102 | devices->clear(); 103 | platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, devices); 104 | 105 | //Traversing All Devices of Xilinx Platform 106 | for (size_t j = 0 ; j < devices->size() ; j++){ 107 | *device = (*devices)[j]; 108 | std::string deviceName = device->getInfo(); 109 | if (deviceName == target_device_name){ 110 | found_device = true; 111 | std::cout << "Found: " << deviceName << std::endl; 112 | break; 113 | } 114 | } 115 | } 116 | } 117 | if (found_device == false){ 118 | std::cout << "Error: Unable to find Target Device " 119 | << target_device_name << std::endl; 120 | return EXIT_FAILURE; 121 | } 122 | return 0; 123 | } 124 | 125 | cl::Kernel load_xcl_bin(const char* kernel_name, char* xclbinFilename, cl::Context* context, std::vector *devices){ 126 | // Load xclbin 127 | std::cout << "Loading: '" << xclbinFilename << "'\n"; 128 | std::ifstream bin_file(xclbinFilename, std::ifstream::binary); 129 | bin_file.seekg (0, bin_file.end); 130 | unsigned nb = bin_file.tellg(); 131 | bin_file.seekg (0, bin_file.beg); 132 | char *buf = new char [nb]; 133 | bin_file.read(buf, nb); 134 | 135 | // Creating Program from Binary File 136 | cl::Program::Binaries bins; 137 | bins.push_back({buf,nb}); 138 | devices->resize(1); 139 | cl::Program program(*context, *devices, bins); 140 | // This call will get the kernel object from program. A kernel is an 141 | // OpenCL function that is executed on the FPGA. 142 | cl::Kernel krnl_vector_add(program, kernel_name); 143 | return krnl_vector_add; 144 | } 145 | 146 | -------------------------------------------------------------------------------- /sources/helloworld_ocl/src/xcl.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | Vendor: Xilinx 3 | Associated Filename: xcl.c 4 | Purpose: SDAccel histogram equalization example 5 | Revision History: December 6, 2015 6 | 7 | ******************************************************************************* 8 | Copyright (C) 2015 XILINX, Inc. 9 | 10 | This file contains confidential and proprietary information of Xilinx, Inc. and 11 | is protected under U.S. and international copyright and other intellectual 12 | property laws. 13 | 14 | DISCLAIMER 15 | This disclaimer is not a license and does not grant any rights to the materials 16 | distributed herewith. Except as otherwise provided in a valid license issued to 17 | you by Xilinx, and to the maximum extent permitted by applicable law: 18 | (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX 19 | HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, 20 | INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR 21 | FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether 22 | in contract or tort, including negligence, or under any other theory of 23 | liability) for any loss or damage of any kind or nature related to, arising under 24 | or in connection with these materials, including for any direct, or any indirect, 25 | special, incidental, or consequential loss or damage (including loss of data, 26 | profits, goodwill, or any type of loss or damage suffered as a result of any 27 | action brought by a third party) even if such damage or loss was reasonably 28 | foreseeable or Xilinx had been advised of the possibility of the same. 29 | 30 | CRITICAL APPLICATIONS 31 | Xilinx products are not designed or intended to be fail-safe, or for use in any 32 | application requiring fail-safe performance, such as life-support or safety 33 | devices or systems, Class III medical devices, nuclear facilities, applications 34 | related to the deployment of airbags, or any other applications that could lead 35 | to death, personal injury, or severe property or environmental damage 36 | (individually and collectively, "Critical Applications"). Customer assumes the 37 | sole risk and liability of any use of Xilinx products in Critical Applications, 38 | subject only to applicable laws and regulations governing limitations on product 39 | liability. 40 | 41 | THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT 42 | ALL TIMES. 43 | 44 | *******************************************************************************/ 45 | 46 | #include 47 | #include 48 | #include 49 | 50 | #include 51 | #include 52 | 53 | #include "xcl.h" 54 | 55 | static void* smalloc(size_t size) { 56 | void* ptr; 57 | 58 | ptr = malloc(size); 59 | 60 | if (ptr == NULL) { 61 | printf("Error: Cannot allocate memory\n"); 62 | printf("Test failed\n"); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | return ptr; 67 | } 68 | 69 | static int load_file_to_memory(const char *filename, char **result) { 70 | unsigned int size; 71 | 72 | FILE *f = fopen(filename, "rb"); 73 | if (f == NULL) { 74 | *result = NULL; 75 | printf("Error: Could not read file %s\n", filename); 76 | exit(EXIT_FAILURE); 77 | } 78 | 79 | fseek(f, 0, SEEK_END); 80 | size = ftell(f); 81 | fseek(f, 0, SEEK_SET); 82 | 83 | *result = (char *) smalloc(sizeof(char)*(size+1)); 84 | 85 | if (size != fread(*result, sizeof(char), size, f)) { 86 | free(*result); 87 | printf("Error: read of kernel failed\n"); 88 | exit(EXIT_FAILURE); 89 | } 90 | 91 | fclose(f); 92 | (*result)[size] = 0; 93 | 94 | return size; 95 | } 96 | 97 | xcl_world xcl_world_single(cl_device_type device_type, const char *target_vendor, 98 | const char *target_device) { 99 | int err; 100 | xcl_world world; 101 | cl_uint num_platforms; 102 | 103 | err = clGetPlatformIDs(0, NULL, &num_platforms); 104 | if (err != CL_SUCCESS) { 105 | printf("Error: no platforms available or OpenCL install broken"); 106 | printf("Test failed\n"); 107 | exit(EXIT_FAILURE); 108 | } 109 | 110 | cl_platform_id *platform_ids = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms); 111 | 112 | if (platform_ids == NULL) { 113 | printf("Error: Out of Memory\n"); 114 | printf("Test failed\n"); 115 | exit(EXIT_FAILURE); 116 | } 117 | 118 | err = clGetPlatformIDs(num_platforms, platform_ids, NULL); 119 | if (err != CL_SUCCESS) { 120 | printf("Error: Failed to find an OpenCL platform!\n"); 121 | printf("Test failed\n"); 122 | exit(EXIT_FAILURE); 123 | } 124 | 125 | int i; 126 | char cl_platform_vendor[1001]; 127 | //find target vendor if target_vendor is specified 128 | if (target_vendor != NULL) { 129 | for(i = 0; i < num_platforms; i++) { 130 | err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, 1000, (void *)cl_platform_vendor,NULL); 131 | if (err != CL_SUCCESS) { 132 | printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n"); 133 | printf("Test failed\n"); 134 | exit(EXIT_FAILURE); 135 | } 136 | if ((target_vendor != NULL) && (strcmp(cl_platform_vendor, target_vendor) == 0)) { 137 | printf("INFO: Selected platform %d from %s\n", i, cl_platform_vendor); 138 | world.platform_id = platform_ids[i]; 139 | break; 140 | } 141 | } 142 | } else { 143 | for(i = 0; i < num_platforms; i++) { 144 | err = clGetDeviceIDs(platform_ids[i], device_type, 145 | 1, &world.device_id, NULL); 146 | if (err == CL_SUCCESS) { 147 | world.platform_id = platform_ids[i]; 148 | break; 149 | } 150 | } 151 | } 152 | free(platform_ids); 153 | if (i == num_platforms) { 154 | printf("Error: Failed to find a platform\n"); 155 | printf("Test failed\n"); 156 | exit(EXIT_FAILURE); 157 | } 158 | 159 | if (target_device != NULL) { 160 | //find target device 161 | cl_device_id devices[16]; // compute device id 162 | cl_uint num_devices; 163 | char cl_device_name[100]; 164 | err = clGetDeviceIDs(world.platform_id, CL_DEVICE_TYPE_ACCELERATOR, 165 | 16, devices, &num_devices); 166 | if (err != CL_SUCCESS) { 167 | printf("Error: Failed to create a device group!\n"); 168 | printf("Test failed\n"); 169 | exit(EXIT_FAILURE); 170 | } 171 | 172 | //iterate all devices to select the target device. 173 | for (i=0; i 51 | 52 | typedef struct { 53 | cl_platform_id platform_id; 54 | cl_device_id device_id; 55 | cl_context context; 56 | cl_command_queue command_queue; 57 | } xcl_world; 58 | 59 | /* xcl_world_single 60 | * 61 | * Description: 62 | * Setup an xcl_world for the case when there is a single 63 | * device in the system. 64 | * 65 | * Inputs: 66 | * device_type - the type of device (i.e. CL_DEVICE_TYPE_ACCELERATOR) 67 | * 68 | * Returns: 69 | * A struct containing the platform_id, device_id, context, and command 70 | * queue. 71 | */ 72 | xcl_world xcl_world_single(cl_device_type device_type, 73 | const char *target_vendor, 74 | const char *target_device); 75 | 76 | /* xcl_release_world 77 | * 78 | * Description: 79 | * Release memory used by xcl_world struct. 80 | * 81 | * Inputs: 82 | * world - xcl_world to release memory from. 83 | */ 84 | void xcl_release_world(xcl_world world); 85 | 86 | /* xcl_import_binary 87 | * 88 | * Description: 89 | * Import precompiled program (as commonly created by the Xilinx OpenCL 90 | * flow). 91 | * 92 | * Inputs: 93 | * world - xcl_world to import into. 94 | * krnl_file - file name of the kernel to import. 95 | * krnl_name - name of kernel. 96 | * 97 | * Returns: 98 | * An opencl kernel object that was created from krnl_name file. 99 | */ 100 | cl_kernel xcl_import_binary(xcl_world world, const char *krnl_file, const char *krnl_name); 101 | 102 | /* xcl_import_source 103 | * 104 | * Description: 105 | * Import opencl source code. 106 | * 107 | * Inputs: 108 | * world - xcl_world to import into. 109 | * krnl_file - file name of the kernel to import. 110 | * krnl_name - name of kernel. 111 | * 112 | * Returns: 113 | * An opencl kernel object that was created from krnl_name file. 114 | */ 115 | cl_kernel xcl_import_source(xcl_world world, const char *krnl_file, const char *krnl_name); 116 | 117 | /* xcl_set_kernel_arg 118 | * 119 | * Description: 120 | * Set kernel arguments 121 | * 122 | * Inputs: 123 | * krnl - kernel to set values for 124 | * num - which kernel arg to set 125 | * size - size of argument 126 | * ptr - address of value 127 | */ 128 | void xcl_set_kernel_arg(cl_kernel krnl, cl_uint num, size_t size, const void *ptr); 129 | 130 | /* xcl_malloc 131 | * 132 | * Description: 133 | * Allocate memory for a buffer on the FPGA device. Exit program on 134 | * error. 135 | * 136 | * Inputs: 137 | * world - xcl_world of the device to create buffer on. 138 | * flags - passed to clCreateBuffer. 139 | * size - buffer size in bytes (like malloc). 140 | */ 141 | cl_mem xcl_malloc(xcl_world world, cl_mem_flags flags, size_t size); 142 | 143 | /* xcl_memcpy_to_device/xcl_memcpy_from_device 144 | * 145 | * Description: 146 | * Copy memory from the host to the FPGA device (or vice a verse.) The 147 | * memory on the FPGA must be allocated with xcl_malloc (or the lower 148 | * level opencl functions) 149 | * 150 | * Inputs: 151 | * world - xcl_world to copy the buffer into. 152 | * dest - memory address on the FPGA to copy to. 153 | * src - memory address on the host to copy from. 154 | * size - number of bytes in src to copy to dest. 155 | */ 156 | void xcl_memcpy_to_device(xcl_world world, cl_mem dest, void* src, 157 | size_t size); 158 | void xcl_memcpy_from_device(xcl_world world, void* dest, cl_mem src, 159 | size_t size); 160 | 161 | /* xcl_run_kernel3d 162 | * 163 | * Description: 164 | * Run a kernel with a 3 dimensional thread array. In this configuration, 165 | * there will be x*y*z threads created with a rank in each dimension. 166 | * 167 | * Inputs: 168 | * world - xcl_world to use for running the kernel. 169 | * krnl - application to run on the device. 170 | * x - number of threads in the x direction. 171 | * y - number of threads in the y direction. 172 | * z - number of threads in the z direction. 173 | * 174 | * Returns: 175 | * For purposes of benchmarking, the return of this program is the length of 176 | * time that the kernel took to run to completion. 177 | */ 178 | unsigned long xcl_run_kernel3d(xcl_world world, cl_kernel krnl, 179 | size_t x, size_t y, size_t z); 180 | -------------------------------------------------------------------------------- /sources/optimization_lab/idct.cpp: -------------------------------------------------------------------------------- 1 | /********** 2 | Copyright (c) 2018, Xilinx, Inc. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********/ 29 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS 30 | #include "CL/opencl.h" 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | typedef short int16_t; 43 | typedef unsigned short uint16_t; 44 | 45 | void idctSoft(const int16_t block[64], const uint16_t q[64], int16_t outp[64], bool ignore_dc); 46 | 47 | /* *************************************************************************** 48 | 49 | aligned_allocator 50 | 51 | This struct provides an 4k alligned memory allocator. Using this 52 | allocator allows data objects to be aligned for efficient data 53 | transfer to the kernel. 54 | 55 | The struct provides an allocate and deallocate function 56 | 57 | *************************************************************************** */ 58 | template 59 | struct aligned_allocator 60 | { 61 | using value_type = T; 62 | T* allocate(std::size_t num) 63 | { 64 | void* ptr = nullptr; 65 | if (posix_memalign(&ptr,4096,num*sizeof(T))) 66 | throw std::bad_alloc(); 67 | return reinterpret_cast(ptr); 68 | } 69 | void deallocate(T* p, std::size_t num) 70 | { 71 | free(p); 72 | } 73 | }; 74 | 75 | /* *************************************************************************** 76 | 77 | smalloc 78 | 79 | Simple helper function to malloc memory of a specifc size. The 80 | function will throw an error if the memory can not be successfully 81 | allocated. 82 | 83 | *************************************************************************** */ 84 | static void* smalloc(size_t size) { 85 | void* ptr; 86 | 87 | ptr = malloc(size); 88 | 89 | if (ptr == NULL) { 90 | printf("Error: Cannot allocate memory\n"); 91 | exit(EXIT_FAILURE); 92 | } 93 | 94 | return ptr; 95 | } 96 | 97 | /* *************************************************************************** 98 | 99 | load_file_to_memory 100 | 101 | This function reads from the file (filename) an xclbin into 102 | memory. This binary information is returned in the argument result. 103 | 104 | *************************************************************************** */ 105 | static int load_file_to_memory(const char *filename, char **result) { 106 | unsigned int size; 107 | 108 | FILE *f = fopen(filename, "rb"); 109 | if (f == NULL) { 110 | *result = NULL; 111 | printf("Error: Could not read file %s\n", filename); 112 | exit(EXIT_FAILURE); 113 | } 114 | 115 | fseek(f, 0, SEEK_END); 116 | size = ftell(f); 117 | fseek(f, 0, SEEK_SET); 118 | 119 | *result = (char *) smalloc(sizeof(char)*(size+1)); 120 | 121 | if (size != fread(*result, sizeof(char), size, f)) { 122 | free(*result); 123 | printf("Error: read of kernel failed\n"); 124 | exit(EXIT_FAILURE); 125 | } 126 | 127 | fclose(f); 128 | (*result)[size] = 0; 129 | 130 | return size; 131 | } 132 | 133 | 134 | /* *************************************************************************** 135 | 136 | oclDct 137 | 138 | This class encapsulates all runtime kernel interaction through openCL. 139 | After the class is constructed, the objects are supposed to be 140 | initialized (init), before kernel communication and execution can be 141 | triggered through calls to write, run, and read. Once all transactions 142 | are enqueued, the user is expected to call finish to ensure all 143 | transactions are completed. 144 | 145 | The class manages the synchronization events and allows to bulk 146 | enqueue transactions. All buffer management is performed in the oclDct 147 | class. 148 | 149 | *************************************************************************** */ 150 | class oclDct { 151 | 152 | #define NUM_SCHED 1 153 | 154 | public: 155 | oclDct(); 156 | ~oclDct(); 157 | 158 | void init(cl_context context, 159 | cl_device_id device, 160 | cl_kernel krnl, 161 | cl_command_queue q, 162 | size_t blocks); 163 | 164 | void write( 165 | size_t start, 166 | std::vector> *blocks, 167 | std::vector> *q, 168 | std::vector> *out, 169 | bool ignore_dc 170 | ); 171 | void run(); 172 | void read(); 173 | void finish(); 174 | private: 175 | cl_context mContext; 176 | cl_device_id mDevice; 177 | cl_kernel mKernel; 178 | cl_command_queue mQ; 179 | 180 | unsigned int mNumBlocks64; 181 | bool mInit; 182 | unsigned int mCount; 183 | bool mHasRun; 184 | 185 | cl_mem mInBufferVec[NUM_SCHED][2]; 186 | cl_mem mOutBufferVec[NUM_SCHED][1]; 187 | 188 | cl_mem *mInBuffer; 189 | cl_mem *mOutBuffer; 190 | int m_dev_ignore_dc; 191 | 192 | cl_event inEvVec[NUM_SCHED]; 193 | cl_event runEvVec[NUM_SCHED]; 194 | cl_event outEvVec[NUM_SCHED]; 195 | 196 | }; 197 | 198 | 199 | /* *************************************************************************** 200 | 201 | oclDct Constructor 202 | 203 | *************************************************************************** */ 204 | oclDct::oclDct() { 205 | mInit = false; 206 | mNumBlocks64 = 0; 207 | } 208 | 209 | 210 | /* *************************************************************************** 211 | 212 | oclDct Destructor 213 | 214 | *************************************************************************** */ 215 | oclDct::~oclDct() { 216 | } 217 | 218 | 219 | /* *************************************************************************** 220 | 221 | oclDct::init 222 | 223 | OclDct object initialization. This sets the internal state of the 224 | kernel interaction class. All general openCL objects are expected to 225 | be allocated externally and provided to the kernel interaction class. 226 | 227 | *************************************************************************** */ 228 | void oclDct::init(cl_context context, 229 | cl_device_id device, 230 | cl_kernel krnl, 231 | cl_command_queue q, 232 | size_t numBlocks64) 233 | { 234 | mContext = context; 235 | mDevice = device; 236 | mKernel = krnl; 237 | mQ = q; 238 | 239 | mNumBlocks64 = numBlocks64; 240 | 241 | assert(mNumBlocks64 == numBlocks64); // check that there was not a truncation 242 | mInit = true; 243 | mCount = 0; 244 | mHasRun = false; 245 | 246 | mInit = true; 247 | } 248 | 249 | 250 | /* *************************************************************************** 251 | 252 | oclDct::write 253 | 254 | This function manages the buffer allocation for the openCL kernel 255 | interaction before actually enqueuing the operands for kernel 256 | processing. Note all buffer and event management for a complete 257 | transaction is managed in this function. 258 | 259 | *************************************************************************** */ 260 | void oclDct::write( 261 | size_t start, 262 | std::vector> *blocks, 263 | std::vector> *q, 264 | std::vector> *out, 265 | bool ignore_dc 266 | ) { 267 | 268 | if(mCount == NUM_SCHED) { 269 | mHasRun = true; 270 | mCount = 0; 271 | } 272 | 273 | if(mHasRun) { 274 | clWaitForEvents(1, &outEvVec[mCount]); 275 | 276 | clReleaseMemObject(mOutBufferVec[mCount][0]); 277 | clReleaseMemObject(mInBufferVec[mCount][0]); 278 | clReleaseMemObject(mInBufferVec[mCount][1]); 279 | 280 | clReleaseEvent(outEvVec[mCount]); 281 | clReleaseEvent(inEvVec[mCount]); 282 | clReleaseEvent(runEvVec[mCount]); 283 | 284 | } 285 | 286 | mInBuffer = &(mInBufferVec[mCount][0]); 287 | mOutBuffer = &(mOutBufferVec[mCount][0]); 288 | 289 | cl_int err; 290 | // Move Buffer over input vector 291 | mInBuffer[0] = clCreateBuffer(mContext, 292 | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, 293 | mNumBlocks64*64*sizeof(int16_t), 294 | blocks->data() + mNumBlocks64*64*start, 295 | &err); 296 | 297 | mInBuffer[1] = clCreateBuffer(mContext, 298 | CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, 299 | 64*sizeof(uint16_t), 300 | q->data(), 301 | &err); 302 | 303 | // Move Buffer over output vector 304 | mOutBuffer[0] =clCreateBuffer(mContext, 305 | CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, 306 | mNumBlocks64*64*sizeof(int16_t), 307 | out->data() + mNumBlocks64*64*start, 308 | &err); 309 | 310 | // Prepare Kernel to run 311 | m_dev_ignore_dc = ignore_dc ? 1 : 0; 312 | 313 | } 314 | 315 | 316 | /* *************************************************************************** 317 | 318 | oclDct::run 319 | 320 | This function sets the kernel arguments and enqueues the kernel 321 | execution. 322 | 323 | *************************************************************************** */ 324 | void oclDct::run() { 325 | // Set the kernel arguments 326 | clSetKernelArg(mKernel, 0, sizeof(cl_mem), &mInBuffer[0]); 327 | clSetKernelArg(mKernel, 1, sizeof(cl_mem), &mInBuffer[1]); 328 | clSetKernelArg(mKernel, 2, sizeof(cl_mem), &mOutBuffer[0]); 329 | clSetKernelArg(mKernel, 3, sizeof(int), &m_dev_ignore_dc); 330 | clSetKernelArg(mKernel, 4, sizeof(unsigned int), &mNumBlocks64); 331 | 332 | // Schedule actual writing of data 333 | clEnqueueMigrateMemObjects(mQ, 2, mInBuffer, 0, 0, nullptr, &inEvVec[mCount]); 334 | 335 | clEnqueueTask(mQ, mKernel, 1, &inEvVec[mCount], &runEvVec[mCount]); 336 | } 337 | 338 | 339 | /* *************************************************************************** 340 | 341 | oclDct::read 342 | 343 | This function enqueues the read back operation of the results of the idct. 344 | 345 | *************************************************************************** */ 346 | void oclDct::read() { 347 | clEnqueueMigrateMemObjects(mQ, 1, mOutBuffer, CL_MIGRATE_MEM_OBJECT_HOST, 1, &runEvVec[mCount], &outEvVec[mCount]); 348 | mCount++; 349 | } 350 | 351 | 352 | /* *************************************************************************** 353 | 354 | oclDct::finish 355 | 356 | This function ensures kernel processing has completed for all 357 | transactions and it releases the allocated opencl objects. 358 | 359 | *************************************************************************** */ 360 | void oclDct::finish() { 361 | clFinish(mQ); 362 | unsigned int delCount = mCount-1; 363 | if(mHasRun) { 364 | delCount = NUM_SCHED; 365 | } 366 | for(unsigned int i = 0; i< delCount; i++) { 367 | clReleaseMemObject(mOutBufferVec[i][0]); 368 | clReleaseMemObject(mInBufferVec[i][0]); 369 | clReleaseMemObject(mInBufferVec[i][1]); 370 | 371 | clReleaseEvent(inEvVec[i]); 372 | clReleaseEvent(runEvVec[i]); 373 | clReleaseEvent(outEvVec[i]); 374 | } 375 | } 376 | 377 | 378 | /* *************************************************************************** 379 | 380 | runFPGA 381 | 382 | This function guides the kernel execution of the idct algorithm. 383 | 384 | *************************************************************************** */ 385 | void runFPGA( 386 | size_t blocks, 387 | std::vector> &source_block, 388 | std::vector> &source_q, 389 | std::vector> &result_vpout, 390 | cl_command_queue q, 391 | bool ignore_dc, 392 | oclDct &cu, 393 | unsigned int numBlocks64 394 | ) { 395 | for(size_t j = 0; j < blocks/numBlocks64; j++) { 396 | cu.write(j, &source_block, &source_q, &result_vpout, ignore_dc); 397 | cu.run(); 398 | cu.read(); 399 | } 400 | 401 | cu.finish(); 402 | } 403 | 404 | 405 | 406 | /* *************************************************************************** 407 | 408 | runCPU 409 | 410 | This function performs the host code computation of the idct 411 | algorithm. 412 | 413 | *************************************************************************** */ 414 | void runCPU( 415 | size_t blocks, 416 | std::vector> &source_block, 417 | std::vector> &source_q, 418 | std::vector> &golden_vpout, 419 | bool ignore_dc 420 | ) { 421 | for(size_t i = 0; i < blocks; i++){ 422 | idctSoft(&source_block[i*64], &source_q[0], &golden_vpout[i*64], ignore_dc); 423 | } 424 | } 425 | 426 | 427 | 428 | /* *************************************************************************** 429 | 430 | main 431 | 432 | This function is the main function of the idct program. It illustrates 433 | the basic opencl hostcode setup, followed by the idct execution on 434 | host (CPU) and an accelerated flow (FPGA). With a functional 435 | comparison between host and fpga exectuion. 436 | 437 | *************************************************************************** */ 438 | int main(int argc, char* argv[]) { 439 | 440 | char *xcl_mode = getenv("XCL_EMULATION_MODE"); 441 | 442 | if (argc != 2) { 443 | printf("Usage: %s \n", argv[0]); 444 | return EXIT_FAILURE; 445 | } 446 | 447 | char* binaryName = argv[1]; 448 | 449 | 450 | // *********** Allocate and initialize test vectors ********** 451 | 452 | // Blocks of 64 of int16_t 453 | size_t blocks = 1024*1024*4; 454 | 455 | // Limit blocks for emulation modes 456 | if (xcl_mode != NULL) { 457 | blocks = 1024; 458 | } 459 | 460 | bool ignore_dc = true; 461 | 462 | // Create input 463 | std::vector> source_block(64*blocks); 464 | std::vector> source_q(64); 465 | std::vector> golden_vpout(64*blocks); 466 | std::vector> result_vpout(64*blocks); 467 | 468 | for(size_t i = 0; i < blocks; i++){ 469 | for(size_t j = 0; j < 64; j++) { 470 | source_block[i*64 + j] = j; 471 | } 472 | } 473 | 474 | for(size_t j = 0; j < 64; j++) { 475 | source_q[j] = j; 476 | } 477 | 478 | 479 | // *********** Communication Parameters ********** 480 | int banks = 1; 481 | const size_t cus = banks; 482 | const size_t threads = cus; 483 | size_t numBlocks64 = 512; 484 | 485 | if (xcl_mode != NULL) { 486 | numBlocks64 = 256; 487 | } 488 | 489 | std::cout << "FPGA number of 64*int16_t blocks per transfer: " << numBlocks64 << std::endl; 490 | if(blocks%(threads*numBlocks64) != 0) { 491 | std::cout << "Error: The current implementation supports only full banks to be transfered" 492 | << " per thread" << std::endl; 493 | exit(1); 494 | } 495 | 496 | // *********** OpenCL Host Code Setup ********** 497 | 498 | // Connect to first platform 499 | int err; 500 | char cl_platform_vendor[1001]; 501 | char cl_platform_name[1001]; 502 | char cl_device_name[1001]; 503 | 504 | cl_platform_id platform_id; // platform id 505 | cl_device_id device_id; // compute device id 506 | cl_context context; // compute context 507 | 508 | // Get number of platforms 509 | cl_uint platform_count; 510 | clGetPlatformIDs(0, nullptr, &platform_count); 511 | 512 | // get all platforms 513 | std::vector platforms(platform_count); 514 | clGetPlatformIDs(platform_count, platforms.data(), nullptr); 515 | 516 | bool found = false; 517 | for (int p = 0; p < (int)platform_count; ++p) { 518 | platform_id = platforms[p]; 519 | clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL); 520 | clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL); 521 | if(!strcmp(cl_platform_vendor,"Xilinx")) { 522 | found = true; 523 | break; 524 | } 525 | } 526 | if (!found){ 527 | std::cout << "Platform Not Found\n"; 528 | return err; 529 | } 530 | 531 | err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ACCELERATOR, 1, &device_id, NULL); 532 | if (err != CL_SUCCESS) { 533 | std::cout << "FAILED TEST - Device\n"; 534 | return err; 535 | } 536 | 537 | context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); 538 | if (!context || (err != CL_SUCCESS)) { 539 | std::cout << "FAILED TEST - Context \n"; 540 | return err; 541 | } 542 | 543 | clGetDeviceInfo(device_id, CL_DEVICE_NAME, 1000, (void*)cl_device_name, NULL); 544 | 545 | std::cout << "DEVICE: " << cl_device_name << std::endl; 546 | 547 | std::cout << "Loading Bitstream: " << binaryName << std::endl; 548 | char *krnl_bin; 549 | size_t krnl_size; 550 | krnl_size = load_file_to_memory(binaryName, &krnl_bin); 551 | 552 | printf("INFO: Loaded file\n"); 553 | 554 | cl_program program = clCreateProgramWithBinary(context, 1, 555 | (const cl_device_id* ) &device_id, &krnl_size, 556 | (const unsigned char**) &krnl_bin, 557 | NULL, &err); 558 | 559 | 560 | // Create Kernel 561 | std::cout << "Create Kernel: krnl_idct" << std::endl; 562 | cl_kernel krnl = clCreateKernel(program, "krnl_idct", &err); 563 | 564 | // Create Command Queue 565 | cl_command_queue q = clCreateCommandQueue(context, device_id, 566 | CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); 567 | 568 | // Create compute units 569 | std::cout << "Create Compute Unit" << std::endl; 570 | oclDct cu; 571 | cu.init(context, device_id, krnl, q, numBlocks64); 572 | 573 | std::cout << "Setup complete" << std::endl; 574 | 575 | 576 | // *********** Host (CPU) execution ********** 577 | std::cout << "Running CPU version" << std::endl; 578 | auto cpu_begin = std::chrono::high_resolution_clock::now(); 579 | runCPU(blocks, source_block, source_q, golden_vpout, ignore_dc); 580 | auto cpu_end = std::chrono::high_resolution_clock::now(); 581 | 582 | 583 | // *********** Accelerator execution ********** 584 | std::cout << "Running FPGA version" << std::endl; 585 | auto fpga_begin = std::chrono::high_resolution_clock::now(); 586 | runFPGA(blocks, 587 | source_block, 588 | source_q, 589 | result_vpout, 590 | q, 591 | ignore_dc, 592 | cu, 593 | numBlocks64); 594 | auto fpga_end = std::chrono::high_resolution_clock::now(); 595 | 596 | 597 | // *********** OpenCL Host Code cleanup ********** 598 | 599 | clReleaseCommandQueue(q); 600 | clReleaseKernel(krnl); 601 | clReleaseProgram(program); 602 | clReleaseContext(context); 603 | 604 | 605 | // *********** Comparison (Host to Acceleration) ********** 606 | 607 | std::cout << "Runs complete validating results" << std::endl; 608 | 609 | int krnl_match = 0; 610 | for(size_t i = 0; i < 64*blocks; i++){ 611 | if(result_vpout[i] != golden_vpout[i]){ 612 | printf("Error: Result mismatch\n"); 613 | printf("i = %d CPU result = %d Krnl Result = %d\n", 614 | (int) i, golden_vpout[i], result_vpout[i]); 615 | krnl_match = 1; 616 | break; 617 | } 618 | } 619 | 620 | std::cout << "TEST " << (krnl_match ? "FAILED" : "PASSED") << std::endl; 621 | 622 | // *********** Computational Statistics ********** 623 | // 624 | // Only reported in the HW execution mode as wall clock time is meaningless in 625 | // emulation. 626 | // 627 | if (xcl_mode == NULL) { 628 | std::chrono::duration cpu_duration = cpu_end - cpu_begin; 629 | std::chrono::duration fpga_duration = fpga_end - fpga_begin; 630 | 631 | std::cout << "CPU Time: " << cpu_duration.count() << " s" << std::endl; 632 | std::cout << "CPU Throughput: " 633 | << (double) blocks*128 / cpu_duration.count() / (1024.0*1024.0) 634 | << " MB/s" << std::endl; 635 | std::cout << "FPGA Time: " << fpga_duration.count() << " s" << std::endl; 636 | std::cout << "FPGA Throughput: " 637 | << (double) blocks*128 / fpga_duration.count() / (1024.0*1024.0) 638 | << " MB/s" << std::endl; 639 | std::cout << "FPGA PCIe Throughput: " 640 | << (2*(double) blocks*128 + 128) / fpga_duration.count() / (1024.0*1024.0) 641 | << " MB/s" << std::endl; 642 | } else { 643 | std::cout << "RUN COMPLETE" << std::endl; 644 | } 645 | 646 | return (krnl_match ? EXIT_FAILURE : EXIT_SUCCESS); 647 | } 648 | 649 | 650 | 651 | /* *************************************************************************** 652 | 653 | idctSoft 654 | 655 | Original software implementation of IDCT algorithm used to generate 656 | golden reference data. 657 | 658 | *************************************************************************** */ 659 | void idctSoft(const int16_t block[64], 660 | const uint16_t q[64], 661 | int16_t outp[64], 662 | bool ignore_dc) { 663 | int32_t intermed[64]; 664 | 665 | const uint16_t w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16) 666 | const uint16_t w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16) 667 | const uint16_t w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16) 668 | const uint16_t w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16) 669 | const uint16_t w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16) 670 | const uint16_t w7 = 565; // 2048*sqrt(2)*cos(7*pi/16) 671 | 672 | const uint16_t w1pw7 = w1 + w7; 673 | const uint16_t w1mw7 = w1 - w7; 674 | const uint16_t w2pw6 = w2 + w6; 675 | const uint16_t w2mw6 = w2 - w6; 676 | const uint16_t w3pw5 = w3 + w5; 677 | const uint16_t w3mw5 = w3 - w5; 678 | 679 | const uint16_t r2 = 181; // 256/sqrt(2) 680 | 681 | // Horizontal 1-D IDCT. 682 | for (int y = 0; y < 8; ++y) { 683 | int y8 = y * 8; 684 | int32_t x0 = (((ignore_dc && y == 0) 685 | ? 0 : (block[y8 + 0] * q[y8 + 0]) << 11)) + 128; 686 | int32_t x1 = (block[y8 + 4] * q[y8 + 4]) << 11; 687 | int32_t x2 = block[y8 + 6] * q[y8 + 6]; 688 | int32_t x3 = block[y8 + 2] * q[y8 + 2]; 689 | int32_t x4 = block[y8 + 1] * q[y8 + 1]; 690 | int32_t x5 = block[y8 + 7] * q[y8 + 7]; 691 | int32_t x6 = block[y8 + 5] * q[y8 + 5]; 692 | int32_t x7 = block[y8 + 3] * q[y8 + 3]; 693 | // If all the AC components are zero, then the IDCT is trivial. 694 | if (x1 ==0 && x2 == 0 && x3 == 0 && x4 == 0 && x5 == 0 && x6 == 0 && x7 == 0) { 695 | int32_t dc = (x0 - 128) >> 8; // coefficients[0] << 3 696 | intermed[y8 + 0] = dc; 697 | intermed[y8 + 1] = dc; 698 | intermed[y8 + 2] = dc; 699 | intermed[y8 + 3] = dc; 700 | intermed[y8 + 4] = dc; 701 | intermed[y8 + 5] = dc; 702 | intermed[y8 + 6] = dc; 703 | intermed[y8 + 7] = dc; 704 | continue; 705 | } 706 | 707 | // Prescale. 708 | 709 | // Stage 1. 710 | int32_t x8 = w7 * (x4 + x5); 711 | x4 = x8 + w1mw7*x4; 712 | x5 = x8 - w1pw7*x5; 713 | x8 = w3 * (x6 + x7); 714 | x6 = x8 - w3mw5*x6; 715 | x7 = x8 - w3pw5*x7; 716 | 717 | // Stage 2. 718 | x8 = x0 + x1; 719 | x0 -= x1; 720 | x1 = w6 * (x3 + x2); 721 | x2 = x1 - w2pw6*x2; 722 | x3 = x1 + w2mw6*x3; 723 | x1 = x4 + x6; 724 | x4 -= x6; 725 | x6 = x5 + x7; 726 | x5 -= x7; 727 | 728 | // Stage 3. 729 | x7 = x8 + x3; 730 | x8 -= x3; 731 | x3 = x0 + x2; 732 | x0 -= x2; 733 | x2 = (r2*(x4+x5) + 128) >> 8; 734 | x4 = (r2*(x4-x5) + 128) >> 8; 735 | 736 | // Stage 4. 737 | intermed[y8+0] = (x7 + x1) >> 8; 738 | intermed[y8+1] = (x3 + x2) >> 8; 739 | intermed[y8+2] = (x0 + x4) >> 8; 740 | intermed[y8+3] = (x8 + x6) >> 8; 741 | intermed[y8+4] = (x8 - x6) >> 8; 742 | intermed[y8+5] = (x0 - x4) >> 8; 743 | intermed[y8+6] = (x3 - x2) >> 8; 744 | intermed[y8+7] = (x7 - x1) >> 8; 745 | } 746 | 747 | // Vertical 1-D IDCT. 748 | for (int32_t x = 0; x < 8; ++x) { 749 | // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial. 750 | // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so 751 | // we do not bother to check for the all-zero case. 752 | 753 | // Prescale. 754 | int32_t y0 = (intermed[8*0+x] << 8) + 8192; 755 | int32_t y1 = intermed[8*4+x] << 8; 756 | int32_t y2 = intermed[8*6+x]; 757 | int32_t y3 = intermed[8*2+x]; 758 | int32_t y4 = intermed[8*1+x]; 759 | int32_t y5 = intermed[8*7+x]; 760 | int32_t y6 = intermed[8*5+x]; 761 | int32_t y7 = intermed[8*3+x]; 762 | 763 | // Stage 1. 764 | int32_t y8 = w7*(y4+y5) + 4; 765 | y4 = (y8 + w1mw7*y4) >> 3; 766 | y5 = (y8 - w1pw7*y5) >> 3; 767 | y8 = w3*(y6+y7) + 4; 768 | y6 = (y8 - w3mw5*y6) >> 3; 769 | y7 = (y8 - w3pw5*y7) >> 3; 770 | 771 | // Stage 2. 772 | y8 = y0 + y1; 773 | y0 -= y1; 774 | y1 = w6*(y3+y2) + 4; 775 | y2 = (y1 - w2pw6*y2) >> 3; 776 | y3 = (y1 + w2mw6*y3) >> 3; 777 | y1 = y4 + y6; 778 | y4 -= y6; 779 | y6 = y5 + y7; 780 | y5 -= y7; 781 | 782 | // Stage 3. 783 | y7 = y8 + y3; 784 | y8 -= y3; 785 | y3 = y0 + y2; 786 | y0 -= y2; 787 | y2 = (r2*(y4+y5) + 128) >> 8; 788 | y4 = (r2*(y4-y5) + 128) >> 8; 789 | 790 | // Stage 4. 791 | outp[8*0+x] = (y7 + y1) >> 11; 792 | outp[8*1+x] = (y3 + y2) >> 11; 793 | outp[8*2+x] = (y0 + y4) >> 11; 794 | outp[8*3+x] = (y8 + y6) >> 11; 795 | outp[8*4+x] = (y8 - y6) >> 11; 796 | outp[8*5+x] = (y0 - y4) >> 11; 797 | outp[8*6+x] = (y3 - y2) >> 11; 798 | outp[8*7+x] = (y7 - y1) >> 11; 799 | } 800 | } 801 | -------------------------------------------------------------------------------- /sources/optimization_lab/krnl_idct.cpp: -------------------------------------------------------------------------------- 1 | /********** 2 | Copyright (c) 2018, Xilinx, Inc. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors 16 | may be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 27 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | **********/ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | typedef short int16_t; 36 | typedef unsigned short uint16_t; 37 | typedef int int32_t; 38 | 39 | /* *************************************************************************** 40 | 41 | reg 42 | 43 | Simple bridge function which is prohibited to be inlined during 44 | synthesis which forces the insertion of registers. 45 | 46 | *************************************************************************** */ 47 | template 48 | reg_t reg(reg_t x) { 49 | #pragma HLS INLINE off 50 | return x; 51 | } 52 | 53 | 54 | 55 | /* *************************************************************************** 56 | 57 | idct 58 | 59 | Idct algorithm description used to describe the actual synthesizable 60 | idct behavior. 61 | 62 | *************************************************************************** */ 63 | void idct(const int16_t block[64], 64 | const uint16_t q[64], 65 | int16_t outp[64], 66 | bool ignore_dc) { 67 | #pragma HLS INLINE 68 | 69 | int32_t intermed[64]; 70 | 71 | const uint16_t w1 = 2841; // 2048*sqrt(2)*cos(1*pi/16) 72 | const uint16_t w2 = 2676; // 2048*sqrt(2)*cos(2*pi/16) 73 | const uint16_t w3 = 2408; // 2048*sqrt(2)*cos(3*pi/16) 74 | const uint16_t w5 = 1609; // 2048*sqrt(2)*cos(5*pi/16) 75 | const uint16_t w6 = 1108; // 2048*sqrt(2)*cos(6*pi/16) 76 | const uint16_t w7 = 565; // 2048*sqrt(2)*cos(7*pi/16) 77 | 78 | const uint16_t w1pw7 = w1 + w7; 79 | const uint16_t w1mw7 = w1 - w7; 80 | const uint16_t w2pw6 = w2 + w6; 81 | const uint16_t w2mw6 = w2 - w6; 82 | const uint16_t w3pw5 = w3 + w5; 83 | const uint16_t w3mw5 = w3 - w5; 84 | 85 | const uint16_t r2 = 181; // 256/sqrt(2) 86 | 87 | // Horizontal 1-D IDCT. 88 | for (int y = 0; y < 8; ++y) { 89 | int y8 = y * 8; 90 | int32_t x0 = (((ignore_dc && y == 0) 91 | ? 0 : (block[y8 + 0] * q[y8 + 0]) << 11)) + 128; 92 | int32_t x1 = (block[y8 + 4] * q[y8 + 4]) << 11; 93 | int32_t x2 = block[y8 + 6] * q[y8 + 6]; 94 | int32_t x3 = block[y8 + 2] * q[y8 + 2]; 95 | int32_t x4 = block[y8 + 1] * q[y8 + 1]; 96 | int32_t x5 = block[y8 + 7] * q[y8 + 7]; 97 | int32_t x6 = block[y8 + 5] * q[y8 + 5]; 98 | int32_t x7 = block[y8 + 3] * q[y8 + 3]; 99 | // If all the AC components are zero, then the IDCT is trivial. 100 | if (x1 ==0 && x2 == 0 && x3 == 0 && x4 == 0 && x5 == 0 && x6 == 0 && x7 == 0) { 101 | int32_t dc = (x0 - 128) >> 8; // coefficients[0] << 3 102 | intermed[y8 + 0] = dc; 103 | intermed[y8 + 1] = dc; 104 | intermed[y8 + 2] = dc; 105 | intermed[y8 + 3] = dc; 106 | intermed[y8 + 4] = dc; 107 | intermed[y8 + 5] = dc; 108 | intermed[y8 + 6] = dc; 109 | intermed[y8 + 7] = dc; 110 | continue; 111 | } 112 | 113 | // Prescale. 114 | 115 | // Stage 1. 116 | int32_t x8 = w7 * (x4 + x5); 117 | x4 = x8 + w1mw7*x4; 118 | x5 = x8 - w1pw7*x5; 119 | x8 = w3 * (x6 + x7); 120 | x6 = x8 - w3mw5*x6; 121 | x7 = x8 - w3pw5*x7; 122 | 123 | // Stage 2. 124 | x8 = x0 + x1; 125 | x0 -= x1; 126 | x1 = w6 * (x3 + x2); 127 | x2 = x1 - w2pw6*x2; 128 | x3 = x1 + w2mw6*x3; 129 | x1 = x4 + x6; 130 | x4 -= x6; 131 | x6 = x5 + x7; 132 | x5 -= x7; 133 | 134 | // Stage 3. 135 | x7 = x8 + x3; 136 | x8 -= x3; 137 | x3 = x0 + x2; 138 | x0 -= x2; 139 | x2 = (r2*(x4+x5) + 128) >> 8; 140 | x4 = (r2*(x4-x5) + 128) >> 8; 141 | 142 | // Stage 4. 143 | intermed[y8+0] = (x7 + x1) >> 8; 144 | intermed[y8+1] = (x3 + x2) >> 8; 145 | intermed[y8+2] = (x0 + x4) >> 8; 146 | intermed[y8+3] = (x8 + x6) >> 8; 147 | intermed[y8+4] = (x8 - x6) >> 8; 148 | intermed[y8+5] = (x0 - x4) >> 8; 149 | intermed[y8+6] = (x3 - x2) >> 8; 150 | intermed[y8+7] = (x7 - x1) >> 8; 151 | } 152 | 153 | // Vertical 1-D IDCT. 154 | for (int32_t x = 0; x < 8; ++x) { 155 | // Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial. 156 | // However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so 157 | // we do not bother to check for the all-zero case. 158 | 159 | // Prescale. 160 | int32_t y0 = (intermed[8*0+x] << 8) + 8192; 161 | int32_t y1 = intermed[8*4+x] << 8; 162 | int32_t y2 = intermed[8*6+x]; 163 | int32_t y3 = intermed[8*2+x]; 164 | int32_t y4 = intermed[8*1+x]; 165 | int32_t y5 = intermed[8*7+x]; 166 | int32_t y6 = intermed[8*5+x]; 167 | int32_t y7 = intermed[8*3+x]; 168 | 169 | // Stage 1. 170 | int32_t y8 = reg(w7*reg(y4+y5)) + 4; 171 | y4 = (y8 + reg(w1mw7*y4)) >> 3; 172 | y5 = (y8 - reg(w1pw7*y5)) >> 3; 173 | y8 = reg(w3*reg(y6+y7)) + 4; 174 | y6 = (y8 - reg(w3mw5*y6)) >> 3; 175 | y7 = (y8 - reg(w3pw5*y7)) >> 3; 176 | 177 | // Stage 2. 178 | y8 = y0 + y1; 179 | y0 -= y1; 180 | y1 = reg(w6*reg(y3+y2)) + 4; 181 | y2 = (y1 - reg(w2pw6*y2)) >> 3; 182 | y3 = (y1 + reg(w2mw6*y3)) >> 3; 183 | y1 = y4 + y6; 184 | y4 -= y6; 185 | y6 = y5 + y7; 186 | y5 -= y7; 187 | 188 | // Stage 3. 189 | y7 = y8 + y3; 190 | y8 -= y3; 191 | y3 = y0 + y2; 192 | y0 -= y2; 193 | y2 = (reg(r2*reg(y4+y5)) + 128) >> 8; 194 | y4 = (reg(r2*reg(y4-y5)) + 128) >> 8; 195 | 196 | // Stage 4. 197 | outp[8*0+x] = (y7 + y1) >> 11; 198 | outp[8*1+x] = (y3 + y2) >> 11; 199 | outp[8*2+x] = (y0 + y4) >> 11; 200 | outp[8*3+x] = (y8 + y6) >> 11; 201 | outp[8*4+x] = (y8 - y6) >> 11; 202 | outp[8*5+x] = (y0 - y4) >> 11; 203 | outp[8*6+x] = (y3 - y2) >> 11; 204 | outp[8*7+x] = (y7 - y1) >> 11; 205 | } 206 | } 207 | 208 | typedef ap_uint<512> uint512_t; 209 | typedef ap_int<512> int512_t; 210 | 211 | 212 | 213 | /* *************************************************************************** 214 | 215 | read_blocks 216 | 217 | Dataflow block used to interface from input memory to streaming input 218 | channels. 219 | 220 | *************************************************************************** */ 221 | template 222 | void read_blocks(const out_t *in, hls::stream &out, unsigned int blocks) { 223 | for(unsigned int i = 0; i < blocks*2; i++) { 224 | #pragma HLS loop_tripcount min=2048 max=2048 225 | #pragma HLS PIPELINE II=1 226 | out.write(in[i]); 227 | } 228 | } 229 | 230 | 231 | 232 | /* *************************************************************************** 233 | 234 | execute 235 | 236 | Dataflow block used to manage full block computation. It uses wide 237 | arrays for single block computation to allow efficient access with 238 | ii=2 for the 8x8 data elements. 239 | 240 | *************************************************************************** */ 241 | void execute(hls::stream &iblock, 242 | hls::stream &iq, 243 | hls::stream &ivoutp, 244 | bool ignore_dc, 245 | unsigned int blocks) { 246 | for(unsigned int i = 0; i < blocks; i++) { 247 | /* Use II=2 here as we this will equalize all the dataflow processes and 248 | * save resources */ 249 | #pragma HLS loop_tripcount min=1024 max=1024 250 | #pragma HLS PIPELINE II=2 251 | 252 | int16_t iiblock[64]; 253 | uint16_t iiq[64]; 254 | int16_t iivoutp[64]; 255 | 256 | for(short j = 0; j < 64/32; j++) { 257 | if(i==0) { 258 | ap_uint<512> tmp; 259 | tmp = iq.read(); 260 | for(short k = 0; k < 32; k++) { 261 | iiq[j*32+k] = tmp(16*(k+1)-1, 16*k); 262 | } 263 | } 264 | } 265 | 266 | for(short j = 0; j < 64/32; j++) { 267 | ap_int<512> tmp; 268 | tmp = iblock.read(); 269 | for(short k = 0; k < 32; k++) { 270 | iiblock[j*32+k] = tmp(16*(k+1)-1, 16*k); 271 | } 272 | } 273 | 274 | idct(iiblock, iiq, iivoutp, ignore_dc); 275 | 276 | for(short j = 0; j < 64/32; j++) { 277 | ap_int<512> tmp; 278 | for(short k = 0; k < 32; k++) { 279 | tmp(16*(k+1)-1, 16*k) = iivoutp[j*32+k]; 280 | } 281 | ivoutp.write(tmp); 282 | } 283 | } 284 | } 285 | 286 | 287 | 288 | /* *************************************************************************** 289 | 290 | write_blocks 291 | 292 | Dataflow block used to interface from streaming output channel to 293 | output memory. 294 | 295 | *************************************************************************** */ 296 | void write_blocks(ap_int<512> *out, hls::stream &in, unsigned int blocks) { 297 | for(unsigned int i = 0; i < blocks*2; i++) { 298 | #pragma HLS loop_tripcount min=2048 max=2048 299 | #pragma HLS PIPELINE II=1 300 | out[i] = in.read(); 301 | } 302 | } 303 | 304 | 305 | 306 | /* *************************************************************************** 307 | 308 | krnl_idct_dataflow 309 | 310 | Top idct kernel function, used to clearly isolate and identify 311 | dataflow blocks. 312 | 313 | *************************************************************************** */ 314 | void krnl_idct_dataflow(const ap_int<512> *block, 315 | const ap_uint<512> *q, 316 | ap_int<512> *voutp, 317 | int ignore_dc, 318 | unsigned int blocks) { 319 | //#pragma HLS DATAFLOW 320 | 321 | hls::stream iblock("input_stream1"); 322 | hls::stream iq("input_stream2"); 323 | hls::stream ivoutp("output_stream"); 324 | #pragma HLS stream variable=iblock depth=512 325 | #pragma HLS stream variable=iq depth=2 326 | #pragma HLS stream variable=ivoutp depth=512 327 | 328 | 329 | read_blocks(q, iq, 1); 330 | read_blocks(block, iblock, blocks); 331 | execute(iblock, iq, ivoutp, ignore_dc ? true : false, blocks); 332 | write_blocks(voutp, ivoutp, blocks); 333 | } 334 | 335 | 336 | 337 | /* *************************************************************************** 338 | 339 | krnl_idct 340 | 341 | Kernel idct interface definition. 342 | 343 | *************************************************************************** */ 344 | extern "C" { 345 | void krnl_idct(const ap_int<512> *block, 346 | const ap_uint<512> *q, 347 | ap_int<512> *voutp, 348 | int ignore_dc, 349 | unsigned int blocks) { 350 | #pragma HLS INTERFACE m_axi port=block offset=slave bundle=gmem0 351 | #pragma HLS INTERFACE s_axilite port=block bundle=control 352 | #pragma HLS INTERFACE m_axi port=q offset=slave bundle=gmem1 353 | #pragma HLS INTERFACE s_axilite port=q bundle=control 354 | #pragma HLS INTERFACE m_axi port=voutp offset=slave bundle=gmem2 355 | #pragma HLS INTERFACE s_axilite port=voutp bundle=control 356 | #pragma HLS INTERFACE s_axilite port=ignore_dc bundle=control 357 | #pragma HLS INTERFACE s_axilite port=blocks bundle=control 358 | #pragma HLS INTERFACE s_axilite port=return bundle=control 359 | 360 | krnl_idct_dataflow(block, q, voutp, ignore_dc, blocks); 361 | } 362 | 363 | } 364 | --------------------------------------------------------------------------------