├── .clang-format ├── .gitignore ├── INSTALL.md ├── LICENSE.txt ├── README.md ├── bin ├── cloc.sh ├── gputable.txt ├── mygpu └── mymcpu ├── docs └── atmi-reference.pdf ├── examples ├── README.md ├── c_extension │ ├── depends │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ ├── csquares.cpp │ │ └── csquares_kernels.cl │ ├── eps │ │ ├── Makefile │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ ├── eps.cpp │ │ └── nullKernel.cl │ ├── fibonacci │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ └── fibonacci.cpp │ ├── helloworld │ │ ├── HelloWorld.cpp │ │ ├── Makefile │ │ └── hw.cl │ ├── helloworld_dGPU │ │ ├── HelloWorld.cpp │ │ ├── Makefile │ │ └── hw.cl │ └── kps │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ ├── kps.cpp │ │ └── nullKernel.cl ├── c_extension_denq │ ├── helloworld │ │ ├── HelloWorld.cpp │ │ ├── Makefile │ │ └── hw.cl │ ├── kps │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ ├── kps.cpp │ │ └── nullKernel.cl │ └── reduction │ │ ├── Makefile │ │ ├── Reduction.cpp │ │ ├── buildrun.sh │ │ ├── cleanup.sh │ │ └── reduction.cl ├── interop │ ├── globalsymbol │ │ ├── Makefile │ │ ├── globalsymbol.cl │ │ └── globalsymbol.cpp │ └── hsainfo │ │ ├── Makefile │ │ └── hsainfo.cpp ├── runtime │ ├── dlbench_multi_agent │ │ ├── build.atmi.sh │ │ ├── dlbench.atmi.c │ │ ├── dlbench.h │ │ └── grayscale.cl │ ├── eps │ │ ├── Makefile │ │ ├── eps.cpp │ │ └── nullKernel.cl │ ├── fibonacci │ │ ├── Makefile │ │ └── fibonacci.cpp │ ├── helloworld │ │ ├── Makefile │ │ ├── hw.cl │ │ ├── hw.cpp │ │ └── hw_structs.h │ ├── helloworld_dGPU │ │ ├── Makefile │ │ ├── hw.cl │ │ └── hw.cpp │ ├── helloworld_dGPU_async │ │ ├── Makefile │ │ ├── hw.cl │ │ └── hw.cpp │ ├── helloworld_dGPU_sync │ │ ├── Makefile │ │ ├── hw.cl │ │ └── hw.cpp │ ├── helloworld_printf │ │ ├── Makefile │ │ ├── hw.h │ │ ├── hw_cpu.c │ │ ├── hw_gpu.cl │ │ └── hw_host.cpp │ ├── kps │ │ ├── Makefile │ │ ├── kps.cpp │ │ └── nullKernel.cl │ ├── needleman-wunsch │ │ ├── Makefile │ │ ├── nw.cl │ │ ├── nw.cpp │ │ └── nw.h │ ├── needleman-wunsch_dGPU │ │ ├── Makefile │ │ ├── nw.cl │ │ ├── nw.cpp │ │ └── nw.h │ └── pcie_bw │ │ ├── Makefile │ │ └── pcie_bw.cpp └── runtime_denq │ ├── helloworld │ ├── Makefile │ ├── hw.cl │ └── hw.cpp │ ├── kps │ ├── Makefile │ ├── kps.cpp │ └── nullKernel.cl │ └── reduction │ ├── Makefile │ ├── reduction.cl │ └── reduction.cpp ├── include ├── atmi.h ├── atmi_c_ext.h ├── atmi_interop_hsa.h ├── atmi_kl.h └── atmi_runtime.h └── src ├── CMakeLists.txt ├── atmi-backward-compat.cmake ├── cmake_modules ├── FindLibElf.cmake ├── FindROCm.cmake └── utils.cmake ├── compiler ├── CMakeLists.txt ├── atl_pifgen_plugin.c ├── atl_synckernel.c └── include │ ├── atl_pifgen.h │ └── hsa_cl.h ├── device_runtime ├── CMakeLists.txt ├── bc.cmake ├── device_rt.cl ├── device_rt.cpp ├── device_rt.h └── include │ ├── device_amd_hsa.h │ └── hsa.h └── runtime ├── CMakeLists.txt ├── core ├── CMakeLists.txt ├── atmi.cpp ├── cputask.cpp ├── data.cpp ├── kernel.cpp ├── machine.cpp ├── queue.cpp ├── system.cpp ├── task.cpp ├── taskgroup.cpp └── utils.cpp ├── include ├── data.h ├── device_rt_internal.h ├── internal.h ├── kernel.h ├── machine.h ├── machine.tcc ├── queue.h ├── realtimer.h ├── rt.h ├── task.h └── taskgroup.h └── interop ├── CMakeLists.txt └── hsa ├── CMakeLists.txt └── atmi_interop_hsa.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: Google 4 | AccessModifierOffset: -1 5 | AlignAfterOpenBracket: Align 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlinesLeft: true 9 | AlignOperands: true 10 | AlignTrailingComments: true 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: All 15 | AllowShortIfStatementsOnASingleLine: true 16 | AllowShortLoopsOnASingleLine: true 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterClass: false 25 | AfterControlStatement: false 26 | AfterEnum: false 27 | AfterFunction: false 28 | AfterNamespace: false 29 | AfterObjCDeclaration: false 30 | AfterStruct: false 31 | AfterUnion: false 32 | BeforeCatch: false 33 | BeforeElse: false 34 | IndentBraces: false 35 | BreakBeforeBinaryOperators: None 36 | BreakBeforeBraces: Attach 37 | BreakBeforeTernaryOperators: true 38 | BreakConstructorInitializersBeforeComma: false 39 | ColumnLimit: 80 40 | CommentPragmas: '^ IWYU pragma:' 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: true 46 | DisableFormat: false 47 | ExperimentalAutoDetectBinPacking: false 48 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 49 | IncludeCategories: 50 | - Regex: '^<.*\.h>' 51 | Priority: 1 52 | - Regex: '^<.*' 53 | Priority: 2 54 | - Regex: '.*' 55 | Priority: 3 56 | IndentCaseLabels: true 57 | IndentWidth: 2 58 | IndentWrappedFunctionNames: false 59 | KeepEmptyLinesAtTheStartOfBlocks: false 60 | MacroBlockBegin: '' 61 | MacroBlockEnd: '' 62 | MaxEmptyLinesToKeep: 1 63 | NamespaceIndentation: None 64 | ObjCBlockIndentWidth: 2 65 | ObjCSpaceAfterProperty: false 66 | ObjCSpaceBeforeProtocolList: false 67 | PenaltyBreakBeforeFirstCallParameter: 1 68 | PenaltyBreakComment: 300 69 | PenaltyBreakFirstLessLess: 120 70 | PenaltyBreakString: 1000 71 | PenaltyExcessCharacter: 1000000 72 | PenaltyReturnTypeOnItsOwnLine: 200 73 | PointerAlignment: Left 74 | ReflowComments: true 75 | SortIncludes: true 76 | SpaceAfterCStyleCast: false 77 | SpaceBeforeAssignmentOperators: true 78 | SpaceBeforeParens: ControlStatements 79 | SpaceInEmptyParentheses: false 80 | SpacesBeforeTrailingComments: 2 81 | SpacesInAngles: false 82 | SpacesInContainerLiterals: true 83 | SpacesInCStyleCastParentheses: false 84 | SpacesInParentheses: false 85 | SpacesInSquareBrackets: false 86 | Standard: Auto 87 | TabWidth: 8 88 | UseTab: Never 89 | ... 90 | 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # executables 2 | hello 3 | a.out 4 | 5 | # tag files 6 | tags 7 | 8 | # temp files 9 | *.i 10 | *.bc 11 | *.o 12 | *.hsaco 13 | *.swp 14 | *.swo 15 | *.swn 16 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ATMI Install Instructions 2 | ========================= 3 | 4 | - [1. Prepare System for ATMI Installation](#Prepare) 5 | - [2. Install/Build ATMI](#ATMI) 6 | - [3. Build/Test ATMI Examples](#Examples) 7 | 8 | 9 | 10 | ## Prepare System for ATMI Installation 11 | 12 | ATMI works on all platforms that are supported by ROCm, but has been tested mainly for the 18.04.2 LTS (Bionic Beaver) platform. 13 | See [here](https://github.com/RadeonOpenCompute/ROCm) for details on all supported hardware/OS configurations and instructions on how to install ROCm for your system. 14 | 15 | 16 | 17 | ## Install/Build ATMI 18 | ATMI can be installed either from the ROCm apt server or built from the source. 19 | 20 | #### Install from the ROCm apt server 21 | 22 | ``` 23 | sudo apt-get install atmi 24 | ``` 25 | 26 | #### Build from the source 27 | 28 | ``` 29 | mkdir -p ~/git 30 | cd ~/git 31 | git clone https://github.com/RadeonOpenCompute/atmi.git 32 | mkdir ~/git/atmi/src/build 33 | cd ~/git/atmi/src/build 34 | # export all GFX target architectures for the ATMI device runtime 35 | export GFXLIST="gfx900 gfx906" # e.g.: gfx900 is for AMD Vega GPUs 36 | # ensure you have cmake (version >= 2.8) 37 | cmake \ 38 | -DCMAKE_INSTALL_PREFIX=/path/to/install \ 39 | -DCMAKE_BUILD_TYPE={Debug|Release} \ 40 | -DLLVM_DIR=/path/to/llvm \ # compiler to build ATMI device runtime and user GPU kernels 41 | -DDEVICE_LIB_DIR= \ # root of ROCm Device Library to link 42 | -DATMI_DEVICE_RUNTIME=ON \ # (optional) to build ATMI device runtime (default: OFF) 43 | -DATMI_HSA_INTEROP=ON \ # (optional) to build ATMI with HSA interop functionality (default: OFF) 44 | -DROCM_DIR=/path/to/hsa \ # (optional) root of ROCm/HSA runtime (default: /opt/rocm) 45 | .. 46 | # make all components (Host runtime and device runtime) 47 | make 48 | make install 49 | export LD_LIBRARY_FLAGS=/path/to/install/lib:$LD_LIBRARY_FLAGS # (optional) 50 | ``` 51 | 52 | 53 | 54 | ## Build/Test ATMI Examples 55 | 56 | ATMI runtime works with any high level compiler that generates AMD GCN code objects. 57 | The examples here use OpenCL kernel language and ATMI as the host runtime, but ATMI can also work any high level 58 | kernel language like HIP or OpenMP as long as they are compiled to AMD GCN code objects. 59 | In this example set, the host code and device code are compiled separately, 60 | and the ATMI host runtime explicitly loads the device module before launching tasks. 61 | ATMI currently supports loading AMD GCN (HSA code objects). 62 | ATMI ships with it the CLOC (CL Offline Compiler) utility script, which is a thin wrapper around Clang to help compile CL kernels. 63 | 64 | ``` 65 | # Building a simple helloworld example on a two GPU system 66 | cd /path/to/atmi/examples/runtime/helloworld_dGPU 67 | make 68 | # If make does not work, then check the different flags in make to point to the right installed locations of ROCm, 69 | # or directly run cloc.sh with the following options 70 | # /opt/rocm/atmi/bin/cloc.sh -aomp /opt/rocm/llvm -triple amdgcn-amd-amdhsa -libgcn /opt/rocm -clopts "-I/opt/rocm/atmi/include -I/opt/rocm/hsa/include -I. -O2 -v" -opt 2 hw.cl 71 | make test 72 | env LD_LIBRARY_PATH=/opt/rocm/atmi/lib:/opt/rocm/hsa/lib: ./hello 73 | Choosing GPU 0/2 74 | Output from the GPU: Hello HSA World 75 | Output from the CPU: Hello HSA World 76 | ``` 77 | 78 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright © 2019 Advanced Micro Devices, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 6 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 7 | persons to whom the Software is furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 12 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 13 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ATMI (Asynchronous Task and Memory Interface) 2 | ============================================= 3 | 4 | Asynchronous Task and Memory Interface, or ATMI, is a runtime framework for efficient task management in heterogeneous 5 | CPU-GPU systems. It provides a consistent API to create and launch tasks from both CPUs and GPUs (integrated and discrete). 6 | In ATMI, the high-level task configurations can be simply described by using a few predefined C-style structures before launching the tasks. 7 | The task configuration includes the task dimensions and granularity, dependencies to other tasks, data requirements and so on. The ATMI runtime, based on the overall task 8 | graph and individual task configurations, will perform task scheduling and memory management that is optimal for the underlying platform. ATMI provides a rich and flexible 9 | user interface so that the end user can relinquish scheduling to the runtime (default behavior) or take full control of scheduling and 10 | mapping, if desired. The target audience for ATMI is application programmers or middleware developers for high-level languages. 11 | 12 | ## Deprecation Notice ## 13 | ATMI is not being released as part of AMD ROCm software after ROCm 5.6. Previously released branches are still available for reference. 14 | ATMI will henceforth be maintained as a separate research project at https://github.com/AMDResearch/atmi. 15 | 16 | ## ATMI (v0.7) Feature List 17 | - ATMI-RT: host runtime library to manage tasks 18 | - ATMI-DEVRT: device runtime library for managing task enqueue from the GPU to both the CPU and other GPUs in the system 19 | - ATMI-C (experimental): Declarative task-based programming model using C language extensions (works only with GCC) 20 | - A comprehensive machine model for CPUs, integrated GPU (APU) and discrete GPU systems. 21 | - Consistent task management API for CPU tasks and GPU tasks 22 | - GPU kernel language: Any language (e.g, CL and HIP) that can be compiled to AMD GCN code objects 23 | - CPU tasks: support for multi-dimensional task grids (similar to GPU tasks) 24 | - Task dependencies 25 | - Task groups 26 | - (Experimental) Dependencies between task groups 27 | - Recursive tasks (tasks creating other tasks) 28 | - Efficient resource management 29 | - Low latency signaling among dependent tasks 30 | - Kernel argument memory regions 31 | - Reuse of task handles 32 | - Efficient task to work queue scheduling 33 | - Data movement API (synchronous and asynchronous options) 34 | - Asynchronous data movement is treated as an ATMI task in the task graph 35 | - Interoperability with HSA/ROCm: Map between ATMI handles and ROCm data structures for expert programmers 36 | - Supported platforms: all devices that are supported by [ROCm](https://github.com/RadeonOpenCompute/ROCm) are supported by ATMI 37 | - Supported runtime: ROCm v2.1+ 38 | - Several miscellaneous code refactoring and bug fixes 39 | 40 | ## Compilation and Runtime Workflow 41 | The below figure depicts the ATMI runtime workflow with CLOC as the compiler utility. 42 | ![atmi-workflow](https://user-images.githubusercontent.com/996564/44241414-a28b8480-a178-11e8-917a-70c8ccbc83a1.png) 43 | 44 | ## License 45 | 46 | MIT License 47 | 48 | Copyright © 2019 Advanced Micro Devices, Inc. 49 | 50 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 51 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 52 | persons to whom the Software is furnished to do so, subject to the following conditions: 53 | 54 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 55 | 56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 57 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 58 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 59 | -------------------------------------------------------------------------------- /bin/mygpu: -------------------------------------------------------------------------------- 1 | mymcpu -------------------------------------------------------------------------------- /docs/atmi-reference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/atmi/f11878cbb5e696c9e83dbbdac04dd73047d9ed23/docs/atmi-reference.pdf -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | Examples are categorized into many types depending on the ATMI interface that 3 | is chosen by the programmer: ATMI-C (C extensions), ATMI-RT (Runtime library 4 | interface) or either of these interface with device enqueue (_denq). There is 5 | also a directory to showcase ATMI with HSA interoperability. 6 | ``` 7 | 8 | ---------- 9 | MIT License 10 | 11 | Copyright © 2019 Advanced Micro Devices, Inc. 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 14 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 15 | persons to whom the Software is furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 20 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | ``` 23 | -------------------------------------------------------------------------------- /examples/c_extension/depends/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | #this software and associated documentation files (the "Software"), to deal in 8 | #the Software 9 | #without restriction, including without limitation the rights to use, copy, 10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | #Software, and to permit 12 | #persons to whom the Software is furnished to do so, subject to the following 13 | #conditions: 14 | # 15 | #The above copyright notice and this permission notice shall be included in all 16 | #copies or substantial portions of the Software. 17 | # 18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | #FITNESS FOR A PARTICULAR 21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 23 | #CONTRACT, TORT OR 24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 25 | #OR OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | set -e 28 | # Set HSA Environment variables 29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/rocm/hsa 30 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/rocm/hsa/lib 31 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/usr/bin 32 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi 33 | ATMI_INC=$ATMI_RUNTIME_PATH/include 34 | 35 | export LD_LIBRARY_PATH=$HSA_RUNTIME_PATH/lib:$ATMI_RUNTIME_PATH/lib:$LD_LIBRARY_PATH 36 | #echo $LD_LIBRARY_PATH 37 | 38 | # Do not compile accelerated functions separately. This script will be invoked by the GCC plugin itself. 39 | echo 40 | #if [ -f csquares_kernels.o ] ; then rm csquares_kernels.o ; fi 41 | 42 | # Compile Main and generate the PIF definitions for host and accelerated functions in csquares.cpp.pifdefs.c 43 | echo 44 | if [ -f csquares ] ; then rm csquares ; fi 45 | echo g++ -o csquares.o -c csquares.cpp -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=csquares_kernels.cl -I$ATMI_INC 46 | g++ -o csquares.o -c csquares.cpp -std=c++11 -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=csquares_kernels.cl -I$ATMI_INC 47 | 48 | echo g++ -o csquares csquares.o csquares.cpp.pifdefs.c -latmi_runtime -L$ATMI_RUNTIME_PATH/lib -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -lelf -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 49 | g++ -o csquares csquares.o csquares.cpp.pifdefs.c -std=c++11 -latmi_runtime -L$ATMI_RUNTIME_PATH/lib -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -lelf -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 50 | 51 | # Execute 52 | echo 53 | echo ./csquares 54 | ./csquares 55 | -------------------------------------------------------------------------------- /examples/c_extension/depends/cleanup.sh: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | rm *.o 27 | rm csquares.cpp.pifdefs.c 28 | rm csquares 29 | 30 | -------------------------------------------------------------------------------- /examples/c_extension/depends/csquares.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | // 7 | // csquares.cpp : Demo of ATMI task dependencies 8 | // 9 | // Creates a diamond DAG using three kernels. 10 | // Demo only, not intended as efficient algorithm. 11 | // 12 | // Init N values: (init) on GPU 13 | // / \ 14 | // |/ \| 15 | // Do 1/4 N each: (even_squares) (even_squares) 16 | // on CPU \ / on GPU 17 | // \| |/ 18 | // Do odd 1/2 N: (odd_squares) on GPU 19 | // 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "atmi_c_ext.h" 26 | using namespace std; 27 | typedef int TYPE; 28 | 29 | static const int N = 16; /* multiple of 4 for demo */ 30 | 31 | /* ---------------- Kernel declarations -------------*/ 32 | // Declare init_kernel as the PIF for the init_kernel_cpu subroutine 33 | extern "C" void init_kernel_cpu(int *in) __attribute__((atmi_kernel("init_kernel", "cpu"))); 34 | __kernel void init_kernel_gpu(__global int *in) __attribute__((atmi_kernel("init_kernel", "gpu"))); 35 | 36 | 37 | // Declare even_squares_kernel as the PIF for the even_squares_kernel_cpu subroutine 38 | extern "C" void even_squares_kernel_cpu( 39 | const int *in , int *out) __attribute__((atmi_kernel("even_squares_kernel", "cpu"))); 40 | __kernel void even_squares_kernel_gpu( 41 | __global const int *in , __global int *out) __attribute__((atmi_kernel("even_squares_kernel", "gpu"))); 42 | 43 | 44 | // Declare odd_squares_kernel as the PIF for the odd_squares_kernel_cpu subroutine 45 | extern "C" void odd_squares_kernel_cpu(int *out) __attribute__((atmi_kernel("odd_squares_kernel", "cpu"))); 46 | __kernel void odd_squares_kernel_gpu(__global int *out) __attribute__((atmi_kernel("odd_squares_kernel", "gpu"))); 47 | 48 | 49 | 50 | /* ---------------- Kernel definitions -------------*/ 51 | extern "C" void init_kernel_cpu(int *in) { 52 | int i; 53 | for(i = 0; i < N; i++) { 54 | in[i] = (int) i; 55 | } 56 | } 57 | 58 | /* Middle children calculate squares for even numbers */ 59 | extern "C" void even_squares_kernel_cpu(const int *in , int *out) 60 | { 61 | int ctr; 62 | for(ctr = 0; ctr < N/4; ctr++) { 63 | int i = ctr*2; 64 | out[i] = in[i] * in[i]; 65 | } 66 | } 67 | 68 | /* The last child calculate squares for odd numbers 69 | using squares from even numbers because. 70 | (X-1)**2 = X**2 - 2X + 1 71 | so X**2 = ((X-1)**2) + 2X - 1 72 | */ 73 | extern "C" void odd_squares_kernel_cpu(int *out) 74 | { 75 | int ctr; 76 | for(ctr = 0; ctr < N/2; ctr++) { 77 | int i = (ctr*2) + 1; 78 | out[i] = out[i-1] + (2*i) - 1; 79 | } 80 | } 81 | 82 | 83 | /* -------------- main ------------------*/ 84 | int main(int argc, char *argv[]) { 85 | TYPE *inArray = new TYPE[N]; 86 | TYPE *outArray = new TYPE[N]; 87 | 88 | // Create launch parameters with thread counts 89 | ATMI_LPARM_1D(init_lp,N); 90 | // Each even tasks caclulates 1/4 of the squares 91 | ATMI_LPARM_1D(even_lp,N/4); 92 | // The final odd task does 1/2 of the squares 93 | ATMI_LPARM_1D(odd_lp,N/2); 94 | 95 | atmi_task_handle_t init_tasks[1]; 96 | atmi_task_handle_t even_tasks[2]; 97 | 98 | // Dispatch init_kernel and set even_lp to require init to complete 99 | init_lp->kernel_id = K_ID_init_kernel_cpu; 100 | init_tasks[0] = init_kernel(init_lp, inArray); 101 | 102 | even_lp->num_required = 1; 103 | even_lp->requires = init_tasks; 104 | // Dispatch 2 even_squares kernels and build dependency list for odd_squares. 105 | even_lp->kernel_id = K_ID_even_squares_kernel_gpu; 106 | even_tasks[0] = even_squares_kernel(even_lp, inArray, outArray); // Half of even kernels go to CPU 107 | even_lp->kernel_id = K_ID_even_squares_kernel_cpu; 108 | even_tasks[1] = even_squares_kernel(even_lp, &inArray[N/2], &outArray[N/2]); // Other half goes to the GPU 109 | odd_lp->num_required = 2; 110 | odd_lp->requires = even_tasks; 111 | odd_lp->kernel_id = K_ID_odd_squares_kernel_gpu; 112 | // Now dispatch odd_squares kernel dependent on BOTH even_squares 113 | // default kernel_id = 0, which is the odd_squares_kernel_cpu by virtue of declaration order 114 | atmi_task_handle_t ret_task = odd_squares_kernel(odd_lp, outArray); 115 | 116 | // Wait for all kernels to complete 117 | SYNC_TASK(ret_task); 118 | // Check results 119 | bool passed = true; 120 | for (int i=0; i " << outArray[i] << endl; 123 | passed = false; 124 | } 125 | } 126 | cout << endl << (passed ? "PASSED" : "FAILED") << endl; 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /examples/c_extension/depends/csquares_kernels.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | /* 7 | File: csquares.cl 8 | 9 | 3 Kernels for csquares.cpp. 10 | This is not intended to be efficient. 11 | It is just a simple demo of dependencies. 12 | */ 13 | /* Parent kernel initializes input array */ 14 | __kernel void init_kernel_gpu(__global int *in) { 15 | int i = get_global_id(0); 16 | in[i] = (int) i; 17 | } 18 | 19 | /* Middle children calculate squares for even numbers */ 20 | __kernel void even_squares_kernel_gpu( 21 | __global const int *in , __global int *out) 22 | { 23 | int i = get_global_id(0)*2; 24 | out[i] = in[i] * in[i]; 25 | } 26 | 27 | /* The last child calculate squares for odd numbers 28 | using squares from even numbers because. 29 | (X-1)**2 = X**2 - 2X + 1 30 | so X**2 = ((X-1)**2) + 2X - 1 31 | */ 32 | __kernel void odd_squares_kernel_gpu(__global int *out) 33 | { 34 | int i = (get_global_id(0)*2) + 1; 35 | out[i] = out[i-1] + (2*i) - 1; 36 | } 37 | -------------------------------------------------------------------------------- /examples/c_extension/eps/Makefile: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | #Set HSA Environment variables 27 | HSA_RUNTIME_PATH ?= /opt/hsa 28 | HSA_LIBHSAIL_PATH ?= /opt/hsa/lib 29 | HSA_LLVM_PATH ?= /opt/amd/cloc/bin 30 | ATMI_RUNTIME_PATH ?= /opt/amd/atmi 31 | ATMI_INC=${ATMI_RUNTIME_PATH}/include 32 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so 33 | 34 | CC=g++ 35 | CFLAGS=-O3 -g 36 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I. 37 | 38 | LIBS=-latmi_runtime -lhsa-runtime64 39 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 40 | 41 | OBJS = eps 42 | 43 | .PHONY: clean all 44 | 45 | all: $(OBJS) 46 | 47 | eps: eps.cpp 48 | $(CC) -c -o nullKernel.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS) 49 | #$(CC) -c -o nullKernel.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS) 50 | $(CC) -o $@ nullKernel.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 51 | 52 | clean: 53 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS) 54 | 55 | run: eps 56 | ATMI_DEPENDENCY_SYNC_TYPE=ATMI_SYNC_CALLBACK ATMI_MAX_HSA_SIGNALS=8 ./eps 2 15 57 | -------------------------------------------------------------------------------- /examples/c_extension/eps/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | #this software and associated documentation files (the "Software"), to deal in 8 | #the Software 9 | #without restriction, including without limitation the rights to use, copy, 10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | #Software, and to permit 12 | #persons to whom the Software is furnished to do so, subject to the following 13 | #conditions: 14 | # 15 | #The above copyright notice and this permission notice shall be included in all 16 | #copies or substantial portions of the Software. 17 | # 18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | #FITNESS FOR A PARTICULAR 21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 23 | #CONTRACT, TORT OR 24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 25 | #OR OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | set -e 28 | # Set HSA Environment variables 29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa-nov15 30 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/home/aaji/opt/hsa 31 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa 32 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib 33 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin 34 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi 35 | ATMI_INC=$ATMI_RUNTIME_PATH/include 36 | 37 | echo 38 | #export VT_MODE="STAT:TRACE" 39 | if [ -f eps ] ; then rm eps ; fi 40 | echo g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC 41 | #vtc++ -vt:inst compinst -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 42 | #g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC 43 | g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 44 | 45 | echo g++ -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 46 | #vtc++ -vt:inst compinst -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 47 | g++ -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 48 | 49 | # Execute 50 | echo 51 | echo ./eps 52 | ./eps 53 | 54 | -------------------------------------------------------------------------------- /examples/c_extension/eps/cleanup.sh: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | rm *.o 27 | rm *pifdefs.c* 28 | rm eps 29 | 30 | -------------------------------------------------------------------------------- /examples/c_extension/eps/nullKernel.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void nullKernel_impl(int i){} 8 | -------------------------------------------------------------------------------- /examples/c_extension/fibonacci/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | #this software and associated documentation files (the "Software"), to deal in 8 | #the Software 9 | #without restriction, including without limitation the rights to use, copy, 10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | #Software, and to permit 12 | #persons to whom the Software is furnished to do so, subject to the following 13 | #conditions: 14 | # 15 | #The above copyright notice and this permission notice shall be included in all 16 | #copies or substantial portions of the Software. 17 | # 18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | #FITNESS FOR A PARTICULAR 21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 23 | #CONTRACT, TORT OR 24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 25 | #OR OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | set -e 28 | # Set HSA Environment variables 29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa 30 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib 31 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin 32 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi 33 | 34 | ATMI_INC=$ATMI_RUNTIME_PATH/include 35 | export LD_LIBRARY_PATH=$HSA_RUNTIME_PATH/lib:$ATMI_RUNTIME_PATH/lib:$LD_LIBRARY_PATH 36 | 37 | echo 38 | if [ -f fibonacci ] ; then rm fibonacci ; fi 39 | echo g++ -c -o fibonacci.o fibonacci.cpp -g -fplugin=atmi_pifgen.so -O3 -I$ATMI_INC 40 | g++ -c -o fibonacci.o fibonacci.cpp -g -fplugin=atmi_pifgen.so -O3 -I$ATMI_INC 41 | 42 | echo g++ -o fibonacci fibonacci.o fibonacci.cpp.pifdefs.c -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 43 | g++ -o fibonacci fibonacci.o fibonacci.cpp.pifdefs.c -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 44 | 45 | 46 | # Execute 47 | echo 48 | echo ./fibonacci $1 49 | ./fibonacci $1 50 | -------------------------------------------------------------------------------- /examples/c_extension/fibonacci/cleanup.sh: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | rm fibonacci.cpp.pifdefs.c 27 | rm *.o 28 | rm fibonacci 29 | 30 | -------------------------------------------------------------------------------- /examples/c_extension/fibonacci/fibonacci.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "atmi_c_ext.h" 13 | 14 | using namespace std; 15 | 16 | bool is_null_task(atmi_task_handle_t t) { 17 | if(t == 0ull) { 18 | return true; 19 | } 20 | else { 21 | return false; 22 | } 23 | } 24 | 25 | extern "C" void sum_cpu(int *a, int *b, int *c) __attribute__((atmi_kernel("sum", "CPU"))); 26 | 27 | extern "C" void sum_cpu(int *a, int *b, int *c) { 28 | *c = *a + *b; 29 | delete a; 30 | delete b; 31 | } 32 | 33 | /* Recursive Fibonacci */ 34 | void fib(const int n , int *result , atmi_task_handle_t *my_sum_task) { 35 | if (n < 2) { 36 | *result = n; 37 | *my_sum_task = NULL_TASK; 38 | } else { 39 | atmi_task_handle_t task_sum1; 40 | atmi_task_handle_t task_sum2; 41 | int *result1 = new int; 42 | int *result2 = new int; 43 | fib(n-1,result1,&task_sum1); 44 | fib(n-2,result2,&task_sum2); 45 | ATMI_LPARM(lparm_child); 46 | lparm_child->num_required = 0; 47 | atmi_task_handle_t requires[2]; 48 | if (!is_null_task(task_sum1)) { 49 | requires[lparm_child->num_required]=task_sum1; 50 | lparm_child->num_required +=1; 51 | } 52 | if (!is_null_task(task_sum2)) { 53 | requires[lparm_child->num_required]=task_sum2; 54 | lparm_child->num_required +=1; 55 | } 56 | lparm_child->requires = requires; 57 | *my_sum_task = sum(lparm_child,result1,result2,result); 58 | } 59 | } 60 | 61 | int main(int argc, char *argv[]) { 62 | int N = 10; 63 | if(argc > 1) { 64 | N = atoi(argv[1]); 65 | } 66 | int result; 67 | 68 | atmi_task_handle_t root_sum_task; 69 | fib(N,&result,&root_sum_task); 70 | if(!is_null_task(root_sum_task)) SYNC_TASK(root_sum_task); 71 | cout << "Fib(" << N << ") = " << result << endl; 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld/HelloWorld.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | #include "atmi.h" 11 | 12 | // Declare decode as the PIF for the CPU kernel decode_cpu 13 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) __attribute__((atmi_kernel("decode", "cpu"))); 14 | 15 | // Declare decode as the PIF for the GPU kernel decode_gpu 16 | __kernel void decode_gpu(__global const char* in, __global char *out, const size_t strlength) __attribute__((atmi_kernel("decode", "gpu"))); 17 | 18 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) { 19 | int num; 20 | for (num = 0; num < strlength; num++) { 21 | out[num] = in[num] + 1; 22 | } 23 | } 24 | 25 | int main(int argc, char* argv[]) { 26 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 27 | size_t strlength = strlen(input); 28 | char *output_cpu = (char*) malloc(strlength + 1); 29 | char *output_gpu = (char*) malloc(strlength + 1); 30 | 31 | ATMI_LPARM_1D(lparm, strlength); 32 | lparm->synchronous = ATMI_TRUE; 33 | 34 | lparm->kernel_id = K_ID_decode_gpu; 35 | decode(lparm, input, output_gpu, strlength); 36 | output_gpu[strlength] = '\0'; 37 | 38 | lparm->kernel_id = K_ID_decode_cpu; 39 | lparm->WORKITEMS = 1; 40 | decode(lparm, input, output_cpu, strlength); 41 | output_cpu[strlength] = '\0'; 42 | 43 | cout << "Output from the CPU: " << output_cpu << endl; 44 | cout << "Output from the GPU: " << output_gpu << endl; 45 | free(output_cpu); 46 | free(output_gpu); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld/Makefile: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | #Set HSA Environment variables 27 | HSA_RUNTIME_PATH ?= /opt/rocm 28 | ATMI_RUNTIME_PATH ?= /opt/rocm/libatmi 29 | ATMI_INC=${ATMI_RUNTIME_PATH}/include 30 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so 31 | 32 | CC=g++ 33 | CFLAGS=-O3 -g 34 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/hsa/include -I. 35 | 36 | LIBS=-latmi_runtime 37 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 38 | 39 | OBJS = hello 40 | 41 | .PHONY: clean all 42 | 43 | all: $(OBJS) 44 | 45 | hello: HelloWorld.cpp 46 | $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS) 47 | #$(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS) 48 | #$(CC) -o $@ HelloWorld.o hw.c $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 49 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 50 | 51 | clean: 52 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS) 53 | 54 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void decode_gpu(__global const char* in, __global char* out, const size_t strlength) { 8 | int num = get_global_id(0); 9 | if(num < strlength) 10 | out[num] = in[num] + 1; 11 | } 12 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld_dGPU/HelloWorld.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | #include "atmi.h" 12 | #include "atmi_runtime.h" 13 | 14 | // Declare decode as the PIF for the CPU kernel decode_cpu 15 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) __attribute__((atmi_kernel("decode", "cpu"))); 16 | 17 | // Declare decode as the PIF for the GPU kernel decode_gpu 18 | __kernel void decode_gpu(__global const char* in, __global char *out, const size_t strlength) __attribute__((atmi_kernel("decode", "gpu"))); 19 | 20 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) { 21 | int num; 22 | for (num = 0; num < strlength; num++) { 23 | out[num] = in[num] + 1; 24 | } 25 | } 26 | 27 | int main(int argc, char* argv[]) { 28 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 29 | if(err != ATMI_STATUS_SUCCESS) return -1; 30 | 31 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 32 | size_t strlength = strlen(input); 33 | atmi_mem_place_t place = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); 34 | char *input_gpu; 35 | atmi_malloc((void **)&input_gpu, strlength + 1, place); 36 | memcpy(input_gpu, input, strlength); 37 | input_gpu[strlength] = 0; 38 | 39 | char *output_cpu = (char*) malloc(strlength + 1); 40 | char *output_gpu; 41 | atmi_malloc((void **)&output_gpu, strlength + 1, place); 42 | 43 | ATMI_LPARM_1D(lparm, strlength); 44 | lparm->synchronous = ATMI_TRUE; 45 | 46 | lparm->kernel_id = K_ID_decode_gpu; 47 | lparm->place = ATMI_PLACE_GPU(0, 0); 48 | decode(lparm, input_gpu, output_gpu, strlength); 49 | output_gpu[strlength] = '\0'; 50 | 51 | lparm->kernel_id = K_ID_decode_cpu; 52 | lparm->place = ATMI_PLACE_CPU(0, 0); 53 | lparm->WORKITEMS = 1; 54 | decode(lparm, input, output_cpu, strlength); 55 | output_cpu[strlength] = '\0'; 56 | 57 | cout << "Output from the CPU: " << output_cpu << endl; 58 | cout << "Output from the GPU: " << output_gpu << endl; 59 | free(output_cpu); 60 | atmi_free(output_gpu); 61 | atmi_free(input_gpu); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld_dGPU/Makefile: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | #Set HSA Environment variables 27 | HSA_RUNTIME_PATH ?= /opt/rocm 28 | ATMI_RUNTIME_PATH ?= /opt/rocm/libatmi 29 | ATMI_INC=${ATMI_RUNTIME_PATH}/include 30 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so 31 | 32 | CC=g++ 33 | CFLAGS=-O3 -g -std=c++11 34 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/hsa/include -I. 35 | 36 | LIBS=-latmi_runtime 37 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 38 | 39 | OBJS = hello 40 | 41 | .PHONY: clean all 42 | 43 | all: $(OBJS) 44 | 45 | hello: HelloWorld.cpp 46 | $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS) 47 | #$(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS) 48 | #$(CC) -o $@ HelloWorld.o hw.c $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 49 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 50 | 51 | clean: 52 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS) 53 | 54 | -------------------------------------------------------------------------------- /examples/c_extension/helloworld_dGPU/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void decode_gpu(__global const char* in, __global char* out, const size_t strlength) { 8 | int num = get_global_id(0); 9 | if(num < strlength) 10 | out[num] = in[num] + 1; 11 | } 12 | -------------------------------------------------------------------------------- /examples/c_extension/kps/buildrun.sh: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | #!/bin/bash 27 | set -e 28 | # Set HSA Environment variables 29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/home/aaji/opt/hsa-nov15 30 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa 31 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib 32 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin 33 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi 34 | ATMI_INC=$ATMI_RUNTIME_PATH/include 35 | 36 | echo 37 | #export VT_MODE="STAT:TRACE" 38 | if [ -f kps ] ; then rm kps ; fi 39 | echo g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 40 | #vtc++ -vt:inst compinst -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 41 | g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 42 | 43 | echo g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 44 | #vtc++ -vt:inst compinst -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 45 | g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 46 | 47 | # Execute 48 | echo 49 | echo ./kps 50 | ./kps 51 | 52 | -------------------------------------------------------------------------------- /examples/c_extension/kps/cleanup.sh: -------------------------------------------------------------------------------- 1 | #MIT License 2 | # 3 | #Copyright © 2016 Advanced Micro Devices, Inc. 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | #this software and associated documentation files (the "Software"), to deal in 7 | #the Software 8 | #without restriction, including without limitation the rights to use, copy, 9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | #Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following 12 | #conditions: 13 | # 14 | #The above copyright notice and this permission notice shall be included in all 15 | #copies or substantial portions of the Software. 16 | # 17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | #FITNESS FOR A PARTICULAR 20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 | #CONTRACT, TORT OR 23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 24 | #OR OTHER DEALINGS IN THE SOFTWARE. 25 | 26 | rm *.o 27 | rm *pifdefs.c* 28 | rm kps 29 | 30 | -------------------------------------------------------------------------------- /examples/c_extension/kps/nullKernel.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void nullKernel_impl(long int kcalls){} 8 | -------------------------------------------------------------------------------- /examples/c_extension_denq/helloworld/HelloWorld.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | using namespace std; 10 | #include "atmi_c_ext.h" 11 | #include "atmi_kl.h" 12 | 13 | __kernel void mainTask_gpu(__global atmi_task_handle_t thisTask, int numTasks) __attribute__((atmi_kernel("mainTask", "gpu"))); 14 | 15 | __kernel void subTask_gpu(__global atmi_task_handle_t thisTask, int taskId) __attribute__((atmi_kernel("subTask", "gpu"))); 16 | 17 | extern "C" void print_taskId_cpu(__global atmi_task_handle_t thisTask, int taskId) __attribute__((atmi_kernel("print", "cpu"))); 18 | 19 | extern "C" void print_taskId_cpu(__global atmi_task_handle_t thisTask, int taskId) 20 | { 21 | //cout << "Leaf Sub-task ID" << endl; 22 | cout << "Leaf Sub-task ID" << ": " << taskId << endl; 23 | } 24 | 25 | extern atmi_klist_t *atmi_klist; 26 | int main(int argc, char* argv[]) { 27 | int numTasks = 16; 28 | 29 | ATMI_LPARM_1D(lparm, numTasks); 30 | lparm->synchronous = ATMI_TRUE; 31 | lparm->groupable = ATMI_TRUE; 32 | 33 | lparm->kernel_id = K_ID_mainTask_gpu; 34 | //for(int i = 0; i < numTasks; i++) 35 | mainTask(lparm, numTasks); 36 | 37 | //SYNC_STREAM(0); 38 | cout << "Number: " << *(int *)atmi_klist << endl; 39 | cout << "Number: " << (void *)atmi_klist->tasks << endl; 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /examples/c_extension_denq/helloworld/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | #persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | # 12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | # 16 | #Set HSA Environment variables 17 | HSA_RUNTIME_PATH ?= /opt/hsa 18 | HSA_LIBHSAIL_PATH ?= ${HSA_RUNTIME_PATH}/lib 19 | CLOC_PATH ?= ${HOME}/git/CLOC/bin 20 | ATMI_RUNTIME_PATH ?= ${HOME}/git/atmi 21 | ATMI_INC=${ATMI_RUNTIME_PATH}/include 22 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so 23 | 24 | CC=g++ 25 | CFLAGS=-O3 -g 26 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I. 27 | 28 | LIBS=-latmi_runtime 29 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib 30 | 31 | OBJS = hello 32 | 33 | .PHONY: clean all 34 | 35 | all: $(OBJS) 36 | 37 | hello: HelloWorld.cpp hw.cl 38 | HSA_LLVM_PATH=$(HOME)/git/CLOC/bin $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS) 39 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 40 | 41 | clean: 42 | rm -rf *.o *.hsaco $(OBJS) 43 | 44 | -------------------------------------------------------------------------------- /examples/c_extension_denq/helloworld/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi.h" 8 | __kernel void subTask_gpu(atmi_task_handle_t thisTask, int taskId) { 9 | ATMI_KLPARM_1D(klparm, 1, thisTask); 10 | klparm->kernel_id = K_ID_print_taskId_cpu; //tell print_taskId to use print_taskId_cpu 11 | print(klparm, taskId); 12 | } 13 | 14 | __kernel void mainTask_gpu(atmi_task_handle_t thisTask, int numTasks) { 15 | int gid = get_global_id(0); 16 | ATMI_KLPARM_1D(klparm, 1, thisTask); 17 | klparm->kernel_id = K_ID_subTask_gpu; //tell decode to use decode_gpu kernel 18 | subTask(klparm, gid); 19 | //klparm->kernel_id = K_ID_print_taskId_cpu; //tell print_taskId to use print_taskId_cpu 20 | //print(klparm, gid); 21 | } 22 | -------------------------------------------------------------------------------- /examples/c_extension_denq/kps/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #MIT License 4 | # 5 | #Copyright © 2016 Advanced Micro Devices, Inc. 6 | # 7 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 8 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 9 | #persons to whom the Software is furnished to do so, subject to the following conditions: 10 | # 11 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | # 13 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 14 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 15 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | # 17 | set -e 18 | # Set HSA Environment variables 19 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa 20 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=${HSA_RUNTIME_PATH}/lib 21 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin 22 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=${HOME}/git/atmi 23 | ATMI_INC=$ATMI_RUNTIME_PATH/include 24 | 25 | echo 26 | #export VT_MODE="STAT:TRACE" 27 | if [ -f kps ] ; then rm kps ; fi 28 | echo g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 29 | #vtc++ -vt:inst compinst -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 30 | HSA_LLVM_PATH=${HOME}/git/CLOC/bin g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC 31 | #HSA_LLVM_PATH=${HOME}/git/CLOC/bin g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC 32 | 33 | echo g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 34 | #vtc++ -vt:inst compinst -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 35 | g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include 36 | 37 | # Execute 38 | echo 39 | echo LD_LIBRARY_PATH=/opt/hsa-nov15/lib:$LD_LIBRARY_PATH ./kps 40 | LD_LIBRARY_PATH=/opt/hsa-nov15/lib:$LD_LIBRARY_PATH ./kps 41 | 42 | -------------------------------------------------------------------------------- /examples/c_extension_denq/kps/cleanup.sh: -------------------------------------------------------------------------------- 1 | # 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | #persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | # 12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | # 16 | rm *.o 17 | rm *pifdefs.c* 18 | rm kps 19 | 20 | -------------------------------------------------------------------------------- /examples/c_extension_denq/kps/nullKernel.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi.h" 8 | __kernel void subTask_gpu(atmi_task_handle_t thisTask) { 9 | return; 10 | ATMI_KLPARM_1D(klparm, 1, thisTask); 11 | klparm->kernel_id = K_ID_subTask_cpu; 12 | subTask(klparm); 13 | } 14 | 15 | __kernel void mainTask_gpu(atmi_task_handle_t thisTask, int numTasks) { 16 | int gid = get_global_id(0); 17 | ATMI_KLPARM_1D(klparm, 1, thisTask); 18 | klparm->kernel_id = K_ID_subTask_gpu; 19 | int i; 20 | for(i = 0; i < numTasks; i++) 21 | subTask(klparm); 22 | } 23 | 24 | __kernel void mainTask_recursive_gpu(atmi_task_handle_t thisTask, int numTasks) { 25 | int gid = get_global_id(0); 26 | int gsize = get_global_size(0); 27 | //if(gid == 0) { 28 | if(numTasks > 1) { 29 | int new_numTasks;; 30 | int new_workitems; 31 | if(gsize >= numTasks) { 32 | new_numTasks = 1; 33 | new_workitems = numTasks; 34 | } 35 | else { 36 | new_numTasks = numTasks/gsize; 37 | new_workitems = gsize; 38 | } 39 | ATMI_KLPARM_1D(klparm, new_workitems, thisTask); 40 | klparm->kernel_id = K_ID_mainTask_recursive_gpu; 41 | mainTask(klparm, new_numTasks); 42 | } 43 | ATMI_KLPARM_1D(klparm_sub, 1, thisTask); 44 | klparm_sub->kernel_id = K_ID_subTask_gpu; 45 | subTask(klparm_sub); 46 | //} 47 | } 48 | 49 | __kernel void mainTask_binary_tree_gpu(atmi_task_handle_t thisTask, int numTasks) { 50 | int gid = get_global_id(0); 51 | //if(gid == 0) { 52 | ATMI_KLPARM_1D(klparm, 1, thisTask); 53 | klparm->kernel_id = K_ID_mainTask_recursive_gpu; 54 | if(numTasks > 1) { 55 | mainTask(klparm, numTasks/2); 56 | mainTask(klparm, numTasks/2 - 1); 57 | } 58 | //} 59 | } 60 | 61 | __kernel void mainTask_flat_gpu(atmi_task_handle_t thisTask, int numTasks) { 62 | if(get_global_id(0) % 64 == 0) { 63 | ATMI_KLPARM_1D(klparm, 1, thisTask); 64 | klparm->kernel_id = K_ID_subTask_gpu; 65 | subTask(klparm); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /examples/c_extension_denq/reduction/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | #persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | # 12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | # 16 | #Set HSA Environment variables 17 | HSA_RUNTIME_PATH ?= /opt/hsa-nov15 18 | HSA_LIBHSAIL_PATH ?= ${HSA_RUNTIME_PATH}/lib 19 | HSA_LLVM_PATH ?= /opt/amd/cloc/bin 20 | ATMI_RUNTIME_PATH ?= /opt/amd/atmi 21 | ATMI_INC=${ATMI_RUNTIME_PATH}/include 22 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so 23 | 24 | CC=g++ 25 | CFLAGS=-O3 -g 26 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I. 27 | 28 | LIBS=-latmi_runtime -lhsa-runtime64 -lelf 29 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 30 | 31 | OBJS = reduction 32 | 33 | .PHONY: clean all 34 | 35 | all: $(OBJS) 36 | 37 | reduction: Reduction.cpp reduction.cl 38 | $(CC) -c -o Reduction.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS) 39 | $(CC) -o $@ Reduction.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 40 | 41 | clean: 42 | rm -rf *.o *pifdefs.c* $(OBJS) 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /examples/c_extension_denq/reduction/Reduction.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | #include "atmi.h" 12 | 13 | // Declare reduction as the PIF for the CPU kernel reduction_cpu 14 | extern "C" void reduction_cpu(atmi_task_handle_t thisTask, int* in, int length) __attribute__((atmi_kernel("reduction", "cpu"))); 15 | extern "C" void reduction_cpu(atmi_task_handle_t thisTask, int* in, int length) { 16 | int num; 17 | for (num = length; num > 0; num >>= 1) { 18 | int j; 19 | for(j = 0; j < num; j++) 20 | { 21 | in[j] += in[j + num]; 22 | } 23 | } 24 | } 25 | 26 | // Declare reduction as the PIF for the GPU kernel implementation reduction_gpu 27 | __kernel void reduction_gpu(atmi_task_handle_t thisTask, __global int* in, int length) __attribute__((atmi_kernel("reduction", "gpu"))); 28 | 29 | int main(int argc, char* argv[]) { 30 | int length = 1024; 31 | int *input_gpu = (int*) malloc(sizeof(int)*(length)); 32 | int *input_cpu = (int*) malloc(sizeof(int)*(length)); 33 | 34 | for(int ii = 0; ii < length; ii++) 35 | { 36 | input_cpu[ii] = input_gpu[ii] = 1; 37 | } 38 | 39 | ATMI_LPARM_1D(lparm_gpu, length >> 1); 40 | lparm_gpu->synchronous = ATMI_TRUE; 41 | lparm_gpu->kernel_id = K_ID_reduction_gpu; 42 | 43 | reduction(lparm_gpu, input_gpu, length >> 1); 44 | 45 | ATMI_LPARM_1D(lparm_cpu, length >> 1); 46 | lparm_cpu->synchronous = ATMI_TRUE; 47 | lparm_cpu->kernel_id = K_ID_reduction_cpu; 48 | reduction(lparm_cpu, input_cpu, length >> 1); 49 | 50 | printf("GPU Sum: %d\n", input_gpu[0]); 51 | printf("CPU Sum: %d\n", input_cpu[0]); 52 | free(input_gpu); 53 | free(input_cpu); 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /examples/c_extension_denq/reduction/buildrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #MIT License 4 | # 5 | #Copyright © 2016 Advanced Micro Devices, Inc. 6 | # 7 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 8 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 9 | #persons to whom the Software is furnished to do so, subject to the following conditions: 10 | # 11 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | # 13 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 14 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 15 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | # 17 | set -e 18 | # Set HSA Environment variables 19 | [ -z $HSA_TEST_RUNTIME_PATH ] && HSA_TEST_RUNTIME_PATH=/opt/hsa.1_1T/ 20 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=$HSA_TEST_RUNTIME_PATH/lib 21 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi 22 | ATMI_INC=$ATMI_RUNTIME_PATH/include 23 | 24 | # Do not compile accelerated functions separately. This script will be invoked by the GCC plugin itself. 25 | 26 | # Compile Main and generate the PIF definitions for host and accelerated functions 27 | # in Reduction.cpp.pifdefs.c 28 | #echo 29 | if [ -f hello ] ; then rm hello ; fi 30 | echo g++ -c -o Reduction.o Reduction.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -O3 -I$ATMI_INC 31 | g++ -c -o Reduction.o Reduction.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -O3 -I$ATMI_INC 32 | 33 | echo g++ -o reduction Reduction.o pifdefs.cpp -g -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_TEST_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_TEST_RUNTIME_PATH/include 34 | g++ -o reduction Reduction.o pifdefs.cpp -g -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_TEST_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_TEST_RUNTIME_PATH/include 35 | 36 | # Execute 37 | echo 38 | echo LD_LIBRARY_PATH=$HSA_TEST_RUNTIME_PATH/lib:$LD_LIBRARY_PATH ./reduction 39 | LD_LIBRARY_PATH=$HSA_TEST_RUNTIME_PATH/lib:$LD_LIBRARY_PATH ./reduction 40 | #gdb reduction 41 | -------------------------------------------------------------------------------- /examples/c_extension_denq/reduction/cleanup.sh: -------------------------------------------------------------------------------- 1 | # 2 | #MIT License 3 | # 4 | #Copyright © 2016 Advanced Micro Devices, Inc. 5 | # 6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | #persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | # 12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | # 16 | rm *.o 17 | rm *pifdefs.c* 18 | rm reduction 19 | -------------------------------------------------------------------------------- /examples/c_extension_denq/reduction/reduction.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi.h" 8 | __kernel void reduction_gpu(atmi_task_handle_t thisTask, __global int* in, int length) { 9 | int num = get_global_id(0); 10 | 11 | in[num] += in[num + length]; 12 | 13 | barrier(CLK_GLOBAL_MEM_FENCE); 14 | 15 | if(num == 0) 16 | { 17 | length = length >> 1; 18 | ATMI_KLPARM_1D(klparm, length, thisTask); 19 | if(length > 8) 20 | klparm->kernel_id = K_ID_reduction_gpu; 21 | else 22 | klparm->kernel_id = K_ID_reduction_cpu; 23 | reduction(klparm, in, length); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /examples/interop/globalsymbol/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set ROCM device lib environment variables 26 | ROCM_DEVICE_PATH ?= /opt/rocm 27 | 28 | #Set LC Environment variables 29 | AMDLLVM ?= /opt/amd/llvm 30 | 31 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 32 | 33 | #MCPU 34 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 35 | #$(info MCPU $(MCPU)) 36 | 37 | # Kernel compiler 38 | CLC ?= 1 39 | 40 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include/hsa -I. 41 | 42 | # CLOC 43 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 44 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 45 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 46 | 47 | # ROCm-Device-lib 48 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 49 | 50 | # GPU compiler 51 | CLCC=$(AMDLLVM)/bin/clang 52 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 53 | CLCFLAGS += -target amdgcn--amdhsa 54 | CLCFLAGS += -mcpu=$(MCPU) 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 56 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 57 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 58 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 59 | 60 | # Host compiler 61 | CXX = g++ 62 | CXXFLAGS =-O3 -g -std=c++11 63 | #CXXFLAGS += -v 64 | 65 | LIBS=-latmi_runtime 66 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 67 | 68 | OBJS = globalsymbol 69 | 70 | .PHONY: clean all 71 | 72 | all: $(OBJS) 73 | 74 | globalsymbol: globalsymbol.cpp globalsymbol.cl 75 | ifeq ($(CLC),1) 76 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 globalsymbol.cl 77 | else 78 | $(CLCC) ${CLCFLAGS} -o globalsymbol.hsaco globalsymbol.cl 79 | endif 80 | $(CXX) -o $@ globalsymbol.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 81 | 82 | clean: 83 | rm -rf *.o *.hsaco $(OBJS) 84 | 85 | test: 86 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./globalsymbol 87 | -------------------------------------------------------------------------------- /examples/interop/globalsymbol/globalsymbol.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | int multiplier = 4; 8 | 9 | __kernel void multiply_gpu(__global float *a, size_t sz) { 10 | int gid = get_global_id(0); 11 | if(gid < sz) 12 | a[gid] *= multiplier; 13 | } 14 | -------------------------------------------------------------------------------- /examples/interop/globalsymbol/globalsymbol.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_runtime.h" 8 | #include "atmi_interop_hsa.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | using namespace std; 14 | #ifdef __cplusplus 15 | #define _CPPSTRING_ "C" 16 | #endif 17 | #ifndef __cplusplus 18 | #define _CPPSTRING_ 19 | #endif 20 | 21 | enum { 22 | GPU_IMPL = 42 23 | }; 24 | 25 | int main(int argc, char **argv) { 26 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 27 | if(err != ATMI_STATUS_SUCCESS) return -1; 28 | const char *module = "globalsymbol.hsaco"; 29 | atmi_platform_type_t module_type = AMDGCN; 30 | atmi_module_register(&module, &module_type, 1); 31 | 32 | atmi_kernel_t kernel; 33 | const unsigned int num_args = 2; 34 | size_t arg_sizes[] = {sizeof(float *), sizeof(size_t)}; 35 | atmi_kernel_create_empty(&kernel, num_args, arg_sizes); 36 | atmi_kernel_add_gpu_impl(kernel, "multiply_gpu", GPU_IMPL); 37 | 38 | size_t a_len = 16; 39 | float *a = (float *) malloc(sizeof(float) * a_len); 40 | // init a 41 | cout << "Original array values" << endl; 42 | for(int i = 0; i < a_len; i++) { 43 | a[i] = i + 1; 44 | cout << a[i] << " "; 45 | } 46 | cout << endl; 47 | 48 | int gpu_id = 0; 49 | atmi_machine_t *machine = atmi_machine_get_info(); 50 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU]; 51 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count); 52 | 53 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0); 54 | 55 | void *d_a; 56 | atmi_malloc(&d_a, sizeof(float) * a_len, gpu); 57 | atmi_memcpy(d_a, a, sizeof(float) * a_len); 58 | 59 | /* setup launch params */ 60 | void *gpu_args[] = {&d_a, &a_len}; 61 | ATMI_LPARM_1D(lparm, a_len); 62 | lparm->synchronous = ATMI_TRUE; 63 | lparm->kernel_id = GPU_IMPL; 64 | lparm->place = ATMI_PLACE_GPU(0, gpu_id); 65 | 66 | /* launch and wait for kernel */ 67 | atmi_task_launch(lparm, kernel, gpu_args); 68 | atmi_memcpy(a, d_a, sizeof(float) * a_len); 69 | cout << "With default multiplier (4)" << endl; 70 | for(int i = 0; i < a_len; i++) { 71 | cout << a[i] << " "; 72 | } 73 | cout << endl; 74 | 75 | /* change the multiplier */ 76 | int new_multiplier = 10; 77 | void *mul_addr; 78 | unsigned int mul_size; 79 | atmi_interop_hsa_get_symbol_info(gpu, "multiplier", &mul_addr, &mul_size); 80 | atmi_memcpy(mul_addr, &new_multiplier, mul_size); 81 | 82 | /* launch with new multiplier and wait for kernel */ 83 | atmi_task_launch(lparm, kernel, gpu_args); 84 | atmi_memcpy(a, d_a, sizeof(float) * a_len); 85 | cout << "With modified multiplier (" << new_multiplier << ")" << endl; 86 | for(int i = 0; i < a_len; i++) { 87 | cout << a[i] << " "; 88 | } 89 | cout << endl; 90 | 91 | /* cleanup */ 92 | free(a); 93 | atmi_free(d_a); 94 | atmi_kernel_release(kernel); 95 | atmi_finalize(); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /examples/interop/hsainfo/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set ROCM device lib environment variables 26 | ROCM_DEVICE_PATH ?= /opt/rocm 27 | 28 | #Set LC Environment variables 29 | AMDLLVM ?= /opt/amd/llvm 30 | 31 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 32 | 33 | #MCPU 34 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 35 | #$(info MCPU $(MCPU)) 36 | 37 | # Host compiler 38 | CXX = g++ 39 | CXXFLAGS =-O3 -g -std=c++11 40 | #CXXFLAGS += -v 41 | 42 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include/hsa -I. 43 | 44 | LIBS=-latmi_runtime -lhsa-runtime64 45 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 46 | 47 | OBJS = hsainfo 48 | 49 | .PHONY: clean all 50 | 51 | all: $(OBJS) 52 | 53 | hsainfo: hsainfo.cpp 54 | $(CXX) -o $@ hsainfo.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 55 | 56 | clean: 57 | rm -rf *.o $(OBJS) 58 | 59 | test: 60 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hsainfo 61 | -------------------------------------------------------------------------------- /examples/runtime/dlbench_multi_agent/build.atmi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #!/bin/bash 4 | # 5 | #MIT License 6 | # 7 | #Copyright © 2016 Advanced Micro Devices, Inc. 8 | # 9 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software 10 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 11 | #persons to whom the Software is furnished to do so, subject to the following conditions: 12 | # 13 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 16 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | # 19 | 20 | if [ $# -eq 1 ] && [ "$1" = "--help" ]; then 21 | echo "usage: ./build.sh -l -a -t -c -v -m " 22 | exit 0 23 | fi 24 | 25 | [ -z ${HSA_RUNTIME_PATH} ] && HSA_RUNTIME_PATH=/opt/rocm/hsa 26 | 27 | ATMIPATH=/opt/rocm/libatmi 28 | CLOC_PATH=/usr/bin 29 | CC=/usr/bin/gcc 30 | CXX=g++ 31 | 32 | while [ $# -gt 0 ]; do 33 | key="$1" 34 | case $key in 35 | -l|--layout) 36 | layout="$2" 37 | shift # option has parameter 38 | ;; 39 | -m|--mode) 40 | mode="$2" 41 | shift 42 | ;; 43 | -c|--copy) 44 | copy="COPY" 45 | ;; 46 | -v|--verbose) 47 | verbose="VERBOSE" 48 | ;; 49 | -a|--alloc) 50 | alloc="$2" 51 | shift 52 | ;; 53 | -t|--agent) 54 | agent="$2" 55 | shift 56 | ;; 57 | *) 58 | echo "Unknown option:" $key 59 | exit 0 60 | ;; 61 | esac 62 | shift 63 | done 64 | 65 | [ "$layout" ] || { layout="AOS"; } 66 | [ "$mode" ] || { mode="build";} 67 | [ "$agent" ] || { agent="DEVICE";} 68 | [ "$copy" ] || { copy="NOCOPY";} 69 | [ "$alloc" ] || { alloc="FINE";} 70 | [ "$verbose" ] || { verbose="CURT";} 71 | [ "$module_type" ] || { module_type="MODULE_GCN";} 72 | 73 | host=`hostname` 74 | case $host in 75 | xn0|xn1|xn2|xn3|xn4|xn5|xn6|xn7|xn8|xn9) 76 | node="kaveri" 77 | ;; 78 | c0|c1|c2|c3) 79 | node="carrizo" 80 | ;; 81 | t1|ROCNREDLINE) 82 | node="fiji" 83 | ;; 84 | *) 85 | echo "unknown host node" $host 86 | exit 0 87 | esac 88 | 89 | 90 | if [ $mode = "clean" ]; then 91 | rm -rf *.o *~ grayscale_hsaco.h kernel.[ch] 92 | for l in AOS DA; do 93 | rm -rf dlbench.atmi_${l} 94 | done 95 | fi 96 | 97 | # build from C source 98 | if [ $mode = "build" ]; then 99 | echo "${CLOC_PATH}/cloc.sh -mcpu fiji -opt 2 grayscale.cl" 100 | ${CLOC_PATH}/cloc.sh -mcpu fiji -opt 2 grayscale.cl 101 | echo "${CXX} -g -I${ATMIPATH}/include -I${HSA_RUNTIME_PATH}/include -D${module_type} -D${alloc} -D${layout} -D${verbose} -I. -c dlbench.atmi.c -std=c++11" 102 | ${CXX} -g -I${ATMIPATH}/include -I${HSA_RUNTIME_PATH}/include -D${module_type} -D${alloc} -D${layout} -D${verbose} -I. -c dlbench.atmi.c -std=c++11 103 | echo "${CXX} -o dlbench.atmi_${layout} dlbench.atmi.o ${LFLAGS} -L${ATMIPATH}/lib -latmi_runtime " 104 | ${CXX} -o dlbench.atmi_${layout} dlbench.atmi.o ${LFLAGS} -L${ATMIPATH}/lib -latmi_runtime 105 | fi 106 | 107 | -------------------------------------------------------------------------------- /examples/runtime/dlbench_multi_agent/dlbench.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #ifdef TUNE 8 | #define NUM_IMGS __NUM_IMGS_FROM_TUNER 9 | #define PIXELS_PER_IMG __PIXELS_PER_IMG_FROM_TUNER 10 | #define DATA_ITEM_TYPE __DATA_ITEM_TYPE_FROM_TUNER 11 | #else 12 | #define DATA_ITEM_TYPE float 13 | #define NUM_IMGS 1000 14 | #define PIXELS_PER_IMG 1024 15 | #endif 16 | 17 | typedef struct pixel_type { 18 | float r; 19 | float g; 20 | float b; 21 | float x; 22 | } pixel; 23 | 24 | 25 | typedef struct arg_aos_struct_type { 26 | pixel *src; 27 | pixel *dst; 28 | int start_index; 29 | int end_index; 30 | } args_aos; 31 | 32 | 33 | typedef struct arg_da_struct_type { 34 | float *r; 35 | float *g; 36 | float *b; 37 | float *x; 38 | float *d_r; 39 | float *d_g; 40 | float *d_b; 41 | float *d_x; 42 | int start_index; 43 | int end_index; 44 | } args_da; 45 | 46 | #define ITERS 1 47 | 48 | #define DEVICES 2 49 | #define CPU_THREADS 4 50 | 51 | #define THREADS PIXELS_PER_IMG 52 | #define WORKGROUP 256 53 | 54 | #define STREAMS 8 55 | #define FLOP 6 // floating-point ops in one iteration of kernel loop 56 | 57 | #define ERROR_THRESH NUM_IMGS * 0.01 // relaxed FP-precision checking 58 | 59 | #ifdef HETERO 60 | #define HOST 61 | #define DEVICE 62 | #endif 63 | -------------------------------------------------------------------------------- /examples/runtime/dlbench_multi_agent/grayscale.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "dlbench.h" 8 | 9 | __kernel void grayscale_aos(__global pixel *src_images, __global pixel *dst_images, int num_imgs) { 10 | int i = get_global_id(0); 11 | DATA_ITEM_TYPE gs; 12 | for (int k = 0; k < ITERS; k++) { 13 | for (int j = 0; j < num_imgs * PIXELS_PER_IMG; j = j + PIXELS_PER_IMG) { 14 | gs = (0.3 * src_images[i + j].r + 0.59 * 15 | src_images[i + j].g + 0.11 * src_images[i + j].b + 1.0 * 16 | src_images[i + j].x); 17 | dst_images[i + j].r = gs; 18 | dst_images[i + j].g = gs; 19 | dst_images[i + j].b = gs; 20 | dst_images[i + j].x = gs; 21 | } 22 | } 23 | } 24 | 25 | 26 | __kernel void grayscale_da(__global DATA_ITEM_TYPE *r, __global DATA_ITEM_TYPE *g, 27 | __global DATA_ITEM_TYPE *b, __global DATA_ITEM_TYPE *x, __global DATA_ITEM_TYPE *d_r, 28 | __global DATA_ITEM_TYPE *d_g, __global 29 | DATA_ITEM_TYPE *d_b, __global DATA_ITEM_TYPE *d_x, 30 | int num_imgs) { 31 | size_t i = get_global_id(0); 32 | DATA_ITEM_TYPE gs; 33 | for (int k = 0; k < ITERS; k++) { 34 | for (int j = 0; j < num_imgs * PIXELS_PER_IMG; j = j + PIXELS_PER_IMG) { 35 | gs = (0.3 * r[i + j] + 0.59 * g[i + j] + 0.11 * b[i + j] + 1.0 * x[i + j]); 36 | d_r[i + j] = gs; 37 | d_g[i + j] = gs; 38 | d_b[i + j] = gs; 39 | d_x[i + j] = gs; 40 | } 41 | } 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /examples/runtime/eps/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | 66 | OBJS = eps 67 | 68 | .PHONY: clean all 69 | 70 | all: $(OBJS) 71 | 72 | eps: eps.cpp nullKernel.cl 73 | ifeq ($(CLC),1) 74 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl 75 | else 76 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl 77 | endif 78 | $(CXX) -o $@ eps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 79 | 80 | clean: 81 | rm -rf *.o *.hsaco $(OBJS) 82 | 83 | test: 84 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ATMI_DEPENDENCY_SYNC_TYPE=ATMI_SYNC_CALLBACK ./eps 2 15 85 | -------------------------------------------------------------------------------- /examples/runtime/eps/nullKernel.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void nullKernel_impl(){} 8 | -------------------------------------------------------------------------------- /examples/runtime/fibonacci/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = fibonacci 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | fibonacci: fibonacci.cpp 72 | #ifeq ($(CLC),1) 73 | # $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 fibonacci.cl 74 | #else 75 | # $(CLCC) ${CLCFLAGS} -o fibonacci.hsaco fibonacci.cl 76 | #endif 77 | $(CXX) -o $@ fibonacci.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./fibonacci 84 | -------------------------------------------------------------------------------- /examples/runtime/helloworld/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = hello 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | hello: hw.cpp hw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl 76 | endif 77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello 84 | -------------------------------------------------------------------------------- /examples/runtime/helloworld/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "hw_structs.h" 8 | __kernel void decode_gpu(__global void *args) { 9 | decode_args_t *gpu_args = (decode_args_t *)args; 10 | size_t strlength = gpu_args->strlength; 11 | const char *in = gpu_args->in; 12 | char *out = gpu_args->out; 13 | int num = get_global_id(0); 14 | if(num < strlength) 15 | out[num] = in[num] + 1; 16 | } 17 | -------------------------------------------------------------------------------- /examples/runtime/helloworld/hw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi.h" 8 | #include "atmi_runtime.h" 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | 20 | #include "hw_structs.h" 21 | #define ErrorCheck(status) \ 22 | if (status != ATMI_STATUS_SUCCESS) { \ 23 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \ 24 | exit(1); \ 25 | } 26 | 27 | extern _CPPSTRING_ void decode_cpu(void **args) { 28 | decode_args_t *cpu_args = *(decode_args_t **)args; 29 | size_t strlength = cpu_args->strlength; 30 | const char *in = cpu_args->in; 31 | char *out = cpu_args->out; 32 | int num = get_global_id(0); 33 | if(num < strlength) 34 | out[num] = in[num] + 1; 35 | } 36 | 37 | int main(int argc, char **argv) { 38 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL)); 39 | const char *module = "hw.hsaco"; 40 | atmi_platform_type_t module_type = AMDGCN; 41 | ErrorCheck(atmi_module_register(&module, &module_type, 1)); 42 | 43 | atmi_kernel_t kernel; 44 | const unsigned int num_args = 1; 45 | size_t arg_sizes[num_args]; 46 | arg_sizes[0] = sizeof(void *); 47 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes, 48 | 2, 49 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu, 50 | ATMI_DEVTYPE_GPU, "decode_gpu")); 51 | 52 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 53 | size_t strlength = strlen(input); 54 | char *output_gpu = (char*) malloc(strlength + 1); 55 | char *output_cpu = (char*) malloc(strlength + 1); 56 | 57 | decode_args_t decode_gpu_args = {.in=input, .out=output_gpu, .strlength=strlength}; 58 | decode_args_t decode_cpu_args = {.in=input, .out=output_cpu, .strlength=strlength}; 59 | 60 | void *gpu_args[num_args]; 61 | void *cpu_args[num_args]; 62 | 63 | void *tmp_gpu = &decode_gpu_args; 64 | gpu_args[0] = &tmp_gpu; 65 | void *tmp_cpu = &decode_cpu_args; 66 | cpu_args[0] = &tmp_cpu; 67 | 68 | ATMI_LPARM_1D(lparm, strlength); 69 | lparm->synchronous = ATMI_TRUE; 70 | 71 | lparm->WORKITEMS = strlength; 72 | lparm->place = ATMI_PLACE_GPU(0, 0); 73 | atmi_task_handle_t task = atmi_task_launch(lparm, kernel, gpu_args); 74 | if(task == ATMI_NULL_TASK_HANDLE) { 75 | fprintf(stderr, "GPU Task Launch/Execution Error.\n"); 76 | exit(1); 77 | } 78 | output_gpu[strlength] = '\0'; 79 | 80 | lparm->place = ATMI_PLACE_CPU(0, 0); 81 | task = atmi_task_launch(lparm, kernel, cpu_args); 82 | if(task == ATMI_NULL_TASK_HANDLE) { 83 | fprintf(stderr, "GPU Task Launch/Execution Error.\n"); 84 | exit(1); 85 | } 86 | output_cpu[strlength] = '\0'; 87 | 88 | cout << "Output from the GPU: " << output_gpu << endl; 89 | cout << "Output from the CPU: " << output_cpu << endl; 90 | free(output_cpu); 91 | free(output_gpu); 92 | 93 | ErrorCheck(atmi_kernel_release(kernel)); 94 | ErrorCheck(atmi_finalize()); 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /examples/runtime/helloworld/hw_structs.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | 8 | typedef struct decode_args_s { 9 | const char* in; 10 | char* out; 11 | const size_t strlength; 12 | } decode_args_t; 13 | 14 | 15 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = hello 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | hello: hw.cpp hw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl 76 | endif 77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello 84 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | kernel void decode_gpu(global const char *in, int strlength, global char *out) { 8 | int num = get_global_id(0); 9 | if(num < strlength) 10 | out[num] = in[num] + 1; 11 | } 12 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU/hw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_runtime.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | 20 | #define ErrorCheck(status) \ 21 | if (status != ATMI_STATUS_SUCCESS) { \ 22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \ 23 | exit(1); \ 24 | } 25 | 26 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, int strlength) { 27 | int num = get_global_id(0); 28 | if(num < strlength) 29 | out[num] = in[num] + 1; 30 | } 31 | 32 | extern _CPPSTRING_ void decode_cpu(const char **in, int *strlength, char **out) { 33 | decode_cpu_fn(*in, *out, *strlength); 34 | } 35 | 36 | 37 | int main(int argc, char **argv) { 38 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL)); 39 | 40 | const char *module = "hw.hsaco"; 41 | atmi_platform_type_t module_type = AMDGCN; 42 | ErrorCheck(atmi_module_register(&module, &module_type, 1)); 43 | 44 | atmi_machine_t *machine = atmi_machine_get_info(); 45 | 46 | atmi_kernel_t kernel; 47 | const unsigned int num_args = 3; 48 | size_t arg_sizes[] = {sizeof(const char *), sizeof(int), sizeof(char *)}; 49 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes, 50 | 2, 51 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu, 52 | ATMI_DEVTYPE_GPU, "decode_gpu")); 53 | 54 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 55 | int strlength = strlen(input); 56 | char *output_cpu = (char*) malloc(strlength + 1); 57 | char *output_gpu = (char*) malloc(strlength + 1); 58 | 59 | int gpu_id = 0; 60 | int cpu_id = 0; 61 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU]; 62 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count); 63 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count); 64 | 65 | /* Run HelloWorld on GPU */ 66 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0); 67 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0); 68 | 69 | void *d_input; 70 | ErrorCheck(atmi_malloc(&d_input, strlength+1, gpu)); 71 | ErrorCheck(atmi_memcpy(d_input, input, strlength+1)); 72 | 73 | void *h_input; 74 | ErrorCheck(atmi_malloc(&h_input, strlength+1, cpu)); 75 | ErrorCheck(atmi_memcpy(h_input, input, strlength+1)); 76 | 77 | void *d_output; 78 | ErrorCheck(atmi_malloc(&d_output, strlength+1, gpu)); 79 | 80 | void *h_output; 81 | ErrorCheck(atmi_malloc(&h_output, strlength+1, cpu)); 82 | 83 | void *gpu_args[] = {&d_input, &strlength, &d_output}; 84 | void *cpu_args[] = {&h_input, &strlength, &h_output}; 85 | 86 | ATMI_LPARM_1D(lparm, strlength); 87 | lparm->synchronous = ATMI_TRUE; 88 | 89 | lparm->place = ATMI_PLACE_GPU(0, gpu_id); 90 | volatile atmi_task_handle_t task = atmi_task_launch(lparm, kernel, gpu_args); 91 | if(task == ATMI_NULL_TASK_HANDLE) { 92 | fprintf(stderr, "GPU Task Launch/Execution Error.\n"); 93 | exit(1); 94 | } 95 | 96 | lparm->place = ATMI_PLACE_CPU(0, cpu_id); 97 | task = atmi_task_launch(lparm, kernel, cpu_args); 98 | if(task == ATMI_NULL_TASK_HANDLE) { 99 | fprintf(stderr, "GPU Task Launch/Execution Error.\n"); 100 | exit(1); 101 | } 102 | 103 | ErrorCheck(atmi_memcpy(output_gpu, d_output, strlength+1)); 104 | output_gpu[strlength] = '\0'; 105 | ErrorCheck(atmi_memcpy(output_cpu, h_output, strlength+1)); 106 | output_cpu[strlength] = '\0'; 107 | 108 | cout << "Output from the GPU: " << output_gpu << endl; 109 | cout << "Output from the CPU: " << output_cpu << endl; 110 | 111 | /* cleanup */ 112 | free(output_gpu); 113 | free(output_cpu); 114 | ErrorCheck(atmi_free(h_input)); 115 | ErrorCheck(atmi_free(h_output)); 116 | ErrorCheck(atmi_free(d_input)); 117 | ErrorCheck(atmi_free(d_output)); 118 | ErrorCheck(atmi_kernel_release(kernel)); 119 | ErrorCheck(atmi_finalize()); 120 | return 0; 121 | } 122 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_async/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = hello 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | hello: hw.cpp hw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl 76 | endif 77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello 84 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_async/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | kernel void decode_gpu(global const char *in, global char *out, ulong strlength) { 8 | int num = get_global_id(0); 9 | if(num < strlength) 10 | out[num] = in[num] + 1; 11 | } 12 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_async/hw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_runtime.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | 20 | #define ErrorCheck(status) \ 21 | if (status != ATMI_STATUS_SUCCESS) { \ 22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \ 23 | exit(1); \ 24 | } 25 | 26 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, size_t strlength) { 27 | int num = get_global_id(0); 28 | if(num < strlength) 29 | out[num] = in[num] + 1; 30 | } 31 | 32 | extern _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength) { 33 | decode_cpu_fn(*in, *out, *strlength); 34 | } 35 | 36 | int main(int argc, char **argv) { 37 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL)); 38 | 39 | const char *module = "hw.hsaco"; 40 | atmi_platform_type_t module_type = AMDGCN; 41 | ErrorCheck(atmi_module_register(&module, &module_type, 1)); 42 | 43 | atmi_machine_t *machine = atmi_machine_get_info(); 44 | 45 | atmi_kernel_t kernel; 46 | const unsigned int num_args = 3; 47 | size_t arg_sizes[] = {sizeof(const char *), sizeof(char *), sizeof(size_t)}; 48 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes, 49 | 2, 50 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu, 51 | ATMI_DEVTYPE_GPU, "decode_gpu")); 52 | 53 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 54 | size_t strlength = strlen(input); 55 | char *output_cpu = (char*) malloc(strlength + 1); 56 | char *output_gpu = (char*) malloc(strlength + 1); 57 | 58 | int gpu_id = 0; 59 | int cpu_id = 0; 60 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU]; 61 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count); 62 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count); 63 | 64 | /* Run HelloWorld on GPU */ 65 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0); 66 | void *d_input, *d_output; 67 | ErrorCheck(atmi_malloc(&d_input, strlength+1, gpu)); 68 | ErrorCheck(atmi_malloc(&d_output, strlength+1, gpu)); 69 | 70 | ATMI_CPARM(cparm_gpu); 71 | atmi_task_handle_t h2d_gpu = atmi_memcpy_async(cparm_gpu, d_input, input, strlength+1); 72 | 73 | ATMI_LPARM_GPU_1D(lparm_gpu, gpu_id, strlength); 74 | ATMI_PARM_SET_DEPENDENCIES(lparm_gpu, h2d_gpu); 75 | void *gpu_args[] = {&d_input, &d_output, &strlength}; 76 | atmi_task_handle_t k_gpu = atmi_task_launch(lparm_gpu, kernel, gpu_args); 77 | 78 | ATMI_PARM_SET_DEPENDENCIES(cparm_gpu, k_gpu); 79 | atmi_task_handle_t d2h_gpu = atmi_memcpy_async(cparm_gpu, output_gpu, d_output, strlength+1); 80 | 81 | // wait only for the last task in the chain 82 | ErrorCheck(atmi_task_wait(d2h_gpu)); 83 | output_gpu[strlength] = '\0'; 84 | cout << "Output from the GPU: " << output_gpu << endl; 85 | 86 | /* Run HelloWorld on CPU */ 87 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0); 88 | void *h_input, *h_output; 89 | ErrorCheck(atmi_malloc(&h_input, strlength+1, cpu)); 90 | ErrorCheck(atmi_malloc(&h_output, strlength+1, cpu)); 91 | 92 | ATMI_CPARM(cparm_cpu); 93 | atmi_task_handle_t h2d_cpu = atmi_memcpy_async(cparm_cpu, h_input, input, strlength+1); 94 | 95 | ATMI_LPARM_CPU_1D(lparm_cpu, cpu_id, strlength); 96 | ATMI_PARM_SET_DEPENDENCIES(lparm_cpu, h2d_cpu); 97 | void *cpu_args[] = {&h_input, &h_output, &strlength}; 98 | atmi_task_handle_t k_cpu = atmi_task_launch(lparm_cpu, kernel, cpu_args); 99 | 100 | ATMI_PARM_SET_DEPENDENCIES(cparm_cpu, k_cpu); 101 | atmi_task_handle_t d2h_cpu = atmi_memcpy_async(cparm_cpu, output_cpu, h_output, strlength+1); 102 | 103 | // wait only for the last task in the chain 104 | ErrorCheck(atmi_task_wait(d2h_cpu)); 105 | output_cpu[strlength] = '\0'; 106 | cout << "Output from the CPU: " << output_cpu << endl; 107 | 108 | /* cleanup */ 109 | free(output_gpu); 110 | free(output_cpu); 111 | ErrorCheck(atmi_free(h_input)); 112 | ErrorCheck(atmi_free(h_output)); 113 | ErrorCheck(atmi_free(d_input)); 114 | ErrorCheck(atmi_free(d_output)); 115 | ErrorCheck(atmi_kernel_release(kernel)); 116 | ErrorCheck(atmi_finalize()); 117 | return 0; 118 | } 119 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_sync/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = hello 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | hello: hw.cpp hw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl 76 | endif 77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello 84 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_sync/hw.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void decode_gpu(__global const char *in, __global char *out, size_t strlength) { 8 | int num = get_global_id(0); 9 | if(num < strlength) 10 | out[num] = in[num] + 1; 11 | } 12 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_dGPU_sync/hw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_runtime.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | 20 | enum { 21 | CPU_IMPL = 10565, 22 | GPU_IMPL = 42 23 | }; 24 | 25 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, size_t strlength) { 26 | int num = get_global_id(0); 27 | if(num < strlength) 28 | out[num] = in[num] + 1; 29 | } 30 | 31 | extern _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength) { 32 | decode_cpu_fn(*in, *out, *strlength); 33 | } 34 | 35 | 36 | int main(int argc, char **argv) { 37 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc"; 38 | size_t strlength = strlen(input); 39 | 40 | char *output_gpu = (char*) malloc(strlength + 1); 41 | char *output_cpu = (char*) malloc(strlength + 1); 42 | 43 | // Init ATMI 44 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 45 | if(err != ATMI_STATUS_SUCCESS) 46 | return -1; 47 | 48 | // Register module 49 | const char *module = "hw.hsaco"; 50 | atmi_platform_type_t module_type = AMDGCN; 51 | atmi_module_register(&module, &module_type, 1); 52 | 53 | { 54 | // Create kernel 55 | atmi_kernel_t kernel; 56 | const unsigned int num_args = 3; 57 | size_t arg_sizes[] = {sizeof(const char *), sizeof(char *), sizeof(size_t)}; 58 | atmi_kernel_create_empty(&kernel, num_args, arg_sizes); 59 | 60 | atmi_kernel_add_cpu_impl(kernel, (atmi_generic_fp)decode_cpu, CPU_IMPL); 61 | atmi_kernel_add_gpu_impl(kernel, "decode_gpu", GPU_IMPL); 62 | 63 | // Select GPU 64 | int gpu_id = 0; 65 | { 66 | atmi_machine_t *machine = atmi_machine_get_info(); 67 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU]; 68 | if(argv[1] != NULL) { 69 | gpu_id = (atoi(argv[1]) % gpu_count); 70 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count); 71 | } 72 | } 73 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0); 74 | // Run on GPU 75 | { 76 | void *d_input; 77 | void *d_output; 78 | 79 | // Alloc 80 | atmi_malloc(&d_input, strlength+1, gpu); 81 | atmi_malloc(&d_output, strlength+1, gpu); 82 | 83 | // Copy 84 | atmi_memcpy(d_input, input, strlength+1); 85 | 86 | void *gpu_args[] = {&d_input, &d_output, &strlength}; 87 | 88 | // Launch 89 | ATMI_LPARM_1D(lparm, strlength); 90 | lparm->synchronous = ATMI_TRUE; 91 | lparm->kernel_id = GPU_IMPL; 92 | lparm->place = ATMI_PLACE_GPU(0, gpu_id); 93 | atmi_task_launch(lparm, kernel, gpu_args); 94 | 95 | // Copy 96 | atmi_memcpy(output_gpu, d_output, strlength+1); 97 | 98 | output_gpu[strlength] = '\0'; 99 | cout << "Output from the GPU: " << output_gpu << endl; 100 | 101 | // Free 102 | atmi_free(d_output); 103 | atmi_free(d_input); 104 | } 105 | 106 | // Select CPU 107 | int cpu_id = 0; 108 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0); 109 | // Run on CPU 110 | { 111 | void *h_input; 112 | void *h_output; 113 | 114 | // Alloc 115 | atmi_malloc(&h_input, strlength+1, cpu); 116 | atmi_malloc(&h_output, strlength+1, cpu); 117 | 118 | // Copy 119 | atmi_memcpy(h_input, input, strlength+1); 120 | 121 | void *cpu_args[] = {&h_input, &h_output, &strlength}; 122 | 123 | // Launch 124 | ATMI_LPARM_1D(lparm, strlength); 125 | lparm->synchronous = ATMI_TRUE; 126 | lparm->kernel_id = CPU_IMPL; 127 | lparm->place = ATMI_PLACE_CPU(0, cpu_id); 128 | atmi_task_launch(lparm, kernel, cpu_args); 129 | 130 | // Copy 131 | atmi_memcpy(output_cpu, h_output, strlength+1); 132 | 133 | output_cpu[strlength] = '\0'; 134 | cout << "Output from the CPU: " << output_cpu << endl; 135 | 136 | // Free 137 | atmi_free(h_output); 138 | atmi_free(h_input); 139 | } 140 | 141 | // Release kernel 142 | atmi_kernel_release(kernel); 143 | } 144 | 145 | atmi_finalize(); 146 | 147 | /* cleanup */ 148 | free(output_gpu); 149 | free(output_cpu); 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_printf/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | # Temp files 38 | SAVETEMP ?= 0 39 | 40 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 41 | 42 | # CLOC 43 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 44 | 45 | # CL options 46 | CLOPTS = $(INC_FLAGS) 47 | CLOPTS += -v 48 | # Frontend optimization 49 | ifneq ($(NOOPT),1) 50 | CLOPTS += -O2 51 | endif 52 | # Temp file 53 | ifneq ($(SAVETEMP),0) 54 | CLOPTS += -save-temps 55 | endif 56 | 57 | # cloc options 58 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 59 | CLOCOPTS += -clopts "$(CLOPTS)" 60 | 61 | # ROCm-Device-lib 62 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 63 | 64 | # GPU compiler 65 | CLCC=$(AMDLLVM)/bin/clang 66 | 67 | LLK=$(AMDLLVM)/bin/llvm-link 68 | LLC=$(AMDLLVM)/bin/llc 69 | LLD=$(AMDLLVM)/bin/lld 70 | 71 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 72 | CLCFLAGS += -target amdgcn--amdhsa 73 | 74 | ifeq ($(SAVETEMP),0) 75 | CLCFLAGS += -mcpu=$(MCPU) 76 | 77 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 78 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 79 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 80 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 81 | else 82 | CLCFLAGS += -emit-llvm -c 83 | 84 | #LLFLAGS = -suppress-warnings 85 | LLFLAGS += $(BITCODE_LIB)/opencl.amdgcn.bc 86 | LLFLAGS += $(BITCODE_LIB)/ockl.amdgcn.bc 87 | LLFLAGS += $(BITCODE_LIB)/ocml.amdgcn.bc 88 | LLFLAGS += $(BITCODE_LIB)/irif.amdgcn.bc 89 | 90 | LCFLAGS = -O2 91 | LCFLAGS += -filetype=obj 92 | LCFLAGS += -mtriple amdgcn--amdhsa 93 | LCFLAGS += -mcpu=$(MCPU) 94 | 95 | LDFLAGS = -flavor gnu -shared 96 | endif 97 | 98 | 99 | # CPU compiler 100 | CC=$(AMDLLVM)/bin/clang 101 | CCFLAGS = -g 102 | #CCFLAGS += -v 103 | 104 | # Host compiler 105 | CXX = g++ 106 | CXXFLAGS = -g -std=c++11 107 | #CXXFLAGS += -v 108 | 109 | LIBS=-latmi_runtime 110 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 111 | 112 | OBJS = hello 113 | 114 | .PHONY: clean all 115 | 116 | all: $(OBJS) 117 | 118 | %.hsaco: %.cl $(INC_FILES) 119 | ifeq ($(CLC),1) 120 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 -o $@ $< 121 | else 122 | ifeq ($(SAVETEMP),0) 123 | $(CLCC) ${CLCFLAGS} -o $@ $< 124 | else 125 | $(CLCC) ${CLCFLAGS} -o $*.bc $< 126 | $(LLK) -o $*.linked.bc $*.bc ${LLFLAGS} 127 | $(LLC) ${LCFLAGS} -o $*.o $*.linked.bc 128 | $(LLD) ${LDFLAGS} -o $@ $*.o 129 | endif 130 | endif 131 | @echo 132 | 133 | %.o: %.cpp $(INC_FILES) 134 | $(CXX) $(CXXFLAGS) $(INC_FLAGS) -o $@ -c $< 135 | @echo 136 | 137 | %.o: %.c $(INC_FILES) 138 | $(CC) $(CCFLAGS) $(INC_FLAGS) -o $@ -c $< 139 | @echo 140 | 141 | hello: hw_gpu.hsaco hw_cpu.o hw_host.o 142 | $(CXX) -o $@ hw_host.o hw_cpu.o $(LIBS) $(LIB_FLAGS) 143 | 144 | clean: 145 | rm -rf *.o *.i *.bc *.hsaco $(OBJS) 146 | 147 | test: 148 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello 149 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_printf/hw.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #ifdef __cplusplus 8 | #define _CPPSTRING_ "C" 9 | #endif 10 | #ifndef __cplusplus 11 | #define _CPPSTRING_ 12 | #endif 13 | 14 | #define BUFFER_SIZE 512 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_printf/hw_cpu.c: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | // Have printf natively 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | // But no OpenCL header 14 | #include "atmi_runtime.h" 15 | 16 | // CPU implementation using function pointer. 17 | static void decode_cpu_fn(const char *in, char *out, size_t strlength, char *extra); 18 | 19 | _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength, char **extra) { 20 | decode_cpu_fn(*in, *out, *strlength, *extra); 21 | } 22 | 23 | /**********************************************************************************/ 24 | //#include "atmi_device.h" 25 | #include "hw.h" 26 | /**********************************************************************************/ 27 | 28 | void decode_cpu_fn( 29 | const char *in, 30 | char *out, 31 | size_t strlength, 32 | char *extra 33 | ) { 34 | 35 | int num = get_global_id(0); 36 | 37 | if(num < strlength) 38 | out[num] = in[num] + 1; 39 | 40 | #if 1 41 | if (!num) { 42 | printf("hello world from CPU, %d, %f\n", num, 1.0); 43 | } 44 | #endif 45 | 46 | } 47 | 48 | -------------------------------------------------------------------------------- /examples/runtime/helloworld_printf/hw_gpu.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | // Do not have printf 8 | 9 | // OpenCL header include by default through CL frontend 10 | 11 | /**********************************************************************************/ 12 | //#include "atmi_device.h" 13 | #include "hw.h" 14 | /**********************************************************************************/ 15 | 16 | kernel void decode_gpu( 17 | global const char *in, 18 | global char *out, 19 | ulong strlength, 20 | global char *extra 21 | ) { 22 | 23 | int num = get_global_id(0); 24 | 25 | if(num < strlength) 26 | out[num] = in[num] + 1; 27 | 28 | #if 1 29 | if (!num) { 30 | printf("hello world from GPU, %d, %f\n", num, 1.0); 31 | } 32 | #endif 33 | 34 | } 35 | 36 | -------------------------------------------------------------------------------- /examples/runtime/kps/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | 66 | OBJS = kps 67 | 68 | .PHONY: clean all 69 | 70 | all: $(OBJS) 71 | 72 | kps: kps.cpp nullKernel.cl 73 | ifeq ($(CLC),1) 74 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl 75 | else 76 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl 77 | endif 78 | $(CXX) -o $@ kps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 79 | 80 | clean: 81 | rm -rf *.o *.hsaco $(OBJS) 82 | 83 | test: 84 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./kps 85 | -------------------------------------------------------------------------------- /examples/runtime/kps/nullKernel.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | __kernel void nullKernel_impl(){} 8 | -------------------------------------------------------------------------------- /examples/runtime/needleman-wunsch/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = nw 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | nw: nw.cpp nw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o nw.hsaco nw.cl 76 | endif 77 | $(CXX) -o $@ nw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./nw 512 10 10 84 | -------------------------------------------------------------------------------- /examples/runtime/needleman-wunsch/nw.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #define BLOCK_SIZE 32 8 | #define LIMIT -999 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/runtime/needleman-wunsch_dGPU/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | # Kernel compiler 35 | CLC ?= 1 36 | 37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 38 | 39 | # CLOC 40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} 42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 43 | 44 | # ROCm-Device-lib 45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 46 | 47 | # GPU compiler 48 | CLCC=$(AMDLLVM)/bin/clang 49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 50 | CLCFLAGS += -target amdgcn--amdhsa 51 | CLCFLAGS += -mcpu=$(MCPU) 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 56 | 57 | # Host compiler 58 | CXX = g++ 59 | CXXFLAGS =-O3 -g -std=c++11 60 | #CXXFLAGS += -v 61 | 62 | LIBS=-latmi_runtime 63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 64 | 65 | OBJS = nw 66 | 67 | .PHONY: clean all 68 | 69 | all: $(OBJS) 70 | 71 | nw: nw.cpp nw.cl 72 | ifeq ($(CLC),1) 73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nw.cl 74 | else 75 | $(CLCC) ${CLCFLAGS} -o nw.hsaco nw.cl 76 | endif 77 | $(CXX) -o $@ nw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 78 | 79 | clean: 80 | rm -rf *.o *.hsaco $(OBJS) 81 | 82 | test: 83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./nw 512 10 10 84 | -------------------------------------------------------------------------------- /examples/runtime/needleman-wunsch_dGPU/nw.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #define BLOCK_SIZE 32 8 | #define LIMIT -999 9 | 10 | 11 | -------------------------------------------------------------------------------- /examples/runtime/pcie_bw/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu) 32 | #$(info MCPU $(MCPU)) 33 | 34 | BIBW ?= 0 35 | ifeq ($(BIBW),1) 36 | BIBW_FLAG=-DBIBW 37 | else 38 | BIBW_FLAG= 39 | endif 40 | 41 | # Host compiler 42 | CXX = g++ 43 | CXXFLAGS =-O3 -g -std=c++11 $(BIBW_FLAG) 44 | #CXXFLAGS += -v 45 | 46 | INC_FLAGS=-I${ATMI_INC} -I. 47 | 48 | LIBS=-latmi_runtime 49 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 50 | 51 | OBJS = pcie_bw 52 | 53 | .PHONY: clean all 54 | 55 | all: $(OBJS) 56 | 57 | pcie_bw: pcie_bw.cpp 58 | $(CXX) -o $@ pcie_bw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 59 | 60 | clean: 61 | rm -rf *.o $(OBJS) 62 | 63 | test: 64 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./pcie_bw 65 | -------------------------------------------------------------------------------- /examples/runtime/pcie_bw/pcie_bw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_runtime.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | 20 | #define ErrorCheck(status) \ 21 | if (status != ATMI_STATUS_SUCCESS) { \ 22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \ 23 | exit(1); \ 24 | } 25 | #define NSECPERSEC 1000000000L 26 | #define NTIMERS 13 27 | long int get_nanosecs( struct timespec start_time, struct timespec end_time) { 28 | long int nanosecs; 29 | if ((end_time.tv_nsec-start_time.tv_nsec)<0) nanosecs = 30 | ((((long int) end_time.tv_sec- (long int) start_time.tv_sec )-1)*NSECPERSEC ) + 31 | ( NSECPERSEC + (long int) end_time.tv_nsec - (long int) start_time.tv_nsec) ; 32 | else nanosecs = 33 | (((long int) end_time.tv_sec- (long int) start_time.tv_sec )*NSECPERSEC ) + 34 | ( (long int) end_time.tv_nsec - (long int) start_time.tv_nsec ); 35 | return nanosecs; 36 | } 37 | 38 | int main(int argc, char **argv) { 39 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL)); 40 | 41 | int gpu_id = 0; 42 | int cpu_id = 0; 43 | atmi_machine_t *machine = atmi_machine_get_info(); 44 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU]; 45 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count); 46 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count); 47 | 48 | struct timespec start_time[NTIMERS],end_time[NTIMERS]; 49 | long int kcalls, nanosecs[NTIMERS]; 50 | float bw[NTIMERS]; 51 | kcalls = 100; 52 | 53 | /* Run HelloWorld on GPU */ 54 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0); 55 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0); 56 | void *d_input, *d_output; 57 | atmi_taskgroup_handle_t group; 58 | ErrorCheck(atmi_taskgroup_create(&group)); 59 | 60 | ATMI_CPARM(cparm); 61 | cparm->groupable = ATMI_TRUE; 62 | cparm->group = group; 63 | 64 | printf("Size (MB)\t"); 65 | #ifdef BIBW 66 | printf("Bi-dir BW(MBps)\n"); 67 | #else 68 | printf("H2D BW(MBps)\tD2H BW(MBps)\n"); 69 | #endif 70 | const long MB = 1024 * 1024; 71 | for(long size = 1*MB; size <= 1024*MB; size *= 2) { 72 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[0]); 73 | ErrorCheck(atmi_malloc(&d_input, size, cpu)); 74 | ErrorCheck(atmi_malloc(&d_output, size, gpu)); 75 | /* touch */ 76 | memset(d_input, 0, size); 77 | ErrorCheck(atmi_memcpy(d_output, d_input, size)); 78 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_time[0]); 79 | 80 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[1]); 81 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[2]); 82 | for(int i=0; iplace = (atmi_place_t)ATMI_PLACE_CPU(0, 0); 22 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE; 23 | args_t args; 24 | args.arg1 = taskId; 25 | 26 | atmid_task_launch(lparm, K_ID_print_taskId_cpu, (void *)&args, sizeof(args_t)); 27 | } 28 | 29 | __kernel void mainTask_gpu(long int numTasks) { 30 | int gid = get_global_id(0); 31 | if(gid % 64 == 0) { 32 | ATMI_LPARM_1D(lparm, 1); 33 | lparm->place = (atmi_place_t)ATMI_PLACE_GPU(0, 0); 34 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE; 35 | args_t args; 36 | args.arg1 = gid; 37 | 38 | atmid_task_launch(lparm, K_ID_subTask_gpu, (void *)&args, sizeof(args_t)); 39 | } 40 | } 41 | 42 | __kernel void print_taskId_gpu(long int taskId) { 43 | } 44 | 45 | -------------------------------------------------------------------------------- /examples/runtime_denq/helloworld/hw.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | using namespace std; 10 | #include "atmi_runtime.h" 11 | 12 | enum { 13 | GPU_IMPL = 42, 14 | CPU_IMPL = 10565 15 | }; 16 | 17 | extern "C" void print_taskId_cpu(long int *taskId) 18 | { 19 | cout << "Leaf Sub-task ID" << ": " << *taskId << endl; 20 | } 21 | 22 | int main(int argc, char* argv[]) { 23 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 24 | if(err != ATMI_STATUS_SUCCESS) return -1; 25 | const char *module = "hw.hsaco"; 26 | atmi_platform_type_t module_type = AMDGCN; 27 | err = atmi_module_register(&module, &module_type, 1); 28 | 29 | atmi_kernel_t main_kernel, sub_kernel, print_kernel; 30 | const unsigned int num_args = 1; 31 | size_t arg_sizes[] = { sizeof(long int) }; 32 | atmi_kernel_create(&main_kernel, num_args, arg_sizes, 33 | 1, 34 | ATMI_DEVTYPE_GPU, "mainTask_gpu"); 35 | atmi_kernel_create(&print_kernel, num_args, arg_sizes, 36 | 2, 37 | ATMI_DEVTYPE_GPU, "print_taskId_gpu", 38 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)print_taskId_cpu); 39 | atmi_kernel_create(&sub_kernel, num_args, arg_sizes, 40 | 1, 41 | ATMI_DEVTYPE_GPU, "subTask_gpu"); 42 | 43 | unsigned long int numTasks = 4; 44 | ATMI_LPARM_1D(lparm, 64 * numTasks); 45 | //lparm->WORKITEMS = numTasks; 46 | //lparm->groupDim[0] = numTasks; 47 | lparm->synchronous = ATMI_TRUE; 48 | lparm->place = ATMI_PLACE_GPU(0, 0); 49 | lparm->groupable = ATMI_TRUE; 50 | //lparm->kernel_id = 0;//GPU_IMPL; 51 | 52 | void *args[] = { &numTasks }; 53 | atmi_task_launch(lparm, main_kernel, args); 54 | 55 | printf("Done!\n"); 56 | 57 | atmi_kernel_release(main_kernel); 58 | atmi_kernel_release(print_kernel); 59 | atmi_kernel_release(sub_kernel); 60 | atmi_finalize(); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /examples/runtime_denq/kps/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell mymcpu) 32 | 33 | # Kernel compiler 34 | CLC ?= 1 35 | 36 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 37 | 38 | # CLOC 39 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 40 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} -atmipath ${ATMI_RUNTIME_PATH} 41 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 42 | 43 | # ROCm-Device-lib 44 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 45 | 46 | # GPU compiler 47 | CLCC=$(AMDLLVM)/bin/clang 48 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 49 | CLCFLAGS += -target amdgcn--amdhsa 50 | CLCFLAGS += -mcpu=$(MCPU) 51 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 55 | 56 | # Host compiler 57 | CXX = g++ 58 | CXXFLAGS =-O3 -g -std=c++11 59 | #CXXFLAGS += -v 60 | 61 | LIBS=-latmi_runtime 62 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 63 | 64 | OBJS = kps 65 | 66 | .PHONY: clean all 67 | 68 | all: $(OBJS) 69 | 70 | kps: kps.cpp nullKernel.cl 71 | ifeq ($(CLC),1) 72 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl 73 | else 74 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl 75 | endif 76 | $(CXX) -o $@ kps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 77 | 78 | clean: 79 | rm -rf *.o *.hsaco $(OBJS) 80 | 81 | test: 82 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./kps 83 | 84 | -------------------------------------------------------------------------------- /examples/runtime_denq/kps/kps.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "stdio.h" 8 | #include "stdlib.h" 9 | #include "string.h" 10 | #include 11 | #include 12 | #include 13 | #include "atmi_runtime.h" 14 | #define NSECPERSEC 1000000000L 15 | 16 | void print_timing(const char *title, 17 | int kcalls, 18 | struct timespec *start_time, 19 | struct timespec *end_launch_time, 20 | struct timespec *end_time); 21 | 22 | static int count = 0; 23 | extern "C" void subTask_cpu() { 24 | //static int count = 0; 25 | printf("Counter: %d\n", count++); 26 | } 27 | 28 | enum { 29 | K_ID_mainTask_gpu = 0, 30 | K_ID_mainTask_recursive_gpu, 31 | K_ID_mainTask_binary_tree_gpu, 32 | K_ID_mainTask_flat_gpu 33 | }; 34 | 35 | enum { 36 | K_ID_subTask_gpu = 0, 37 | K_ID_subTask_cpu 38 | }; 39 | 40 | int main(int argc, char *argv[]) { 41 | struct timespec start_time; 42 | struct timespec end_time; 43 | struct timespec end_launch_time; 44 | long int nanosecs; 45 | float kps; 46 | 47 | long int kcalls = 16; 48 | 49 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 50 | if(err != ATMI_STATUS_SUCCESS) return -1; 51 | const char *module = "nullKernel.hsaco"; 52 | atmi_platform_type_t module_type = AMDGCN; 53 | err = atmi_module_register(&module, &module_type, 1); 54 | 55 | atmi_kernel_t main_kernel, sub_kernel; 56 | const unsigned int main_num_args = 1; 57 | size_t main_arg_sizes[] = { sizeof(long int) }; 58 | atmi_kernel_create(&main_kernel, main_num_args, main_arg_sizes, 59 | 1, 60 | ATMI_DEVTYPE_GPU, "mainTask_gpu" 61 | //ATMI_DEVTYPE_GPU, "mainTask_recursive_gpu", 62 | //ATMI_DEVTYPE_GPU, "mainTask_binary_tree_gpu", 63 | //ATMI_DEVTYPE_GPU, "mainTask_flat_gpu" 64 | ); 65 | atmi_kernel_create(&sub_kernel, 0, NULL, 66 | 2, 67 | ATMI_DEVTYPE_GPU, "subTask_gpu", 68 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)subTask_cpu); 69 | 70 | 71 | ATMI_LPARM_1D(lparm, 1); 72 | //lparm->WORKITEMS = numTasks; 73 | lparm->groupDim[0] = 64; 74 | lparm->synchronous = ATMI_TRUE; 75 | lparm->place = ATMI_PLACE_GPU(0, 0); 76 | lparm->groupable = ATMI_TRUE; 77 | //lparm->kernel_id = K_ID_subTask_gpu; 78 | atmi_task_launch(lparm, sub_kernel, NULL); 79 | 80 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time); 81 | lparm->WORKITEMS = kcalls * 64; 82 | //lparm->kernel_id = K_ID_mainTask_gpu; 83 | void *args[] = { &kcalls }; 84 | atmi_task_launch(lparm, main_kernel, args); 85 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_launch_time); 86 | //atmi_taskgroup_wait(stream); 87 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_time); 88 | print_timing("Synchronous Flat Execution (DP)", 89 | kcalls, &start_time, 90 | &end_launch_time, &end_time); 91 | 92 | lparm->WORKITEMS = 64; 93 | //lparm->kernel_id = K_ID_subTask_gpu; 94 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time); 95 | for(int i=0; iplace = (atmi_place_t)ATMI_PLACE_GPU(0, 0); 23 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE; 24 | for(long int i = 0; i < numTasks/num_wavefronts; i++) 25 | atmid_task_launch(lparm, K_ID_subTask, NULL, 0); 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /examples/runtime_denq/reduction/Makefile: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | SHELL=/bin/bash 7 | 8 | #BUILDROOT 9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//') 10 | #$(info BUILDROOT $(BUILDROOT)) 11 | -include $(BUILDROOT)/atmi-config.mak 12 | 13 | #Set ATMI Environment variables 14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi 15 | 16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin 17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include 18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib 19 | 20 | #Set HSA Environment variables 21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa 22 | #Set ROCM device environment variables 23 | ROCM_DEVICE_PATH ?= /opt/rocm 24 | 25 | #Set LC Environment variables 26 | AMDLLVM ?= /opt/amd/llvm 27 | 28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa 29 | 30 | #MCPU 31 | MCPU ?= $(shell mymcpu) 32 | 33 | # Kernel compiler 34 | CLC ?= 1 35 | 36 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I. 37 | 38 | # CLOC 39 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin 40 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} -atmipath ${ATMI_RUNTIME_PATH} 41 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v" 42 | 43 | # ROCm-Device-lib 44 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib 45 | 46 | # GPU compiler 47 | CLCC=$(AMDLLVM)/bin/clang 48 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header 49 | CLCFLAGS += -target amdgcn--amdhsa 50 | CLCFLAGS += -mcpu=$(MCPU) 51 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc 52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc 53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc 54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc 55 | 56 | # Host compiler 57 | CXX = g++ 58 | CXXFLAGS =-O3 -g -std=c++11 59 | #CXXFLAGS += -v 60 | 61 | LIBS=-latmi_runtime 62 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib 63 | 64 | OBJS = reduction 65 | 66 | .PHONY: clean all 67 | 68 | all: $(OBJS) 69 | 70 | reduction: reduction.cpp reduction.cl 71 | ifeq ($(CLC),1) 72 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 reduction.cl 73 | else 74 | $(CLCC) ${CLCFLAGS} -o reduction.hsaco reduction.cl 75 | endif 76 | $(CXX) -o $@ reduction.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS) 77 | 78 | clean: 79 | rm -rf *.o *.hsaco $(OBJS) 80 | 81 | test: 82 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./reduction 83 | 84 | -------------------------------------------------------------------------------- /examples/runtime_denq/reduction/reduction.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include "atmi_kl.h" 8 | 9 | enum { 10 | reduction_task = 0, 11 | }; 12 | 13 | typedef struct args_r { 14 | int *in; 15 | unsigned long length; 16 | } args_t; 17 | 18 | kernel void reduction_gpu(__global int* in, unsigned long length) { 19 | int num = get_global_id(0); 20 | 21 | in[num] += in[num + length]; 22 | 23 | barrier(CLK_GLOBAL_MEM_FENCE); 24 | 25 | if(num == 0) 26 | { 27 | length = length >> 1; 28 | ATMI_LPARM_1D(lparm, length); 29 | if(length > 8) 30 | lparm->place = (atmi_place_t)ATMI_PLACE_GPU(0, 0); 31 | else 32 | lparm->place = (atmi_place_t)ATMI_PLACE_CPU(0, 0); 33 | args_t args; 34 | args.in = in; 35 | args.length = length; 36 | atmid_task_launch(lparm, reduction_task, (void *)&args, sizeof(args_t)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /examples/runtime_denq/reduction/reduction.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | #include "atmi_runtime.h" 12 | 13 | extern "C" void reduction_cpu(int **in_ptr, long unsigned *length_ptr) { 14 | int *in = *in_ptr; 15 | long unsigned length = *length_ptr; 16 | int num; 17 | for (num = length; num > 0; num >>= 1) 18 | for(int j = 0; j < num; j++) 19 | in[j] += in[j + num]; 20 | } 21 | 22 | int main(int argc, char* argv[]) { 23 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL); 24 | if(err != ATMI_STATUS_SUCCESS) return -1; 25 | const char *module = "reduction.hsaco"; 26 | atmi_platform_type_t module_type = AMDGCN; 27 | err = atmi_module_register(&module, &module_type, 1); 28 | 29 | atmi_kernel_t reduction_kernel; 30 | const unsigned int num_args = 2; 31 | size_t arg_sizes[] = { sizeof(int *), sizeof(long unsigned) }; 32 | atmi_kernel_create(&reduction_kernel, num_args, arg_sizes, 33 | 2, 34 | ATMI_DEVTYPE_GPU, "reduction_gpu", 35 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)reduction_cpu); 36 | 37 | long unsigned length = 1024; 38 | int *input; 39 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, 0, 0); 40 | atmi_malloc((void **)&input, sizeof(int) * length, cpu); 41 | 42 | for(int ii = 0; ii < length; ii++) 43 | input[ii] = ii; 44 | 45 | ATMI_LPARM_1D(lparm, length >> 1); 46 | lparm->synchronous = ATMI_TRUE; 47 | lparm->place = ATMI_PLACE_GPU(0, 0); 48 | lparm->groupable = ATMI_TRUE; 49 | //lparm->kernel_id = K_ID_reduction_gpu; 50 | 51 | long unsigned arg_length = length >> 1; 52 | void *args[] = { &input, &arg_length }; 53 | atmi_task_launch(lparm, reduction_kernel, args); 54 | 55 | printf("Sum: %d\n", input[0]); 56 | 57 | atmi_free(input); 58 | atmi_kernel_release(reduction_kernel); 59 | atmi_finalize(); 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /include/atmi_c_ext.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef INCLUDE_ATMI_C_EXT_H_ 7 | #define INCLUDE_ATMI_C_EXT_H_ 8 | 9 | #include 10 | /** \defgroup Helper macros when using ATMI C Extension feature. 11 | * @{ 12 | */ 13 | #ifdef __cplusplus 14 | #define _CPPSTRING_ "C" 15 | #endif 16 | #ifndef __cplusplus 17 | #define _CPPSTRING_ 18 | #endif 19 | /** 20 | * @brief \deprecated Predefined function calling a null CPU task. 21 | */ 22 | extern _CPPSTRING_ atmi_task_handle_t __sync_kernel_pif(atmi_lparm_t* lparm); 23 | 24 | /** 25 | * @brief \deprecated Helper macros calling a 26 | * null CPU task under specific conditions. 27 | */ 28 | #define SYNC_STREAM(s) \ 29 | { \ 30 | ATMI_LPARM(__lparm_sync_kernel); \ 31 | __lparm_sync_kernel->synchronous = ATMI_TRUE; \ 32 | __lparm_sync_kernel->groupable = ATMI_TRUE; \ 33 | __lparm_sync_kernel->group = s; \ 34 | __sync_kernel_pif(__lparm_sync_kernel); \ 35 | } 36 | 37 | #define SYNC_TASK(t) \ 38 | { \ 39 | ATMI_LPARM(__lparm_sync_kernel); \ 40 | __lparm_sync_kernel->synchronous = ATMI_TRUE; \ 41 | __lparm_sync_kernel->num_required = 1; \ 42 | __lparm_sync_kernel->requires = &t; \ 43 | __sync_kernel_pif(__lparm_sync_kernel); \ 44 | } 45 | /** 46 | * @} 47 | */ 48 | 49 | #endif // INCLUDE_ATMI_C_EXT_H_ 50 | -------------------------------------------------------------------------------- /include/atmi_interop_hsa.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef INCLUDE_ATMI_INTEROP_HSA_H_ 7 | #define INCLUDE_ATMI_INTEROP_HSA_H_ 8 | 9 | #include "atmi_runtime.h" 10 | #include "hsa/hsa.h" 11 | #include "hsa/hsa_ext_amd.h" 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | /** \defgroup interop_hsa_functions ATMI-HSA Interop 18 | * @{ 19 | */ 20 | /** 21 | * @brief Get the HSA compute agent from the ATMI compute place. 22 | * 23 | * @detail Use this function to query more details about the underlying HSA 24 | * agent. 25 | * 26 | * @param[in] proc The ATMI compute place 27 | * 28 | * @param[in] agent Pointer to a non-NULL @p hsa_agent_t structure that will 29 | * hold the 30 | * return value. 31 | * 32 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. 33 | * 34 | * @retval ::ATMI_STATUS_ERROR If @p proc is an invalid location in the current 35 | * node, or 36 | * if ATMI is not initialized. 37 | * 38 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. 39 | */ 40 | atmi_status_t atmi_interop_hsa_get_agent(atmi_place_t proc, hsa_agent_t *agent); 41 | 42 | /** 43 | * @brief Get the HSA memory pool handle from the ATMI memory place. 44 | * 45 | * @detail Use this function to query more details about the underlying HSA 46 | * memory 47 | * pool handle. 48 | * 49 | * @param[in] memory The ATMI memory place 50 | * 51 | * @param[in] pool Pointer to a non-NULL @p hsa_amd_memory_pool_t structure that 52 | * will 53 | * hold the return value. 54 | * 55 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. 56 | * 57 | * @retval ::ATMI_STATUS_ERROR If @p memory is an invalid location in the 58 | * current node, or 59 | * if ATMI is not initialized. 60 | * 61 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. 62 | */ 63 | atmi_status_t atmi_interop_hsa_get_memory_pool(atmi_mem_place_t memory, 64 | hsa_amd_memory_pool_t *pool); 65 | 66 | /** 67 | * @brief Get the device address and size of an HSA global symbol 68 | * 69 | * @detail Use this function to query the device address and size of an HSA 70 | * global symbol. 71 | * The symbol can be set at by the compiler or by the application writer in a 72 | * language-specific manner. This function is meaningful only after calling one 73 | * of the @p atmi_module_register functions. 74 | * 75 | * @param[in] place The ATMI memory place 76 | * 77 | * @param[in] symbol Pointer to a non-NULL global symbol name 78 | * 79 | * @param[in] var_addr Pointer to a non-NULL @p void* variable that will 80 | * hold the device address of the global symbol object. 81 | * 82 | * @param[in] var_size Pointer to a non-NULL @p uint variable that will 83 | * hold the size of the global symbol object. 84 | * 85 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. 86 | * 87 | * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are 88 | * invalid 89 | * location in the current node, or if ATMI is not initialized. 90 | * 91 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. 92 | */ 93 | atmi_status_t atmi_interop_hsa_get_symbol_info(atmi_mem_place_t place, 94 | const char *symbol, 95 | void **var_addr, 96 | unsigned int *var_size); 97 | 98 | /** 99 | * @brief Get the HSA-specific kernel info from a kernel name 100 | * 101 | * @detail Use this function to query the HSA-specific kernel info from the 102 | * kernel name. 103 | * This function is meaningful only after calling one 104 | * of the @p atmi_module_register functions. 105 | * 106 | * @param[in] place The ATMI memory place 107 | * 108 | * @param[in] kernel_name Pointer to a char array with the kernel name 109 | * 110 | * @param[in] info The different possible kernel properties 111 | * 112 | * @param[in] value Pointer to a non-NULL @p uint variable that will 113 | * hold the return value of the kernel property. 114 | * 115 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. 116 | * 117 | * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are 118 | * invalid 119 | * location in the current node, or if ATMI is not initialized. 120 | * 121 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. 122 | */ 123 | atmi_status_t atmi_interop_hsa_get_kernel_info( 124 | atmi_mem_place_t place, const char *kernel_name, 125 | hsa_executable_symbol_info_t info, uint32_t *value); 126 | /** @} */ 127 | 128 | #ifdef __cplusplus 129 | } 130 | #endif 131 | 132 | #endif // INCLUDE_ATMI_INTEROP_HSA_H_ 133 | -------------------------------------------------------------------------------- /include/atmi_kl.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef INCLUDE_ATMI_KL_H_ 7 | #define INCLUDE_ATMI_KL_H_ 8 | 9 | #include 10 | 11 | extern void atmid_task_launch(atmi_lparm_t *lp, unsigned long kernel_id, 12 | void *args_region, 13 | unsigned long args_region_size); 14 | 15 | #endif // INCLUDE_ATMI_KL_H_ 16 | -------------------------------------------------------------------------------- /src/cmake_modules/FindLibElf.cmake: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | # Below are the variables that will be set at the end of this file 8 | # LIBELF_FOUND 9 | # LIBELF_INCLUDE_DIRS 10 | # LIBELF_LIBRARIES 11 | 12 | 13 | find_path (LIBELF_INCLUDE_DIRS 14 | NAMES 15 | libelf.h 16 | PATHS 17 | /usr/include 18 | /usr/local/include 19 | ENV CPATH) 20 | 21 | find_library (LIBELF_LIBRARIES 22 | NAMES 23 | elf 24 | PATHS 25 | /usr/lib/x86_64-linux-gnu 26 | /usr/lib 27 | /usr/local/lib 28 | ENV LIBRARY_PATH 29 | ENV LD_LIBRARY_PATH) 30 | 31 | # set LIBELF_FOUND to TRUE if the below variables are true, 32 | # i.e. header and lib files are found 33 | include (FindPackageHandleStandardArgs) 34 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibElf DEFAULT_MSG 35 | LIBELF_LIBRARIES 36 | LIBELF_INCLUDE_DIRS) 37 | 38 | mark_as_advanced(LIBELF_INCLUDE_DIRS LIBELF_LIBRARIES) 39 | -------------------------------------------------------------------------------- /src/cmake_modules/FindROCm.cmake: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | # Below are the variables that will be set at the end of this file 8 | # ROCM_FOUND 9 | # ROCM_LIBRARIES 10 | # ROCM_INCLUDE_DIRS 11 | # ROCM_VERSION 12 | # ROCM_VERSION_MAJOR 13 | # ROCM_VERSION_MINOR 14 | # ROCM_VERSION_PATCH 15 | # ROCM_VERSION_STRING 16 | 17 | find_path( 18 | ROCM_INCLUDE_DIRS 19 | hsa/hsa.h 20 | HINTS 21 | ${ROC_DIR}/include 22 | ${ROCR_DIR}/include 23 | /opt/rocm/include 24 | ENV CPATH 25 | ) 26 | 27 | find_library( 28 | ROCR_LIBRARY 29 | hsa-runtime64 30 | HINTS 31 | ${ROC_DIR}/lib 32 | ${ROC_DIR} 33 | ${ROCR_DIR}/lib 34 | ${ROCR_DIR} 35 | /opt/rocm/lib 36 | /usr/local/lib 37 | /usr/lib/x86_64-linux-gnu 38 | /usr/lib 39 | ENV LIBRARY_PATH 40 | ENV LD_LIBRARY_PATH 41 | ) 42 | find_library( 43 | ROCT_LIBRARY 44 | hsakmt 45 | HINTS 46 | ${ROC_DIR}/lib 47 | ${ROC_DIR} 48 | ${ROCT_DIR}/lib 49 | ${ROCT_DIR} 50 | /opt/rocm/lib 51 | /usr/local/lib 52 | /usr/lib/x86_64-linux-gnu 53 | /usr/lib 54 | ENV LIBRARY_PATH 55 | ENV LD_LIBRARY_PATH 56 | ) 57 | get_filename_component (ROCM_LIBRARIES_DIR ${ROCR_LIBRARY} DIRECTORY) 58 | set(ROCM_LIBRARIES ${ROCR_LIBRARY} ${ROCT_LIBRARY}) 59 | #message(STATUS "ROCm libraries: ${ROCM_LIBRARIES}") 60 | #message(STATUS "ROCm libraries dir: ${ROCM_LIBRARIES_DIR}") 61 | 62 | if(NOT ROCM_VERSION) 63 | # Do not use the metapackage version number because it is error-prone. 64 | # Use ROCr version number directly if there is a way to infer it. 65 | # Until then, set the ROCm version to 0.0.0 as default. 66 | # file(GLOB version_files 67 | # LIST_DIRECTORIES false 68 | # /opt/rocm/.info/version* 69 | # ) 70 | # list(GET version_files 0 version_file) 71 | # # Compute the version 72 | # execute_process( 73 | # COMMAND cat ${version_file} 74 | # OUTPUT_VARIABLE _rocm_version 75 | # ERROR_VARIABLE _rocm_error 76 | # OUTPUT_STRIP_TRAILING_WHITESPACE 77 | # ERROR_STRIP_TRAILING_WHITESPACE 78 | # ) 79 | # if(NOT _rocm_error) 80 | # set(ROCM_VERSION ${_rocm_version} CACHE STRING "Version of ROCm as found in /opt/rocm/.info/version*") 81 | # else() 82 | # set(ROCM_VERSION "0.0.0" CACHE STRING "Version of ROCm set to default") 83 | # endif() 84 | set(ROCM_VERSION "0.0.0" CACHE STRING "Version of ROCm set to default") 85 | mark_as_advanced(ROCM_VERSION) 86 | endif() 87 | 88 | string(REPLACE "." ";" _rocm_version_list "${ROCM_VERSION}") 89 | list(GET _rocm_version_list 0 ROCM_VERSION_MAJOR) 90 | list(GET _rocm_version_list 1 ROCM_VERSION_MINOR) 91 | list(GET _rocm_version_list 2 ROCM_VERSION_PATCH) 92 | set(ROCM_VERSION_STRING "${ROCM_VERSION}") 93 | 94 | # set ROCM_FOUND to TRUE if the below variables are true, 95 | # i.e. header and lib files are found 96 | include(FindPackageHandleStandardArgs) 97 | find_package_handle_standard_args(ROCM DEFAULT_MSG 98 | ROCM_LIBRARIES 99 | ROCM_INCLUDE_DIRS 100 | ROCM_VERSION 101 | ROCM_VERSION_STRING 102 | ) 103 | 104 | mark_as_advanced( 105 | ROCM_LIBRARIES 106 | ROCM_INCLUDE_DIRS 107 | ROCM_VERSION 108 | ROCM_VERSION_MAJOR 109 | ROCM_VERSION_MINOR 110 | ROCM_VERSION_PATCH 111 | ROCM_VERSION_STRING 112 | ) 113 | -------------------------------------------------------------------------------- /src/cmake_modules/utils.cmake: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | 8 | ## Parses the VERSION_STRING variable and places 9 | ## the first, second and third number values in 10 | ## the major, minor and patch variables. 11 | function( parse_version VERSION_STRING ) 12 | 13 | string ( FIND ${VERSION_STRING} "-" STRING_INDEX ) 14 | 15 | if ( ${STRING_INDEX} GREATER -1 ) 16 | math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" ) 17 | string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD ) 18 | endif () 19 | 20 | string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} ) 21 | list ( LENGTH VERSIONS VERSION_COUNT ) 22 | 23 | if ( ${VERSION_COUNT} GREATER 0) 24 | list ( GET VERSIONS 0 MAJOR ) 25 | set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE ) 26 | set ( TEMP_VERSION_STRING "${MAJOR}" ) 27 | endif () 28 | 29 | if ( ${VERSION_COUNT} GREATER 1 ) 30 | list ( GET VERSIONS 1 MINOR ) 31 | set ( VERSION_MINOR ${MINOR} PARENT_SCOPE ) 32 | set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" ) 33 | endif () 34 | 35 | if ( ${VERSION_COUNT} GREATER 2 ) 36 | list ( GET VERSIONS 2 PATCH ) 37 | set ( VERSION_PATCH ${PATCH} PARENT_SCOPE ) 38 | set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" ) 39 | endif () 40 | 41 | if ( DEFINED VERSION_BUILD ) 42 | set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE ) 43 | endif () 44 | 45 | set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE ) 46 | 47 | endfunction () 48 | 49 | ## Gets the current version of the repository 50 | ## using versioning tags and git describe. 51 | ## Passes back a packaging version string 52 | ## and a library version string. 53 | function ( get_version DEFAULT_VERSION_STRING ) 54 | 55 | parse_version ( ${DEFAULT_VERSION_STRING} ) 56 | 57 | find_program ( GIT NAMES git ) 58 | 59 | if ( GIT ) 60 | 61 | execute_process ( COMMAND git describe --dirty --tags --long --match atmi-[0-9]* 62 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 63 | OUTPUT_VARIABLE GIT_TAG_STRING 64 | OUTPUT_STRIP_TRAILING_WHITESPACE 65 | RESULT_VARIABLE RESULT ) 66 | 67 | if ( ${RESULT} EQUAL 0 ) 68 | 69 | parse_version ( ${GIT_TAG_STRING} ) 70 | 71 | endif () 72 | 73 | endif () 74 | 75 | set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE ) 76 | set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE ) 77 | set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE ) 78 | set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE ) 79 | set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE ) 80 | 81 | endfunction() 82 | -------------------------------------------------------------------------------- /src/compiler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | string( TOLOWER "${ATMI_C_EXTENSION}" ATMI_C_EXTENSION_VAR ) 8 | if(NOT ATMI_C_EXTENSION_VAR MATCHES on ) 9 | libatmi_runtime_say("Not building ATMI C Extension. Use -DATMI_C_EXTENSION=on in your cmake options to enable.") 10 | return() 11 | endif() 12 | 13 | if(ROCM_FOUND) 14 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux") 15 | set (CMAKE_C_COMPILER g++) 16 | set (EXECUTE_COMMAND ${CMAKE_C_COMPILER} -print-file-name=plugin) 17 | execute_process(COMMAND ${EXECUTE_COMMAND} RESULT_VARIABLE rv OUTPUT_VARIABLE ov) 18 | # run the command -print-file-name=plugin to determine 19 | # the location of the GCC Plugin. Strip it out of whitespaces before 20 | # and after the string to determine if the plugin has been installed or 21 | # not. If it has been installed, the returned string provides the 22 | # location of the GCC plugin 23 | string(STRIP ${ov} new_ov) 24 | 25 | if(new_ov STREQUAL "plugin") 26 | libatmi_runtime_say("GCC Plugin not found") 27 | set(PLUGIN_FOUND 0) 28 | else() 29 | if(EXISTS "${new_ov}/include/gcc-plugin.h" 30 | AND EXISTS "${new_ov}/include/print-tree.h") 31 | libatmi_runtime_say("GCC Plugin found. Preparing to build ATMI C extensions.") 32 | include_directories(${new_ov}/include) 33 | set(PLUGIN_FOUND 1) 34 | else() 35 | libatmi_runtime_say("GCC Plugin (gcc-plugin.h or print-tree.h) not found") 36 | set(PLUGIN_FOUND 0) 37 | endif() 38 | endif() 39 | 40 | if(PLUGIN_FOUND) 41 | # Enable support for C++11? 42 | #add_definitions(-std=c++11) 43 | 44 | # If building this library in debug mode, we define a macro to enable 45 | # dumping progress messages at runtime. 46 | string( TOLOWER "${CMAKE_BUILD_TYPE}" ATMI_CMAKE_BUILD_TYPE) 47 | if(ATMI_CMAKE_BUILD_TYPE MATCHES debug) 48 | add_definitions(-DDEBUG) 49 | add_definitions(-g) 50 | add_definitions(-O0) 51 | else() 52 | add_definitions(-g) 53 | add_definitions(-O2) 54 | endif() 55 | add_definitions(-c) 56 | add_definitions(-fpic) 57 | 58 | add_library(atmi_cplugin SHARED 59 | atl_pifgen_plugin.c 60 | atl_synckernel.c 61 | ) 62 | 63 | include_directories(${ROCM_INCLUDE_DIRS}) 64 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 65 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) 66 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include) 67 | 68 | # Install plugin under the lib destination folder. 69 | if(CMAKE_BUILD_TYPE MATCHES Debug) 70 | install(TARGETS atmi_cplugin LIBRARY DESTINATION "lib-debug" COMPONENT cplugin ) 71 | else() 72 | install(TARGETS atmi_cplugin LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT cplugin ) 73 | endif() 74 | 75 | INSTALL(FILES 76 | ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/atmi_c_ext.h 77 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" 78 | COMPONENT cplugin 79 | ) 80 | 81 | ## CPack component info 82 | set(CPACK_COMPONENT_CPLUGIN_DISPLAY_NAME "ATMI C Plugin") 83 | set(CPACK_COMPONENT_CPLUGIN_DEPENDS runtime) 84 | 85 | 86 | target_link_libraries( 87 | atmi_cplugin 88 | ${ROCM_LIBRARIES} 89 | -L${ROCM_LIBRARIES_DIR} 90 | -Wl,--enable-new-dtags 91 | ) 92 | 93 | if (NOT CMAKE_INSTALL_RPATH) 94 | set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../hsa/lib;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64") 95 | endif () 96 | 97 | else() 98 | libatmi_runtime_say("Not building ATMI C Extension: GCC Plugin not found.") 99 | endif() 100 | else() 101 | libatmi_runtime_say("Not building ATMI C Extension: only support ATMI in Linux x86_64 or ppc64le hosts.") 102 | endif() 103 | else() 104 | libatmi_runtime_say("Not building ATMI C Extension: libhsa-runtime64 not found") 105 | endif() 106 | -------------------------------------------------------------------------------- /src/compiler/atl_synckernel.c: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #include 7 | #include "atl_internal.h" 8 | #include "atmi.h" 9 | 10 | /* Null kernel */ 11 | #ifdef __cplusplus 12 | #define _CPPSTRING_ "C" 13 | #endif 14 | #ifndef __cplusplus 15 | #define _CPPSTRING_ 16 | #endif 17 | extern _CPPSTRING_ void __sync_kernel() {} 18 | extern _CPPSTRING_ void __sync_kernel_wrapper() { __sync_kernel(); } 19 | static int cpu_initalized = 0; 20 | 21 | typedef struct pif_kernel_table_s { 22 | atmi_devtype_t devtype; 23 | atmi_generic_fp cpu_kernel; 24 | const char *gpu_kernel; 25 | } pif_kernel_table_t; 26 | 27 | pif_kernel_table_t __sync_kernel_pif_fn_table[] = { 28 | {.devtype = ATMI_DEVTYPE_CPU, 29 | .cpu_kernel = (atmi_generic_fp)__sync_kernel_wrapper, 30 | .gpu_kernel = 0}, 31 | }; 32 | 33 | static int __sync_kernel_CPU_FK = 0; 34 | static atmi_kernel_t __sync_kernel_obj; 35 | extern _CPPSTRING_ atmi_task_handle_t __sync_kernel_pif(atmi_lparm_t *lparm) { 36 | int k_id = lparm->kernel_id; 37 | assert(k_id == 0); 38 | atmi_devtype_t devtype = __sync_kernel_pif_fn_table[k_id].devtype; 39 | if (devtype == ATMI_DEVTYPE_GPU) { 40 | } else if (devtype == ATMI_DEVTYPE_CPU) { 41 | /* Kernel initialization has to be done before kernel arguments are 42 | * set/inspected */ 43 | const char *kernel_name = "__sync_kernel"; 44 | const int num_args = 0; 45 | if (__sync_kernel_CPU_FK == 0) { 46 | atmi_kernel_create_empty(&__sync_kernel_obj, num_args, NULL); 47 | atmi_kernel_add_cpu_impl( 48 | __sync_kernel_obj, 49 | (atmi_generic_fp)(__sync_kernel_pif_fn_table[0].cpu_kernel), 0); 50 | __sync_kernel_CPU_FK = 1; 51 | } 52 | if (cpu_initalized == 0) { 53 | atmi_init(ATMI_DEVTYPE_CPU); 54 | cpu_initalized = 1; 55 | } 56 | return atmi_task_launch(lparm, __sync_kernel_obj, NULL); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/compiler/include/atl_pifgen.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef __ATMI_PIFGEN_PLUGIN__ 7 | #define __ATMI_PIFGEN_PLUGIN__ 8 | struct cl_decoded_option 9 | { 10 | /* The index of this option, or an OPT_SPECIAL_* value for 11 | * non-options and unknown options. */ 12 | size_t opt_index; 13 | 14 | /* Any warning to give for use of this option, or NULL if none. */ 15 | const char *warn_message; 16 | 17 | /* The string argument, or NULL if none. For OPT_SPECIAL_* cases, 18 | * the option or non-option command-line argument. */ 19 | const char *arg; 20 | 21 | /* The original text of option plus arguments, with separate argv 22 | * elements concatenated into one string with spaces separating 23 | * them. This is for such uses as diagnostics and 24 | * -frecord-gcc-switches. */ 25 | const char *orig_option_with_args_text; 26 | 27 | /* The canonical form of the option and its argument, for when it is 28 | * necessary to reconstruct argv elements (in particular, for 29 | * processing specs and passing options to subprocesses from the 30 | * driver). */ 31 | const char *canonical_option[4]; 32 | 33 | /* The number of elements in the canonical form of the option and 34 | * arguments; always at least 1. */ 35 | size_t canonical_option_num_elements; 36 | 37 | /* For a boolean option, 1 for the true case and 0 for the "no-" 38 | * case. For an unsigned integer option, the value of the 39 | * argument. 1 in all other cases. */ 40 | int value; 41 | 42 | /* Any flags describing errors detected in this option. */ 43 | int errors; 44 | }; 45 | 46 | /* Decoded options, and number of such options. */ 47 | extern struct cl_decoded_option *save_decoded_options; 48 | extern unsigned int save_decoded_options_count; 49 | 50 | #endif // __ATMI_PIFGEN_PLUGIN__ 51 | -------------------------------------------------------------------------------- /src/device_runtime/device_rt.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef SRC_DEVICE_RUNTIME_DEVICE_RT_H_ 7 | #define SRC_DEVICE_RUNTIME_DEVICE_RT_H_ 8 | 9 | #include "rt.h" 10 | 11 | namespace core { 12 | 13 | class DeviceRuntime : public Runtime { 14 | public: 15 | static DeviceRuntime &getInstance() { 16 | static DeviceRuntime instance; 17 | return instance; 18 | } 19 | 20 | // init/finalize 21 | virtual atmi_status_t Initialize(atmi_devtype_t); 22 | virtual atmi_status_t Finalize(); 23 | // kernels 24 | virtual atmi_status_t CreateKernel(atmi_kernel_t *, const int, const size_t *, 25 | const int, va_list); 26 | virtual atmi_status_t ReleaseKernel(atmi_kernel_t); 27 | 28 | // bool initialized() const { return initialized_; } 29 | // void set_initialized(const bool val) { initialized_ = val; } 30 | private: 31 | DeviceRuntime() = default; 32 | ~DeviceRuntime() = default; 33 | DeviceRuntime(const DeviceRuntime &) = delete; 34 | DeviceRuntime &operator=(const DeviceRuntime &) = delete; 35 | // bool initialized_; 36 | }; 37 | 38 | } // namespace core 39 | 40 | #endif // SRC_DEVICE_RUNTIME_DEVICE_RT_H_ 41 | -------------------------------------------------------------------------------- /src/device_runtime/include/device_amd_hsa.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #ifndef DEVICE_AMD_HSA_H 9 | #define DEVICE_AMD_HSA_H 10 | 11 | typedef char int8_t; 12 | typedef unsigned char uint8_t; 13 | typedef short int16_t; 14 | typedef unsigned short uint16_t; 15 | typedef int int32_t; 16 | typedef unsigned int uint32_t; 17 | typedef long int64_t; 18 | typedef unsigned long uint64_t; 19 | 20 | #ifdef __LP64__ 21 | #undef __LP64__ 22 | #endif 23 | #define __LP64__ 24 | #define DEVICE_COMPILER 25 | #define LITTLEENDIAN_CPU 26 | #include "hsa.h" 27 | // below includes are unnecessary for ATMI 28 | //#include "amd_hsa_common.h" 29 | //#include "amd_hsa_elf.h" 30 | //#include "amd_hsa_kernel_code.h" 31 | //#include "amd_hsa_queue.h" 32 | //#include "amd_hsa_signal.h" 33 | //#include "device_amd_hsa.h" 34 | #undef DEVICE_COMPILER 35 | 36 | #endif // DEVICE_AMD_HSA_H 37 | -------------------------------------------------------------------------------- /src/runtime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | if(ROCM_FOUND) 8 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux") 9 | libatmi_runtime_say("Preparing to build runtime/core") 10 | add_subdirectory(core) 11 | add_subdirectory(interop) 12 | else() 13 | libatmi_runtime_say("Not building ATMI Runtime: only support ATMI in Linux x86_64 or ppc64le hosts.") 14 | endif() 15 | else() 16 | libatmi_runtime_say("Not building ATMI Runtime: libhsa-runtime64 not found") 17 | endif() 18 | -------------------------------------------------------------------------------- /src/runtime/core/queue.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #include "queue.h" 7 | #include "atmi.h" 8 | #include "hsa/hsa_ext_amd.h" 9 | 10 | bool equalsPlace(const atmi_place_t &l, const atmi_place_t &r) { 11 | bool val = false; 12 | if (l.node_id == r.node_id && l.type == r.type && 13 | l.device_id == r.device_id && l.cu_mask == r.cu_mask) 14 | val = true; 15 | return val; 16 | } 17 | 18 | hsa_status_t ATLGPUQueue::set_place(atmi_place_t place) { 19 | hsa_status_t val = HSA_STATUS_SUCCESS; 20 | if (!equalsPlace(place_, place)) { 21 | place_ = place; 22 | val = hsa_amd_queue_cu_set_mask( 23 | queue_, 2, reinterpret_cast(&(place_.cu_mask))); 24 | } 25 | return val; 26 | } 27 | 28 | hsa_status_t ATLCPUQueue::set_place(atmi_place_t place) { 29 | hsa_status_t val = HSA_STATUS_SUCCESS; 30 | if (!equalsPlace(place_, place)) { 31 | place_ = place; 32 | // change pthread-to-core binding based on cpu_set. If number of bits that 33 | // are set on cpu_set is >1 then choose the first non-zero bit and place 34 | // the thread on that core. 35 | // TODO(ashwinma): Any other scheduling algorithms based on load, task group 36 | // annotations, and so on... 37 | } 38 | return val; 39 | } 40 | -------------------------------------------------------------------------------- /src/runtime/include/data.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef SRC_RUNTIME_INCLUDE_DATA_H_ 7 | #define SRC_RUNTIME_INCLUDE_DATA_H_ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "atmi.h" 14 | // #define USE_ROCR_PTR_INFO 15 | // There seems to be a bug with ROCr's hsa_amd_pointer_info_set_userdata for 16 | // variable 17 | // symbols. If/when that bug is fixed, we can uncomment the above line to 18 | // USE_ROCR_PTR_INFO. 19 | // Until then, we maintain our own mapping of device addr to a user specified 20 | // data object 21 | namespace core { 22 | // Internal representation of any data that is created and managed by ATMI. 23 | // Data can be located on any device memory or host memory. 24 | class ATLData { 25 | public: 26 | ATLData(void *ptr, size_t size, atmi_mem_place_t place, atmi_arg_type_t type) 27 | : ptr_(ptr), 28 | host_aliasptr_(NULL), 29 | size_(size), 30 | place_(place), 31 | arg_type_(type) {} 32 | 33 | ATLData(void *ptr, void *hostptr, size_t size, atmi_mem_place_t place, 34 | atmi_arg_type_t type) 35 | : ptr_(ptr), 36 | host_aliasptr_(hostptr), 37 | size_(size), 38 | place_(place), 39 | arg_type_(type) {} 40 | 41 | void *ptr() const { return ptr_; } 42 | void *host_aliasptr() const { return host_aliasptr_; } 43 | size_t size() const { return size_; } 44 | atmi_mem_place_t place() const { return place_; } 45 | atmi_arg_type_t arg_type() const { return arg_type_; } 46 | 47 | private: 48 | // make this a vector of pointers? 49 | void *ptr_; 50 | void *host_aliasptr_; 51 | size_t size_; 52 | atmi_mem_place_t place_; 53 | atmi_arg_type_t arg_type_; 54 | }; 55 | 56 | #ifndef USE_ROCR_PTR_INFO 57 | //--- 58 | struct ATLMemoryRange { 59 | const void *base_pointer; 60 | const void *end_pointer; 61 | ATLMemoryRange(const void *bp, size_t size_bytes) 62 | : base_pointer(bp), 63 | end_pointer(reinterpret_cast(bp) + size_bytes - 64 | 1) {} 65 | }; 66 | 67 | // Functor to compare ranges: 68 | struct ATLMemoryRangeCompare { 69 | // Return true is LHS range is less than RHS - used to order the ranges 70 | bool operator()(const ATLMemoryRange &lhs, const ATLMemoryRange &rhs) const { 71 | return lhs.end_pointer < rhs.base_pointer; 72 | } 73 | }; 74 | 75 | //------------------------------------------------------------------------------------------------- 76 | // This structure tracks information for each pointer. 77 | // Uses memory-range-based lookups - so pointers that exist anywhere in the 78 | // range of hostPtr + size 79 | // will find the associated ATLPointerInfo. 80 | // The insertions and lookups use a self-balancing binary tree and should 81 | // support O(logN) lookup speed. 82 | // The structure is thread-safe - writers obtain a mutex before modifying the 83 | // tree. Multiple simulatenous readers are supported. 84 | class ATLPointerTracker { 85 | typedef std::map 86 | MapTrackerType; 87 | 88 | public: 89 | void insert(void *pointer, ATLData *data); 90 | void remove(void *pointer); 91 | ATLData *find(const void *pointer); 92 | 93 | private: 94 | MapTrackerType tracker_; 95 | std::mutex mutex_; 96 | // std::shared_timed_mutex _mut; 97 | }; 98 | 99 | extern ATLPointerTracker g_data_map; // Track all am pointer allocations. 100 | #endif 101 | 102 | enum class Direction { ATMI_H2D, ATMI_D2H, ATMI_D2D, ATMI_H2H }; 103 | 104 | hsa_agent_t get_mem_agent(atmi_mem_place_t place); 105 | hsa_agent_t get_compute_agent(atmi_place_t place); 106 | } // namespace core 107 | #endif // SRC_RUNTIME_INCLUDE_DATA_H_ 108 | -------------------------------------------------------------------------------- /src/runtime/include/device_rt_internal.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef SRC_RUNTIME_INCLUDE_DEVICE_RT_INTERNAL_H_ 7 | #define SRC_RUNTIME_INCLUDE_DEVICE_RT_INTERNAL_H_ 8 | 9 | #ifdef __OPENCL_C_VERSION__ 10 | #include "device_amd_hsa.h" 11 | #else 12 | #include 13 | #endif 14 | 15 | #define MAX_NUM_KERNELS (1024 * 16) 16 | /*typedef struct atmi_task_impl_s { 17 | unsigned long int signal; 18 | unsigned char reserved[376]; 19 | } atmi_task_impl_t; 20 | */ 21 | typedef struct atmi_implicit_args_s { 22 | unsigned long offset_x; 23 | unsigned long offset_y; 24 | unsigned long offset_z; 25 | unsigned long hostcall_ptr; 26 | char num_gpu_queues; 27 | unsigned long gpu_queue_ptr; 28 | char num_cpu_queues; 29 | unsigned long cpu_worker_signals; 30 | unsigned long cpu_queue_ptr; 31 | unsigned long kernarg_template_ptr; 32 | // possible TODO: send signal pool to be used by DAGs on GPU 33 | // uint8_t num_signals; 34 | // unsigned long signal_ptr; 35 | } atmi_implicit_args_t; 36 | 37 | typedef struct atmi_kernel_enqueue_template_s { 38 | unsigned long kernel_handle; 39 | hsa_kernel_dispatch_packet_t k_packet; 40 | hsa_agent_dispatch_packet_t a_packet; 41 | unsigned long kernarg_segment_size; 42 | void *kernarg_regions; 43 | } atmi_kernel_enqueue_template_t; 44 | 45 | #endif // SRC_RUNTIME_INCLUDE_DEVICE_RT_INTERNAL_H_ 46 | -------------------------------------------------------------------------------- /src/runtime/include/machine.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef SRC_RUNTIME_INCLUDE_MACHINE_H_ 7 | #define SRC_RUNTIME_INCLUDE_MACHINE_H_ 8 | #include 9 | #include 10 | #include 11 | #include "atmi.h" 12 | #include "internal.h" 13 | 14 | class ATLMemory; 15 | 16 | class ATLProcessor { 17 | public: 18 | explicit ATLProcessor(hsa_agent_t agent, 19 | atmi_devtype_t type = ATMI_DEVTYPE_ALL) 20 | : next_best_queue_id_(0), agent_(agent), type_(type) { 21 | queues_.clear(); 22 | memories_.clear(); 23 | } 24 | void addMemory(const ATLMemory &p); 25 | hsa_agent_t agent() const { return agent_; } 26 | // TODO(ashwinma): Do we need this or are we building the machine structure 27 | // just once in the program? 28 | // void removeMemory(ATLMemory &p); 29 | const std::vector &memories() const; 30 | atmi_devtype_t type() const { return type_; } 31 | 32 | virtual void createQueues(const int count) {} 33 | virtual void destroyQueues(); 34 | virtual hsa_queue_t *getQueueAt(const int index); 35 | std::vector queues() const { return queues_; } 36 | virtual hsa_queue_t *getBestQueue(atmi_scheduler_t sched); 37 | int num_cus() const; 38 | int wavefront_size() const; 39 | 40 | protected: 41 | hsa_agent_t agent_; 42 | atmi_devtype_t type_; 43 | std::vector queues_; 44 | // schedule queues by setting this to best queue ID 45 | unsigned int next_best_queue_id_; 46 | std::vector memories_; 47 | }; 48 | 49 | class ATLCPUProcessor : public ATLProcessor { 50 | public: 51 | explicit ATLCPUProcessor(hsa_agent_t agent) 52 | : ATLProcessor(agent, ATMI_DEVTYPE_CPU) { 53 | thread_agents_.clear(); 54 | } 55 | void createQueues(const int count); 56 | 57 | thread_agent_t *getThreadAgentAt(const int index); 58 | const std::vector &thread_agents() const { 59 | return thread_agents_; 60 | } 61 | // misc helper functions needed by ATMI DP 62 | hsa_signal_t *get_worker_sig(hsa_queue_t *q); 63 | 64 | private: 65 | std::vector thread_agents_; 66 | }; 67 | 68 | class ATLGPUProcessor : public ATLProcessor { 69 | public: 70 | explicit ATLGPUProcessor(hsa_agent_t agent, 71 | atmi_devtype_t type = ATMI_DEVTYPE_dGPU) 72 | : ATLProcessor(agent, type) {} 73 | void createQueues(const int count); 74 | }; 75 | 76 | class ATLDSPProcessor : public ATLProcessor { 77 | public: 78 | explicit ATLDSPProcessor(hsa_agent_t agent) 79 | : ATLProcessor(agent, ATMI_DEVTYPE_DSP) {} 80 | void createQueues(const int count); 81 | }; 82 | 83 | class ATLMemory { 84 | public: 85 | ATLMemory(hsa_amd_memory_pool_t pool, ATLProcessor p, atmi_memtype_t t) 86 | : memory_pool_(pool), processor_(p), type_(t) {} 87 | ATLProcessor &processor() { return processor_; } 88 | hsa_amd_memory_pool_t memory() const { return memory_pool_; } 89 | 90 | atmi_memtype_t type() const { return type_; } 91 | // uint32_t access_type() { return fine of coarse grained? ;} 92 | /* memory alloc/free */ 93 | void *alloc(size_t s); 94 | void free(void *p); 95 | // atmi_task_handle_t copy(ATLMemory &m, bool async = false); 96 | private: 97 | hsa_amd_memory_pool_t memory_pool_; 98 | ATLProcessor processor_; 99 | atmi_memtype_t type_; 100 | }; 101 | 102 | class ATLMachine { 103 | public: 104 | ATLMachine() { 105 | cpu_processors_.clear(); 106 | gpu_processors_.clear(); 107 | dsp_processors_.clear(); 108 | } 109 | template 110 | void addProcessor(const T &p); 111 | template 112 | std::vector &processors(); 113 | template 114 | size_t processorCount() { 115 | return processors().size(); 116 | } 117 | 118 | private: 119 | std::vector cpu_processors_; 120 | std::vector gpu_processors_; 121 | std::vector dsp_processors_; 122 | }; 123 | 124 | hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc, 125 | const int mem_id); 126 | 127 | #include "machine.tcc" 128 | 129 | #endif // SRC_RUNTIME_INCLUDE_MACHINE_H_ 130 | -------------------------------------------------------------------------------- /src/runtime/include/machine.tcc: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | extern ATLMachine g_atl_machine; 7 | template 8 | T& get_processor(atmi_place_t place) { 9 | int dev_id = place.device_id; 10 | if(dev_id == -1) { 11 | // user is asking runtime to pick a device 12 | // TODO(ashwinma): best device of this type? pick 0 for now 13 | dev_id = 0; 14 | } 15 | return g_atl_machine.processors()[dev_id]; 16 | } 17 | -------------------------------------------------------------------------------- /src/runtime/include/queue.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #ifndef SRC_RUNTIME_INCLUDE_QUEUE_H_ 7 | #define SRC_RUNTIME_INCLUDE_QUEUE_H_ 8 | 9 | #include "atmi.h" 10 | #include "hsa/hsa.h" 11 | class ATLQueue { 12 | public: 13 | explicit ATLQueue(hsa_queue_t *q, atmi_place_t p = ATMI_PLACE_ANY(0)) 14 | : queue_(q), place_(p) {} 15 | hsa_queue_t *queue() const { return queue_; } 16 | atmi_place_t place() const { return place_; } 17 | 18 | hsa_status_t set_place(atmi_place_t place); 19 | 20 | protected: 21 | hsa_queue_t *queue_; 22 | atmi_place_t place_; 23 | }; 24 | 25 | class ATLCPUQueue : public ATLQueue { 26 | public: 27 | explicit ATLCPUQueue(hsa_queue_t *q, atmi_place_t p = ATMI_PLACE_ANY_CPU(0)) 28 | : ATLQueue(q, p) {} 29 | hsa_status_t set_place(atmi_place_t place); 30 | }; 31 | 32 | class ATLGPUQueue : public ATLQueue { 33 | public: 34 | explicit ATLGPUQueue(hsa_queue_t *q, atmi_place_t p = ATMI_PLACE_ANY_GPU(0)) 35 | : ATLQueue(q, p) {} 36 | hsa_status_t set_place(atmi_place_t place); 37 | }; 38 | 39 | #endif // SRC_RUNTIME_INCLUDE_QUEUE_H_ 40 | -------------------------------------------------------------------------------- /src/runtime/include/taskgroup.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | 7 | #ifndef SRC_RUNTIME_INCLUDE_TASKGROUP_H_ 8 | #define SRC_RUNTIME_INCLUDE_TASKGROUP_H_ 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "atmi.h" 18 | #include "internal.h" 19 | #include "machine.h" 20 | #include "task.h" 21 | 22 | namespace core { 23 | class TaskgroupImpl { 24 | public: 25 | TaskgroupImpl(bool, atmi_place_t); 26 | ~TaskgroupImpl(); 27 | void sync(); 28 | 29 | template 30 | hsa_queue_t *chooseQueueFromPlace(atmi_place_t place) { 31 | hsa_queue_t *ret_queue = NULL; 32 | atmi_scheduler_t sched = ordered_ ? ATMI_SCHED_NONE : ATMI_SCHED_RR; 33 | ProcType &proc = get_processor(place); 34 | if (ordered_) { 35 | // Get the taskgroup's CPU or GPU queue depending on the task. If 36 | // taskgroup is 37 | // ordered, it will have just one GPU queue for its GPU tasks and just one 38 | // CPU 39 | // queue for its CPU tasks. If a taskgroup has interleaved CPU and GPU 40 | // tasks, then 41 | // a corresponding barrier packet or dependency edge will capture the 42 | // relationship 43 | // between the two queues. 44 | hsa_queue_t *generic_queue = 45 | (place.type == ATMI_DEVTYPE_GPU) ? gpu_queue_ : cpu_queue_; 46 | if (generic_queue == NULL) { 47 | generic_queue = proc.getQueueAt(id_); 48 | // put the chosen queue as the taskgroup's designated CPU or GPU queue 49 | if (place.type == ATMI_DEVTYPE_GPU) 50 | gpu_queue_ = generic_queue; 51 | else if (place.type == ATMI_DEVTYPE_CPU) 52 | cpu_queue_ = generic_queue; 53 | } 54 | ret_queue = generic_queue; 55 | } else { 56 | ret_queue = proc.getQueueAt(getBestQueueID(sched)); 57 | } 58 | DEBUG_PRINT("Returned Queue: %p\n", ret_queue); 59 | return ret_queue; 60 | } 61 | 62 | hsa_signal_t signal() const { return group_signal_; } 63 | 64 | private: 65 | atmi_status_t clearSavedTasks(); 66 | int getBestQueueID(atmi_scheduler_t sched); 67 | 68 | public: 69 | uint32_t id_; 70 | bool ordered_; 71 | TaskImpl *last_task_; 72 | hsa_queue_t *gpu_queue_; 73 | hsa_queue_t *cpu_queue_; 74 | atmi_devtype_t last_device_type_; 75 | int next_best_queue_id_; 76 | atmi_place_t place_; 77 | // int next_gpu_qid; 78 | // int next_cpu_qid; 79 | // dependent tasks for the entire task group 80 | TaskImplVecTy and_successors_; 81 | hsa_signal_t group_signal_; 82 | std::atomic task_count_; 83 | pthread_mutex_t group_mutex_; 84 | // the below vectors are collections of tasks of 85 | // a certain type (grouped or ordered). 86 | // TODO(ashwinma): check if some of the below containers 87 | // can be removed 88 | std::deque running_ordered_tasks_; 89 | std::vector running_default_tasks_; 90 | std::vector running_groupable_tasks_; 91 | 92 | // the below vectors are needed by task dependency 93 | // resolution logic where tasks are moved from one 94 | // queue to another depending on their execution state 95 | std::deque created_tasks_; 96 | std::vector dispatched_tasks_; 97 | std::set dispatched_sink_tasks_; 98 | std::atomic first_created_tasks_dispatched_; 99 | 100 | std::queue ready_tasks_; // ReadyTaskQueue 101 | // TODO(ashwinma): for now, all waiting tasks (groupable and individual) are 102 | // placed in a single queue. does it make sense to have groupable waiting 103 | // tasks separately waiting in their own queue? perhaps not for now. 104 | // Should revisit if there are more than one callback threads 105 | // std::vector waiting_groupable_tasks; 106 | std::atomic_flag callback_started_; 107 | 108 | // int maxsize; /**< Number of tasks allowed in group */ 109 | // atmi_full_policy_t full_policy;/**< What to do if maxsize reached */ 110 | }; // class TaskgroupImpl 111 | } // namespace core 112 | #endif // SRC_RUNTIME_INCLUDE_TASKGROUP_H_ 113 | -------------------------------------------------------------------------------- /src/runtime/interop/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | # compile HSA interop layer if specified explicitly 8 | string( TOLOWER "${ATMI_HSA_INTEROP}" ATMI_HSA_INTEROP_VAR ) 9 | if(ATMI_HSA_INTEROP_VAR MATCHES on ) 10 | add_subdirectory(hsa) 11 | endif() 12 | -------------------------------------------------------------------------------- /src/runtime/interop/hsa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ===-------------------------------------------------------------------------- 2 | # ATMI (Asynchronous Task and Memory Interface) 3 | # 4 | # This file is distributed under the MIT License. See LICENSE.txt for details. 5 | # ===-------------------------------------------------------------------------- 6 | 7 | libatmi_runtime_say("Preparing to build runtime/interop/hsa") 8 | target_sources(atmi_runtime PRIVATE 9 | ${PROJECT_SOURCE_DIR}/runtime/interop/hsa/atmi_interop_hsa.cpp 10 | ) 11 | 12 | include_directories(${ROCM_INCLUDE_DIRS}) 13 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 14 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include) 15 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../../include) 16 | 17 | # set output dir for .h files 18 | set (OUTPUT_INC_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../../include) 19 | 20 | # create output dir for include files 21 | execute_process(COMMAND "/bin/mkdir" "-p" "${OUTPUT_INC_DIRECTORY}") 22 | 23 | add_custom_command( 24 | OUTPUT ${OUTPUT_INC_DIRECTORY}/atmi_interop_hsa.h 25 | COMMAND /usr/bin/rsync ${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/atmi_interop_hsa.h ${OUTPUT_INC_DIRECTORY}/atmi_interop_hsa.h 26 | DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/*.h 27 | ) 28 | 29 | add_custom_target(interop_header ALL DEPENDS ${OUTPUT_INC_DIRECTORY}/atmi_interop_hsa.h) 30 | 31 | INSTALL(FILES 32 | ${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/atmi_interop_hsa.h 33 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/atmi" 34 | COMPONENT runtime 35 | ) 36 | 37 | -------------------------------------------------------------------------------- /src/runtime/interop/hsa/atmi_interop_hsa.cpp: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ATMI (Asynchronous Task and Memory Interface) 3 | * 4 | * This file is distributed under the MIT License. See LICENSE.txt for details. 5 | *===------------------------------------------------------------------------*/ 6 | #include "atmi_interop_hsa.h" 7 | #include "device_rt_internal.h" 8 | #include "internal.h" 9 | using core::atl_is_atmi_initialized; 10 | using core::get_compute_agent; 11 | using core::get_memory_pool_by_mem_place; 12 | 13 | atmi_status_t atmi_interop_hsa_get_agent(atmi_place_t proc, 14 | hsa_agent_t *agent) { 15 | if (!atl_is_atmi_initialized()) return ATMI_STATUS_ERROR; 16 | if (!agent) return ATMI_STATUS_ERROR; 17 | 18 | *agent = get_compute_agent(proc); 19 | return ATMI_STATUS_SUCCESS; 20 | } 21 | 22 | atmi_status_t atmi_interop_hsa_get_memory_pool(atmi_mem_place_t memory, 23 | hsa_amd_memory_pool_t *pool) { 24 | if (!atl_is_atmi_initialized()) return ATMI_STATUS_ERROR; 25 | if (!pool) return ATMI_STATUS_ERROR; 26 | 27 | *pool = get_memory_pool_by_mem_place(memory); 28 | return ATMI_STATUS_SUCCESS; 29 | } 30 | 31 | atmi_status_t atmi_interop_hsa_get_symbol_info(atmi_mem_place_t place, 32 | const char *symbol, 33 | void **var_addr, 34 | unsigned int *var_size) { 35 | /* 36 | // Typical usage: 37 | void *var_addr; 38 | size_t var_size; 39 | atmi_interop_hsa_get_symbol_addr(gpu_place, "symbol_name", &var_addr, 40 | &var_size); 41 | atmi_memcpy(host_add, var_addr, var_size); 42 | */ 43 | 44 | if (!atl_is_atmi_initialized()) return ATMI_STATUS_ERROR; 45 | atmi_machine_t *machine = atmi_machine_get_info(); 46 | if (!symbol || !var_addr || !var_size || !machine) return ATMI_STATUS_ERROR; 47 | if (place.dev_id < 0 || 48 | place.dev_id >= machine->device_count_by_type[place.dev_type]) 49 | return ATMI_STATUS_ERROR; 50 | 51 | // get the symbol info 52 | std::string symbolStr = std::string(symbol); 53 | if (SymbolInfoTable[place.dev_id].find(symbolStr) != 54 | SymbolInfoTable[place.dev_id].end()) { 55 | atl_symbol_info_t info = SymbolInfoTable[place.dev_id][symbolStr]; 56 | *var_addr = reinterpret_cast(info.addr); 57 | *var_size = info.size; 58 | return ATMI_STATUS_SUCCESS; 59 | } else { 60 | *var_addr = NULL; 61 | *var_size = 0; 62 | return ATMI_STATUS_ERROR; 63 | } 64 | } 65 | 66 | atmi_status_t atmi_interop_hsa_get_kernel_info( 67 | atmi_mem_place_t place, const char *kernel_name, 68 | hsa_executable_symbol_info_t kernel_info, uint32_t *value) { 69 | /* 70 | // Typical usage: 71 | uint32_t value; 72 | atmi_interop_hsa_get_kernel_addr(gpu_place, "kernel_name", 73 | HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, 74 | &val); 75 | */ 76 | 77 | if (!atl_is_atmi_initialized()) return ATMI_STATUS_ERROR; 78 | atmi_machine_t *machine = atmi_machine_get_info(); 79 | if (!kernel_name || !value || !machine) return ATMI_STATUS_ERROR; 80 | if (place.dev_id < 0 || 81 | place.dev_id >= machine->device_count_by_type[place.dev_type]) 82 | return ATMI_STATUS_ERROR; 83 | 84 | atmi_status_t status = ATMI_STATUS_SUCCESS; 85 | // get the kernel info 86 | std::string kernelStr = std::string(kernel_name); 87 | if (KernelInfoTable[place.dev_id].find(kernelStr) != 88 | KernelInfoTable[place.dev_id].end()) { 89 | atl_kernel_info_t info = KernelInfoTable[place.dev_id][kernelStr]; 90 | switch (kernel_info) { 91 | case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE: 92 | *value = info.group_segment_size; 93 | break; 94 | case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE: 95 | *value = info.private_segment_size; 96 | break; 97 | case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE: 98 | // return the size for non-implicit args 99 | *value = info.kernel_segment_size - sizeof(atmi_implicit_args_t); 100 | break; 101 | default: 102 | *value = 0; 103 | status = ATMI_STATUS_ERROR; 104 | break; 105 | } 106 | } else { 107 | *value = 0; 108 | status = ATMI_STATUS_ERROR; 109 | } 110 | 111 | return status; 112 | } 113 | --------------------------------------------------------------------------------