├── .clang-format
├── .gitignore
├── INSTALL.md
├── LICENSE.txt
├── README.md
├── bin
├── cloc.sh
├── gputable.txt
├── mygpu
└── mymcpu
├── docs
└── atmi-reference.pdf
├── examples
├── README.md
├── c_extension
│ ├── depends
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ ├── csquares.cpp
│ │ └── csquares_kernels.cl
│ ├── eps
│ │ ├── Makefile
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ ├── eps.cpp
│ │ └── nullKernel.cl
│ ├── fibonacci
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ └── fibonacci.cpp
│ ├── helloworld
│ │ ├── HelloWorld.cpp
│ │ ├── Makefile
│ │ └── hw.cl
│ ├── helloworld_dGPU
│ │ ├── HelloWorld.cpp
│ │ ├── Makefile
│ │ └── hw.cl
│ └── kps
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ ├── kps.cpp
│ │ └── nullKernel.cl
├── c_extension_denq
│ ├── helloworld
│ │ ├── HelloWorld.cpp
│ │ ├── Makefile
│ │ └── hw.cl
│ ├── kps
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ ├── kps.cpp
│ │ └── nullKernel.cl
│ └── reduction
│ │ ├── Makefile
│ │ ├── Reduction.cpp
│ │ ├── buildrun.sh
│ │ ├── cleanup.sh
│ │ └── reduction.cl
├── interop
│ ├── globalsymbol
│ │ ├── Makefile
│ │ ├── globalsymbol.cl
│ │ └── globalsymbol.cpp
│ └── hsainfo
│ │ ├── Makefile
│ │ └── hsainfo.cpp
├── runtime
│ ├── dlbench_multi_agent
│ │ ├── build.atmi.sh
│ │ ├── dlbench.atmi.c
│ │ ├── dlbench.h
│ │ └── grayscale.cl
│ ├── eps
│ │ ├── Makefile
│ │ ├── eps.cpp
│ │ └── nullKernel.cl
│ ├── fibonacci
│ │ ├── Makefile
│ │ └── fibonacci.cpp
│ ├── helloworld
│ │ ├── Makefile
│ │ ├── hw.cl
│ │ ├── hw.cpp
│ │ └── hw_structs.h
│ ├── helloworld_dGPU
│ │ ├── Makefile
│ │ ├── hw.cl
│ │ └── hw.cpp
│ ├── helloworld_dGPU_async
│ │ ├── Makefile
│ │ ├── hw.cl
│ │ └── hw.cpp
│ ├── helloworld_dGPU_sync
│ │ ├── Makefile
│ │ ├── hw.cl
│ │ └── hw.cpp
│ ├── helloworld_printf
│ │ ├── Makefile
│ │ ├── hw.h
│ │ ├── hw_cpu.c
│ │ ├── hw_gpu.cl
│ │ └── hw_host.cpp
│ ├── kps
│ │ ├── Makefile
│ │ ├── kps.cpp
│ │ └── nullKernel.cl
│ ├── needleman-wunsch
│ │ ├── Makefile
│ │ ├── nw.cl
│ │ ├── nw.cpp
│ │ └── nw.h
│ ├── needleman-wunsch_dGPU
│ │ ├── Makefile
│ │ ├── nw.cl
│ │ ├── nw.cpp
│ │ └── nw.h
│ └── pcie_bw
│ │ ├── Makefile
│ │ └── pcie_bw.cpp
└── runtime_denq
│ ├── helloworld
│ ├── Makefile
│ ├── hw.cl
│ └── hw.cpp
│ ├── kps
│ ├── Makefile
│ ├── kps.cpp
│ └── nullKernel.cl
│ └── reduction
│ ├── Makefile
│ ├── reduction.cl
│ └── reduction.cpp
├── include
├── atmi.h
├── atmi_c_ext.h
├── atmi_interop_hsa.h
├── atmi_kl.h
└── atmi_runtime.h
└── src
├── CMakeLists.txt
├── atmi-backward-compat.cmake
├── cmake_modules
├── FindLibElf.cmake
├── FindROCm.cmake
└── utils.cmake
├── compiler
├── CMakeLists.txt
├── atl_pifgen_plugin.c
├── atl_synckernel.c
└── include
│ ├── atl_pifgen.h
│ └── hsa_cl.h
├── device_runtime
├── CMakeLists.txt
├── bc.cmake
├── device_rt.cl
├── device_rt.cpp
├── device_rt.h
└── include
│ ├── device_amd_hsa.h
│ └── hsa.h
└── runtime
├── CMakeLists.txt
├── core
├── CMakeLists.txt
├── atmi.cpp
├── cputask.cpp
├── data.cpp
├── kernel.cpp
├── machine.cpp
├── queue.cpp
├── system.cpp
├── task.cpp
├── taskgroup.cpp
└── utils.cpp
├── include
├── data.h
├── device_rt_internal.h
├── internal.h
├── kernel.h
├── machine.h
├── machine.tcc
├── queue.h
├── realtimer.h
├── rt.h
├── task.h
└── taskgroup.h
└── interop
├── CMakeLists.txt
└── hsa
├── CMakeLists.txt
└── atmi_interop_hsa.cpp
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | # BasedOnStyle: Google
4 | AccessModifierOffset: -1
5 | AlignAfterOpenBracket: Align
6 | AlignConsecutiveAssignments: false
7 | AlignConsecutiveDeclarations: false
8 | AlignEscapedNewlinesLeft: true
9 | AlignOperands: true
10 | AlignTrailingComments: true
11 | AllowAllParametersOfDeclarationOnNextLine: true
12 | AllowShortBlocksOnASingleLine: false
13 | AllowShortCaseLabelsOnASingleLine: false
14 | AllowShortFunctionsOnASingleLine: All
15 | AllowShortIfStatementsOnASingleLine: true
16 | AllowShortLoopsOnASingleLine: true
17 | AlwaysBreakAfterDefinitionReturnType: None
18 | AlwaysBreakAfterReturnType: None
19 | AlwaysBreakBeforeMultilineStrings: true
20 | AlwaysBreakTemplateDeclarations: true
21 | BinPackArguments: true
22 | BinPackParameters: true
23 | BraceWrapping:
24 | AfterClass: false
25 | AfterControlStatement: false
26 | AfterEnum: false
27 | AfterFunction: false
28 | AfterNamespace: false
29 | AfterObjCDeclaration: false
30 | AfterStruct: false
31 | AfterUnion: false
32 | BeforeCatch: false
33 | BeforeElse: false
34 | IndentBraces: false
35 | BreakBeforeBinaryOperators: None
36 | BreakBeforeBraces: Attach
37 | BreakBeforeTernaryOperators: true
38 | BreakConstructorInitializersBeforeComma: false
39 | ColumnLimit: 80
40 | CommentPragmas: '^ IWYU pragma:'
41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
42 | ConstructorInitializerIndentWidth: 4
43 | ContinuationIndentWidth: 4
44 | Cpp11BracedListStyle: true
45 | DerivePointerAlignment: true
46 | DisableFormat: false
47 | ExperimentalAutoDetectBinPacking: false
48 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
49 | IncludeCategories:
50 | - Regex: '^<.*\.h>'
51 | Priority: 1
52 | - Regex: '^<.*'
53 | Priority: 2
54 | - Regex: '.*'
55 | Priority: 3
56 | IndentCaseLabels: true
57 | IndentWidth: 2
58 | IndentWrappedFunctionNames: false
59 | KeepEmptyLinesAtTheStartOfBlocks: false
60 | MacroBlockBegin: ''
61 | MacroBlockEnd: ''
62 | MaxEmptyLinesToKeep: 1
63 | NamespaceIndentation: None
64 | ObjCBlockIndentWidth: 2
65 | ObjCSpaceAfterProperty: false
66 | ObjCSpaceBeforeProtocolList: false
67 | PenaltyBreakBeforeFirstCallParameter: 1
68 | PenaltyBreakComment: 300
69 | PenaltyBreakFirstLessLess: 120
70 | PenaltyBreakString: 1000
71 | PenaltyExcessCharacter: 1000000
72 | PenaltyReturnTypeOnItsOwnLine: 200
73 | PointerAlignment: Left
74 | ReflowComments: true
75 | SortIncludes: true
76 | SpaceAfterCStyleCast: false
77 | SpaceBeforeAssignmentOperators: true
78 | SpaceBeforeParens: ControlStatements
79 | SpaceInEmptyParentheses: false
80 | SpacesBeforeTrailingComments: 2
81 | SpacesInAngles: false
82 | SpacesInContainerLiterals: true
83 | SpacesInCStyleCastParentheses: false
84 | SpacesInParentheses: false
85 | SpacesInSquareBrackets: false
86 | Standard: Auto
87 | TabWidth: 8
88 | UseTab: Never
89 | ...
90 |
91 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # executables
2 | hello
3 | a.out
4 |
5 | # tag files
6 | tags
7 |
8 | # temp files
9 | *.i
10 | *.bc
11 | *.o
12 | *.hsaco
13 | *.swp
14 | *.swo
15 | *.swn
16 |
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | ATMI Install Instructions
2 | =========================
3 |
4 | - [1. Prepare System for ATMI Installation](#Prepare)
5 | - [2. Install/Build ATMI](#ATMI)
6 | - [3. Build/Test ATMI Examples](#Examples)
7 |
8 |
9 |
10 | ## Prepare System for ATMI Installation
11 |
12 | ATMI works on all platforms that are supported by ROCm, but has been tested mainly for the 18.04.2 LTS (Bionic Beaver) platform.
13 | See [here](https://github.com/RadeonOpenCompute/ROCm) for details on all supported hardware/OS configurations and instructions on how to install ROCm for your system.
14 |
15 |
16 |
17 | ## Install/Build ATMI
18 | ATMI can be installed either from the ROCm apt server or built from the source.
19 |
20 | #### Install from the ROCm apt server
21 |
22 | ```
23 | sudo apt-get install atmi
24 | ```
25 |
26 | #### Build from the source
27 |
28 | ```
29 | mkdir -p ~/git
30 | cd ~/git
31 | git clone https://github.com/RadeonOpenCompute/atmi.git
32 | mkdir ~/git/atmi/src/build
33 | cd ~/git/atmi/src/build
34 | # export all GFX target architectures for the ATMI device runtime
35 | export GFXLIST="gfx900 gfx906" # e.g.: gfx900 is for AMD Vega GPUs
36 | # ensure you have cmake (version >= 2.8)
37 | cmake \
38 | -DCMAKE_INSTALL_PREFIX=/path/to/install \
39 | -DCMAKE_BUILD_TYPE={Debug|Release} \
40 | -DLLVM_DIR=/path/to/llvm \ # compiler to build ATMI device runtime and user GPU kernels
41 | -DDEVICE_LIB_DIR= \ # root of ROCm Device Library to link
42 | -DATMI_DEVICE_RUNTIME=ON \ # (optional) to build ATMI device runtime (default: OFF)
43 | -DATMI_HSA_INTEROP=ON \ # (optional) to build ATMI with HSA interop functionality (default: OFF)
44 | -DROCM_DIR=/path/to/hsa \ # (optional) root of ROCm/HSA runtime (default: /opt/rocm)
45 | ..
46 | # make all components (Host runtime and device runtime)
47 | make
48 | make install
49 | export LD_LIBRARY_FLAGS=/path/to/install/lib:$LD_LIBRARY_FLAGS # (optional)
50 | ```
51 |
52 |
53 |
54 | ## Build/Test ATMI Examples
55 |
56 | ATMI runtime works with any high level compiler that generates AMD GCN code objects.
57 | The examples here use OpenCL kernel language and ATMI as the host runtime, but ATMI can also work any high level
58 | kernel language like HIP or OpenMP as long as they are compiled to AMD GCN code objects.
59 | In this example set, the host code and device code are compiled separately,
60 | and the ATMI host runtime explicitly loads the device module before launching tasks.
61 | ATMI currently supports loading AMD GCN (HSA code objects).
62 | ATMI ships with it the CLOC (CL Offline Compiler) utility script, which is a thin wrapper around Clang to help compile CL kernels.
63 |
64 | ```
65 | # Building a simple helloworld example on a two GPU system
66 | cd /path/to/atmi/examples/runtime/helloworld_dGPU
67 | make
68 | # If make does not work, then check the different flags in make to point to the right installed locations of ROCm,
69 | # or directly run cloc.sh with the following options
70 | # /opt/rocm/atmi/bin/cloc.sh -aomp /opt/rocm/llvm -triple amdgcn-amd-amdhsa -libgcn /opt/rocm -clopts "-I/opt/rocm/atmi/include -I/opt/rocm/hsa/include -I. -O2 -v" -opt 2 hw.cl
71 | make test
72 | env LD_LIBRARY_PATH=/opt/rocm/atmi/lib:/opt/rocm/hsa/lib: ./hello
73 | Choosing GPU 0/2
74 | Output from the GPU: Hello HSA World
75 | Output from the CPU: Hello HSA World
76 | ```
77 |
78 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright © 2019 Advanced Micro Devices, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
6 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
7 | persons to whom the Software is furnished to do so, subject to the following conditions:
8 |
9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10 |
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
12 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
13 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ATMI (Asynchronous Task and Memory Interface)
2 | =============================================
3 |
4 | Asynchronous Task and Memory Interface, or ATMI, is a runtime framework for efficient task management in heterogeneous
5 | CPU-GPU systems. It provides a consistent API to create and launch tasks from both CPUs and GPUs (integrated and discrete).
6 | In ATMI, the high-level task configurations can be simply described by using a few predefined C-style structures before launching the tasks.
7 | The task configuration includes the task dimensions and granularity, dependencies to other tasks, data requirements and so on. The ATMI runtime, based on the overall task
8 | graph and individual task configurations, will perform task scheduling and memory management that is optimal for the underlying platform. ATMI provides a rich and flexible
9 | user interface so that the end user can relinquish scheduling to the runtime (default behavior) or take full control of scheduling and
10 | mapping, if desired. The target audience for ATMI is application programmers or middleware developers for high-level languages.
11 |
12 | ## Deprecation Notice ##
13 | ATMI is not being released as part of AMD ROCm software after ROCm 5.6. Previously released branches are still available for reference.
14 | ATMI will henceforth be maintained as a separate research project at https://github.com/AMDResearch/atmi.
15 |
16 | ## ATMI (v0.7) Feature List
17 | - ATMI-RT: host runtime library to manage tasks
18 | - ATMI-DEVRT: device runtime library for managing task enqueue from the GPU to both the CPU and other GPUs in the system
19 | - ATMI-C (experimental): Declarative task-based programming model using C language extensions (works only with GCC)
20 | - A comprehensive machine model for CPUs, integrated GPU (APU) and discrete GPU systems.
21 | - Consistent task management API for CPU tasks and GPU tasks
22 | - GPU kernel language: Any language (e.g, CL and HIP) that can be compiled to AMD GCN code objects
23 | - CPU tasks: support for multi-dimensional task grids (similar to GPU tasks)
24 | - Task dependencies
25 | - Task groups
26 | - (Experimental) Dependencies between task groups
27 | - Recursive tasks (tasks creating other tasks)
28 | - Efficient resource management
29 | - Low latency signaling among dependent tasks
30 | - Kernel argument memory regions
31 | - Reuse of task handles
32 | - Efficient task to work queue scheduling
33 | - Data movement API (synchronous and asynchronous options)
34 | - Asynchronous data movement is treated as an ATMI task in the task graph
35 | - Interoperability with HSA/ROCm: Map between ATMI handles and ROCm data structures for expert programmers
36 | - Supported platforms: all devices that are supported by [ROCm](https://github.com/RadeonOpenCompute/ROCm) are supported by ATMI
37 | - Supported runtime: ROCm v2.1+
38 | - Several miscellaneous code refactoring and bug fixes
39 |
40 | ## Compilation and Runtime Workflow
41 | The below figure depicts the ATMI runtime workflow with CLOC as the compiler utility.
42 | 
43 |
44 | ## License
45 |
46 | MIT License
47 |
48 | Copyright © 2019 Advanced Micro Devices, Inc.
49 |
50 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
51 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
52 | persons to whom the Software is furnished to do so, subject to the following conditions:
53 |
54 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
55 |
56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
57 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
58 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
59 |
--------------------------------------------------------------------------------
/bin/mygpu:
--------------------------------------------------------------------------------
1 | mymcpu
--------------------------------------------------------------------------------
/docs/atmi-reference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ROCm/atmi/f11878cbb5e696c9e83dbbdac04dd73047d9ed23/docs/atmi-reference.pdf
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | Examples are categorized into many types depending on the ATMI interface that
3 | is chosen by the programmer: ATMI-C (C extensions), ATMI-RT (Runtime library
4 | interface) or either of these interface with device enqueue (_denq). There is
5 | also a directory to showcase ATMI with HSA interoperability.
6 | ```
7 |
8 | ----------
9 | MIT License
10 |
11 | Copyright © 2019 Advanced Micro Devices, Inc.
12 |
13 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
14 | without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
15 | persons to whom the Software is furnished to do so, subject to the following conditions:
16 |
17 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
18 |
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
20 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | ```
23 |
--------------------------------------------------------------------------------
/examples/c_extension/depends/buildrun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | #this software and associated documentation files (the "Software"), to deal in
8 | #the Software
9 | #without restriction, including without limitation the rights to use, copy,
10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
11 | #Software, and to permit
12 | #persons to whom the Software is furnished to do so, subject to the following
13 | #conditions:
14 | #
15 | #The above copyright notice and this permission notice shall be included in all
16 | #copies or substantial portions of the Software.
17 | #
18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | #FITNESS FOR A PARTICULAR
21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 | #CONTRACT, TORT OR
24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
25 | #OR OTHER DEALINGS IN THE SOFTWARE.
26 |
27 | set -e
28 | # Set HSA Environment variables
29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/rocm/hsa
30 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/rocm/hsa/lib
31 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/usr/bin
32 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi
33 | ATMI_INC=$ATMI_RUNTIME_PATH/include
34 |
35 | export LD_LIBRARY_PATH=$HSA_RUNTIME_PATH/lib:$ATMI_RUNTIME_PATH/lib:$LD_LIBRARY_PATH
36 | #echo $LD_LIBRARY_PATH
37 |
38 | # Do not compile accelerated functions separately. This script will be invoked by the GCC plugin itself.
39 | echo
40 | #if [ -f csquares_kernels.o ] ; then rm csquares_kernels.o ; fi
41 |
42 | # Compile Main and generate the PIF definitions for host and accelerated functions in csquares.cpp.pifdefs.c
43 | echo
44 | if [ -f csquares ] ; then rm csquares ; fi
45 | echo g++ -o csquares.o -c csquares.cpp -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=csquares_kernels.cl -I$ATMI_INC
46 | g++ -o csquares.o -c csquares.cpp -std=c++11 -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=csquares_kernels.cl -I$ATMI_INC
47 |
48 | echo g++ -o csquares csquares.o csquares.cpp.pifdefs.c -latmi_runtime -L$ATMI_RUNTIME_PATH/lib -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -lelf -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
49 | g++ -o csquares csquares.o csquares.cpp.pifdefs.c -std=c++11 -latmi_runtime -L$ATMI_RUNTIME_PATH/lib -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -lelf -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
50 |
51 | # Execute
52 | echo
53 | echo ./csquares
54 | ./csquares
55 |
--------------------------------------------------------------------------------
/examples/c_extension/depends/cleanup.sh:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | rm *.o
27 | rm csquares.cpp.pifdefs.c
28 | rm csquares
29 |
30 |
--------------------------------------------------------------------------------
/examples/c_extension/depends/csquares.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | //
7 | // csquares.cpp : Demo of ATMI task dependencies
8 | //
9 | // Creates a diamond DAG using three kernels.
10 | // Demo only, not intended as efficient algorithm.
11 | //
12 | // Init N values: (init) on GPU
13 | // / \
14 | // |/ \|
15 | // Do 1/4 N each: (even_squares) (even_squares)
16 | // on CPU \ / on GPU
17 | // \| |/
18 | // Do odd 1/2 N: (odd_squares) on GPU
19 | //
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include "atmi_c_ext.h"
26 | using namespace std;
27 | typedef int TYPE;
28 |
29 | static const int N = 16; /* multiple of 4 for demo */
30 |
31 | /* ---------------- Kernel declarations -------------*/
32 | // Declare init_kernel as the PIF for the init_kernel_cpu subroutine
33 | extern "C" void init_kernel_cpu(int *in) __attribute__((atmi_kernel("init_kernel", "cpu")));
34 | __kernel void init_kernel_gpu(__global int *in) __attribute__((atmi_kernel("init_kernel", "gpu")));
35 |
36 |
37 | // Declare even_squares_kernel as the PIF for the even_squares_kernel_cpu subroutine
38 | extern "C" void even_squares_kernel_cpu(
39 | const int *in , int *out) __attribute__((atmi_kernel("even_squares_kernel", "cpu")));
40 | __kernel void even_squares_kernel_gpu(
41 | __global const int *in , __global int *out) __attribute__((atmi_kernel("even_squares_kernel", "gpu")));
42 |
43 |
44 | // Declare odd_squares_kernel as the PIF for the odd_squares_kernel_cpu subroutine
45 | extern "C" void odd_squares_kernel_cpu(int *out) __attribute__((atmi_kernel("odd_squares_kernel", "cpu")));
46 | __kernel void odd_squares_kernel_gpu(__global int *out) __attribute__((atmi_kernel("odd_squares_kernel", "gpu")));
47 |
48 |
49 |
50 | /* ---------------- Kernel definitions -------------*/
51 | extern "C" void init_kernel_cpu(int *in) {
52 | int i;
53 | for(i = 0; i < N; i++) {
54 | in[i] = (int) i;
55 | }
56 | }
57 |
58 | /* Middle children calculate squares for even numbers */
59 | extern "C" void even_squares_kernel_cpu(const int *in , int *out)
60 | {
61 | int ctr;
62 | for(ctr = 0; ctr < N/4; ctr++) {
63 | int i = ctr*2;
64 | out[i] = in[i] * in[i];
65 | }
66 | }
67 |
68 | /* The last child calculate squares for odd numbers
69 | using squares from even numbers because.
70 | (X-1)**2 = X**2 - 2X + 1
71 | so X**2 = ((X-1)**2) + 2X - 1
72 | */
73 | extern "C" void odd_squares_kernel_cpu(int *out)
74 | {
75 | int ctr;
76 | for(ctr = 0; ctr < N/2; ctr++) {
77 | int i = (ctr*2) + 1;
78 | out[i] = out[i-1] + (2*i) - 1;
79 | }
80 | }
81 |
82 |
83 | /* -------------- main ------------------*/
84 | int main(int argc, char *argv[]) {
85 | TYPE *inArray = new TYPE[N];
86 | TYPE *outArray = new TYPE[N];
87 |
88 | // Create launch parameters with thread counts
89 | ATMI_LPARM_1D(init_lp,N);
90 | // Each even tasks caclulates 1/4 of the squares
91 | ATMI_LPARM_1D(even_lp,N/4);
92 | // The final odd task does 1/2 of the squares
93 | ATMI_LPARM_1D(odd_lp,N/2);
94 |
95 | atmi_task_handle_t init_tasks[1];
96 | atmi_task_handle_t even_tasks[2];
97 |
98 | // Dispatch init_kernel and set even_lp to require init to complete
99 | init_lp->kernel_id = K_ID_init_kernel_cpu;
100 | init_tasks[0] = init_kernel(init_lp, inArray);
101 |
102 | even_lp->num_required = 1;
103 | even_lp->requires = init_tasks;
104 | // Dispatch 2 even_squares kernels and build dependency list for odd_squares.
105 | even_lp->kernel_id = K_ID_even_squares_kernel_gpu;
106 | even_tasks[0] = even_squares_kernel(even_lp, inArray, outArray); // Half of even kernels go to CPU
107 | even_lp->kernel_id = K_ID_even_squares_kernel_cpu;
108 | even_tasks[1] = even_squares_kernel(even_lp, &inArray[N/2], &outArray[N/2]); // Other half goes to the GPU
109 | odd_lp->num_required = 2;
110 | odd_lp->requires = even_tasks;
111 | odd_lp->kernel_id = K_ID_odd_squares_kernel_gpu;
112 | // Now dispatch odd_squares kernel dependent on BOTH even_squares
113 | // default kernel_id = 0, which is the odd_squares_kernel_cpu by virtue of declaration order
114 | atmi_task_handle_t ret_task = odd_squares_kernel(odd_lp, outArray);
115 |
116 | // Wait for all kernels to complete
117 | SYNC_TASK(ret_task);
118 | // Check results
119 | bool passed = true;
120 | for (int i=0; i " << outArray[i] << endl;
123 | passed = false;
124 | }
125 | }
126 | cout << endl << (passed ? "PASSED" : "FAILED") << endl;
127 | return 0;
128 | }
129 |
--------------------------------------------------------------------------------
/examples/c_extension/depends/csquares_kernels.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | /*
7 | File: csquares.cl
8 |
9 | 3 Kernels for csquares.cpp.
10 | This is not intended to be efficient.
11 | It is just a simple demo of dependencies.
12 | */
13 | /* Parent kernel initializes input array */
14 | __kernel void init_kernel_gpu(__global int *in) {
15 | int i = get_global_id(0);
16 | in[i] = (int) i;
17 | }
18 |
19 | /* Middle children calculate squares for even numbers */
20 | __kernel void even_squares_kernel_gpu(
21 | __global const int *in , __global int *out)
22 | {
23 | int i = get_global_id(0)*2;
24 | out[i] = in[i] * in[i];
25 | }
26 |
27 | /* The last child calculate squares for odd numbers
28 | using squares from even numbers because.
29 | (X-1)**2 = X**2 - 2X + 1
30 | so X**2 = ((X-1)**2) + 2X - 1
31 | */
32 | __kernel void odd_squares_kernel_gpu(__global int *out)
33 | {
34 | int i = (get_global_id(0)*2) + 1;
35 | out[i] = out[i-1] + (2*i) - 1;
36 | }
37 |
--------------------------------------------------------------------------------
/examples/c_extension/eps/Makefile:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | #Set HSA Environment variables
27 | HSA_RUNTIME_PATH ?= /opt/hsa
28 | HSA_LIBHSAIL_PATH ?= /opt/hsa/lib
29 | HSA_LLVM_PATH ?= /opt/amd/cloc/bin
30 | ATMI_RUNTIME_PATH ?= /opt/amd/atmi
31 | ATMI_INC=${ATMI_RUNTIME_PATH}/include
32 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so
33 |
34 | CC=g++
35 | CFLAGS=-O3 -g
36 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I.
37 |
38 | LIBS=-latmi_runtime -lhsa-runtime64
39 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
40 |
41 | OBJS = eps
42 |
43 | .PHONY: clean all
44 |
45 | all: $(OBJS)
46 |
47 | eps: eps.cpp
48 | $(CC) -c -o nullKernel.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS)
49 | #$(CC) -c -o nullKernel.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS)
50 | $(CC) -o $@ nullKernel.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
51 |
52 | clean:
53 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS)
54 |
55 | run: eps
56 | ATMI_DEPENDENCY_SYNC_TYPE=ATMI_SYNC_CALLBACK ATMI_MAX_HSA_SIGNALS=8 ./eps 2 15
57 |
--------------------------------------------------------------------------------
/examples/c_extension/eps/buildrun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | #this software and associated documentation files (the "Software"), to deal in
8 | #the Software
9 | #without restriction, including without limitation the rights to use, copy,
10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
11 | #Software, and to permit
12 | #persons to whom the Software is furnished to do so, subject to the following
13 | #conditions:
14 | #
15 | #The above copyright notice and this permission notice shall be included in all
16 | #copies or substantial portions of the Software.
17 | #
18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | #FITNESS FOR A PARTICULAR
21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 | #CONTRACT, TORT OR
24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
25 | #OR OTHER DEALINGS IN THE SOFTWARE.
26 |
27 | set -e
28 | # Set HSA Environment variables
29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa-nov15
30 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/home/aaji/opt/hsa
31 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa
32 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib
33 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin
34 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi
35 | ATMI_INC=$ATMI_RUNTIME_PATH/include
36 |
37 | echo
38 | #export VT_MODE="STAT:TRACE"
39 | if [ -f eps ] ; then rm eps ; fi
40 | echo g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC
41 | #vtc++ -vt:inst compinst -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
42 | #g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC
43 | g++ -c -o nullKernel.o eps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
44 |
45 | echo g++ -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
46 | #vtc++ -vt:inst compinst -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
47 | g++ -o eps nullKernel.o eps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
48 |
49 | # Execute
50 | echo
51 | echo ./eps
52 | ./eps
53 |
54 |
--------------------------------------------------------------------------------
/examples/c_extension/eps/cleanup.sh:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | rm *.o
27 | rm *pifdefs.c*
28 | rm eps
29 |
30 |
--------------------------------------------------------------------------------
/examples/c_extension/eps/nullKernel.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void nullKernel_impl(int i){}
8 |
--------------------------------------------------------------------------------
/examples/c_extension/fibonacci/buildrun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
7 | #this software and associated documentation files (the "Software"), to deal in
8 | #the Software
9 | #without restriction, including without limitation the rights to use, copy,
10 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
11 | #Software, and to permit
12 | #persons to whom the Software is furnished to do so, subject to the following
13 | #conditions:
14 | #
15 | #The above copyright notice and this permission notice shall be included in all
16 | #copies or substantial portions of the Software.
17 | #
18 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | #FITNESS FOR A PARTICULAR
21 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 | #CONTRACT, TORT OR
24 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
25 | #OR OTHER DEALINGS IN THE SOFTWARE.
26 |
27 | set -e
28 | # Set HSA Environment variables
29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa
30 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib
31 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin
32 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi
33 |
34 | ATMI_INC=$ATMI_RUNTIME_PATH/include
35 | export LD_LIBRARY_PATH=$HSA_RUNTIME_PATH/lib:$ATMI_RUNTIME_PATH/lib:$LD_LIBRARY_PATH
36 |
37 | echo
38 | if [ -f fibonacci ] ; then rm fibonacci ; fi
39 | echo g++ -c -o fibonacci.o fibonacci.cpp -g -fplugin=atmi_pifgen.so -O3 -I$ATMI_INC
40 | g++ -c -o fibonacci.o fibonacci.cpp -g -fplugin=atmi_pifgen.so -O3 -I$ATMI_INC
41 |
42 | echo g++ -o fibonacci fibonacci.o fibonacci.cpp.pifdefs.c -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
43 | g++ -o fibonacci fibonacci.o fibonacci.cpp.pifdefs.c -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
44 |
45 |
46 | # Execute
47 | echo
48 | echo ./fibonacci $1
49 | ./fibonacci $1
50 |
--------------------------------------------------------------------------------
/examples/c_extension/fibonacci/cleanup.sh:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | rm fibonacci.cpp.pifdefs.c
27 | rm *.o
28 | rm fibonacci
29 |
30 |
--------------------------------------------------------------------------------
/examples/c_extension/fibonacci/fibonacci.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "atmi_c_ext.h"
13 |
14 | using namespace std;
15 |
16 | bool is_null_task(atmi_task_handle_t t) {
17 | if(t == 0ull) {
18 | return true;
19 | }
20 | else {
21 | return false;
22 | }
23 | }
24 |
25 | extern "C" void sum_cpu(int *a, int *b, int *c) __attribute__((atmi_kernel("sum", "CPU")));
26 |
27 | extern "C" void sum_cpu(int *a, int *b, int *c) {
28 | *c = *a + *b;
29 | delete a;
30 | delete b;
31 | }
32 |
33 | /* Recursive Fibonacci */
34 | void fib(const int n , int *result , atmi_task_handle_t *my_sum_task) {
35 | if (n < 2) {
36 | *result = n;
37 | *my_sum_task = NULL_TASK;
38 | } else {
39 | atmi_task_handle_t task_sum1;
40 | atmi_task_handle_t task_sum2;
41 | int *result1 = new int;
42 | int *result2 = new int;
43 | fib(n-1,result1,&task_sum1);
44 | fib(n-2,result2,&task_sum2);
45 | ATMI_LPARM(lparm_child);
46 | lparm_child->num_required = 0;
47 | atmi_task_handle_t requires[2];
48 | if (!is_null_task(task_sum1)) {
49 | requires[lparm_child->num_required]=task_sum1;
50 | lparm_child->num_required +=1;
51 | }
52 | if (!is_null_task(task_sum2)) {
53 | requires[lparm_child->num_required]=task_sum2;
54 | lparm_child->num_required +=1;
55 | }
56 | lparm_child->requires = requires;
57 | *my_sum_task = sum(lparm_child,result1,result2,result);
58 | }
59 | }
60 |
61 | int main(int argc, char *argv[]) {
62 | int N = 10;
63 | if(argc > 1) {
64 | N = atoi(argv[1]);
65 | }
66 | int result;
67 |
68 | atmi_task_handle_t root_sum_task;
69 | fib(N,&result,&root_sum_task);
70 | if(!is_null_task(root_sum_task)) SYNC_TASK(root_sum_task);
71 | cout << "Fib(" << N << ") = " << result << endl;
72 | return 0;
73 | }
74 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld/HelloWorld.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #include
7 | #include
8 | #include
9 | using namespace std;
10 | #include "atmi.h"
11 |
12 | // Declare decode as the PIF for the CPU kernel decode_cpu
13 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) __attribute__((atmi_kernel("decode", "cpu")));
14 |
15 | // Declare decode as the PIF for the GPU kernel decode_gpu
16 | __kernel void decode_gpu(__global const char* in, __global char *out, const size_t strlength) __attribute__((atmi_kernel("decode", "gpu")));
17 |
18 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) {
19 | int num;
20 | for (num = 0; num < strlength; num++) {
21 | out[num] = in[num] + 1;
22 | }
23 | }
24 |
25 | int main(int argc, char* argv[]) {
26 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
27 | size_t strlength = strlen(input);
28 | char *output_cpu = (char*) malloc(strlength + 1);
29 | char *output_gpu = (char*) malloc(strlength + 1);
30 |
31 | ATMI_LPARM_1D(lparm, strlength);
32 | lparm->synchronous = ATMI_TRUE;
33 |
34 | lparm->kernel_id = K_ID_decode_gpu;
35 | decode(lparm, input, output_gpu, strlength);
36 | output_gpu[strlength] = '\0';
37 |
38 | lparm->kernel_id = K_ID_decode_cpu;
39 | lparm->WORKITEMS = 1;
40 | decode(lparm, input, output_cpu, strlength);
41 | output_cpu[strlength] = '\0';
42 |
43 | cout << "Output from the CPU: " << output_cpu << endl;
44 | cout << "Output from the GPU: " << output_gpu << endl;
45 | free(output_cpu);
46 | free(output_gpu);
47 | return 0;
48 | }
49 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld/Makefile:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | #Set HSA Environment variables
27 | HSA_RUNTIME_PATH ?= /opt/rocm
28 | ATMI_RUNTIME_PATH ?= /opt/rocm/libatmi
29 | ATMI_INC=${ATMI_RUNTIME_PATH}/include
30 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so
31 |
32 | CC=g++
33 | CFLAGS=-O3 -g
34 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/hsa/include -I.
35 |
36 | LIBS=-latmi_runtime
37 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
38 |
39 | OBJS = hello
40 |
41 | .PHONY: clean all
42 |
43 | all: $(OBJS)
44 |
45 | hello: HelloWorld.cpp
46 | $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS)
47 | #$(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS)
48 | #$(CC) -o $@ HelloWorld.o hw.c $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
49 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
50 |
51 | clean:
52 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS)
53 |
54 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void decode_gpu(__global const char* in, __global char* out, const size_t strlength) {
8 | int num = get_global_id(0);
9 | if(num < strlength)
10 | out[num] = in[num] + 1;
11 | }
12 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld_dGPU/HelloWorld.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | #include
10 | using namespace std;
11 | #include "atmi.h"
12 | #include "atmi_runtime.h"
13 |
14 | // Declare decode as the PIF for the CPU kernel decode_cpu
15 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) __attribute__((atmi_kernel("decode", "cpu")));
16 |
17 | // Declare decode as the PIF for the GPU kernel decode_gpu
18 | __kernel void decode_gpu(__global const char* in, __global char *out, const size_t strlength) __attribute__((atmi_kernel("decode", "gpu")));
19 |
20 | extern "C" void decode_cpu(const char* in, char* out, const size_t strlength) {
21 | int num;
22 | for (num = 0; num < strlength; num++) {
23 | out[num] = in[num] + 1;
24 | }
25 | }
26 |
27 | int main(int argc, char* argv[]) {
28 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
29 | if(err != ATMI_STATUS_SUCCESS) return -1;
30 |
31 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
32 | size_t strlength = strlen(input);
33 | atmi_mem_place_t place = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
34 | char *input_gpu;
35 | atmi_malloc((void **)&input_gpu, strlength + 1, place);
36 | memcpy(input_gpu, input, strlength);
37 | input_gpu[strlength] = 0;
38 |
39 | char *output_cpu = (char*) malloc(strlength + 1);
40 | char *output_gpu;
41 | atmi_malloc((void **)&output_gpu, strlength + 1, place);
42 |
43 | ATMI_LPARM_1D(lparm, strlength);
44 | lparm->synchronous = ATMI_TRUE;
45 |
46 | lparm->kernel_id = K_ID_decode_gpu;
47 | lparm->place = ATMI_PLACE_GPU(0, 0);
48 | decode(lparm, input_gpu, output_gpu, strlength);
49 | output_gpu[strlength] = '\0';
50 |
51 | lparm->kernel_id = K_ID_decode_cpu;
52 | lparm->place = ATMI_PLACE_CPU(0, 0);
53 | lparm->WORKITEMS = 1;
54 | decode(lparm, input, output_cpu, strlength);
55 | output_cpu[strlength] = '\0';
56 |
57 | cout << "Output from the CPU: " << output_cpu << endl;
58 | cout << "Output from the GPU: " << output_gpu << endl;
59 | free(output_cpu);
60 | atmi_free(output_gpu);
61 | atmi_free(input_gpu);
62 | return 0;
63 | }
64 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld_dGPU/Makefile:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | #Set HSA Environment variables
27 | HSA_RUNTIME_PATH ?= /opt/rocm
28 | ATMI_RUNTIME_PATH ?= /opt/rocm/libatmi
29 | ATMI_INC=${ATMI_RUNTIME_PATH}/include
30 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so
31 |
32 | CC=g++
33 | CFLAGS=-O3 -g -std=c++11
34 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/hsa/include -I.
35 |
36 | LIBS=-latmi_runtime
37 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
38 |
39 | OBJS = hello
40 |
41 | .PHONY: clean all
42 |
43 | all: $(OBJS)
44 |
45 | hello: HelloWorld.cpp
46 | $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS)
47 | #$(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -fplugin-arg-atmi_pifgen-jitcompile=false $(CFLAGS) $(INC_FLAGS)
48 | #$(CC) -o $@ HelloWorld.o hw.c $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
49 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
50 |
51 | clean:
52 | rm -rf *.o *.hsaco *pifdefs.c* $(OBJS)
53 |
54 |
--------------------------------------------------------------------------------
/examples/c_extension/helloworld_dGPU/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void decode_gpu(__global const char* in, __global char* out, const size_t strlength) {
8 | int num = get_global_id(0);
9 | if(num < strlength)
10 | out[num] = in[num] + 1;
11 | }
12 |
--------------------------------------------------------------------------------
/examples/c_extension/kps/buildrun.sh:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | #!/bin/bash
27 | set -e
28 | # Set HSA Environment variables
29 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/home/aaji/opt/hsa-nov15
30 | #[ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa
31 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=/opt/hsa/lib
32 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin
33 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi
34 | ATMI_INC=$ATMI_RUNTIME_PATH/include
35 |
36 | echo
37 | #export VT_MODE="STAT:TRACE"
38 | if [ -f kps ] ; then rm kps ; fi
39 | echo g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
40 | #vtc++ -vt:inst compinst -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
41 | g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
42 |
43 | echo g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
44 | #vtc++ -vt:inst compinst -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
45 | g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
46 |
47 | # Execute
48 | echo
49 | echo ./kps
50 | ./kps
51 |
52 |
--------------------------------------------------------------------------------
/examples/c_extension/kps/cleanup.sh:
--------------------------------------------------------------------------------
1 | #MIT License
2 | #
3 | #Copyright © 2016 Advanced Micro Devices, Inc.
4 | #
5 | #Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | #this software and associated documentation files (the "Software"), to deal in
7 | #the Software
8 | #without restriction, including without limitation the rights to use, copy,
9 | #modify, merge, publish, distribute, sublicense, and/or sell copies of the
10 | #Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following
12 | #conditions:
13 | #
14 | #The above copyright notice and this permission notice shall be included in all
15 | #copies or substantial portions of the Software.
16 | #
17 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | #FITNESS FOR A PARTICULAR
20 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 | #BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 | #CONTRACT, TORT OR
23 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
24 | #OR OTHER DEALINGS IN THE SOFTWARE.
25 |
26 | rm *.o
27 | rm *pifdefs.c*
28 | rm kps
29 |
30 |
--------------------------------------------------------------------------------
/examples/c_extension/kps/nullKernel.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void nullKernel_impl(long int kcalls){}
8 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/helloworld/HelloWorld.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | using namespace std;
10 | #include "atmi_c_ext.h"
11 | #include "atmi_kl.h"
12 |
13 | __kernel void mainTask_gpu(__global atmi_task_handle_t thisTask, int numTasks) __attribute__((atmi_kernel("mainTask", "gpu")));
14 |
15 | __kernel void subTask_gpu(__global atmi_task_handle_t thisTask, int taskId) __attribute__((atmi_kernel("subTask", "gpu")));
16 |
17 | extern "C" void print_taskId_cpu(__global atmi_task_handle_t thisTask, int taskId) __attribute__((atmi_kernel("print", "cpu")));
18 |
19 | extern "C" void print_taskId_cpu(__global atmi_task_handle_t thisTask, int taskId)
20 | {
21 | //cout << "Leaf Sub-task ID" << endl;
22 | cout << "Leaf Sub-task ID" << ": " << taskId << endl;
23 | }
24 |
25 | extern atmi_klist_t *atmi_klist;
26 | int main(int argc, char* argv[]) {
27 | int numTasks = 16;
28 |
29 | ATMI_LPARM_1D(lparm, numTasks);
30 | lparm->synchronous = ATMI_TRUE;
31 | lparm->groupable = ATMI_TRUE;
32 |
33 | lparm->kernel_id = K_ID_mainTask_gpu;
34 | //for(int i = 0; i < numTasks; i++)
35 | mainTask(lparm, numTasks);
36 |
37 | //SYNC_STREAM(0);
38 | cout << "Number: " << *(int *)atmi_klist << endl;
39 | cout << "Number: " << (void *)atmi_klist->tasks << endl;
40 | return 0;
41 | }
42 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/helloworld/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8 | #persons to whom the Software is furnished to do so, subject to the following conditions:
9 | #
10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | #
12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | #
16 | #Set HSA Environment variables
17 | HSA_RUNTIME_PATH ?= /opt/hsa
18 | HSA_LIBHSAIL_PATH ?= ${HSA_RUNTIME_PATH}/lib
19 | CLOC_PATH ?= ${HOME}/git/CLOC/bin
20 | ATMI_RUNTIME_PATH ?= ${HOME}/git/atmi
21 | ATMI_INC=${ATMI_RUNTIME_PATH}/include
22 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so
23 |
24 | CC=g++
25 | CFLAGS=-O3 -g
26 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I.
27 |
28 | LIBS=-latmi_runtime
29 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib
30 |
31 | OBJS = hello
32 |
33 | .PHONY: clean all
34 |
35 | all: $(OBJS)
36 |
37 | hello: HelloWorld.cpp hw.cl
38 | HSA_LLVM_PATH=$(HOME)/git/CLOC/bin $(CC) -c -o HelloWorld.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=hw.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS)
39 | $(CC) -o $@ HelloWorld.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
40 |
41 | clean:
42 | rm -rf *.o *.hsaco $(OBJS)
43 |
44 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/helloworld/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi.h"
8 | __kernel void subTask_gpu(atmi_task_handle_t thisTask, int taskId) {
9 | ATMI_KLPARM_1D(klparm, 1, thisTask);
10 | klparm->kernel_id = K_ID_print_taskId_cpu; //tell print_taskId to use print_taskId_cpu
11 | print(klparm, taskId);
12 | }
13 |
14 | __kernel void mainTask_gpu(atmi_task_handle_t thisTask, int numTasks) {
15 | int gid = get_global_id(0);
16 | ATMI_KLPARM_1D(klparm, 1, thisTask);
17 | klparm->kernel_id = K_ID_subTask_gpu; //tell decode to use decode_gpu kernel
18 | subTask(klparm, gid);
19 | //klparm->kernel_id = K_ID_print_taskId_cpu; //tell print_taskId to use print_taskId_cpu
20 | //print(klparm, gid);
21 | }
22 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/kps/buildrun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #MIT License
4 | #
5 | #Copyright © 2016 Advanced Micro Devices, Inc.
6 | #
7 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
8 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
9 | #persons to whom the Software is furnished to do so, subject to the following conditions:
10 | #
11 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 | #
13 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
15 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16 | #
17 | set -e
18 | # Set HSA Environment variables
19 | [ -z $HSA_RUNTIME_PATH ] && HSA_RUNTIME_PATH=/opt/hsa
20 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=${HSA_RUNTIME_PATH}/lib
21 | [ -z $HSA_LLVM_PATH ] && HSA_LLVM_PATH=/opt/amd/cloc/bin
22 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=${HOME}/git/atmi
23 | ATMI_INC=$ATMI_RUNTIME_PATH/include
24 |
25 | echo
26 | #export VT_MODE="STAT:TRACE"
27 | if [ -f kps ] ; then rm kps ; fi
28 | echo g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
29 | #vtc++ -vt:inst compinst -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
30 | HSA_LLVM_PATH=${HOME}/git/CLOC/bin g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -O3 -I$ATMI_INC
31 | #HSA_LLVM_PATH=${HOME}/git/CLOC/bin g++ -c -o nullKernel.o kps.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=nullKernel.cl -fplugin-arg-atmi_pifgen-jitcompile=false -O3 -I$ATMI_INC
32 |
33 | echo g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
34 | #vtc++ -vt:inst compinst -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
35 | g++ -o kps nullKernel.o kps.cpp.pifdefs.c -g -O3 -lelf -L$ATMI_PATH/lib -latmi_runtime -L$HSA_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_RUNTIME_PATH/include
36 |
37 | # Execute
38 | echo
39 | echo LD_LIBRARY_PATH=/opt/hsa-nov15/lib:$LD_LIBRARY_PATH ./kps
40 | LD_LIBRARY_PATH=/opt/hsa-nov15/lib:$LD_LIBRARY_PATH ./kps
41 |
42 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/kps/cleanup.sh:
--------------------------------------------------------------------------------
1 | #
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8 | #persons to whom the Software is furnished to do so, subject to the following conditions:
9 | #
10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | #
12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | #
16 | rm *.o
17 | rm *pifdefs.c*
18 | rm kps
19 |
20 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/kps/nullKernel.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi.h"
8 | __kernel void subTask_gpu(atmi_task_handle_t thisTask) {
9 | return;
10 | ATMI_KLPARM_1D(klparm, 1, thisTask);
11 | klparm->kernel_id = K_ID_subTask_cpu;
12 | subTask(klparm);
13 | }
14 |
15 | __kernel void mainTask_gpu(atmi_task_handle_t thisTask, int numTasks) {
16 | int gid = get_global_id(0);
17 | ATMI_KLPARM_1D(klparm, 1, thisTask);
18 | klparm->kernel_id = K_ID_subTask_gpu;
19 | int i;
20 | for(i = 0; i < numTasks; i++)
21 | subTask(klparm);
22 | }
23 |
24 | __kernel void mainTask_recursive_gpu(atmi_task_handle_t thisTask, int numTasks) {
25 | int gid = get_global_id(0);
26 | int gsize = get_global_size(0);
27 | //if(gid == 0) {
28 | if(numTasks > 1) {
29 | int new_numTasks;;
30 | int new_workitems;
31 | if(gsize >= numTasks) {
32 | new_numTasks = 1;
33 | new_workitems = numTasks;
34 | }
35 | else {
36 | new_numTasks = numTasks/gsize;
37 | new_workitems = gsize;
38 | }
39 | ATMI_KLPARM_1D(klparm, new_workitems, thisTask);
40 | klparm->kernel_id = K_ID_mainTask_recursive_gpu;
41 | mainTask(klparm, new_numTasks);
42 | }
43 | ATMI_KLPARM_1D(klparm_sub, 1, thisTask);
44 | klparm_sub->kernel_id = K_ID_subTask_gpu;
45 | subTask(klparm_sub);
46 | //}
47 | }
48 |
49 | __kernel void mainTask_binary_tree_gpu(atmi_task_handle_t thisTask, int numTasks) {
50 | int gid = get_global_id(0);
51 | //if(gid == 0) {
52 | ATMI_KLPARM_1D(klparm, 1, thisTask);
53 | klparm->kernel_id = K_ID_mainTask_recursive_gpu;
54 | if(numTasks > 1) {
55 | mainTask(klparm, numTasks/2);
56 | mainTask(klparm, numTasks/2 - 1);
57 | }
58 | //}
59 | }
60 |
61 | __kernel void mainTask_flat_gpu(atmi_task_handle_t thisTask, int numTasks) {
62 | if(get_global_id(0) % 64 == 0) {
63 | ATMI_KLPARM_1D(klparm, 1, thisTask);
64 | klparm->kernel_id = K_ID_subTask_gpu;
65 | subTask(klparm);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/reduction/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8 | #persons to whom the Software is furnished to do so, subject to the following conditions:
9 | #
10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | #
12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | #
16 | #Set HSA Environment variables
17 | HSA_RUNTIME_PATH ?= /opt/hsa-nov15
18 | HSA_LIBHSAIL_PATH ?= ${HSA_RUNTIME_PATH}/lib
19 | HSA_LLVM_PATH ?= /opt/amd/cloc/bin
20 | ATMI_RUNTIME_PATH ?= /opt/amd/atmi
21 | ATMI_INC=${ATMI_RUNTIME_PATH}/include
22 | PLUGIN_LIB = ${ATMI_RUNTIME_PATH}/lib/atmi_pifgen.so
23 |
24 | CC=g++
25 | CFLAGS=-O3 -g
26 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I.
27 |
28 | LIBS=-latmi_runtime -lhsa-runtime64 -lelf
29 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
30 |
31 | OBJS = reduction
32 |
33 | .PHONY: clean all
34 |
35 | all: $(OBJS)
36 |
37 | reduction: Reduction.cpp reduction.cl
38 | $(CC) -c -o Reduction.o $^ -fplugin=$(PLUGIN_LIB) -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp $(CFLAGS) $(INC_FLAGS)
39 | $(CC) -o $@ Reduction.o pifdefs.cpp $(CFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
40 |
41 | clean:
42 | rm -rf *.o *pifdefs.c* $(OBJS)
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/reduction/Reduction.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | #include
10 | using namespace std;
11 | #include "atmi.h"
12 |
13 | // Declare reduction as the PIF for the CPU kernel reduction_cpu
14 | extern "C" void reduction_cpu(atmi_task_handle_t thisTask, int* in, int length) __attribute__((atmi_kernel("reduction", "cpu")));
15 | extern "C" void reduction_cpu(atmi_task_handle_t thisTask, int* in, int length) {
16 | int num;
17 | for (num = length; num > 0; num >>= 1) {
18 | int j;
19 | for(j = 0; j < num; j++)
20 | {
21 | in[j] += in[j + num];
22 | }
23 | }
24 | }
25 |
26 | // Declare reduction as the PIF for the GPU kernel implementation reduction_gpu
27 | __kernel void reduction_gpu(atmi_task_handle_t thisTask, __global int* in, int length) __attribute__((atmi_kernel("reduction", "gpu")));
28 |
29 | int main(int argc, char* argv[]) {
30 | int length = 1024;
31 | int *input_gpu = (int*) malloc(sizeof(int)*(length));
32 | int *input_cpu = (int*) malloc(sizeof(int)*(length));
33 |
34 | for(int ii = 0; ii < length; ii++)
35 | {
36 | input_cpu[ii] = input_gpu[ii] = 1;
37 | }
38 |
39 | ATMI_LPARM_1D(lparm_gpu, length >> 1);
40 | lparm_gpu->synchronous = ATMI_TRUE;
41 | lparm_gpu->kernel_id = K_ID_reduction_gpu;
42 |
43 | reduction(lparm_gpu, input_gpu, length >> 1);
44 |
45 | ATMI_LPARM_1D(lparm_cpu, length >> 1);
46 | lparm_cpu->synchronous = ATMI_TRUE;
47 | lparm_cpu->kernel_id = K_ID_reduction_cpu;
48 | reduction(lparm_cpu, input_cpu, length >> 1);
49 |
50 | printf("GPU Sum: %d\n", input_gpu[0]);
51 | printf("CPU Sum: %d\n", input_cpu[0]);
52 | free(input_gpu);
53 | free(input_cpu);
54 | return 0;
55 | }
56 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/reduction/buildrun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #MIT License
4 | #
5 | #Copyright © 2016 Advanced Micro Devices, Inc.
6 | #
7 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
8 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
9 | #persons to whom the Software is furnished to do so, subject to the following conditions:
10 | #
11 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 | #
13 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
15 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16 | #
17 | set -e
18 | # Set HSA Environment variables
19 | [ -z $HSA_TEST_RUNTIME_PATH ] && HSA_TEST_RUNTIME_PATH=/opt/hsa.1_1T/
20 | [ -z $HSA_LIBHSAIL_PATH ] && HSA_LIBHSAIL_PATH=$HSA_TEST_RUNTIME_PATH/lib
21 | [ -z $ATMI_RUNTIME_PATH ] && ATMI_RUNTIME_PATH=/opt/amd/atmi
22 | ATMI_INC=$ATMI_RUNTIME_PATH/include
23 |
24 | # Do not compile accelerated functions separately. This script will be invoked by the GCC plugin itself.
25 |
26 | # Compile Main and generate the PIF definitions for host and accelerated functions
27 | # in Reduction.cpp.pifdefs.c
28 | #echo
29 | if [ -f hello ] ; then rm hello ; fi
30 | echo g++ -c -o Reduction.o Reduction.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -O3 -I$ATMI_INC
31 | g++ -c -o Reduction.o Reduction.cpp -g -fplugin=atmi_pifgen.so -fplugin-arg-atmi_pifgen-clfile=reduction.cl -fplugin-arg-atmi_pifgen-pifgenfile=pifdefs.cpp -O3 -I$ATMI_INC
32 |
33 | echo g++ -o reduction Reduction.o pifdefs.cpp -g -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_TEST_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_TEST_RUNTIME_PATH/include
34 | g++ -o reduction Reduction.o pifdefs.cpp -g -O3 -lelf -L$ATMI_RUNTIME_PATH/lib -latmi_runtime -L$HSA_TEST_RUNTIME_PATH/lib -lhsa-runtime64 -I$ATMI_INC -I$HSA_TEST_RUNTIME_PATH/include
35 |
36 | # Execute
37 | echo
38 | echo LD_LIBRARY_PATH=$HSA_TEST_RUNTIME_PATH/lib:$LD_LIBRARY_PATH ./reduction
39 | LD_LIBRARY_PATH=$HSA_TEST_RUNTIME_PATH/lib:$LD_LIBRARY_PATH ./reduction
40 | #gdb reduction
41 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/reduction/cleanup.sh:
--------------------------------------------------------------------------------
1 | #
2 | #MIT License
3 | #
4 | #Copyright © 2016 Advanced Micro Devices, Inc.
5 | #
6 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
7 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8 | #persons to whom the Software is furnished to do so, subject to the following conditions:
9 | #
10 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | #
12 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
13 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
14 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | #
16 | rm *.o
17 | rm *pifdefs.c*
18 | rm reduction
19 |
--------------------------------------------------------------------------------
/examples/c_extension_denq/reduction/reduction.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi.h"
8 | __kernel void reduction_gpu(atmi_task_handle_t thisTask, __global int* in, int length) {
9 | int num = get_global_id(0);
10 |
11 | in[num] += in[num + length];
12 |
13 | barrier(CLK_GLOBAL_MEM_FENCE);
14 |
15 | if(num == 0)
16 | {
17 | length = length >> 1;
18 | ATMI_KLPARM_1D(klparm, length, thisTask);
19 | if(length > 8)
20 | klparm->kernel_id = K_ID_reduction_gpu;
21 | else
22 | klparm->kernel_id = K_ID_reduction_cpu;
23 | reduction(klparm, in, length);
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/examples/interop/globalsymbol/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set ROCM device lib environment variables
26 | ROCM_DEVICE_PATH ?= /opt/rocm
27 |
28 | #Set LC Environment variables
29 | AMDLLVM ?= /opt/amd/llvm
30 |
31 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
32 |
33 | #MCPU
34 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
35 | #$(info MCPU $(MCPU))
36 |
37 | # Kernel compiler
38 | CLC ?= 1
39 |
40 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include/hsa -I.
41 |
42 | # CLOC
43 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
44 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
45 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
46 |
47 | # ROCm-Device-lib
48 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
49 |
50 | # GPU compiler
51 | CLCC=$(AMDLLVM)/bin/clang
52 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
53 | CLCFLAGS += -target amdgcn--amdhsa
54 | CLCFLAGS += -mcpu=$(MCPU)
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
56 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
57 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
58 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
59 |
60 | # Host compiler
61 | CXX = g++
62 | CXXFLAGS =-O3 -g -std=c++11
63 | #CXXFLAGS += -v
64 |
65 | LIBS=-latmi_runtime
66 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
67 |
68 | OBJS = globalsymbol
69 |
70 | .PHONY: clean all
71 |
72 | all: $(OBJS)
73 |
74 | globalsymbol: globalsymbol.cpp globalsymbol.cl
75 | ifeq ($(CLC),1)
76 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 globalsymbol.cl
77 | else
78 | $(CLCC) ${CLCFLAGS} -o globalsymbol.hsaco globalsymbol.cl
79 | endif
80 | $(CXX) -o $@ globalsymbol.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
81 |
82 | clean:
83 | rm -rf *.o *.hsaco $(OBJS)
84 |
85 | test:
86 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./globalsymbol
87 |
--------------------------------------------------------------------------------
/examples/interop/globalsymbol/globalsymbol.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | int multiplier = 4;
8 |
9 | __kernel void multiply_gpu(__global float *a, size_t sz) {
10 | int gid = get_global_id(0);
11 | if(gid < sz)
12 | a[gid] *= multiplier;
13 | }
14 |
--------------------------------------------------------------------------------
/examples/interop/globalsymbol/globalsymbol.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_runtime.h"
8 | #include "atmi_interop_hsa.h"
9 | #include
10 | #include
11 | #include
12 | #include
13 | using namespace std;
14 | #ifdef __cplusplus
15 | #define _CPPSTRING_ "C"
16 | #endif
17 | #ifndef __cplusplus
18 | #define _CPPSTRING_
19 | #endif
20 |
21 | enum {
22 | GPU_IMPL = 42
23 | };
24 |
25 | int main(int argc, char **argv) {
26 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
27 | if(err != ATMI_STATUS_SUCCESS) return -1;
28 | const char *module = "globalsymbol.hsaco";
29 | atmi_platform_type_t module_type = AMDGCN;
30 | atmi_module_register(&module, &module_type, 1);
31 |
32 | atmi_kernel_t kernel;
33 | const unsigned int num_args = 2;
34 | size_t arg_sizes[] = {sizeof(float *), sizeof(size_t)};
35 | atmi_kernel_create_empty(&kernel, num_args, arg_sizes);
36 | atmi_kernel_add_gpu_impl(kernel, "multiply_gpu", GPU_IMPL);
37 |
38 | size_t a_len = 16;
39 | float *a = (float *) malloc(sizeof(float) * a_len);
40 | // init a
41 | cout << "Original array values" << endl;
42 | for(int i = 0; i < a_len; i++) {
43 | a[i] = i + 1;
44 | cout << a[i] << " ";
45 | }
46 | cout << endl;
47 |
48 | int gpu_id = 0;
49 | atmi_machine_t *machine = atmi_machine_get_info();
50 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU];
51 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count);
52 |
53 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0);
54 |
55 | void *d_a;
56 | atmi_malloc(&d_a, sizeof(float) * a_len, gpu);
57 | atmi_memcpy(d_a, a, sizeof(float) * a_len);
58 |
59 | /* setup launch params */
60 | void *gpu_args[] = {&d_a, &a_len};
61 | ATMI_LPARM_1D(lparm, a_len);
62 | lparm->synchronous = ATMI_TRUE;
63 | lparm->kernel_id = GPU_IMPL;
64 | lparm->place = ATMI_PLACE_GPU(0, gpu_id);
65 |
66 | /* launch and wait for kernel */
67 | atmi_task_launch(lparm, kernel, gpu_args);
68 | atmi_memcpy(a, d_a, sizeof(float) * a_len);
69 | cout << "With default multiplier (4)" << endl;
70 | for(int i = 0; i < a_len; i++) {
71 | cout << a[i] << " ";
72 | }
73 | cout << endl;
74 |
75 | /* change the multiplier */
76 | int new_multiplier = 10;
77 | void *mul_addr;
78 | unsigned int mul_size;
79 | atmi_interop_hsa_get_symbol_info(gpu, "multiplier", &mul_addr, &mul_size);
80 | atmi_memcpy(mul_addr, &new_multiplier, mul_size);
81 |
82 | /* launch with new multiplier and wait for kernel */
83 | atmi_task_launch(lparm, kernel, gpu_args);
84 | atmi_memcpy(a, d_a, sizeof(float) * a_len);
85 | cout << "With modified multiplier (" << new_multiplier << ")" << endl;
86 | for(int i = 0; i < a_len; i++) {
87 | cout << a[i] << " ";
88 | }
89 | cout << endl;
90 |
91 | /* cleanup */
92 | free(a);
93 | atmi_free(d_a);
94 | atmi_kernel_release(kernel);
95 | atmi_finalize();
96 | return 0;
97 | }
98 |
--------------------------------------------------------------------------------
/examples/interop/hsainfo/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set ROCM device lib environment variables
26 | ROCM_DEVICE_PATH ?= /opt/rocm
27 |
28 | #Set LC Environment variables
29 | AMDLLVM ?= /opt/amd/llvm
30 |
31 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
32 |
33 | #MCPU
34 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
35 | #$(info MCPU $(MCPU))
36 |
37 | # Host compiler
38 | CXX = g++
39 | CXXFLAGS =-O3 -g -std=c++11
40 | #CXXFLAGS += -v
41 |
42 | INC_FLAGS=-I${ATMI_INC} -I${HSA_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include/hsa -I.
43 |
44 | LIBS=-latmi_runtime -lhsa-runtime64
45 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
46 |
47 | OBJS = hsainfo
48 |
49 | .PHONY: clean all
50 |
51 | all: $(OBJS)
52 |
53 | hsainfo: hsainfo.cpp
54 | $(CXX) -o $@ hsainfo.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
55 |
56 | clean:
57 | rm -rf *.o $(OBJS)
58 |
59 | test:
60 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hsainfo
61 |
--------------------------------------------------------------------------------
/examples/runtime/dlbench_multi_agent/build.atmi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #!/bin/bash
4 | #
5 | #MIT License
6 | #
7 | #Copyright © 2016 Advanced Micro Devices, Inc.
8 | #
9 | #Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software
10 | #without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
11 | #persons to whom the Software is furnished to do so, subject to the following conditions:
12 | #
13 | #The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 | #
15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16 | #PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 | #OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18 | #
19 |
20 | if [ $# -eq 1 ] && [ "$1" = "--help" ]; then
21 | echo "usage: ./build.sh -l -a -t -c -v -m "
22 | exit 0
23 | fi
24 |
25 | [ -z ${HSA_RUNTIME_PATH} ] && HSA_RUNTIME_PATH=/opt/rocm/hsa
26 |
27 | ATMIPATH=/opt/rocm/libatmi
28 | CLOC_PATH=/usr/bin
29 | CC=/usr/bin/gcc
30 | CXX=g++
31 |
32 | while [ $# -gt 0 ]; do
33 | key="$1"
34 | case $key in
35 | -l|--layout)
36 | layout="$2"
37 | shift # option has parameter
38 | ;;
39 | -m|--mode)
40 | mode="$2"
41 | shift
42 | ;;
43 | -c|--copy)
44 | copy="COPY"
45 | ;;
46 | -v|--verbose)
47 | verbose="VERBOSE"
48 | ;;
49 | -a|--alloc)
50 | alloc="$2"
51 | shift
52 | ;;
53 | -t|--agent)
54 | agent="$2"
55 | shift
56 | ;;
57 | *)
58 | echo "Unknown option:" $key
59 | exit 0
60 | ;;
61 | esac
62 | shift
63 | done
64 |
65 | [ "$layout" ] || { layout="AOS"; }
66 | [ "$mode" ] || { mode="build";}
67 | [ "$agent" ] || { agent="DEVICE";}
68 | [ "$copy" ] || { copy="NOCOPY";}
69 | [ "$alloc" ] || { alloc="FINE";}
70 | [ "$verbose" ] || { verbose="CURT";}
71 | [ "$module_type" ] || { module_type="MODULE_GCN";}
72 |
73 | host=`hostname`
74 | case $host in
75 | xn0|xn1|xn2|xn3|xn4|xn5|xn6|xn7|xn8|xn9)
76 | node="kaveri"
77 | ;;
78 | c0|c1|c2|c3)
79 | node="carrizo"
80 | ;;
81 | t1|ROCNREDLINE)
82 | node="fiji"
83 | ;;
84 | *)
85 | echo "unknown host node" $host
86 | exit 0
87 | esac
88 |
89 |
90 | if [ $mode = "clean" ]; then
91 | rm -rf *.o *~ grayscale_hsaco.h kernel.[ch]
92 | for l in AOS DA; do
93 | rm -rf dlbench.atmi_${l}
94 | done
95 | fi
96 |
97 | # build from C source
98 | if [ $mode = "build" ]; then
99 | echo "${CLOC_PATH}/cloc.sh -mcpu fiji -opt 2 grayscale.cl"
100 | ${CLOC_PATH}/cloc.sh -mcpu fiji -opt 2 grayscale.cl
101 | echo "${CXX} -g -I${ATMIPATH}/include -I${HSA_RUNTIME_PATH}/include -D${module_type} -D${alloc} -D${layout} -D${verbose} -I. -c dlbench.atmi.c -std=c++11"
102 | ${CXX} -g -I${ATMIPATH}/include -I${HSA_RUNTIME_PATH}/include -D${module_type} -D${alloc} -D${layout} -D${verbose} -I. -c dlbench.atmi.c -std=c++11
103 | echo "${CXX} -o dlbench.atmi_${layout} dlbench.atmi.o ${LFLAGS} -L${ATMIPATH}/lib -latmi_runtime "
104 | ${CXX} -o dlbench.atmi_${layout} dlbench.atmi.o ${LFLAGS} -L${ATMIPATH}/lib -latmi_runtime
105 | fi
106 |
107 |
--------------------------------------------------------------------------------
/examples/runtime/dlbench_multi_agent/dlbench.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #ifdef TUNE
8 | #define NUM_IMGS __NUM_IMGS_FROM_TUNER
9 | #define PIXELS_PER_IMG __PIXELS_PER_IMG_FROM_TUNER
10 | #define DATA_ITEM_TYPE __DATA_ITEM_TYPE_FROM_TUNER
11 | #else
12 | #define DATA_ITEM_TYPE float
13 | #define NUM_IMGS 1000
14 | #define PIXELS_PER_IMG 1024
15 | #endif
16 |
17 | typedef struct pixel_type {
18 | float r;
19 | float g;
20 | float b;
21 | float x;
22 | } pixel;
23 |
24 |
25 | typedef struct arg_aos_struct_type {
26 | pixel *src;
27 | pixel *dst;
28 | int start_index;
29 | int end_index;
30 | } args_aos;
31 |
32 |
33 | typedef struct arg_da_struct_type {
34 | float *r;
35 | float *g;
36 | float *b;
37 | float *x;
38 | float *d_r;
39 | float *d_g;
40 | float *d_b;
41 | float *d_x;
42 | int start_index;
43 | int end_index;
44 | } args_da;
45 |
46 | #define ITERS 1
47 |
48 | #define DEVICES 2
49 | #define CPU_THREADS 4
50 |
51 | #define THREADS PIXELS_PER_IMG
52 | #define WORKGROUP 256
53 |
54 | #define STREAMS 8
55 | #define FLOP 6 // floating-point ops in one iteration of kernel loop
56 |
57 | #define ERROR_THRESH NUM_IMGS * 0.01 // relaxed FP-precision checking
58 |
59 | #ifdef HETERO
60 | #define HOST
61 | #define DEVICE
62 | #endif
63 |
--------------------------------------------------------------------------------
/examples/runtime/dlbench_multi_agent/grayscale.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "dlbench.h"
8 |
9 | __kernel void grayscale_aos(__global pixel *src_images, __global pixel *dst_images, int num_imgs) {
10 | int i = get_global_id(0);
11 | DATA_ITEM_TYPE gs;
12 | for (int k = 0; k < ITERS; k++) {
13 | for (int j = 0; j < num_imgs * PIXELS_PER_IMG; j = j + PIXELS_PER_IMG) {
14 | gs = (0.3 * src_images[i + j].r + 0.59 *
15 | src_images[i + j].g + 0.11 * src_images[i + j].b + 1.0 *
16 | src_images[i + j].x);
17 | dst_images[i + j].r = gs;
18 | dst_images[i + j].g = gs;
19 | dst_images[i + j].b = gs;
20 | dst_images[i + j].x = gs;
21 | }
22 | }
23 | }
24 |
25 |
26 | __kernel void grayscale_da(__global DATA_ITEM_TYPE *r, __global DATA_ITEM_TYPE *g,
27 | __global DATA_ITEM_TYPE *b, __global DATA_ITEM_TYPE *x, __global DATA_ITEM_TYPE *d_r,
28 | __global DATA_ITEM_TYPE *d_g, __global
29 | DATA_ITEM_TYPE *d_b, __global DATA_ITEM_TYPE *d_x,
30 | int num_imgs) {
31 | size_t i = get_global_id(0);
32 | DATA_ITEM_TYPE gs;
33 | for (int k = 0; k < ITERS; k++) {
34 | for (int j = 0; j < num_imgs * PIXELS_PER_IMG; j = j + PIXELS_PER_IMG) {
35 | gs = (0.3 * r[i + j] + 0.59 * g[i + j] + 0.11 * b[i + j] + 1.0 * x[i + j]);
36 | d_r[i + j] = gs;
37 | d_g[i + j] = gs;
38 | d_b[i + j] = gs;
39 | d_x[i + j] = gs;
40 | }
41 | }
42 | }
43 |
44 |
45 |
--------------------------------------------------------------------------------
/examples/runtime/eps/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 |
66 | OBJS = eps
67 |
68 | .PHONY: clean all
69 |
70 | all: $(OBJS)
71 |
72 | eps: eps.cpp nullKernel.cl
73 | ifeq ($(CLC),1)
74 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl
75 | else
76 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl
77 | endif
78 | $(CXX) -o $@ eps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
79 |
80 | clean:
81 | rm -rf *.o *.hsaco $(OBJS)
82 |
83 | test:
84 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ATMI_DEPENDENCY_SYNC_TYPE=ATMI_SYNC_CALLBACK ./eps 2 15
85 |
--------------------------------------------------------------------------------
/examples/runtime/eps/nullKernel.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void nullKernel_impl(){}
8 |
--------------------------------------------------------------------------------
/examples/runtime/fibonacci/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = fibonacci
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | fibonacci: fibonacci.cpp
72 | #ifeq ($(CLC),1)
73 | # $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 fibonacci.cl
74 | #else
75 | # $(CLCC) ${CLCFLAGS} -o fibonacci.hsaco fibonacci.cl
76 | #endif
77 | $(CXX) -o $@ fibonacci.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./fibonacci
84 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = hello
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | hello: hw.cpp hw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl
76 | endif
77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello
84 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "hw_structs.h"
8 | __kernel void decode_gpu(__global void *args) {
9 | decode_args_t *gpu_args = (decode_args_t *)args;
10 | size_t strlength = gpu_args->strlength;
11 | const char *in = gpu_args->in;
12 | char *out = gpu_args->out;
13 | int num = get_global_id(0);
14 | if(num < strlength)
15 | out[num] = in[num] + 1;
16 | }
17 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld/hw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi.h"
8 | #include "atmi_runtime.h"
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 |
20 | #include "hw_structs.h"
21 | #define ErrorCheck(status) \
22 | if (status != ATMI_STATUS_SUCCESS) { \
23 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \
24 | exit(1); \
25 | }
26 |
27 | extern _CPPSTRING_ void decode_cpu(void **args) {
28 | decode_args_t *cpu_args = *(decode_args_t **)args;
29 | size_t strlength = cpu_args->strlength;
30 | const char *in = cpu_args->in;
31 | char *out = cpu_args->out;
32 | int num = get_global_id(0);
33 | if(num < strlength)
34 | out[num] = in[num] + 1;
35 | }
36 |
37 | int main(int argc, char **argv) {
38 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL));
39 | const char *module = "hw.hsaco";
40 | atmi_platform_type_t module_type = AMDGCN;
41 | ErrorCheck(atmi_module_register(&module, &module_type, 1));
42 |
43 | atmi_kernel_t kernel;
44 | const unsigned int num_args = 1;
45 | size_t arg_sizes[num_args];
46 | arg_sizes[0] = sizeof(void *);
47 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes,
48 | 2,
49 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu,
50 | ATMI_DEVTYPE_GPU, "decode_gpu"));
51 |
52 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
53 | size_t strlength = strlen(input);
54 | char *output_gpu = (char*) malloc(strlength + 1);
55 | char *output_cpu = (char*) malloc(strlength + 1);
56 |
57 | decode_args_t decode_gpu_args = {.in=input, .out=output_gpu, .strlength=strlength};
58 | decode_args_t decode_cpu_args = {.in=input, .out=output_cpu, .strlength=strlength};
59 |
60 | void *gpu_args[num_args];
61 | void *cpu_args[num_args];
62 |
63 | void *tmp_gpu = &decode_gpu_args;
64 | gpu_args[0] = &tmp_gpu;
65 | void *tmp_cpu = &decode_cpu_args;
66 | cpu_args[0] = &tmp_cpu;
67 |
68 | ATMI_LPARM_1D(lparm, strlength);
69 | lparm->synchronous = ATMI_TRUE;
70 |
71 | lparm->WORKITEMS = strlength;
72 | lparm->place = ATMI_PLACE_GPU(0, 0);
73 | atmi_task_handle_t task = atmi_task_launch(lparm, kernel, gpu_args);
74 | if(task == ATMI_NULL_TASK_HANDLE) {
75 | fprintf(stderr, "GPU Task Launch/Execution Error.\n");
76 | exit(1);
77 | }
78 | output_gpu[strlength] = '\0';
79 |
80 | lparm->place = ATMI_PLACE_CPU(0, 0);
81 | task = atmi_task_launch(lparm, kernel, cpu_args);
82 | if(task == ATMI_NULL_TASK_HANDLE) {
83 | fprintf(stderr, "GPU Task Launch/Execution Error.\n");
84 | exit(1);
85 | }
86 | output_cpu[strlength] = '\0';
87 |
88 | cout << "Output from the GPU: " << output_gpu << endl;
89 | cout << "Output from the CPU: " << output_cpu << endl;
90 | free(output_cpu);
91 | free(output_gpu);
92 |
93 | ErrorCheck(atmi_kernel_release(kernel));
94 | ErrorCheck(atmi_finalize());
95 | return 0;
96 | }
97 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld/hw_structs.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 |
8 | typedef struct decode_args_s {
9 | const char* in;
10 | char* out;
11 | const size_t strlength;
12 | } decode_args_t;
13 |
14 |
15 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = hello
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | hello: hw.cpp hw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl
76 | endif
77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello
84 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | kernel void decode_gpu(global const char *in, int strlength, global char *out) {
8 | int num = get_global_id(0);
9 | if(num < strlength)
10 | out[num] = in[num] + 1;
11 | }
12 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU/hw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_runtime.h"
8 | #include
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 |
20 | #define ErrorCheck(status) \
21 | if (status != ATMI_STATUS_SUCCESS) { \
22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \
23 | exit(1); \
24 | }
25 |
26 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, int strlength) {
27 | int num = get_global_id(0);
28 | if(num < strlength)
29 | out[num] = in[num] + 1;
30 | }
31 |
32 | extern _CPPSTRING_ void decode_cpu(const char **in, int *strlength, char **out) {
33 | decode_cpu_fn(*in, *out, *strlength);
34 | }
35 |
36 |
37 | int main(int argc, char **argv) {
38 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL));
39 |
40 | const char *module = "hw.hsaco";
41 | atmi_platform_type_t module_type = AMDGCN;
42 | ErrorCheck(atmi_module_register(&module, &module_type, 1));
43 |
44 | atmi_machine_t *machine = atmi_machine_get_info();
45 |
46 | atmi_kernel_t kernel;
47 | const unsigned int num_args = 3;
48 | size_t arg_sizes[] = {sizeof(const char *), sizeof(int), sizeof(char *)};
49 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes,
50 | 2,
51 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu,
52 | ATMI_DEVTYPE_GPU, "decode_gpu"));
53 |
54 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
55 | int strlength = strlen(input);
56 | char *output_cpu = (char*) malloc(strlength + 1);
57 | char *output_gpu = (char*) malloc(strlength + 1);
58 |
59 | int gpu_id = 0;
60 | int cpu_id = 0;
61 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU];
62 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count);
63 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count);
64 |
65 | /* Run HelloWorld on GPU */
66 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0);
67 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0);
68 |
69 | void *d_input;
70 | ErrorCheck(atmi_malloc(&d_input, strlength+1, gpu));
71 | ErrorCheck(atmi_memcpy(d_input, input, strlength+1));
72 |
73 | void *h_input;
74 | ErrorCheck(atmi_malloc(&h_input, strlength+1, cpu));
75 | ErrorCheck(atmi_memcpy(h_input, input, strlength+1));
76 |
77 | void *d_output;
78 | ErrorCheck(atmi_malloc(&d_output, strlength+1, gpu));
79 |
80 | void *h_output;
81 | ErrorCheck(atmi_malloc(&h_output, strlength+1, cpu));
82 |
83 | void *gpu_args[] = {&d_input, &strlength, &d_output};
84 | void *cpu_args[] = {&h_input, &strlength, &h_output};
85 |
86 | ATMI_LPARM_1D(lparm, strlength);
87 | lparm->synchronous = ATMI_TRUE;
88 |
89 | lparm->place = ATMI_PLACE_GPU(0, gpu_id);
90 | volatile atmi_task_handle_t task = atmi_task_launch(lparm, kernel, gpu_args);
91 | if(task == ATMI_NULL_TASK_HANDLE) {
92 | fprintf(stderr, "GPU Task Launch/Execution Error.\n");
93 | exit(1);
94 | }
95 |
96 | lparm->place = ATMI_PLACE_CPU(0, cpu_id);
97 | task = atmi_task_launch(lparm, kernel, cpu_args);
98 | if(task == ATMI_NULL_TASK_HANDLE) {
99 | fprintf(stderr, "GPU Task Launch/Execution Error.\n");
100 | exit(1);
101 | }
102 |
103 | ErrorCheck(atmi_memcpy(output_gpu, d_output, strlength+1));
104 | output_gpu[strlength] = '\0';
105 | ErrorCheck(atmi_memcpy(output_cpu, h_output, strlength+1));
106 | output_cpu[strlength] = '\0';
107 |
108 | cout << "Output from the GPU: " << output_gpu << endl;
109 | cout << "Output from the CPU: " << output_cpu << endl;
110 |
111 | /* cleanup */
112 | free(output_gpu);
113 | free(output_cpu);
114 | ErrorCheck(atmi_free(h_input));
115 | ErrorCheck(atmi_free(h_output));
116 | ErrorCheck(atmi_free(d_input));
117 | ErrorCheck(atmi_free(d_output));
118 | ErrorCheck(atmi_kernel_release(kernel));
119 | ErrorCheck(atmi_finalize());
120 | return 0;
121 | }
122 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_async/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = hello
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | hello: hw.cpp hw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl
76 | endif
77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello
84 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_async/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | kernel void decode_gpu(global const char *in, global char *out, ulong strlength) {
8 | int num = get_global_id(0);
9 | if(num < strlength)
10 | out[num] = in[num] + 1;
11 | }
12 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_async/hw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_runtime.h"
8 | #include
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 |
20 | #define ErrorCheck(status) \
21 | if (status != ATMI_STATUS_SUCCESS) { \
22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \
23 | exit(1); \
24 | }
25 |
26 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, size_t strlength) {
27 | int num = get_global_id(0);
28 | if(num < strlength)
29 | out[num] = in[num] + 1;
30 | }
31 |
32 | extern _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength) {
33 | decode_cpu_fn(*in, *out, *strlength);
34 | }
35 |
36 | int main(int argc, char **argv) {
37 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL));
38 |
39 | const char *module = "hw.hsaco";
40 | atmi_platform_type_t module_type = AMDGCN;
41 | ErrorCheck(atmi_module_register(&module, &module_type, 1));
42 |
43 | atmi_machine_t *machine = atmi_machine_get_info();
44 |
45 | atmi_kernel_t kernel;
46 | const unsigned int num_args = 3;
47 | size_t arg_sizes[] = {sizeof(const char *), sizeof(char *), sizeof(size_t)};
48 | ErrorCheck(atmi_kernel_create(&kernel, num_args, arg_sizes,
49 | 2,
50 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)decode_cpu,
51 | ATMI_DEVTYPE_GPU, "decode_gpu"));
52 |
53 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
54 | size_t strlength = strlen(input);
55 | char *output_cpu = (char*) malloc(strlength + 1);
56 | char *output_gpu = (char*) malloc(strlength + 1);
57 |
58 | int gpu_id = 0;
59 | int cpu_id = 0;
60 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU];
61 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count);
62 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count);
63 |
64 | /* Run HelloWorld on GPU */
65 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0);
66 | void *d_input, *d_output;
67 | ErrorCheck(atmi_malloc(&d_input, strlength+1, gpu));
68 | ErrorCheck(atmi_malloc(&d_output, strlength+1, gpu));
69 |
70 | ATMI_CPARM(cparm_gpu);
71 | atmi_task_handle_t h2d_gpu = atmi_memcpy_async(cparm_gpu, d_input, input, strlength+1);
72 |
73 | ATMI_LPARM_GPU_1D(lparm_gpu, gpu_id, strlength);
74 | ATMI_PARM_SET_DEPENDENCIES(lparm_gpu, h2d_gpu);
75 | void *gpu_args[] = {&d_input, &d_output, &strlength};
76 | atmi_task_handle_t k_gpu = atmi_task_launch(lparm_gpu, kernel, gpu_args);
77 |
78 | ATMI_PARM_SET_DEPENDENCIES(cparm_gpu, k_gpu);
79 | atmi_task_handle_t d2h_gpu = atmi_memcpy_async(cparm_gpu, output_gpu, d_output, strlength+1);
80 |
81 | // wait only for the last task in the chain
82 | ErrorCheck(atmi_task_wait(d2h_gpu));
83 | output_gpu[strlength] = '\0';
84 | cout << "Output from the GPU: " << output_gpu << endl;
85 |
86 | /* Run HelloWorld on CPU */
87 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0);
88 | void *h_input, *h_output;
89 | ErrorCheck(atmi_malloc(&h_input, strlength+1, cpu));
90 | ErrorCheck(atmi_malloc(&h_output, strlength+1, cpu));
91 |
92 | ATMI_CPARM(cparm_cpu);
93 | atmi_task_handle_t h2d_cpu = atmi_memcpy_async(cparm_cpu, h_input, input, strlength+1);
94 |
95 | ATMI_LPARM_CPU_1D(lparm_cpu, cpu_id, strlength);
96 | ATMI_PARM_SET_DEPENDENCIES(lparm_cpu, h2d_cpu);
97 | void *cpu_args[] = {&h_input, &h_output, &strlength};
98 | atmi_task_handle_t k_cpu = atmi_task_launch(lparm_cpu, kernel, cpu_args);
99 |
100 | ATMI_PARM_SET_DEPENDENCIES(cparm_cpu, k_cpu);
101 | atmi_task_handle_t d2h_cpu = atmi_memcpy_async(cparm_cpu, output_cpu, h_output, strlength+1);
102 |
103 | // wait only for the last task in the chain
104 | ErrorCheck(atmi_task_wait(d2h_cpu));
105 | output_cpu[strlength] = '\0';
106 | cout << "Output from the CPU: " << output_cpu << endl;
107 |
108 | /* cleanup */
109 | free(output_gpu);
110 | free(output_cpu);
111 | ErrorCheck(atmi_free(h_input));
112 | ErrorCheck(atmi_free(h_output));
113 | ErrorCheck(atmi_free(d_input));
114 | ErrorCheck(atmi_free(d_output));
115 | ErrorCheck(atmi_kernel_release(kernel));
116 | ErrorCheck(atmi_finalize());
117 | return 0;
118 | }
119 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_sync/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = hello
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | hello: hw.cpp hw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 hw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o hw.hsaco hw.cl
76 | endif
77 | $(CXX) -o $@ hw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello
84 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_sync/hw.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void decode_gpu(__global const char *in, __global char *out, size_t strlength) {
8 | int num = get_global_id(0);
9 | if(num < strlength)
10 | out[num] = in[num] + 1;
11 | }
12 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_dGPU_sync/hw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_runtime.h"
8 | #include
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 |
20 | enum {
21 | CPU_IMPL = 10565,
22 | GPU_IMPL = 42
23 | };
24 |
25 | extern _CPPSTRING_ void decode_cpu_fn(const char *in, char *out, size_t strlength) {
26 | int num = get_global_id(0);
27 | if(num < strlength)
28 | out[num] = in[num] + 1;
29 | }
30 |
31 | extern _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength) {
32 | decode_cpu_fn(*in, *out, *strlength);
33 | }
34 |
35 |
36 | int main(int argc, char **argv) {
37 | const char* input = "Gdkkn\x1FGR@\x1FVnqkc";
38 | size_t strlength = strlen(input);
39 |
40 | char *output_gpu = (char*) malloc(strlength + 1);
41 | char *output_cpu = (char*) malloc(strlength + 1);
42 |
43 | // Init ATMI
44 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
45 | if(err != ATMI_STATUS_SUCCESS)
46 | return -1;
47 |
48 | // Register module
49 | const char *module = "hw.hsaco";
50 | atmi_platform_type_t module_type = AMDGCN;
51 | atmi_module_register(&module, &module_type, 1);
52 |
53 | {
54 | // Create kernel
55 | atmi_kernel_t kernel;
56 | const unsigned int num_args = 3;
57 | size_t arg_sizes[] = {sizeof(const char *), sizeof(char *), sizeof(size_t)};
58 | atmi_kernel_create_empty(&kernel, num_args, arg_sizes);
59 |
60 | atmi_kernel_add_cpu_impl(kernel, (atmi_generic_fp)decode_cpu, CPU_IMPL);
61 | atmi_kernel_add_gpu_impl(kernel, "decode_gpu", GPU_IMPL);
62 |
63 | // Select GPU
64 | int gpu_id = 0;
65 | {
66 | atmi_machine_t *machine = atmi_machine_get_info();
67 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU];
68 | if(argv[1] != NULL) {
69 | gpu_id = (atoi(argv[1]) % gpu_count);
70 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count);
71 | }
72 | }
73 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0);
74 | // Run on GPU
75 | {
76 | void *d_input;
77 | void *d_output;
78 |
79 | // Alloc
80 | atmi_malloc(&d_input, strlength+1, gpu);
81 | atmi_malloc(&d_output, strlength+1, gpu);
82 |
83 | // Copy
84 | atmi_memcpy(d_input, input, strlength+1);
85 |
86 | void *gpu_args[] = {&d_input, &d_output, &strlength};
87 |
88 | // Launch
89 | ATMI_LPARM_1D(lparm, strlength);
90 | lparm->synchronous = ATMI_TRUE;
91 | lparm->kernel_id = GPU_IMPL;
92 | lparm->place = ATMI_PLACE_GPU(0, gpu_id);
93 | atmi_task_launch(lparm, kernel, gpu_args);
94 |
95 | // Copy
96 | atmi_memcpy(output_gpu, d_output, strlength+1);
97 |
98 | output_gpu[strlength] = '\0';
99 | cout << "Output from the GPU: " << output_gpu << endl;
100 |
101 | // Free
102 | atmi_free(d_output);
103 | atmi_free(d_input);
104 | }
105 |
106 | // Select CPU
107 | int cpu_id = 0;
108 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0);
109 | // Run on CPU
110 | {
111 | void *h_input;
112 | void *h_output;
113 |
114 | // Alloc
115 | atmi_malloc(&h_input, strlength+1, cpu);
116 | atmi_malloc(&h_output, strlength+1, cpu);
117 |
118 | // Copy
119 | atmi_memcpy(h_input, input, strlength+1);
120 |
121 | void *cpu_args[] = {&h_input, &h_output, &strlength};
122 |
123 | // Launch
124 | ATMI_LPARM_1D(lparm, strlength);
125 | lparm->synchronous = ATMI_TRUE;
126 | lparm->kernel_id = CPU_IMPL;
127 | lparm->place = ATMI_PLACE_CPU(0, cpu_id);
128 | atmi_task_launch(lparm, kernel, cpu_args);
129 |
130 | // Copy
131 | atmi_memcpy(output_cpu, h_output, strlength+1);
132 |
133 | output_cpu[strlength] = '\0';
134 | cout << "Output from the CPU: " << output_cpu << endl;
135 |
136 | // Free
137 | atmi_free(h_output);
138 | atmi_free(h_input);
139 | }
140 |
141 | // Release kernel
142 | atmi_kernel_release(kernel);
143 | }
144 |
145 | atmi_finalize();
146 |
147 | /* cleanup */
148 | free(output_gpu);
149 | free(output_cpu);
150 |
151 | return 0;
152 | }
153 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_printf/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | # Temp files
38 | SAVETEMP ?= 0
39 |
40 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
41 |
42 | # CLOC
43 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
44 |
45 | # CL options
46 | CLOPTS = $(INC_FLAGS)
47 | CLOPTS += -v
48 | # Frontend optimization
49 | ifneq ($(NOOPT),1)
50 | CLOPTS += -O2
51 | endif
52 | # Temp file
53 | ifneq ($(SAVETEMP),0)
54 | CLOPTS += -save-temps
55 | endif
56 |
57 | # cloc options
58 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
59 | CLOCOPTS += -clopts "$(CLOPTS)"
60 |
61 | # ROCm-Device-lib
62 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
63 |
64 | # GPU compiler
65 | CLCC=$(AMDLLVM)/bin/clang
66 |
67 | LLK=$(AMDLLVM)/bin/llvm-link
68 | LLC=$(AMDLLVM)/bin/llc
69 | LLD=$(AMDLLVM)/bin/lld
70 |
71 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
72 | CLCFLAGS += -target amdgcn--amdhsa
73 |
74 | ifeq ($(SAVETEMP),0)
75 | CLCFLAGS += -mcpu=$(MCPU)
76 |
77 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
78 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
79 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
80 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
81 | else
82 | CLCFLAGS += -emit-llvm -c
83 |
84 | #LLFLAGS = -suppress-warnings
85 | LLFLAGS += $(BITCODE_LIB)/opencl.amdgcn.bc
86 | LLFLAGS += $(BITCODE_LIB)/ockl.amdgcn.bc
87 | LLFLAGS += $(BITCODE_LIB)/ocml.amdgcn.bc
88 | LLFLAGS += $(BITCODE_LIB)/irif.amdgcn.bc
89 |
90 | LCFLAGS = -O2
91 | LCFLAGS += -filetype=obj
92 | LCFLAGS += -mtriple amdgcn--amdhsa
93 | LCFLAGS += -mcpu=$(MCPU)
94 |
95 | LDFLAGS = -flavor gnu -shared
96 | endif
97 |
98 |
99 | # CPU compiler
100 | CC=$(AMDLLVM)/bin/clang
101 | CCFLAGS = -g
102 | #CCFLAGS += -v
103 |
104 | # Host compiler
105 | CXX = g++
106 | CXXFLAGS = -g -std=c++11
107 | #CXXFLAGS += -v
108 |
109 | LIBS=-latmi_runtime
110 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
111 |
112 | OBJS = hello
113 |
114 | .PHONY: clean all
115 |
116 | all: $(OBJS)
117 |
118 | %.hsaco: %.cl $(INC_FILES)
119 | ifeq ($(CLC),1)
120 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 -o $@ $<
121 | else
122 | ifeq ($(SAVETEMP),0)
123 | $(CLCC) ${CLCFLAGS} -o $@ $<
124 | else
125 | $(CLCC) ${CLCFLAGS} -o $*.bc $<
126 | $(LLK) -o $*.linked.bc $*.bc ${LLFLAGS}
127 | $(LLC) ${LCFLAGS} -o $*.o $*.linked.bc
128 | $(LLD) ${LDFLAGS} -o $@ $*.o
129 | endif
130 | endif
131 | @echo
132 |
133 | %.o: %.cpp $(INC_FILES)
134 | $(CXX) $(CXXFLAGS) $(INC_FLAGS) -o $@ -c $<
135 | @echo
136 |
137 | %.o: %.c $(INC_FILES)
138 | $(CC) $(CCFLAGS) $(INC_FLAGS) -o $@ -c $<
139 | @echo
140 |
141 | hello: hw_gpu.hsaco hw_cpu.o hw_host.o
142 | $(CXX) -o $@ hw_host.o hw_cpu.o $(LIBS) $(LIB_FLAGS)
143 |
144 | clean:
145 | rm -rf *.o *.i *.bc *.hsaco $(OBJS)
146 |
147 | test:
148 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./hello
149 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_printf/hw.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #ifdef __cplusplus
8 | #define _CPPSTRING_ "C"
9 | #endif
10 | #ifndef __cplusplus
11 | #define _CPPSTRING_
12 | #endif
13 |
14 | #define BUFFER_SIZE 512
15 |
16 |
17 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_printf/hw_cpu.c:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | // Have printf natively
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | // But no OpenCL header
14 | #include "atmi_runtime.h"
15 |
16 | // CPU implementation using function pointer.
17 | static void decode_cpu_fn(const char *in, char *out, size_t strlength, char *extra);
18 |
19 | _CPPSTRING_ void decode_cpu(const char **in, char **out, size_t *strlength, char **extra) {
20 | decode_cpu_fn(*in, *out, *strlength, *extra);
21 | }
22 |
23 | /**********************************************************************************/
24 | //#include "atmi_device.h"
25 | #include "hw.h"
26 | /**********************************************************************************/
27 |
28 | void decode_cpu_fn(
29 | const char *in,
30 | char *out,
31 | size_t strlength,
32 | char *extra
33 | ) {
34 |
35 | int num = get_global_id(0);
36 |
37 | if(num < strlength)
38 | out[num] = in[num] + 1;
39 |
40 | #if 1
41 | if (!num) {
42 | printf("hello world from CPU, %d, %f\n", num, 1.0);
43 | }
44 | #endif
45 |
46 | }
47 |
48 |
--------------------------------------------------------------------------------
/examples/runtime/helloworld_printf/hw_gpu.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | // Do not have printf
8 |
9 | // OpenCL header include by default through CL frontend
10 |
11 | /**********************************************************************************/
12 | //#include "atmi_device.h"
13 | #include "hw.h"
14 | /**********************************************************************************/
15 |
16 | kernel void decode_gpu(
17 | global const char *in,
18 | global char *out,
19 | ulong strlength,
20 | global char *extra
21 | ) {
22 |
23 | int num = get_global_id(0);
24 |
25 | if(num < strlength)
26 | out[num] = in[num] + 1;
27 |
28 | #if 1
29 | if (!num) {
30 | printf("hello world from GPU, %d, %f\n", num, 1.0);
31 | }
32 | #endif
33 |
34 | }
35 |
36 |
--------------------------------------------------------------------------------
/examples/runtime/kps/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 |
66 | OBJS = kps
67 |
68 | .PHONY: clean all
69 |
70 | all: $(OBJS)
71 |
72 | kps: kps.cpp nullKernel.cl
73 | ifeq ($(CLC),1)
74 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl
75 | else
76 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl
77 | endif
78 | $(CXX) -o $@ kps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
79 |
80 | clean:
81 | rm -rf *.o *.hsaco $(OBJS)
82 |
83 | test:
84 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./kps
85 |
--------------------------------------------------------------------------------
/examples/runtime/kps/nullKernel.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | __kernel void nullKernel_impl(){}
8 |
--------------------------------------------------------------------------------
/examples/runtime/needleman-wunsch/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = nw
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | nw: nw.cpp nw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o nw.hsaco nw.cl
76 | endif
77 | $(CXX) -o $@ nw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./nw 512 10 10
84 |
--------------------------------------------------------------------------------
/examples/runtime/needleman-wunsch/nw.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #define BLOCK_SIZE 32
8 | #define LIMIT -999
9 |
10 |
11 |
--------------------------------------------------------------------------------
/examples/runtime/needleman-wunsch_dGPU/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | # Kernel compiler
35 | CLC ?= 1
36 |
37 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
38 |
39 | # CLOC
40 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
41 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH}
42 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
43 |
44 | # ROCm-Device-lib
45 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
46 |
47 | # GPU compiler
48 | CLCC=$(AMDLLVM)/bin/clang
49 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
50 | CLCFLAGS += -target amdgcn--amdhsa
51 | CLCFLAGS += -mcpu=$(MCPU)
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
55 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
56 |
57 | # Host compiler
58 | CXX = g++
59 | CXXFLAGS =-O3 -g -std=c++11
60 | #CXXFLAGS += -v
61 |
62 | LIBS=-latmi_runtime
63 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
64 |
65 | OBJS = nw
66 |
67 | .PHONY: clean all
68 |
69 | all: $(OBJS)
70 |
71 | nw: nw.cpp nw.cl
72 | ifeq ($(CLC),1)
73 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nw.cl
74 | else
75 | $(CLCC) ${CLCFLAGS} -o nw.hsaco nw.cl
76 | endif
77 | $(CXX) -o $@ nw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
78 |
79 | clean:
80 | rm -rf *.o *.hsaco $(OBJS)
81 |
82 | test:
83 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./nw 512 10 10
84 |
--------------------------------------------------------------------------------
/examples/runtime/needleman-wunsch_dGPU/nw.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #define BLOCK_SIZE 32
8 | #define LIMIT -999
9 |
10 |
11 |
--------------------------------------------------------------------------------
/examples/runtime/pcie_bw/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell ${ATMI_RUNTIME_PATH}/bin/mygpu)
32 | #$(info MCPU $(MCPU))
33 |
34 | BIBW ?= 0
35 | ifeq ($(BIBW),1)
36 | BIBW_FLAG=-DBIBW
37 | else
38 | BIBW_FLAG=
39 | endif
40 |
41 | # Host compiler
42 | CXX = g++
43 | CXXFLAGS =-O3 -g -std=c++11 $(BIBW_FLAG)
44 | #CXXFLAGS += -v
45 |
46 | INC_FLAGS=-I${ATMI_INC} -I.
47 |
48 | LIBS=-latmi_runtime
49 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
50 |
51 | OBJS = pcie_bw
52 |
53 | .PHONY: clean all
54 |
55 | all: $(OBJS)
56 |
57 | pcie_bw: pcie_bw.cpp
58 | $(CXX) -o $@ pcie_bw.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
59 |
60 | clean:
61 | rm -rf *.o $(OBJS)
62 |
63 | test:
64 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./pcie_bw
65 |
--------------------------------------------------------------------------------
/examples/runtime/pcie_bw/pcie_bw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_runtime.h"
8 | #include
9 | #include
10 | #include
11 | #include
12 | using namespace std;
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 |
20 | #define ErrorCheck(status) \
21 | if (status != ATMI_STATUS_SUCCESS) { \
22 | printf("Error at [%s:%d]\n", __FILE__, __LINE__); \
23 | exit(1); \
24 | }
25 | #define NSECPERSEC 1000000000L
26 | #define NTIMERS 13
27 | long int get_nanosecs( struct timespec start_time, struct timespec end_time) {
28 | long int nanosecs;
29 | if ((end_time.tv_nsec-start_time.tv_nsec)<0) nanosecs =
30 | ((((long int) end_time.tv_sec- (long int) start_time.tv_sec )-1)*NSECPERSEC ) +
31 | ( NSECPERSEC + (long int) end_time.tv_nsec - (long int) start_time.tv_nsec) ;
32 | else nanosecs =
33 | (((long int) end_time.tv_sec- (long int) start_time.tv_sec )*NSECPERSEC ) +
34 | ( (long int) end_time.tv_nsec - (long int) start_time.tv_nsec );
35 | return nanosecs;
36 | }
37 |
38 | int main(int argc, char **argv) {
39 | ErrorCheck(atmi_init(ATMI_DEVTYPE_ALL));
40 |
41 | int gpu_id = 0;
42 | int cpu_id = 0;
43 | atmi_machine_t *machine = atmi_machine_get_info();
44 | int gpu_count = machine->device_count_by_type[ATMI_DEVTYPE_GPU];
45 | if(argv[1] != NULL) gpu_id = (atoi(argv[1]) % gpu_count);
46 | printf("Choosing GPU %d/%d\n", gpu_id, gpu_count);
47 |
48 | struct timespec start_time[NTIMERS],end_time[NTIMERS];
49 | long int kcalls, nanosecs[NTIMERS];
50 | float bw[NTIMERS];
51 | kcalls = 100;
52 |
53 | /* Run HelloWorld on GPU */
54 | atmi_mem_place_t gpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu_id, 0);
55 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, cpu_id, 0);
56 | void *d_input, *d_output;
57 | atmi_taskgroup_handle_t group;
58 | ErrorCheck(atmi_taskgroup_create(&group));
59 |
60 | ATMI_CPARM(cparm);
61 | cparm->groupable = ATMI_TRUE;
62 | cparm->group = group;
63 |
64 | printf("Size (MB)\t");
65 | #ifdef BIBW
66 | printf("Bi-dir BW(MBps)\n");
67 | #else
68 | printf("H2D BW(MBps)\tD2H BW(MBps)\n");
69 | #endif
70 | const long MB = 1024 * 1024;
71 | for(long size = 1*MB; size <= 1024*MB; size *= 2) {
72 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[0]);
73 | ErrorCheck(atmi_malloc(&d_input, size, cpu));
74 | ErrorCheck(atmi_malloc(&d_output, size, gpu));
75 | /* touch */
76 | memset(d_input, 0, size);
77 | ErrorCheck(atmi_memcpy(d_output, d_input, size));
78 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_time[0]);
79 |
80 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[1]);
81 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time[2]);
82 | for(int i=0; iplace = (atmi_place_t)ATMI_PLACE_CPU(0, 0);
22 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE;
23 | args_t args;
24 | args.arg1 = taskId;
25 |
26 | atmid_task_launch(lparm, K_ID_print_taskId_cpu, (void *)&args, sizeof(args_t));
27 | }
28 |
29 | __kernel void mainTask_gpu(long int numTasks) {
30 | int gid = get_global_id(0);
31 | if(gid % 64 == 0) {
32 | ATMI_LPARM_1D(lparm, 1);
33 | lparm->place = (atmi_place_t)ATMI_PLACE_GPU(0, 0);
34 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE;
35 | args_t args;
36 | args.arg1 = gid;
37 |
38 | atmid_task_launch(lparm, K_ID_subTask_gpu, (void *)&args, sizeof(args_t));
39 | }
40 | }
41 |
42 | __kernel void print_taskId_gpu(long int taskId) {
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/examples/runtime_denq/helloworld/hw.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | using namespace std;
10 | #include "atmi_runtime.h"
11 |
12 | enum {
13 | GPU_IMPL = 42,
14 | CPU_IMPL = 10565
15 | };
16 |
17 | extern "C" void print_taskId_cpu(long int *taskId)
18 | {
19 | cout << "Leaf Sub-task ID" << ": " << *taskId << endl;
20 | }
21 |
22 | int main(int argc, char* argv[]) {
23 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
24 | if(err != ATMI_STATUS_SUCCESS) return -1;
25 | const char *module = "hw.hsaco";
26 | atmi_platform_type_t module_type = AMDGCN;
27 | err = atmi_module_register(&module, &module_type, 1);
28 |
29 | atmi_kernel_t main_kernel, sub_kernel, print_kernel;
30 | const unsigned int num_args = 1;
31 | size_t arg_sizes[] = { sizeof(long int) };
32 | atmi_kernel_create(&main_kernel, num_args, arg_sizes,
33 | 1,
34 | ATMI_DEVTYPE_GPU, "mainTask_gpu");
35 | atmi_kernel_create(&print_kernel, num_args, arg_sizes,
36 | 2,
37 | ATMI_DEVTYPE_GPU, "print_taskId_gpu",
38 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)print_taskId_cpu);
39 | atmi_kernel_create(&sub_kernel, num_args, arg_sizes,
40 | 1,
41 | ATMI_DEVTYPE_GPU, "subTask_gpu");
42 |
43 | unsigned long int numTasks = 4;
44 | ATMI_LPARM_1D(lparm, 64 * numTasks);
45 | //lparm->WORKITEMS = numTasks;
46 | //lparm->groupDim[0] = numTasks;
47 | lparm->synchronous = ATMI_TRUE;
48 | lparm->place = ATMI_PLACE_GPU(0, 0);
49 | lparm->groupable = ATMI_TRUE;
50 | //lparm->kernel_id = 0;//GPU_IMPL;
51 |
52 | void *args[] = { &numTasks };
53 | atmi_task_launch(lparm, main_kernel, args);
54 |
55 | printf("Done!\n");
56 |
57 | atmi_kernel_release(main_kernel);
58 | atmi_kernel_release(print_kernel);
59 | atmi_kernel_release(sub_kernel);
60 | atmi_finalize();
61 | return 0;
62 | }
63 |
--------------------------------------------------------------------------------
/examples/runtime_denq/kps/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell mymcpu)
32 |
33 | # Kernel compiler
34 | CLC ?= 1
35 |
36 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
37 |
38 | # CLOC
39 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
40 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} -atmipath ${ATMI_RUNTIME_PATH}
41 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
42 |
43 | # ROCm-Device-lib
44 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
45 |
46 | # GPU compiler
47 | CLCC=$(AMDLLVM)/bin/clang
48 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
49 | CLCFLAGS += -target amdgcn--amdhsa
50 | CLCFLAGS += -mcpu=$(MCPU)
51 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
55 |
56 | # Host compiler
57 | CXX = g++
58 | CXXFLAGS =-O3 -g -std=c++11
59 | #CXXFLAGS += -v
60 |
61 | LIBS=-latmi_runtime
62 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
63 |
64 | OBJS = kps
65 |
66 | .PHONY: clean all
67 |
68 | all: $(OBJS)
69 |
70 | kps: kps.cpp nullKernel.cl
71 | ifeq ($(CLC),1)
72 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 nullKernel.cl
73 | else
74 | $(CLCC) ${CLCFLAGS} -o nullKernel.hsaco nullKernel.cl
75 | endif
76 | $(CXX) -o $@ kps.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
77 |
78 | clean:
79 | rm -rf *.o *.hsaco $(OBJS)
80 |
81 | test:
82 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./kps
83 |
84 |
--------------------------------------------------------------------------------
/examples/runtime_denq/kps/kps.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "stdio.h"
8 | #include "stdlib.h"
9 | #include "string.h"
10 | #include
11 | #include
12 | #include
13 | #include "atmi_runtime.h"
14 | #define NSECPERSEC 1000000000L
15 |
16 | void print_timing(const char *title,
17 | int kcalls,
18 | struct timespec *start_time,
19 | struct timespec *end_launch_time,
20 | struct timespec *end_time);
21 |
22 | static int count = 0;
23 | extern "C" void subTask_cpu() {
24 | //static int count = 0;
25 | printf("Counter: %d\n", count++);
26 | }
27 |
28 | enum {
29 | K_ID_mainTask_gpu = 0,
30 | K_ID_mainTask_recursive_gpu,
31 | K_ID_mainTask_binary_tree_gpu,
32 | K_ID_mainTask_flat_gpu
33 | };
34 |
35 | enum {
36 | K_ID_subTask_gpu = 0,
37 | K_ID_subTask_cpu
38 | };
39 |
40 | int main(int argc, char *argv[]) {
41 | struct timespec start_time;
42 | struct timespec end_time;
43 | struct timespec end_launch_time;
44 | long int nanosecs;
45 | float kps;
46 |
47 | long int kcalls = 16;
48 |
49 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
50 | if(err != ATMI_STATUS_SUCCESS) return -1;
51 | const char *module = "nullKernel.hsaco";
52 | atmi_platform_type_t module_type = AMDGCN;
53 | err = atmi_module_register(&module, &module_type, 1);
54 |
55 | atmi_kernel_t main_kernel, sub_kernel;
56 | const unsigned int main_num_args = 1;
57 | size_t main_arg_sizes[] = { sizeof(long int) };
58 | atmi_kernel_create(&main_kernel, main_num_args, main_arg_sizes,
59 | 1,
60 | ATMI_DEVTYPE_GPU, "mainTask_gpu"
61 | //ATMI_DEVTYPE_GPU, "mainTask_recursive_gpu",
62 | //ATMI_DEVTYPE_GPU, "mainTask_binary_tree_gpu",
63 | //ATMI_DEVTYPE_GPU, "mainTask_flat_gpu"
64 | );
65 | atmi_kernel_create(&sub_kernel, 0, NULL,
66 | 2,
67 | ATMI_DEVTYPE_GPU, "subTask_gpu",
68 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)subTask_cpu);
69 |
70 |
71 | ATMI_LPARM_1D(lparm, 1);
72 | //lparm->WORKITEMS = numTasks;
73 | lparm->groupDim[0] = 64;
74 | lparm->synchronous = ATMI_TRUE;
75 | lparm->place = ATMI_PLACE_GPU(0, 0);
76 | lparm->groupable = ATMI_TRUE;
77 | //lparm->kernel_id = K_ID_subTask_gpu;
78 | atmi_task_launch(lparm, sub_kernel, NULL);
79 |
80 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time);
81 | lparm->WORKITEMS = kcalls * 64;
82 | //lparm->kernel_id = K_ID_mainTask_gpu;
83 | void *args[] = { &kcalls };
84 | atmi_task_launch(lparm, main_kernel, args);
85 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_launch_time);
86 | //atmi_taskgroup_wait(stream);
87 | clock_gettime(CLOCK_MONOTONIC_RAW,&end_time);
88 | print_timing("Synchronous Flat Execution (DP)",
89 | kcalls, &start_time,
90 | &end_launch_time, &end_time);
91 |
92 | lparm->WORKITEMS = 64;
93 | //lparm->kernel_id = K_ID_subTask_gpu;
94 | clock_gettime(CLOCK_MONOTONIC_RAW,&start_time);
95 | for(int i=0; iplace = (atmi_place_t)ATMI_PLACE_GPU(0, 0);
23 | // default case for kernel enqueue: lparm->groupable = ATMI_TRUE;
24 | for(long int i = 0; i < numTasks/num_wavefronts; i++)
25 | atmid_task_launch(lparm, K_ID_subTask, NULL, 0);
26 | }
27 | }
28 |
29 |
--------------------------------------------------------------------------------
/examples/runtime_denq/reduction/Makefile:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 | SHELL=/bin/bash
7 |
8 | #BUILDROOT
9 | BUILDROOT = $(shell pwd | sed 's/examples.*$$//')
10 | #$(info BUILDROOT $(BUILDROOT))
11 | -include $(BUILDROOT)/atmi-config.mak
12 |
13 | #Set ATMI Environment variables
14 | ATMI_RUNTIME_PATH ?= /opt/rocm/atmi
15 |
16 | ATMI_BIN ?= ${ATMI_RUNTIME_PATH}/bin
17 | ATMI_INC ?= ${ATMI_RUNTIME_PATH}/include
18 | ATMI_LIB ?= ${ATMI_RUNTIME_PATH}/lib
19 |
20 | #Set HSA Environment variables
21 | HSA_RUNTIME_PATH ?= /opt/rocm/hsa
22 | #Set ROCM device environment variables
23 | ROCM_DEVICE_PATH ?= /opt/rocm
24 |
25 | #Set LC Environment variables
26 | AMDLLVM ?= /opt/amd/llvm
27 |
28 | AMDGPU_TARGET_TRIPLE ?= amdgpu--amdhsa
29 |
30 | #MCPU
31 | MCPU ?= $(shell mymcpu)
32 |
33 | # Kernel compiler
34 | CLC ?= 1
35 |
36 | INC_FLAGS=-I${ATMI_RUNTIME_PATH}/include -I${HSA_RUNTIME_PATH}/include -I.
37 |
38 | # CLOC
39 | CLOC_PATH ?= ${ATMI_RUNTIME_PATH}/bin
40 | CLOCOPTS = -vv -aomp ${AMDLLVM} -triple ${AMDGPU_TARGET_TRIPLE} -libgcn ${ROCM_DEVICE_PATH} -atmipath ${ATMI_RUNTIME_PATH}
41 | CLOCOPTS += -clopts "$(INC_FLAGS) -O2 -v"
42 |
43 | # ROCm-Device-lib
44 | BITCODE_LIB ?= ${ROCMLIB}/dist/lib
45 |
46 | # GPU compiler
47 | CLCC=$(AMDLLVM)/bin/clang
48 | CLCFLAGS = -x cl -Xclang -cl-std=CL2.0 -Xclang -finclude-default-header
49 | CLCFLAGS += -target amdgcn--amdhsa
50 | CLCFLAGS += -mcpu=$(MCPU)
51 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/opencl.amdgcn.bc
52 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ockl.amdgcn.bc
53 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/ocml.amdgcn.bc
54 | CLCFLAGS += -Xclang -mlink-bitcode-file -Xclang $(BITCODE_LIB)/irif.amdgcn.bc
55 |
56 | # Host compiler
57 | CXX = g++
58 | CXXFLAGS =-O3 -g -std=c++11
59 | #CXXFLAGS += -v
60 |
61 | LIBS=-latmi_runtime
62 | LIB_FLAGS=-L${ATMI_RUNTIME_PATH}/lib -L${HSA_RUNTIME_PATH}/lib
63 |
64 | OBJS = reduction
65 |
66 | .PHONY: clean all
67 |
68 | all: $(OBJS)
69 |
70 | reduction: reduction.cpp reduction.cl
71 | ifeq ($(CLC),1)
72 | $(CLOC_PATH)/cloc.sh ${CLOCOPTS} -opt 2 reduction.cl
73 | else
74 | $(CLCC) ${CLCFLAGS} -o reduction.hsaco reduction.cl
75 | endif
76 | $(CXX) -o $@ reduction.cpp $(CXXFLAGS) $(LIBS) $(LIB_FLAGS) $(INC_FLAGS)
77 |
78 | clean:
79 | rm -rf *.o *.hsaco $(OBJS)
80 |
81 | test:
82 | env LD_LIBRARY_PATH=${ATMI_RUNTIME_PATH}/lib:${HSA_RUNTIME_PATH}/lib:${LD_LIBRARY_PATH} ./reduction
83 |
84 |
--------------------------------------------------------------------------------
/examples/runtime_denq/reduction/reduction.cl:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include "atmi_kl.h"
8 |
9 | enum {
10 | reduction_task = 0,
11 | };
12 |
13 | typedef struct args_r {
14 | int *in;
15 | unsigned long length;
16 | } args_t;
17 |
18 | kernel void reduction_gpu(__global int* in, unsigned long length) {
19 | int num = get_global_id(0);
20 |
21 | in[num] += in[num + length];
22 |
23 | barrier(CLK_GLOBAL_MEM_FENCE);
24 |
25 | if(num == 0)
26 | {
27 | length = length >> 1;
28 | ATMI_LPARM_1D(lparm, length);
29 | if(length > 8)
30 | lparm->place = (atmi_place_t)ATMI_PLACE_GPU(0, 0);
31 | else
32 | lparm->place = (atmi_place_t)ATMI_PLACE_CPU(0, 0);
33 | args_t args;
34 | args.in = in;
35 | args.length = length;
36 | atmid_task_launch(lparm, reduction_task, (void *)&args, sizeof(args_t));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/examples/runtime_denq/reduction/reduction.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 |
7 | #include
8 | #include
9 | #include
10 | using namespace std;
11 | #include "atmi_runtime.h"
12 |
13 | extern "C" void reduction_cpu(int **in_ptr, long unsigned *length_ptr) {
14 | int *in = *in_ptr;
15 | long unsigned length = *length_ptr;
16 | int num;
17 | for (num = length; num > 0; num >>= 1)
18 | for(int j = 0; j < num; j++)
19 | in[j] += in[j + num];
20 | }
21 |
22 | int main(int argc, char* argv[]) {
23 | atmi_status_t err = atmi_init(ATMI_DEVTYPE_ALL);
24 | if(err != ATMI_STATUS_SUCCESS) return -1;
25 | const char *module = "reduction.hsaco";
26 | atmi_platform_type_t module_type = AMDGCN;
27 | err = atmi_module_register(&module, &module_type, 1);
28 |
29 | atmi_kernel_t reduction_kernel;
30 | const unsigned int num_args = 2;
31 | size_t arg_sizes[] = { sizeof(int *), sizeof(long unsigned) };
32 | atmi_kernel_create(&reduction_kernel, num_args, arg_sizes,
33 | 2,
34 | ATMI_DEVTYPE_GPU, "reduction_gpu",
35 | ATMI_DEVTYPE_CPU, (atmi_generic_fp)reduction_cpu);
36 |
37 | long unsigned length = 1024;
38 | int *input;
39 | atmi_mem_place_t cpu = ATMI_MEM_PLACE(ATMI_DEVTYPE_CPU, 0, 0);
40 | atmi_malloc((void **)&input, sizeof(int) * length, cpu);
41 |
42 | for(int ii = 0; ii < length; ii++)
43 | input[ii] = ii;
44 |
45 | ATMI_LPARM_1D(lparm, length >> 1);
46 | lparm->synchronous = ATMI_TRUE;
47 | lparm->place = ATMI_PLACE_GPU(0, 0);
48 | lparm->groupable = ATMI_TRUE;
49 | //lparm->kernel_id = K_ID_reduction_gpu;
50 |
51 | long unsigned arg_length = length >> 1;
52 | void *args[] = { &input, &arg_length };
53 | atmi_task_launch(lparm, reduction_kernel, args);
54 |
55 | printf("Sum: %d\n", input[0]);
56 |
57 | atmi_free(input);
58 | atmi_kernel_release(reduction_kernel);
59 | atmi_finalize();
60 | return 0;
61 | }
62 |
--------------------------------------------------------------------------------
/include/atmi_c_ext.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef INCLUDE_ATMI_C_EXT_H_
7 | #define INCLUDE_ATMI_C_EXT_H_
8 |
9 | #include
10 | /** \defgroup Helper macros when using ATMI C Extension feature.
11 | * @{
12 | */
13 | #ifdef __cplusplus
14 | #define _CPPSTRING_ "C"
15 | #endif
16 | #ifndef __cplusplus
17 | #define _CPPSTRING_
18 | #endif
19 | /**
20 | * @brief \deprecated Predefined function calling a null CPU task.
21 | */
22 | extern _CPPSTRING_ atmi_task_handle_t __sync_kernel_pif(atmi_lparm_t* lparm);
23 |
24 | /**
25 | * @brief \deprecated Helper macros calling a
26 | * null CPU task under specific conditions.
27 | */
28 | #define SYNC_STREAM(s) \
29 | { \
30 | ATMI_LPARM(__lparm_sync_kernel); \
31 | __lparm_sync_kernel->synchronous = ATMI_TRUE; \
32 | __lparm_sync_kernel->groupable = ATMI_TRUE; \
33 | __lparm_sync_kernel->group = s; \
34 | __sync_kernel_pif(__lparm_sync_kernel); \
35 | }
36 |
37 | #define SYNC_TASK(t) \
38 | { \
39 | ATMI_LPARM(__lparm_sync_kernel); \
40 | __lparm_sync_kernel->synchronous = ATMI_TRUE; \
41 | __lparm_sync_kernel->num_required = 1; \
42 | __lparm_sync_kernel->requires = &t; \
43 | __sync_kernel_pif(__lparm_sync_kernel); \
44 | }
45 | /**
46 | * @}
47 | */
48 |
49 | #endif // INCLUDE_ATMI_C_EXT_H_
50 |
--------------------------------------------------------------------------------
/include/atmi_interop_hsa.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef INCLUDE_ATMI_INTEROP_HSA_H_
7 | #define INCLUDE_ATMI_INTEROP_HSA_H_
8 |
9 | #include "atmi_runtime.h"
10 | #include "hsa/hsa.h"
11 | #include "hsa/hsa_ext_amd.h"
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | /** \defgroup interop_hsa_functions ATMI-HSA Interop
18 | * @{
19 | */
20 | /**
21 | * @brief Get the HSA compute agent from the ATMI compute place.
22 | *
23 | * @detail Use this function to query more details about the underlying HSA
24 | * agent.
25 | *
26 | * @param[in] proc The ATMI compute place
27 | *
28 | * @param[in] agent Pointer to a non-NULL @p hsa_agent_t structure that will
29 | * hold the
30 | * return value.
31 | *
32 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
33 | *
34 | * @retval ::ATMI_STATUS_ERROR If @p proc is an invalid location in the current
35 | * node, or
36 | * if ATMI is not initialized.
37 | *
38 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
39 | */
40 | atmi_status_t atmi_interop_hsa_get_agent(atmi_place_t proc, hsa_agent_t *agent);
41 |
42 | /**
43 | * @brief Get the HSA memory pool handle from the ATMI memory place.
44 | *
45 | * @detail Use this function to query more details about the underlying HSA
46 | * memory
47 | * pool handle.
48 | *
49 | * @param[in] memory The ATMI memory place
50 | *
51 | * @param[in] pool Pointer to a non-NULL @p hsa_amd_memory_pool_t structure that
52 | * will
53 | * hold the return value.
54 | *
55 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
56 | *
57 | * @retval ::ATMI_STATUS_ERROR If @p memory is an invalid location in the
58 | * current node, or
59 | * if ATMI is not initialized.
60 | *
61 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
62 | */
63 | atmi_status_t atmi_interop_hsa_get_memory_pool(atmi_mem_place_t memory,
64 | hsa_amd_memory_pool_t *pool);
65 |
66 | /**
67 | * @brief Get the device address and size of an HSA global symbol
68 | *
69 | * @detail Use this function to query the device address and size of an HSA
70 | * global symbol.
71 | * The symbol can be set at by the compiler or by the application writer in a
72 | * language-specific manner. This function is meaningful only after calling one
73 | * of the @p atmi_module_register functions.
74 | *
75 | * @param[in] place The ATMI memory place
76 | *
77 | * @param[in] symbol Pointer to a non-NULL global symbol name
78 | *
79 | * @param[in] var_addr Pointer to a non-NULL @p void* variable that will
80 | * hold the device address of the global symbol object.
81 | *
82 | * @param[in] var_size Pointer to a non-NULL @p uint variable that will
83 | * hold the size of the global symbol object.
84 | *
85 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
86 | *
87 | * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are
88 | * invalid
89 | * location in the current node, or if ATMI is not initialized.
90 | *
91 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
92 | */
93 | atmi_status_t atmi_interop_hsa_get_symbol_info(atmi_mem_place_t place,
94 | const char *symbol,
95 | void **var_addr,
96 | unsigned int *var_size);
97 |
98 | /**
99 | * @brief Get the HSA-specific kernel info from a kernel name
100 | *
101 | * @detail Use this function to query the HSA-specific kernel info from the
102 | * kernel name.
103 | * This function is meaningful only after calling one
104 | * of the @p atmi_module_register functions.
105 | *
106 | * @param[in] place The ATMI memory place
107 | *
108 | * @param[in] kernel_name Pointer to a char array with the kernel name
109 | *
110 | * @param[in] info The different possible kernel properties
111 | *
112 | * @param[in] value Pointer to a non-NULL @p uint variable that will
113 | * hold the return value of the kernel property.
114 | *
115 | * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully.
116 | *
117 | * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are
118 | * invalid
119 | * location in the current node, or if ATMI is not initialized.
120 | *
121 | * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors.
122 | */
123 | atmi_status_t atmi_interop_hsa_get_kernel_info(
124 | atmi_mem_place_t place, const char *kernel_name,
125 | hsa_executable_symbol_info_t info, uint32_t *value);
126 | /** @} */
127 |
128 | #ifdef __cplusplus
129 | }
130 | #endif
131 |
132 | #endif // INCLUDE_ATMI_INTEROP_HSA_H_
133 |
--------------------------------------------------------------------------------
/include/atmi_kl.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef INCLUDE_ATMI_KL_H_
7 | #define INCLUDE_ATMI_KL_H_
8 |
9 | #include
10 |
11 | extern void atmid_task_launch(atmi_lparm_t *lp, unsigned long kernel_id,
12 | void *args_region,
13 | unsigned long args_region_size);
14 |
15 | #endif // INCLUDE_ATMI_KL_H_
16 |
--------------------------------------------------------------------------------
/src/cmake_modules/FindLibElf.cmake:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 |
7 | # Below are the variables that will be set at the end of this file
8 | # LIBELF_FOUND
9 | # LIBELF_INCLUDE_DIRS
10 | # LIBELF_LIBRARIES
11 |
12 |
13 | find_path (LIBELF_INCLUDE_DIRS
14 | NAMES
15 | libelf.h
16 | PATHS
17 | /usr/include
18 | /usr/local/include
19 | ENV CPATH)
20 |
21 | find_library (LIBELF_LIBRARIES
22 | NAMES
23 | elf
24 | PATHS
25 | /usr/lib/x86_64-linux-gnu
26 | /usr/lib
27 | /usr/local/lib
28 | ENV LIBRARY_PATH
29 | ENV LD_LIBRARY_PATH)
30 |
31 | # set LIBELF_FOUND to TRUE if the below variables are true,
32 | # i.e. header and lib files are found
33 | include (FindPackageHandleStandardArgs)
34 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibElf DEFAULT_MSG
35 | LIBELF_LIBRARIES
36 | LIBELF_INCLUDE_DIRS)
37 |
38 | mark_as_advanced(LIBELF_INCLUDE_DIRS LIBELF_LIBRARIES)
39 |
--------------------------------------------------------------------------------
/src/cmake_modules/FindROCm.cmake:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 |
7 | # Below are the variables that will be set at the end of this file
8 | # ROCM_FOUND
9 | # ROCM_LIBRARIES
10 | # ROCM_INCLUDE_DIRS
11 | # ROCM_VERSION
12 | # ROCM_VERSION_MAJOR
13 | # ROCM_VERSION_MINOR
14 | # ROCM_VERSION_PATCH
15 | # ROCM_VERSION_STRING
16 |
17 | find_path(
18 | ROCM_INCLUDE_DIRS
19 | hsa/hsa.h
20 | HINTS
21 | ${ROC_DIR}/include
22 | ${ROCR_DIR}/include
23 | /opt/rocm/include
24 | ENV CPATH
25 | )
26 |
27 | find_library(
28 | ROCR_LIBRARY
29 | hsa-runtime64
30 | HINTS
31 | ${ROC_DIR}/lib
32 | ${ROC_DIR}
33 | ${ROCR_DIR}/lib
34 | ${ROCR_DIR}
35 | /opt/rocm/lib
36 | /usr/local/lib
37 | /usr/lib/x86_64-linux-gnu
38 | /usr/lib
39 | ENV LIBRARY_PATH
40 | ENV LD_LIBRARY_PATH
41 | )
42 | find_library(
43 | ROCT_LIBRARY
44 | hsakmt
45 | HINTS
46 | ${ROC_DIR}/lib
47 | ${ROC_DIR}
48 | ${ROCT_DIR}/lib
49 | ${ROCT_DIR}
50 | /opt/rocm/lib
51 | /usr/local/lib
52 | /usr/lib/x86_64-linux-gnu
53 | /usr/lib
54 | ENV LIBRARY_PATH
55 | ENV LD_LIBRARY_PATH
56 | )
57 | get_filename_component (ROCM_LIBRARIES_DIR ${ROCR_LIBRARY} DIRECTORY)
58 | set(ROCM_LIBRARIES ${ROCR_LIBRARY} ${ROCT_LIBRARY})
59 | #message(STATUS "ROCm libraries: ${ROCM_LIBRARIES}")
60 | #message(STATUS "ROCm libraries dir: ${ROCM_LIBRARIES_DIR}")
61 |
62 | if(NOT ROCM_VERSION)
63 | # Do not use the metapackage version number because it is error-prone.
64 | # Use ROCr version number directly if there is a way to infer it.
65 | # Until then, set the ROCm version to 0.0.0 as default.
66 | # file(GLOB version_files
67 | # LIST_DIRECTORIES false
68 | # /opt/rocm/.info/version*
69 | # )
70 | # list(GET version_files 0 version_file)
71 | # # Compute the version
72 | # execute_process(
73 | # COMMAND cat ${version_file}
74 | # OUTPUT_VARIABLE _rocm_version
75 | # ERROR_VARIABLE _rocm_error
76 | # OUTPUT_STRIP_TRAILING_WHITESPACE
77 | # ERROR_STRIP_TRAILING_WHITESPACE
78 | # )
79 | # if(NOT _rocm_error)
80 | # set(ROCM_VERSION ${_rocm_version} CACHE STRING "Version of ROCm as found in /opt/rocm/.info/version*")
81 | # else()
82 | # set(ROCM_VERSION "0.0.0" CACHE STRING "Version of ROCm set to default")
83 | # endif()
84 | set(ROCM_VERSION "0.0.0" CACHE STRING "Version of ROCm set to default")
85 | mark_as_advanced(ROCM_VERSION)
86 | endif()
87 |
88 | string(REPLACE "." ";" _rocm_version_list "${ROCM_VERSION}")
89 | list(GET _rocm_version_list 0 ROCM_VERSION_MAJOR)
90 | list(GET _rocm_version_list 1 ROCM_VERSION_MINOR)
91 | list(GET _rocm_version_list 2 ROCM_VERSION_PATCH)
92 | set(ROCM_VERSION_STRING "${ROCM_VERSION}")
93 |
94 | # set ROCM_FOUND to TRUE if the below variables are true,
95 | # i.e. header and lib files are found
96 | include(FindPackageHandleStandardArgs)
97 | find_package_handle_standard_args(ROCM DEFAULT_MSG
98 | ROCM_LIBRARIES
99 | ROCM_INCLUDE_DIRS
100 | ROCM_VERSION
101 | ROCM_VERSION_STRING
102 | )
103 |
104 | mark_as_advanced(
105 | ROCM_LIBRARIES
106 | ROCM_INCLUDE_DIRS
107 | ROCM_VERSION
108 | ROCM_VERSION_MAJOR
109 | ROCM_VERSION_MINOR
110 | ROCM_VERSION_PATCH
111 | ROCM_VERSION_STRING
112 | )
113 |
--------------------------------------------------------------------------------
/src/cmake_modules/utils.cmake:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 |
7 |
8 | ## Parses the VERSION_STRING variable and places
9 | ## the first, second and third number values in
10 | ## the major, minor and patch variables.
11 | function( parse_version VERSION_STRING )
12 |
13 | string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
14 |
15 | if ( ${STRING_INDEX} GREATER -1 )
16 | math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
17 | string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
18 | endif ()
19 |
20 | string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
21 | list ( LENGTH VERSIONS VERSION_COUNT )
22 |
23 | if ( ${VERSION_COUNT} GREATER 0)
24 | list ( GET VERSIONS 0 MAJOR )
25 | set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
26 | set ( TEMP_VERSION_STRING "${MAJOR}" )
27 | endif ()
28 |
29 | if ( ${VERSION_COUNT} GREATER 1 )
30 | list ( GET VERSIONS 1 MINOR )
31 | set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
32 | set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
33 | endif ()
34 |
35 | if ( ${VERSION_COUNT} GREATER 2 )
36 | list ( GET VERSIONS 2 PATCH )
37 | set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
38 | set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
39 | endif ()
40 |
41 | if ( DEFINED VERSION_BUILD )
42 | set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
43 | endif ()
44 |
45 | set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
46 |
47 | endfunction ()
48 |
49 | ## Gets the current version of the repository
50 | ## using versioning tags and git describe.
51 | ## Passes back a packaging version string
52 | ## and a library version string.
53 | function ( get_version DEFAULT_VERSION_STRING )
54 |
55 | parse_version ( ${DEFAULT_VERSION_STRING} )
56 |
57 | find_program ( GIT NAMES git )
58 |
59 | if ( GIT )
60 |
61 | execute_process ( COMMAND git describe --dirty --tags --long --match atmi-[0-9]*
62 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
63 | OUTPUT_VARIABLE GIT_TAG_STRING
64 | OUTPUT_STRIP_TRAILING_WHITESPACE
65 | RESULT_VARIABLE RESULT )
66 |
67 | if ( ${RESULT} EQUAL 0 )
68 |
69 | parse_version ( ${GIT_TAG_STRING} )
70 |
71 | endif ()
72 |
73 | endif ()
74 |
75 | set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
76 | set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
77 | set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
78 | set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
79 | set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
80 |
81 | endfunction()
82 |
--------------------------------------------------------------------------------
/src/compiler/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 |
7 | string( TOLOWER "${ATMI_C_EXTENSION}" ATMI_C_EXTENSION_VAR )
8 | if(NOT ATMI_C_EXTENSION_VAR MATCHES on )
9 | libatmi_runtime_say("Not building ATMI C Extension. Use -DATMI_C_EXTENSION=on in your cmake options to enable.")
10 | return()
11 | endif()
12 |
13 | if(ROCM_FOUND)
14 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
15 | set (CMAKE_C_COMPILER g++)
16 | set (EXECUTE_COMMAND ${CMAKE_C_COMPILER} -print-file-name=plugin)
17 | execute_process(COMMAND ${EXECUTE_COMMAND} RESULT_VARIABLE rv OUTPUT_VARIABLE ov)
18 | # run the command -print-file-name=plugin to determine
19 | # the location of the GCC Plugin. Strip it out of whitespaces before
20 | # and after the string to determine if the plugin has been installed or
21 | # not. If it has been installed, the returned string provides the
22 | # location of the GCC plugin
23 | string(STRIP ${ov} new_ov)
24 |
25 | if(new_ov STREQUAL "plugin")
26 | libatmi_runtime_say("GCC Plugin not found")
27 | set(PLUGIN_FOUND 0)
28 | else()
29 | if(EXISTS "${new_ov}/include/gcc-plugin.h"
30 | AND EXISTS "${new_ov}/include/print-tree.h")
31 | libatmi_runtime_say("GCC Plugin found. Preparing to build ATMI C extensions.")
32 | include_directories(${new_ov}/include)
33 | set(PLUGIN_FOUND 1)
34 | else()
35 | libatmi_runtime_say("GCC Plugin (gcc-plugin.h or print-tree.h) not found")
36 | set(PLUGIN_FOUND 0)
37 | endif()
38 | endif()
39 |
40 | if(PLUGIN_FOUND)
41 | # Enable support for C++11?
42 | #add_definitions(-std=c++11)
43 |
44 | # If building this library in debug mode, we define a macro to enable
45 | # dumping progress messages at runtime.
46 | string( TOLOWER "${CMAKE_BUILD_TYPE}" ATMI_CMAKE_BUILD_TYPE)
47 | if(ATMI_CMAKE_BUILD_TYPE MATCHES debug)
48 | add_definitions(-DDEBUG)
49 | add_definitions(-g)
50 | add_definitions(-O0)
51 | else()
52 | add_definitions(-g)
53 | add_definitions(-O2)
54 | endif()
55 | add_definitions(-c)
56 | add_definitions(-fpic)
57 |
58 | add_library(atmi_cplugin SHARED
59 | atl_pifgen_plugin.c
60 | atl_synckernel.c
61 | )
62 |
63 | include_directories(${ROCM_INCLUDE_DIRS})
64 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
65 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
66 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include)
67 |
68 | # Install plugin under the lib destination folder.
69 | if(CMAKE_BUILD_TYPE MATCHES Debug)
70 | install(TARGETS atmi_cplugin LIBRARY DESTINATION "lib-debug" COMPONENT cplugin )
71 | else()
72 | install(TARGETS atmi_cplugin LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT cplugin )
73 | endif()
74 |
75 | INSTALL(FILES
76 | ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/atmi_c_ext.h
77 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
78 | COMPONENT cplugin
79 | )
80 |
81 | ## CPack component info
82 | set(CPACK_COMPONENT_CPLUGIN_DISPLAY_NAME "ATMI C Plugin")
83 | set(CPACK_COMPONENT_CPLUGIN_DEPENDS runtime)
84 |
85 |
86 | target_link_libraries(
87 | atmi_cplugin
88 | ${ROCM_LIBRARIES}
89 | -L${ROCM_LIBRARIES_DIR}
90 | -Wl,--enable-new-dtags
91 | )
92 |
93 | if (NOT CMAKE_INSTALL_RPATH)
94 | set(CMAKE_INSTALL_RPATH "$ORIGIN;$ORIGIN/../../hsa/lib;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64")
95 | endif ()
96 |
97 | else()
98 | libatmi_runtime_say("Not building ATMI C Extension: GCC Plugin not found.")
99 | endif()
100 | else()
101 | libatmi_runtime_say("Not building ATMI C Extension: only support ATMI in Linux x86_64 or ppc64le hosts.")
102 | endif()
103 | else()
104 | libatmi_runtime_say("Not building ATMI C Extension: libhsa-runtime64 not found")
105 | endif()
106 |
--------------------------------------------------------------------------------
/src/compiler/atl_synckernel.c:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #include
7 | #include "atl_internal.h"
8 | #include "atmi.h"
9 |
10 | /* Null kernel */
11 | #ifdef __cplusplus
12 | #define _CPPSTRING_ "C"
13 | #endif
14 | #ifndef __cplusplus
15 | #define _CPPSTRING_
16 | #endif
17 | extern _CPPSTRING_ void __sync_kernel() {}
18 | extern _CPPSTRING_ void __sync_kernel_wrapper() { __sync_kernel(); }
19 | static int cpu_initalized = 0;
20 |
21 | typedef struct pif_kernel_table_s {
22 | atmi_devtype_t devtype;
23 | atmi_generic_fp cpu_kernel;
24 | const char *gpu_kernel;
25 | } pif_kernel_table_t;
26 |
27 | pif_kernel_table_t __sync_kernel_pif_fn_table[] = {
28 | {.devtype = ATMI_DEVTYPE_CPU,
29 | .cpu_kernel = (atmi_generic_fp)__sync_kernel_wrapper,
30 | .gpu_kernel = 0},
31 | };
32 |
33 | static int __sync_kernel_CPU_FK = 0;
34 | static atmi_kernel_t __sync_kernel_obj;
35 | extern _CPPSTRING_ atmi_task_handle_t __sync_kernel_pif(atmi_lparm_t *lparm) {
36 | int k_id = lparm->kernel_id;
37 | assert(k_id == 0);
38 | atmi_devtype_t devtype = __sync_kernel_pif_fn_table[k_id].devtype;
39 | if (devtype == ATMI_DEVTYPE_GPU) {
40 | } else if (devtype == ATMI_DEVTYPE_CPU) {
41 | /* Kernel initialization has to be done before kernel arguments are
42 | * set/inspected */
43 | const char *kernel_name = "__sync_kernel";
44 | const int num_args = 0;
45 | if (__sync_kernel_CPU_FK == 0) {
46 | atmi_kernel_create_empty(&__sync_kernel_obj, num_args, NULL);
47 | atmi_kernel_add_cpu_impl(
48 | __sync_kernel_obj,
49 | (atmi_generic_fp)(__sync_kernel_pif_fn_table[0].cpu_kernel), 0);
50 | __sync_kernel_CPU_FK = 1;
51 | }
52 | if (cpu_initalized == 0) {
53 | atmi_init(ATMI_DEVTYPE_CPU);
54 | cpu_initalized = 1;
55 | }
56 | return atmi_task_launch(lparm, __sync_kernel_obj, NULL);
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/compiler/include/atl_pifgen.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef __ATMI_PIFGEN_PLUGIN__
7 | #define __ATMI_PIFGEN_PLUGIN__
8 | struct cl_decoded_option
9 | {
10 | /* The index of this option, or an OPT_SPECIAL_* value for
11 | * non-options and unknown options. */
12 | size_t opt_index;
13 |
14 | /* Any warning to give for use of this option, or NULL if none. */
15 | const char *warn_message;
16 |
17 | /* The string argument, or NULL if none. For OPT_SPECIAL_* cases,
18 | * the option or non-option command-line argument. */
19 | const char *arg;
20 |
21 | /* The original text of option plus arguments, with separate argv
22 | * elements concatenated into one string with spaces separating
23 | * them. This is for such uses as diagnostics and
24 | * -frecord-gcc-switches. */
25 | const char *orig_option_with_args_text;
26 |
27 | /* The canonical form of the option and its argument, for when it is
28 | * necessary to reconstruct argv elements (in particular, for
29 | * processing specs and passing options to subprocesses from the
30 | * driver). */
31 | const char *canonical_option[4];
32 |
33 | /* The number of elements in the canonical form of the option and
34 | * arguments; always at least 1. */
35 | size_t canonical_option_num_elements;
36 |
37 | /* For a boolean option, 1 for the true case and 0 for the "no-"
38 | * case. For an unsigned integer option, the value of the
39 | * argument. 1 in all other cases. */
40 | int value;
41 |
42 | /* Any flags describing errors detected in this option. */
43 | int errors;
44 | };
45 |
46 | /* Decoded options, and number of such options. */
47 | extern struct cl_decoded_option *save_decoded_options;
48 | extern unsigned int save_decoded_options_count;
49 |
50 | #endif // __ATMI_PIFGEN_PLUGIN__
51 |
--------------------------------------------------------------------------------
/src/device_runtime/device_rt.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef SRC_DEVICE_RUNTIME_DEVICE_RT_H_
7 | #define SRC_DEVICE_RUNTIME_DEVICE_RT_H_
8 |
9 | #include "rt.h"
10 |
11 | namespace core {
12 |
13 | class DeviceRuntime : public Runtime {
14 | public:
15 | static DeviceRuntime &getInstance() {
16 | static DeviceRuntime instance;
17 | return instance;
18 | }
19 |
20 | // init/finalize
21 | virtual atmi_status_t Initialize(atmi_devtype_t);
22 | virtual atmi_status_t Finalize();
23 | // kernels
24 | virtual atmi_status_t CreateKernel(atmi_kernel_t *, const int, const size_t *,
25 | const int, va_list);
26 | virtual atmi_status_t ReleaseKernel(atmi_kernel_t);
27 |
28 | // bool initialized() const { return initialized_; }
29 | // void set_initialized(const bool val) { initialized_ = val; }
30 | private:
31 | DeviceRuntime() = default;
32 | ~DeviceRuntime() = default;
33 | DeviceRuntime(const DeviceRuntime &) = delete;
34 | DeviceRuntime &operator=(const DeviceRuntime &) = delete;
35 | // bool initialized_;
36 | };
37 |
38 | } // namespace core
39 |
40 | #endif // SRC_DEVICE_RUNTIME_DEVICE_RT_H_
41 |
--------------------------------------------------------------------------------
/src/device_runtime/include/device_amd_hsa.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ROCm Device Libraries
3 | *
4 | * This file is distributed under the University of Illinois Open Source
5 | * License. See LICENSE.TXT for details.
6 | *===------------------------------------------------------------------------*/
7 |
8 | #ifndef DEVICE_AMD_HSA_H
9 | #define DEVICE_AMD_HSA_H
10 |
11 | typedef char int8_t;
12 | typedef unsigned char uint8_t;
13 | typedef short int16_t;
14 | typedef unsigned short uint16_t;
15 | typedef int int32_t;
16 | typedef unsigned int uint32_t;
17 | typedef long int64_t;
18 | typedef unsigned long uint64_t;
19 |
20 | #ifdef __LP64__
21 | #undef __LP64__
22 | #endif
23 | #define __LP64__
24 | #define DEVICE_COMPILER
25 | #define LITTLEENDIAN_CPU
26 | #include "hsa.h"
27 | // below includes are unnecessary for ATMI
28 | //#include "amd_hsa_common.h"
29 | //#include "amd_hsa_elf.h"
30 | //#include "amd_hsa_kernel_code.h"
31 | //#include "amd_hsa_queue.h"
32 | //#include "amd_hsa_signal.h"
33 | //#include "device_amd_hsa.h"
34 | #undef DEVICE_COMPILER
35 |
36 | #endif // DEVICE_AMD_HSA_H
37 |
--------------------------------------------------------------------------------
/src/runtime/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # ===--------------------------------------------------------------------------
2 | # ATMI (Asynchronous Task and Memory Interface)
3 | #
4 | # This file is distributed under the MIT License. See LICENSE.txt for details.
5 | # ===--------------------------------------------------------------------------
6 |
7 | if(ROCM_FOUND)
8 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
9 | libatmi_runtime_say("Preparing to build runtime/core")
10 | add_subdirectory(core)
11 | add_subdirectory(interop)
12 | else()
13 | libatmi_runtime_say("Not building ATMI Runtime: only support ATMI in Linux x86_64 or ppc64le hosts.")
14 | endif()
15 | else()
16 | libatmi_runtime_say("Not building ATMI Runtime: libhsa-runtime64 not found")
17 | endif()
18 |
--------------------------------------------------------------------------------
/src/runtime/core/queue.cpp:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #include "queue.h"
7 | #include "atmi.h"
8 | #include "hsa/hsa_ext_amd.h"
9 |
10 | bool equalsPlace(const atmi_place_t &l, const atmi_place_t &r) {
11 | bool val = false;
12 | if (l.node_id == r.node_id && l.type == r.type &&
13 | l.device_id == r.device_id && l.cu_mask == r.cu_mask)
14 | val = true;
15 | return val;
16 | }
17 |
18 | hsa_status_t ATLGPUQueue::set_place(atmi_place_t place) {
19 | hsa_status_t val = HSA_STATUS_SUCCESS;
20 | if (!equalsPlace(place_, place)) {
21 | place_ = place;
22 | val = hsa_amd_queue_cu_set_mask(
23 | queue_, 2, reinterpret_cast(&(place_.cu_mask)));
24 | }
25 | return val;
26 | }
27 |
28 | hsa_status_t ATLCPUQueue::set_place(atmi_place_t place) {
29 | hsa_status_t val = HSA_STATUS_SUCCESS;
30 | if (!equalsPlace(place_, place)) {
31 | place_ = place;
32 | // change pthread-to-core binding based on cpu_set. If number of bits that
33 | // are set on cpu_set is >1 then choose the first non-zero bit and place
34 | // the thread on that core.
35 | // TODO(ashwinma): Any other scheduling algorithms based on load, task group
36 | // annotations, and so on...
37 | }
38 | return val;
39 | }
40 |
--------------------------------------------------------------------------------
/src/runtime/include/data.h:
--------------------------------------------------------------------------------
1 | /*===--------------------------------------------------------------------------
2 | * ATMI (Asynchronous Task and Memory Interface)
3 | *
4 | * This file is distributed under the MIT License. See LICENSE.txt for details.
5 | *===------------------------------------------------------------------------*/
6 | #ifndef SRC_RUNTIME_INCLUDE_DATA_H_
7 | #define SRC_RUNTIME_INCLUDE_DATA_H_
8 | #include
9 | #include
10 | #include
11 | #include