├── src ├── inc │ ├── hsa_ext_amd.h │ ├── amd_hsa_signal.h │ ├── amd_hsa_queue.h │ ├── hsa_ven_amd_loaded_code_object.h │ └── amd_hsa_common.h ├── utils │ └── sp3 │ │ ├── libsp3.a │ │ ├── LICENSE.txt │ │ ├── sp3-vm.h │ │ ├── sp3-asic.h │ │ └── sp3-type.h ├── loader │ ├── CMakeLists.txt │ └── loaders.hpp ├── libamdhsacode │ ├── CMakeLists.txt │ ├── amd_hsa_locks.cpp │ └── amd_hsa_locks.hpp ├── cmake_modules │ ├── COPYING-CMAKE-SCRIPTS │ ├── FindLibElf.cmake │ ├── utils.cmake │ └── hsa_common.cmake ├── core │ ├── inc │ │ ├── hsa_table_interface.h │ │ ├── amd_topology.h │ │ ├── hsa_api_trace_int.h │ │ ├── hsa_ext_interface.h │ │ ├── checked.h │ │ ├── amd_loader_context.hpp │ │ ├── memory_region.h │ │ ├── blit.h │ │ ├── isa.h │ │ ├── amd_cpu_agent.h │ │ ├── host_queue.h │ │ ├── default_signal.h │ │ ├── amd_blit_kernel.h │ │ ├── amd_memory_region.h │ │ └── interrupt_signal.h │ ├── common │ │ ├── shared.cpp │ │ └── shared.h │ ├── runtime │ │ ├── hsa_ven_amd_loaded_code_object.cpp │ │ ├── host_queue.cpp │ │ ├── isa.cpp │ │ ├── signal.cpp │ │ └── amd_topology.cpp │ ├── util │ │ ├── small_heap.h │ │ ├── timer.cpp │ │ ├── flag.h │ │ ├── locks.h │ │ ├── timer.h │ │ └── small_heap.cpp │ └── hsacore.so.def ├── CMakeLists.txt └── README.md ├── sample ├── vector_copy_base.brig ├── vector_copy_full.brig ├── Makefile ├── vector_copy_base.hsail └── vector_copy_full.hsail ├── LICENSE.txt └── README.md /src/inc/hsa_ext_amd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bgoglin/ROCR-Runtime/master/src/inc/hsa_ext_amd.h -------------------------------------------------------------------------------- /src/utils/sp3/libsp3.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bgoglin/ROCR-Runtime/master/src/utils/sp3/libsp3.a -------------------------------------------------------------------------------- /sample/vector_copy_base.brig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bgoglin/ROCR-Runtime/master/sample/vector_copy_base.brig -------------------------------------------------------------------------------- /sample/vector_copy_full.brig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bgoglin/ROCR-Runtime/master/sample/vector_copy_full.brig -------------------------------------------------------------------------------- /src/loader/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # loader library 3 | # 4 | # This file is expected to be included from top-level CMakeLists.txt. 5 | # 6 | # Dependencies: 7 | # - Compiler definitions 8 | # - amdhsacode library 9 | # 10 | # Defines: 11 | # - amdhsaloader library and target include directories 12 | 13 | file(GLOB sources *.cpp *.hpp) 14 | add_library(amdhsaloader ${sources}) 15 | target_include_directories(amdhsaloader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 16 | target_link_libraries(amdhsaloader amdhsacode) 17 | -------------------------------------------------------------------------------- /src/libamdhsacode/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # amdhsacode library 3 | # 4 | # This file is expected to be included from top-level CMakeLists.txt. 5 | # 6 | # Dependencies: 7 | # - Compiler definitions 8 | # - elf library 9 | # 10 | # Defines: 11 | # - amdhsacode library and target include directories 12 | 13 | file(GLOB sources *.cpp *.hpp) 14 | find_package(LibElf REQUIRED) 15 | add_library(amdhsacode ${sources}) 16 | target_include_directories(amdhsacode PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 17 | target_link_libraries(amdhsacode elf) 18 | -------------------------------------------------------------------------------- /src/utils/sp3/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/cmake_modules/COPYING-CMAKE-SCRIPTS: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without 2 | modification, are permitted provided that the following conditions 3 | are met: 4 | 5 | 1. Redistributions of source code must retain the copyright 6 | notice, this list of conditions and the following disclaimer. 7 | 2. Redistributions in binary form must reproduce the copyright 8 | notice, this list of conditions and the following disclaimer in the 9 | documentation and/or other materials provided with the distribution. 10 | 3. The name of the author may not be used to endorse or promote products 11 | derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 14 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 16 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 17 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 20 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The University of Illinois/NCSA 2 | Open Source License (NCSA) 3 | 4 | Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 5 | 6 | Developed by: 7 | 8 | AMD Research and AMD HSA Software Development 9 | 10 | Advanced Micro Devices, Inc. 11 | 12 | www.amd.com 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to 16 | deal with the Software without restriction, including without limitation 17 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 18 | and/or sell copies of the Software, and to permit persons to whom the 19 | Software is furnished to do so, subject to the following conditions: 20 | 21 | - Redistributions of source code must retain the above copyright notice, 22 | this list of conditions and the following disclaimers. 23 | - Redistributions in binary form must reproduce the above copyright 24 | notice, this list of conditions and the following disclaimers in 25 | the documentation and/or other materials provided with the distribution. 26 | - Neither the names of Advanced Micro Devices, Inc, 27 | nor the names of its contributors may be used to endorse or promote 28 | products derived from this Software without specific prior written 29 | permission. 30 | 31 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 | THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 35 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 36 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 37 | DEALINGS WITH THE SOFTWARE. 38 | -------------------------------------------------------------------------------- /src/cmake_modules/FindLibElf.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find libelf 2 | # Once done this will define 3 | # 4 | # LIBELF_FOUND - system has libelf 5 | # LIBELF_INCLUDE_DIRS - the libelf include directory 6 | # LIBELF_LIBRARIES - Link these to use libelf 7 | # LIBELF_DEFINITIONS - Compiler switches required for using libelf 8 | # 9 | # Copyright (c) 2008 Bernhard Walle 10 | # 11 | # Redistribution and use is allowed according to the terms of the New 12 | # BSD license. 13 | # For details see the accompanying COPYING-CMAKE-SCRIPTS file. 14 | # 15 | 16 | if (LIBELF_FOUND) 17 | return() 18 | endif (LIBELF_FOUND) 19 | 20 | find_path (LIBELF_INCLUDE_DIRS 21 | NAMES 22 | libelf.h 23 | PATHS 24 | /usr/include 25 | /usr/include/libelf 26 | /usr/local/include 27 | /usr/local/include/libelf 28 | /opt/local/include 29 | /opt/local/include/libelf 30 | /sw/include 31 | /sw/include/libelf 32 | ENV CPATH) 33 | 34 | find_library (LIBELF_LIBRARIES 35 | NAMES 36 | elf 37 | PATHS 38 | /usr/lib 39 | /usr/local/lib 40 | /opt/local/lib 41 | /sw/lib 42 | ENV LIBRARY_PATH 43 | ENV LD_LIBRARY_PATH) 44 | 45 | include (FindPackageHandleStandardArgs) 46 | 47 | 48 | # handle the QUIETLY and REQUIRED arguments and set LIBELF_FOUND to TRUE if all listed variables are TRUE 49 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibElf DEFAULT_MSG 50 | LIBELF_LIBRARIES 51 | LIBELF_INCLUDE_DIRS) 52 | 53 | SET(CMAKE_REQUIRED_LIBRARIES elf) 54 | INCLUDE(CheckCXXSourceCompiles) 55 | CHECK_CXX_SOURCE_COMPILES("#include 56 | int main() { 57 | Elf *e = (Elf*)0; 58 | size_t sz; 59 | elf_getshdrstrndx(e, &sz); 60 | return 0; 61 | }" ELF_GETSHDRSTRNDX) 62 | 63 | mark_as_advanced(LIBELF_INCLUDE_DIRS LIBELF_LIBRARIES ELF_GETSHDRSTRNDX) 64 | 65 | if(LIBELF_FOUND) 66 | add_library(elf UNKNOWN IMPORTED) 67 | set_property(TARGET elf PROPERTY IMPORTED_LOCATION ${LIBELF_LIBRARIES}) 68 | set_property(TARGET elf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LIBELF_INCLUDE_DIRS}) 69 | endif() 70 | -------------------------------------------------------------------------------- /src/core/inc/hsa_table_interface.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "hsa_api_trace.h" 44 | 45 | void hsa_table_interface_init(const ApiTable* table); 46 | 47 | const ApiTable* hsa_table_interface_get_table(); 48 | -------------------------------------------------------------------------------- /src/core/common/shared.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "core/common/shared.h" 44 | 45 | namespace core { 46 | std::function BaseShared::allocate_=nullptr; 47 | std::function BaseShared::free_=nullptr; 48 | } 49 | -------------------------------------------------------------------------------- /sample/Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## 3 | ## The University of Illinois/NCSA 4 | ## Open Source License (NCSA) 5 | ## 6 | ## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | ## 8 | ## Developed by: 9 | ## 10 | ## AMD Research and AMD HSA Software Development 11 | ## 12 | ## Advanced Micro Devices, Inc. 13 | ## 14 | ## www.amd.com 15 | ## 16 | ## Permission is hereby granted, free of charge, to any person obtaining a copy 17 | ## of this software and associated documentation files (the "Software"), to 18 | ## deal with the Software without restriction, including without limitation 19 | ## the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | ## and#or sell copies of the Software, and to permit persons to whom the 21 | ## Software is furnished to do so, subject to the following conditions: 22 | ## 23 | ## - Redistributions of source code must retain the above copyright notice, 24 | ## this list of conditions and the following disclaimers. 25 | ## - Redistributions in binary form must reproduce the above copyright 26 | ## notice, this list of conditions and the following disclaimers in 27 | ## the documentation and#or other materials provided with the distribution. 28 | ## - Neither the names of Advanced Micro Devices, Inc, 29 | ## nor the names of its contributors may be used to endorse or promote 30 | ## products derived from this Software without specific prior written 31 | ## permission. 32 | ## 33 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | ## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | ## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | ## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | ## DEALINGS WITH THE SOFTWARE. 40 | ## 41 | ################################################################################ 42 | 43 | LFLAGS= -Wl,--unresolved-symbols=ignore-in-shared-libs 44 | 45 | CC := gcc 46 | 47 | C_FILES := $(wildcard *.c) 48 | 49 | OBJ_FILES := $(notdir $(C_FILES:.c=.o)) 50 | 51 | all: vector_copy 52 | 53 | vector_copy: $(OBJ_FILES) 54 | $(CC) $(LFLAGS) $(OBJ_FILES) -L/opt/rocm/lib -lhsa-runtime64 -o vector_copy 55 | 56 | %.o: %.c 57 | $(CC) -c -I/opt/rocm/include -o $@ $< -std=c99 58 | 59 | clean: 60 | rm -rf *.o vector_copy 61 | -------------------------------------------------------------------------------- /src/core/inc/amd_topology.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_ 44 | #define HSA_RUNTIME_CORE_INC_AMD_TOPOLOGY_H_ 45 | 46 | namespace amd { 47 | /// @brief Initializes the runtime. 48 | /// Should not be called directly, must be called only from Runtime::Acquire() 49 | bool Load(); 50 | 51 | /// @brief Shutdown/cleanup of runtime. 52 | /// Should not be called directly, must be called only from Runtime::Release() 53 | bool Unload(); 54 | } // namespace 55 | 56 | #endif // header guard 57 | -------------------------------------------------------------------------------- /src/core/inc/hsa_api_trace_int.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H 44 | #define HSA_RUNTIME_CORE_INC_HSA_API_TRACE_INT_H 45 | 46 | #include "inc/hsa_api_trace.h" 47 | #include "core/inc/hsa_internal.h" 48 | 49 | namespace core { 50 | struct ApiTable { 51 | ::ApiTable table; 52 | ExtTable extension_backup; 53 | 54 | ApiTable(); 55 | void Reset(); 56 | void LinkExts(ExtTable* ptr); 57 | }; 58 | 59 | extern ApiTable hsa_api_table_; 60 | extern ApiTable hsa_internal_api_table_; 61 | } 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /sample/vector_copy_base.hsail: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | module &m:1:0:$base:$large:$default; 44 | 45 | decl prog function &abort()(); 46 | 47 | prog kernel &__vector_copy_kernel( 48 | kernarg_u64 %in, 49 | kernarg_u64 %out) 50 | { 51 | @__vector_copy_kernel_entry: 52 | // BB#0: // %entry 53 | workitemabsid_u32 $s0, 0; 54 | cvt_s64_s32 $d0, $s0; 55 | shl_u64 $d0, $d0, 2; 56 | ld_kernarg_align(8)_width(all)_u64 $d1, [%out]; 57 | add_u64 $d1, $d1, $d0; 58 | ld_kernarg_align(8)_width(all)_u64 $d2, [%in]; 59 | add_u64 $d0, $d2, $d0; 60 | ld_global_u32 $s0, [$d0]; 61 | st_global_u32 $s0, [$d1]; 62 | ret; 63 | }; 64 | -------------------------------------------------------------------------------- /sample/vector_copy_full.hsail: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | module &m:1:0:$full:$large:$default; 44 | 45 | decl prog function &abort()(); 46 | 47 | prog kernel &__vector_copy_kernel( 48 | kernarg_u64 %in, 49 | kernarg_u64 %out) 50 | { 51 | @__vector_copy_kernel_entry: 52 | // BB#0: // %entry 53 | workitemabsid_u32 $s0, 0; 54 | cvt_s64_s32 $d0, $s0; 55 | shl_u64 $d0, $d0, 2; 56 | ld_kernarg_align(8)_width(all)_u64 $d1, [%out]; 57 | add_u64 $d1, $d1, $d0; 58 | ld_kernarg_align(8)_width(all)_u64 $d2, [%in]; 59 | add_u64 $d0, $d2, $d0; 60 | ld_global_u32 $s0, [$d0]; 61 | st_global_u32 $s0, [$d1]; 62 | ret; 63 | }; 64 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## 3 | ## The University of Illinois/NCSA 4 | ## Open Source License (NCSA) 5 | ## 6 | ## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | ## 8 | ## Developed by: 9 | ## 10 | ## AMD Research and AMD HSA Software Development 11 | ## 12 | ## Advanced Micro Devices, Inc. 13 | ## 14 | ## www.amd.com 15 | ## 16 | ## Permission is hereby granted, free of charge, to any person obtaining a copy 17 | ## of this software and associated documentation files (the "Software"), to 18 | ## deal with the Software without restriction, including without limitation 19 | ## the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | ## and#or sell copies of the Software, and to permit persons to whom the 21 | ## Software is furnished to do so, subject to the following conditions: 22 | ## 23 | ## - Redistributions of source code must retain the above copyright notice, 24 | ## this list of conditions and the following disclaimers. 25 | ## - Redistributions in binary form must reproduce the above copyright 26 | ## notice, this list of conditions and the following disclaimers in 27 | ## the documentation and#or other materials provided with the distribution. 28 | ## - Neither the names of Advanced Micro Devices, Inc, 29 | ## nor the names of its contributors may be used to endorse or promote 30 | ## products derived from this Software without specific prior written 31 | ## permission. 32 | ## 33 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | ## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | ## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | ## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | ## DEALINGS WITH THE SOFTWARE. 40 | ## 41 | ################################################################################ 42 | 43 | cmake_minimum_required(VERSION 2.8) 44 | 45 | project(hsa-runtime) 46 | 47 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") 48 | 49 | include(utils) 50 | include(hsa_common) 51 | 52 | if(NOT DEFINED VERSION_STRING) 53 | set (VERSION_STRING "1") 54 | endif() 55 | 56 | parse_version(${VERSION_STRING}) 57 | 58 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 59 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) 60 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/core/inc) 61 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libamdhsacode) 62 | include_directories(${HSATHK_BUILD_INC_PATH}) 63 | 64 | link_directories (${HSATHK_BUILD_LIB_PATH}) 65 | 66 | add_subdirectory(libamdhsacode) 67 | add_subdirectory(loader) 68 | add_subdirectory(core) 69 | -------------------------------------------------------------------------------- /src/core/inc/hsa_ext_interface.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_ 44 | #define HSA_RUNTME_CORE_INC_AMD_EXT_INTERFACE_H_ 45 | 46 | #include 47 | #include 48 | 49 | #include "hsa_api_trace_int.h" 50 | 51 | #include "core/util/os.h" 52 | #include "core/util/utils.h" 53 | 54 | namespace core { 55 | struct ExtTableInternal : public ExtTable { 56 | decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn; 57 | decltype(::hsa_amd_image_create)* hsa_amd_image_create_fn; 58 | }; 59 | 60 | class ExtensionEntryPoints { 61 | public: 62 | ExtTableInternal table; 63 | 64 | ExtensionEntryPoints(); 65 | 66 | bool Load(std::string library_name); 67 | void Unload(); 68 | 69 | private: 70 | typedef void (*Load_t)(const ::ApiTable* table); 71 | typedef void (*Unload_t)(); 72 | 73 | std::vector libs_; 74 | 75 | void InitTable(); 76 | DISALLOW_COPY_AND_ASSIGN(ExtensionEntryPoints); 77 | }; 78 | } 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /src/core/inc/checked.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTME_CORE_INC_CHECKED_H_ 44 | #define HSA_RUNTME_CORE_INC_CHECKED_H_ 45 | 46 | #include "stdint.h" 47 | 48 | namespace core { 49 | 50 | /// @brief Base class for all classes whose validity can be checked using 51 | /// IsValid() method. 52 | template 53 | class Checked { 54 | public: 55 | typedef Checked CheckedType; 56 | 57 | Checked() { object_ = uintptr_t(this) ^ uintptr_t(code); } 58 | Checked(const Checked&) { object_ = uintptr_t(this) ^ uintptr_t(code); } 59 | Checked(Checked&&) { object_ = uintptr_t(this) ^ uintptr_t(code); } 60 | 61 | virtual ~Checked() { object_ = NULL; } 62 | 63 | const Checked& operator=(Checked&& rhs) { return *this; } 64 | const Checked& operator=(const Checked& rhs) { return *this; } 65 | 66 | bool IsValid() const { 67 | return object_ == (uintptr_t(this) ^ uintptr_t(code)); 68 | } 69 | 70 | private: 71 | uintptr_t object_; 72 | }; 73 | 74 | } // namespace core 75 | #endif // header guard 76 | -------------------------------------------------------------------------------- /src/core/runtime/hsa_ven_amd_loaded_code_object.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "hsa_ven_amd_loaded_code_object.h" 44 | 45 | #include "core/inc/amd_hsa_loader.hpp" 46 | #include "core/inc/runtime.h" 47 | 48 | using namespace core; 49 | 50 | hsa_status_t hsa_ven_amd_loaded_code_object_query_host_address( 51 | const void *device_address, 52 | const void **host_address) { 53 | if (false == core::Runtime::runtime_singleton_->IsOpen()) { 54 | return HSA_STATUS_ERROR_NOT_INITIALIZED; 55 | } 56 | if (nullptr == device_address) { 57 | return HSA_STATUS_ERROR_INVALID_ARGUMENT; 58 | } 59 | if (nullptr == host_address) { 60 | return HSA_STATUS_ERROR_INVALID_ARGUMENT; 61 | } 62 | 63 | uint64_t udaddr = reinterpret_cast(device_address); 64 | uint64_t uhaddr = Runtime::runtime_singleton_->loader()->FindHostAddress(udaddr); 65 | if (0 == uhaddr) { 66 | return HSA_STATUS_ERROR_INVALID_ARGUMENT; 67 | } 68 | 69 | *host_address = reinterpret_cast(uhaddr); 70 | return HSA_STATUS_SUCCESS; 71 | } 72 | -------------------------------------------------------------------------------- /src/cmake_modules/utils.cmake: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## 3 | ## The University of Illinois/NCSA 4 | ## Open Source License (NCSA) 5 | ## 6 | ## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | ## 8 | ## Developed by: 9 | ## 10 | ## AMD Research and AMD HSA Software Development 11 | ## 12 | ## Advanced Micro Devices, Inc. 13 | ## 14 | ## www.amd.com 15 | ## 16 | ## Permission is hereby granted, free of charge, to any person obtaining a copy 17 | ## of this software and associated documentation files (the "Software"), to 18 | ## deal with the Software without restriction, including without limitation 19 | ## the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | ## and#or sell copies of the Software, and to permit persons to whom the 21 | ## Software is furnished to do so, subject to the following conditions: 22 | ## 23 | ## - Redistributions of source code must retain the above copyright notice, 24 | ## this list of conditions and the following disclaimers. 25 | ## - Redistributions in binary form must reproduce the above copyright 26 | ## notice, this list of conditions and the following disclaimers in 27 | ## the documentation and#or other materials provided with the distribution. 28 | ## - Neither the names of Advanced Micro Devices, Inc, 29 | ## nor the names of its contributors may be used to endorse or promote 30 | ## products derived from this Software without specific prior written 31 | ## permission. 32 | ## 33 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | ## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | ## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | ## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | ## DEALINGS WITH THE SOFTWARE. 40 | ## 41 | ################################################################################ 42 | 43 | ## Parses the VERSION_STRING variable and places 44 | ## the first, second and third number values in 45 | ## the major, minor and patch variables. 46 | function(parse_version VERSION_STRING) 47 | 48 | string(REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING}) 49 | list(LENGTH VERSIONS VERSION_COUNT) 50 | 51 | if (${VERSION_COUNT} GREATER 0) 52 | list(GET VERSIONS 0 MAJOR) 53 | set(VERSION_MAJOR ${MAJOR} PARENT_SCOPE) 54 | set(TEMP_VERSION_STRING "${MAJOR}") 55 | endif () 56 | 57 | if (${VERSION_COUNT} GREATER 1) 58 | list(GET VERSIONS 1 MINOR) 59 | set(VERSION_MINOR ${MINOR} PARENT_SCOPE) 60 | set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}") 61 | endif () 62 | 63 | if (${VERSION_COUNT} GREATER 2) 64 | list(GET VERSIONS 2 PATCH) 65 | set(VERSION_PATCH ${PATCH} PARENT_SCOPE) 66 | set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}") 67 | endif () 68 | 69 | set(VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE) 70 | 71 | endfunction() 72 | -------------------------------------------------------------------------------- /src/inc/amd_hsa_signal.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef AMD_HSA_SIGNAL_H 44 | #define AMD_HSA_SIGNAL_H 45 | 46 | #include "amd_hsa_common.h" 47 | #include "amd_hsa_queue.h" 48 | 49 | // AMD Signal Kind Enumeration Values. 50 | typedef int64_t amd_signal_kind64_t; 51 | enum amd_signal_kind_t { 52 | AMD_SIGNAL_KIND_INVALID = 0, 53 | AMD_SIGNAL_KIND_USER = 1, 54 | AMD_SIGNAL_KIND_DOORBELL = -1, 55 | AMD_SIGNAL_KIND_LEGACY_DOORBELL = -2 56 | }; 57 | 58 | // AMD Signal. 59 | #define AMD_SIGNAL_ALIGN_BYTES 64 60 | #define AMD_SIGNAL_ALIGN __ALIGNED__(AMD_SIGNAL_ALIGN_BYTES) 61 | typedef struct AMD_SIGNAL_ALIGN amd_signal_s { 62 | amd_signal_kind64_t kind; 63 | union { 64 | volatile int64_t value; 65 | volatile uint32_t* legacy_hardware_doorbell_ptr; 66 | volatile uint64_t* hardware_doorbell_ptr; 67 | }; 68 | uint64_t event_mailbox_ptr; 69 | uint32_t event_id; 70 | uint32_t reserved1; 71 | uint64_t start_ts; 72 | uint64_t end_ts; 73 | union { 74 | amd_queue_t* queue_ptr; 75 | uint64_t reserved2; 76 | }; 77 | uint32_t reserved3[2]; 78 | } amd_signal_t; 79 | 80 | #endif // AMD_HSA_SIGNAL_H 81 | -------------------------------------------------------------------------------- /src/cmake_modules/hsa_common.cmake: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## 3 | ## The University of Illinois/NCSA 4 | ## Open Source License (NCSA) 5 | ## 6 | ## Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | ## 8 | ## Developed by: 9 | ## 10 | ## AMD Research and AMD HSA Software Development 11 | ## 12 | ## Advanced Micro Devices, Inc. 13 | ## 14 | ## www.amd.com 15 | ## 16 | ## Permission is hereby granted, free of charge, to any person obtaining a copy 17 | ## of this software and associated documentation files (the "Software"), to 18 | ## deal with the Software without restriction, including without limitation 19 | ## the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | ## and#or sell copies of the Software, and to permit persons to whom the 21 | ## Software is furnished to do so, subject to the following conditions: 22 | ## 23 | ## - Redistributions of source code must retain the above copyright notice, 24 | ## this list of conditions and the following disclaimers. 25 | ## - Redistributions in binary form must reproduce the above copyright 26 | ## notice, this list of conditions and the following disclaimers in 27 | ## the documentation and#or other materials provided with the distribution. 28 | ## - Neither the names of Advanced Micro Devices, Inc, 29 | ## nor the names of its contributors may be used to endorse or promote 30 | ## products derived from this Software without specific prior written 31 | ## permission. 32 | ## 33 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | ## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | ## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | ## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | ## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | ## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | ## DEALINGS WITH THE SOFTWARE. 40 | ## 41 | ################################################################################ 42 | 43 | # 44 | # HSA Build compiler definitions common between components. 45 | # 46 | 47 | set(IS64BIT 0) 48 | set(ONLY64STR "32") 49 | if(CMAKE_SIZEOF_VOID_P EQUAL 8) 50 | set(IS64BIT 1) 51 | set(ONLY64STR "64") 52 | endif() 53 | 54 | if(UNIX) 55 | set(PS ":") 56 | set(CMAKE_CXX_FLAGS "-Wall -std=c++11 ${EXTRA_CFLAGS}") 57 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpic") 58 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--unresolved-symbols=ignore-in-shared-libs") 59 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") 60 | if ( IS64BIT ) 61 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) 62 | else () 63 | set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) 64 | endif () 65 | if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug ) 66 | set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" ) 67 | endif () 68 | add_definitions(-D __STDC_LIMIT_MACROS) 69 | add_definitions(-D __STDC_CONSTANT_MACROS) 70 | add_definitions(-D __STDC_FORMAT_MACROS) 71 | add_definitions (-DLITTLEENDIAN_CPU=1) 72 | else() 73 | set (PS "\;") 74 | endif() 75 | 76 | if(MSVC) 77 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") 78 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") 79 | endif() 80 | -------------------------------------------------------------------------------- /src/libamdhsacode/amd_hsa_locks.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "amd_hsa_locks.hpp" 44 | 45 | namespace amd { 46 | namespace hsa { 47 | namespace common { 48 | 49 | void ReaderWriterLock::ReaderLock() 50 | { 51 | internal_lock_.lock(); 52 | while (0 < writers_count_) { 53 | readers_condition_.wait(internal_lock_); 54 | } 55 | readers_count_ += 1; 56 | internal_lock_.unlock(); 57 | } 58 | 59 | void ReaderWriterLock::ReaderUnlock() 60 | { 61 | internal_lock_.lock(); 62 | readers_count_ -= 1; 63 | if (0 == readers_count_ && 0 < writers_waiting_) { 64 | writers_condition_.notify_one(); 65 | } 66 | internal_lock_.unlock(); 67 | } 68 | 69 | void ReaderWriterLock::WriterLock() 70 | { 71 | internal_lock_.lock(); 72 | writers_waiting_ += 1; 73 | while (0 < readers_count_ || 0 < writers_count_) { 74 | writers_condition_.wait(internal_lock_); 75 | } 76 | writers_count_ += 1; 77 | writers_waiting_ -= 1; 78 | internal_lock_.unlock(); 79 | } 80 | 81 | void ReaderWriterLock::WriterUnlock() 82 | { 83 | internal_lock_.lock(); 84 | writers_count_ -= 1; 85 | if (0 < writers_waiting_) { 86 | writers_condition_.notify_one(); 87 | } 88 | readers_condition_.notify_all(); 89 | internal_lock_.unlock(); 90 | } 91 | 92 | } // namespace common 93 | } // namespace hsa 94 | } // namespace amd 95 | -------------------------------------------------------------------------------- /src/utils/sp3/sp3-vm.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. 3 | // 4 | /// \author AMD Developer Tools Team 5 | /// \file 6 | /// 7 | //===================================================================== 8 | 9 | #ifndef SP3_VM_H 10 | #define SP3_VM_H 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #if defined (WIN_OS) && !defined(SP3_STATIC_LIB) 17 | #if defined(DLL_EXPORT_SP3) 18 | #define SP3_EXPORT __declspec(dllexport) 19 | #else 20 | #define SP3_EXPORT __declspec(dllimport) 21 | #endif 22 | #else 23 | #define SP3_EXPORT 24 | #endif 25 | 26 | #ifdef _MSC_VER 27 | typedef __int32 int32_t; 28 | typedef unsigned __int32 uint32_t; 29 | 30 | typedef __int64 int64_t; 31 | typedef unsigned __int64 uint64_t; 32 | #else 33 | #include 34 | #endif 35 | 36 | struct sp3_vma; 37 | 38 | /// @file sp3-vm.h 39 | /// @brief sp3 VM API 40 | /// 41 | /// The VM API is used to manage virtual memory maps. Those maps are 42 | /// used for binary storage for disassembly, as they can naturally 43 | /// mirror the GPU's memory map (so no register translation is needed). 44 | 45 | #define SP3_VM_PAGESIZE 64 46 | 47 | /// @brief VM addresses are 64-bit and the address unit is 32 bits 48 | /// 49 | typedef uint64_t sp3_vmaddr; 50 | 51 | /// @brief Callback function that will fill a VMA on demand 52 | /// 53 | /// The VMA to be filled will be specified through the request address. 54 | /// The callback should fill the VMA using sp3_vm_write calls. 55 | typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx); 56 | 57 | /// @brief VM area 58 | /// 59 | /// VMAs are kept in a sorted list 60 | typedef struct sp3_vma { 61 | sp3_vmaddr base, len; 62 | sp3_vmfill fill; 63 | void *fill_ctx; 64 | uint32_t *data; 65 | struct sp3_vma *prev, *next; 66 | } sp3_vma; 67 | 68 | /// @brief Create a new VM that is empty. 69 | /// 70 | SP3_EXPORT 71 | sp3_vma *sp3_vm_new(void); 72 | 73 | /// @brief Create a new VM that has a sp3_vmfill callback. 74 | /// 75 | SP3_EXPORT 76 | sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx); 77 | 78 | /// @brief Create a new VM from an array of words. 79 | /// @param base VM address to load array at. 80 | /// @param len Number of 32-bit words in the array. 81 | /// @param data Pointer to the array. 82 | /// 83 | SP3_EXPORT 84 | sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data); 85 | 86 | /// @brief Find a VMA, optionally adding it. 87 | /// @param vm VM to search in. 88 | /// @param addr Address to search for. 89 | /// @param add Flag indicating whether a failure should result in adding a new VMA. 90 | /// 91 | SP3_EXPORT 92 | sp3_vma *sp3_vm_find(sp3_vma *vm, sp3_vmaddr addr, int add); 93 | 94 | /// @brief Write a word to a VM. 95 | /// 96 | SP3_EXPORT 97 | void sp3_vm_write(sp3_vma *vm, sp3_vmaddr addr, uint32_t val); 98 | 99 | /// @brief Read a word from a VM. 100 | /// 101 | SP3_EXPORT 102 | uint32_t sp3_vm_read(sp3_vma *vm, sp3_vmaddr addr); 103 | 104 | /// @brief Probe VM for presence. 105 | /// @return 1 if the specified address is backed in the VM, 0 otherwise. 106 | /// 107 | SP3_EXPORT 108 | int sp3_vm_present(sp3_vma *vm, sp3_vmaddr addr); 109 | 110 | /// @brief Free a VM and all its storage. 111 | /// 112 | SP3_EXPORT 113 | void sp3_vm_free(sp3_vma *vm); 114 | 115 | #ifdef __cplusplus 116 | } 117 | #endif 118 | 119 | #endif 120 | -------------------------------------------------------------------------------- /src/inc/amd_hsa_queue.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef AMD_HSA_QUEUE_H 44 | #define AMD_HSA_QUEUE_H 45 | 46 | #include "amd_hsa_common.h" 47 | #include "hsa.h" 48 | 49 | // AMD Queue Properties. 50 | typedef uint32_t amd_queue_properties32_t; 51 | enum amd_queue_properties_t { 52 | AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER, 0, 1), 53 | AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_IS_PTR64, 1, 1), 54 | AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_TRAP_HANDLER_DEBUG_SGPRS, 2, 1), 55 | AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 3, 1), 56 | AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_QUEUE_PROPERTIES_RESERVED1, 4, 28) 57 | }; 58 | 59 | // AMD Queue. 60 | #define AMD_QUEUE_ALIGN_BYTES 64 61 | #define AMD_QUEUE_ALIGN __ALIGNED__(AMD_QUEUE_ALIGN_BYTES) 62 | typedef struct AMD_QUEUE_ALIGN amd_queue_s { 63 | hsa_queue_t hsa_queue; 64 | uint32_t reserved1[4]; 65 | volatile uint64_t write_dispatch_id; 66 | uint32_t group_segment_aperture_base_hi; 67 | uint32_t private_segment_aperture_base_hi; 68 | uint32_t max_cu_id; 69 | uint32_t max_wave_id; 70 | volatile uint64_t max_legacy_doorbell_dispatch_id_plus_1; 71 | volatile uint32_t legacy_doorbell_lock; 72 | uint32_t reserved2[9]; 73 | volatile uint64_t read_dispatch_id; 74 | uint32_t read_dispatch_id_field_base_byte_offset; 75 | uint32_t compute_tmpring_size; 76 | uint32_t scratch_resource_descriptor[4]; 77 | uint64_t scratch_backing_memory_location; 78 | uint64_t scratch_backing_memory_byte_size; 79 | uint32_t scratch_workitem_byte_size; 80 | amd_queue_properties32_t queue_properties; 81 | uint32_t reserved3[2]; 82 | hsa_signal_t queue_inactive_signal; 83 | uint32_t reserved4[14]; 84 | } amd_queue_t; 85 | 86 | #endif // AMD_HSA_QUEUE_H 87 | -------------------------------------------------------------------------------- /src/inc/hsa_ven_amd_loaded_code_object.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // HSA AMD extension for loaded code objects. 44 | 45 | #ifndef HSA_VEN_AMD_LOADED_CODE_OBJECT_H 46 | #define HSA_VEN_AMD_LOADED_CODE_OBJECT_H 47 | 48 | #include "hsa.h" 49 | 50 | #ifdef __cplusplus 51 | extern "C" { 52 | #endif // __cplusplus 53 | 54 | /** 55 | * @brief Records loaded code object's host address in @p host_address given 56 | * loaded code object's device address. Recorded host address points to host 57 | * accessible memory, which is identical to memory pointed to by device address. 58 | * If device address already points to host accessible memory, then device 59 | * address is recorded in @p host_address. 60 | * 61 | * @param[in] device_address Device address. 62 | * 63 | * @param[out] host_address Pointer to application-allocated buffer, where to 64 | * record host address. 65 | * 66 | * @retval HSA_STATUS_SUCCESS Function has been executed successfully. 67 | * 68 | * @retval HSA_STATUS_ERROR_NOT_INITIALIZED Runtime has not been initialized. 69 | * 70 | * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT @p device address is invalid/null, 71 | * or @p host address is null. 72 | */ 73 | hsa_status_t HSA_API hsa_ven_amd_loaded_code_object_query_host_address( 74 | const void *device_address, 75 | const void **host_address); 76 | 77 | /** 78 | * @brief Extension's version. 79 | */ 80 | #define hsa_ven_amd_loaded_code_object 001000 81 | 82 | /** 83 | * @brief Extension's function table. 84 | */ 85 | typedef struct hsa_ven_amd_loaded_code_object_1_00_pfn_s { 86 | hsa_status_t (*hsa_ven_amd_loaded_code_object_query_host_address)( 87 | const void *device_address, 88 | const void **host_address); 89 | } hsa_ven_amd_loaded_code_object_1_00_pfn_t; 90 | 91 | #ifdef __cplusplus 92 | } // extern "C" 93 | #endif // __cplusplus 94 | 95 | #endif // HSA_VEN_AMD_LOADED_CODE_OBJECT_H 96 | -------------------------------------------------------------------------------- /src/core/runtime/host_queue.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "core/inc/host_queue.h" 44 | 45 | #include "core/inc/runtime.h" 46 | #include "core/util/utils.h" 47 | 48 | namespace core { 49 | HostQueue::HostQueue(hsa_region_t region, uint32_t ring_size, 50 | hsa_queue_type_t type, uint32_t features, 51 | hsa_signal_t doorbell_signal) 52 | : Queue(), 53 | size_(ring_size), 54 | active_(false) { 55 | if (!Shared::IsSharedObjectAllocationValid()) { 56 | return; 57 | } 58 | 59 | HSA::hsa_memory_register(this, sizeof(HostQueue)); 60 | 61 | const size_t queue_buffer_size = size_ * sizeof(AqlPacket); 62 | if (HSA_STATUS_SUCCESS != 63 | HSA::hsa_memory_allocate(region, queue_buffer_size, &ring_)) { 64 | return; 65 | } 66 | 67 | assert(IsMultipleOf(ring_, kRingAlignment)); 68 | assert(ring_ != NULL); 69 | 70 | amd_queue_.hsa_queue.base_address = ring_; 71 | amd_queue_.hsa_queue.size = size_; 72 | amd_queue_.hsa_queue.doorbell_signal = doorbell_signal; 73 | amd_queue_.hsa_queue.id = Runtime::runtime_singleton_->GetQueueId(); 74 | amd_queue_.hsa_queue.type = type; 75 | amd_queue_.hsa_queue.features = features; 76 | #ifdef HSA_LARGE_MODEL 77 | AMD_HSA_BITS_SET( 78 | amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 1); 79 | #else 80 | AMD_HSA_BITS_SET( 81 | amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_IS_PTR64, 0); 82 | #endif 83 | amd_queue_.write_dispatch_id = amd_queue_.read_dispatch_id = 0; 84 | AMD_HSA_BITS_SET( 85 | amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 0); 86 | 87 | active_ = true; 88 | } 89 | 90 | HostQueue::~HostQueue() { 91 | if (!Shared::IsSharedObjectAllocationValid()) { 92 | return; 93 | } 94 | 95 | HSA::hsa_memory_free(ring_); 96 | HSA::hsa_memory_deregister(this, sizeof(HostQueue)); 97 | } 98 | 99 | } // namespace core 100 | -------------------------------------------------------------------------------- /src/libamdhsacode/amd_hsa_locks.hpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef AMD_HSA_LOCKS_HPP 44 | #define AMD_HSA_LOCKS_HPP 45 | 46 | #include 47 | #include 48 | #include 49 | 50 | namespace amd { 51 | namespace hsa { 52 | namespace common { 53 | 54 | template 55 | class ReaderLockGuard final { 56 | public: 57 | explicit ReaderLockGuard(LockType &lock): 58 | lock_(lock) 59 | { 60 | lock_.ReaderLock(); 61 | } 62 | 63 | ~ReaderLockGuard() 64 | { 65 | lock_.ReaderUnlock(); 66 | } 67 | 68 | private: 69 | ReaderLockGuard(const ReaderLockGuard&); 70 | ReaderLockGuard& operator=(const ReaderLockGuard&); 71 | 72 | LockType &lock_; 73 | }; 74 | 75 | template 76 | class WriterLockGuard final { 77 | public: 78 | explicit WriterLockGuard(LockType &lock): 79 | lock_(lock) 80 | { 81 | lock_.WriterLock(); 82 | } 83 | 84 | ~WriterLockGuard() 85 | { 86 | lock_.WriterUnlock(); 87 | } 88 | 89 | private: 90 | WriterLockGuard(const WriterLockGuard&); 91 | WriterLockGuard& operator=(const WriterLockGuard&); 92 | 93 | LockType &lock_; 94 | }; 95 | 96 | class ReaderWriterLock final { 97 | public: 98 | ReaderWriterLock(): 99 | readers_count_(0), writers_count_(0), writers_waiting_(0) {} 100 | 101 | ~ReaderWriterLock() {} 102 | 103 | void ReaderLock(); 104 | 105 | void ReaderUnlock(); 106 | 107 | void WriterLock(); 108 | 109 | void WriterUnlock(); 110 | 111 | private: 112 | ReaderWriterLock(const ReaderWriterLock&); 113 | ReaderWriterLock& operator=(const ReaderWriterLock&); 114 | 115 | size_t readers_count_; 116 | size_t writers_count_; 117 | size_t writers_waiting_; 118 | std::mutex internal_lock_; 119 | std::condition_variable_any readers_condition_; 120 | std::condition_variable_any writers_condition_; 121 | }; 122 | 123 | } // namespace common 124 | } // namespace hsa 125 | } // namespace amd 126 | 127 | #endif // AMD_HSA_LOCKS_HPP 128 | -------------------------------------------------------------------------------- /src/inc/amd_hsa_common.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // The following set of header files provides definitions for AMD GPU 44 | // Architecture: 45 | // - amd_hsa_common.h 46 | // - amd_hsa_elf.h 47 | // - amd_hsa_kernel_code.h 48 | // - amd_hsa_queue.h 49 | // - amd_hsa_signal.h 50 | // 51 | // Refer to "HSA Application Binary Interface: AMD GPU Architecture" for more 52 | // information. 53 | 54 | #ifndef AMD_HSA_COMMON_H 55 | #define AMD_HSA_COMMON_H 56 | 57 | #include 58 | #include 59 | 60 | // Descriptive version of the HSA Application Binary Interface. 61 | #define AMD_HSA_ABI_VERSION "AMD GPU Architecture v0.35 (June 25, 2015)" 62 | 63 | // Alignment attribute that specifies a minimum alignment (in bytes) for 64 | // variables of the specified type. 65 | #if defined(__GNUC__) 66 | # define __ALIGNED__(x) __attribute__((aligned(x))) 67 | #elif defined(_MSC_VER) 68 | # define __ALIGNED__(x) __declspec(align(x)) 69 | #elif defined(RC_INVOKED) 70 | # define __ALIGNED__(x) 71 | #else 72 | # error 73 | #endif 74 | 75 | // Creates enumeration entries for packed types. Enumeration entries include 76 | // bit shift amount, bit width, and bit mask. 77 | #define AMD_HSA_BITS_CREATE_ENUM_ENTRIES(name, shift, width) \ 78 | name ## _SHIFT = (shift), \ 79 | name ## _WIDTH = (width), \ 80 | name = (((1 << (width)) - 1) << (shift)) \ 81 | 82 | // Gets bits for specified mask from specified src packed instance. 83 | #define AMD_HSA_BITS_GET(src, mask) \ 84 | ((src & mask) >> mask ## _SHIFT) \ 85 | 86 | // Sets val bits for specified mask in specified dst packed instance. 87 | #define AMD_HSA_BITS_SET(dst, mask, val) \ 88 | dst &= (~(1 << mask ## _SHIFT) & ~mask); \ 89 | dst |= (((val) << mask ## _SHIFT) & mask) \ 90 | 91 | #endif // AMD_HSA_COMMON_H 92 | -------------------------------------------------------------------------------- /src/core/inc/amd_loader_context.hpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP 44 | #define HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP 45 | 46 | #include "core/inc/amd_hsa_loader.hpp" 47 | 48 | namespace amd { 49 | 50 | class LoaderContext final: public hsa::loader::Context { 51 | public: 52 | LoaderContext(): hsa::loader::Context() {} 53 | 54 | ~LoaderContext() {} 55 | 56 | hsa_isa_t IsaFromName(const char *name) override; 57 | 58 | bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa) override; 59 | 60 | void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override; 61 | 62 | bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override; 63 | 64 | void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override; 65 | 66 | void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; 67 | 68 | void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; 69 | 70 | bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override; 71 | 72 | bool ImageExtensionSupported(); 73 | 74 | hsa_status_t ImageCreate( 75 | hsa_agent_t agent, 76 | hsa_access_permission_t image_permission, 77 | const hsa_ext_image_descriptor_t *image_descriptor, 78 | const void *image_data, 79 | hsa_ext_image_t *image_handle); 80 | 81 | hsa_status_t ImageDestroy(hsa_agent_t agent, hsa_ext_image_t image_handle); 82 | 83 | hsa_status_t SamplerCreate( 84 | hsa_agent_t agent, 85 | const hsa_ext_sampler_descriptor_t *sampler_descriptor, 86 | hsa_ext_sampler_t *sampler_handle); 87 | 88 | hsa_status_t SamplerDestroy(hsa_agent_t agent, hsa_ext_sampler_t sampler_handle); 89 | 90 | private: 91 | LoaderContext(const LoaderContext&); 92 | LoaderContext& operator=(const LoaderContext&); 93 | }; 94 | 95 | } // namespace amd 96 | 97 | #endif // HSA_RUNTIME_CORE_INC_AMD_LOADER_CONTEXT_HPP 98 | -------------------------------------------------------------------------------- /src/core/common/shared.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTME_CORE_INC_SHARED_H_ 44 | #define HSA_RUNTME_CORE_INC_SHARED_H_ 45 | 46 | #include "core/util/utils.h" 47 | #include 48 | 49 | #include 50 | #include 51 | 52 | namespace core { 53 | /// @brief Base class encapsulating the allocator and deallocator for 54 | /// shared shared object. 55 | class BaseShared { 56 | public: 57 | static void SetAllocateAndFree( 58 | const std::function& allocate, 59 | const std::function& free) { 60 | allocate_ = allocate; 61 | free_ = free; 62 | } 63 | 64 | protected: 65 | static std::function allocate_; 66 | static std::function free_; 67 | }; 68 | 69 | /// @brief Base class for classes that encapsulates object shared between 70 | /// host and agents. Alignment defaults to __alignof(T) but may be increased. 71 | template 72 | class Shared : public BaseShared { 73 | public: 74 | Shared() { 75 | assert(allocate_ != nullptr && free_ != nullptr && 76 | "Shared object allocator is not set"); 77 | static_assert((__alignof(T) <= Align) || (Align == 0), 78 | "Align is less than alignof(T)"); 79 | 80 | shared_object_ = 81 | reinterpret_cast(allocate_(sizeof(T), Max(__alignof(T), Align))); 82 | 83 | assert(shared_object_ != NULL && "Failed on allocating shared_object_"); 84 | 85 | if (shared_object_ != NULL) new (shared_object_) T; 86 | } 87 | 88 | virtual ~Shared() { 89 | assert(allocate_ != nullptr && free_ != nullptr && 90 | "Shared object allocator is not set"); 91 | 92 | if (IsSharedObjectAllocationValid()) { 93 | shared_object_->~T(); 94 | free_(shared_object_); 95 | } 96 | } 97 | 98 | T* shared_object() const { return shared_object_; } 99 | 100 | bool IsSharedObjectAllocationValid() const { 101 | return (shared_object_ != NULL); 102 | } 103 | 104 | private: 105 | T* shared_object_; 106 | }; 107 | 108 | } // namespace core 109 | #endif // header guard 110 | -------------------------------------------------------------------------------- /src/core/util/small_heap.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // A simple first fit memory allocator with eager compaction. For use with few 44 | // items (where list iteration is faster than trees). 45 | // Not thread safe! 46 | 47 | #ifndef HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_ 48 | #define HSA_RUNTME_CORE_UTIL_SMALL_HEAP_H_ 49 | 50 | #include "utils.h" 51 | 52 | #include 53 | 54 | class SmallHeap { 55 | public: 56 | class Node { 57 | public: 58 | size_t len; 59 | void* next_free; 60 | void* prior_free; 61 | static const intptr_t END = -1; 62 | 63 | __forceinline bool isfree() const { return next_free != NULL; } 64 | __forceinline bool islastfree() const { return intptr_t(next_free) == END; } 65 | __forceinline bool isfirstfree() const { 66 | return intptr_t(prior_free) == END; 67 | } 68 | __forceinline void setlastfree() { 69 | *reinterpret_cast(&next_free) = END; 70 | } 71 | __forceinline void setfirstfree() { 72 | *reinterpret_cast(&prior_free) = END; 73 | } 74 | }; 75 | 76 | private: 77 | SmallHeap(const SmallHeap& rhs); 78 | SmallHeap& operator=(const SmallHeap& rhs); 79 | 80 | void* const pool; 81 | const size_t length; 82 | 83 | size_t total_free; 84 | void* first_free; 85 | std::map memory; 86 | 87 | typedef decltype(memory) memory_t; 88 | memory_t::iterator merge(memory_t::iterator& keep, 89 | memory_t::iterator& destroy); 90 | 91 | public: 92 | SmallHeap() : pool(NULL), length(0), total_free(0) {} 93 | SmallHeap(void* base, size_t length) 94 | : pool(base), length(length), total_free(length) { 95 | first_free = pool; 96 | 97 | Node& node = memory[first_free]; 98 | node.len = length; 99 | node.setlastfree(); 100 | node.setfirstfree(); 101 | 102 | memory[0].len = 0; 103 | memory[(void*)0xFFFFFFFFFFFFFFFFull].len = 0; 104 | } 105 | 106 | void* alloc(size_t bytes); 107 | void free(void* ptr); 108 | 109 | void* base() const { return pool; } 110 | size_t size() const { return length; } 111 | size_t remaining() const { return total_free; } 112 | }; 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /src/core/util/timer.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "core/util/timer.h" 44 | 45 | namespace timer { 46 | 47 | accurate_clock::init::init() { 48 | freq = os::AccurateClockFrequency(); 49 | accurate_clock::period_ns = 1e9 / double(freq); 50 | } 51 | 52 | // Calibrates the fast clock using the accurate clock. 53 | fast_clock::init::init() { 54 | typedef accurate_clock clock; 55 | clock::duration delay(std::chrono::milliseconds(1)); 56 | 57 | // calibrate clock 58 | fast_clock::raw_rep min = 0; 59 | clock::duration elapsed = clock::duration::max(); 60 | 61 | do { 62 | for (int t = 0; t < 10; t++) { 63 | fast_clock::raw_rep r1, r2; 64 | clock::time_point t0, t1, t2, t3; 65 | 66 | t0 = clock::now(); 67 | std::atomic_signal_fence(std::memory_order_acq_rel); 68 | r1 = fast_clock::raw_now(); 69 | std::atomic_signal_fence(std::memory_order_acq_rel); 70 | t1 = clock::now(); 71 | std::atomic_signal_fence(std::memory_order_acq_rel); 72 | 73 | do { 74 | t2 = clock::now(); 75 | } while (t2 - t1 < delay); 76 | 77 | std::atomic_signal_fence(std::memory_order_acq_rel); 78 | r2 = fast_clock::raw_now(); 79 | std::atomic_signal_fence(std::memory_order_acq_rel); 80 | t3 = clock::now(); 81 | 82 | // If elapsed time is shorter than last recorded time and both the start 83 | // and end times are confirmed correlated then record the clock readings. 84 | // This protects against inaccuracy due to thread switching 85 | if ((t3 - t1 < elapsed) && ((t1 - t0) * 10 < (t2 - t1)) && 86 | ((t3 - t2) * 10 < (t2 - t1))) { 87 | elapsed = t3 - t1; 88 | min = r2 - r1; 89 | } 90 | } 91 | delay += delay; 92 | } while (min < 1000); 93 | 94 | fast_clock::freq = double(min) / duration_in_seconds(elapsed); 95 | fast_clock::period_ps = 1e12 / fast_clock::freq; 96 | } 97 | 98 | double accurate_clock::period_ns; 99 | accurate_clock::raw_frequency accurate_clock::freq; 100 | accurate_clock::init accurate_clock::accurate_clock_init; 101 | 102 | double fast_clock::period_ps; 103 | fast_clock::raw_frequency fast_clock::freq; 104 | fast_clock::init fast_clock::fast_clock_init; 105 | } 106 | -------------------------------------------------------------------------------- /src/loader/loaders.hpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef LOADERS_HPP_ 44 | #define LOADERS_HPP_ 45 | 46 | #include "amd_hsa_loader.hpp" 47 | #include 48 | #include 49 | 50 | namespace amd { 51 | namespace hsa { 52 | namespace loader { 53 | 54 | class OfflineLoaderContext : public amd::hsa::loader::Context { 55 | private: 56 | hsa_isa_t invalid; 57 | hsa_isa_t gfx700, gfx701, gfx800, gfx801, gfx802, gfx803, gfx804, gfx810; 58 | hsa_isa_t reserved; 59 | std::ostream& out; 60 | typedef std::set PointerSet; 61 | PointerSet pointers; 62 | 63 | public: 64 | OfflineLoaderContext(); 65 | 66 | hsa_isa_t IsaFromName(const char *name) override; 67 | 68 | bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) override; 69 | 70 | void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override; 71 | 72 | bool SegmentCopy(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size) override; 73 | 74 | void SegmentFree(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size = 0) override; 75 | 76 | void* SegmentAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; 77 | 78 | void* SegmentHostAddress(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset) override; 79 | 80 | bool SegmentFreeze(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size) override; 81 | 82 | bool ImageExtensionSupported(); 83 | 84 | hsa_status_t ImageCreate( 85 | hsa_agent_t agent, 86 | hsa_access_permission_t image_permission, 87 | const hsa_ext_image_descriptor_t *image_descriptor, 88 | const void *image_data, 89 | hsa_ext_image_t *image_handle); 90 | 91 | hsa_status_t ImageDestroy( 92 | hsa_agent_t agent, hsa_ext_image_t image_handle); 93 | 94 | hsa_status_t SamplerCreate( 95 | hsa_agent_t agent, 96 | const hsa_ext_sampler_descriptor_t *sampler_descriptor, 97 | hsa_ext_sampler_t *sampler_handle); 98 | 99 | hsa_status_t SamplerDestroy( 100 | hsa_agent_t agent, hsa_ext_sampler_t sampler_handle); 101 | }; 102 | } 103 | } 104 | } 105 | 106 | #endif // LOADERS_HPP_ 107 | -------------------------------------------------------------------------------- /src/core/inc/memory_region.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // HSA runtime C++ interface file. 44 | 45 | #ifndef HSA_RUNTME_CORE_INC_MEMORY_REGION_H_ 46 | #define HSA_RUNTME_CORE_INC_MEMORY_REGION_H_ 47 | 48 | #include 49 | 50 | #include "core/inc/runtime.h" 51 | #include "core/inc/agent.h" 52 | #include "core/inc/checked.h" 53 | 54 | namespace core { 55 | class Agent; 56 | 57 | class MemoryRegion : public Checked<0x9C961F19EE175BB3> { 58 | public: 59 | MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner) 60 | : fine_grain_(fine_grain), full_profile_(full_profile), owner_(owner) { 61 | assert(owner_ != NULL); 62 | } 63 | 64 | virtual ~MemoryRegion() {} 65 | 66 | // Convert this object into hsa_region_t. 67 | static __forceinline hsa_region_t Convert(MemoryRegion* region) { 68 | const hsa_region_t region_handle = { 69 | static_cast(reinterpret_cast(region))}; 70 | return region_handle; 71 | } 72 | 73 | static __forceinline const hsa_region_t Convert(const MemoryRegion* region) { 74 | const hsa_region_t region_handle = { 75 | static_cast(reinterpret_cast(region))}; 76 | return region_handle; 77 | } 78 | 79 | // Convert hsa_region_t into MemoryRegion *. 80 | static __forceinline MemoryRegion* Convert(hsa_region_t region) { 81 | return reinterpret_cast(region.handle); 82 | } 83 | 84 | virtual hsa_status_t Allocate(size_t size, void** address) const = 0; 85 | 86 | virtual hsa_status_t Free(void* address, size_t size) const = 0; 87 | 88 | // Translate memory properties into HSA region attribute. 89 | virtual hsa_status_t GetInfo(hsa_region_info_t attribute, 90 | void* value) const = 0; 91 | 92 | virtual hsa_status_t AssignAgent(void* ptr, size_t size, const Agent& agent, 93 | hsa_access_permission_t access) const = 0; 94 | 95 | __forceinline bool fine_grain() const { return fine_grain_; } 96 | 97 | __forceinline bool full_profile() const { return full_profile_; } 98 | 99 | __forceinline core::Agent* owner() const { return owner_; } 100 | 101 | private: 102 | const bool fine_grain_; 103 | const bool full_profile_; 104 | 105 | core::Agent* owner_; 106 | }; 107 | } // namespace core 108 | 109 | #endif // header guard 110 | -------------------------------------------------------------------------------- /src/core/hsacore.so.def: -------------------------------------------------------------------------------- 1 | ROCR_1 2 | { 3 | global: 4 | hsa_init; 5 | hsa_shut_down; 6 | hsa_system_get_info; 7 | hsa_system_extension_supported; 8 | hsa_system_get_extension_table; 9 | hsa_iterate_agents; 10 | hsa_agent_get_info; 11 | hsa_agent_get_exception_policies; 12 | hsa_agent_extension_supported; 13 | hsa_queue_create; 14 | hsa_soft_queue_create; 15 | hsa_queue_destroy; 16 | hsa_queue_inactivate; 17 | hsa_queue_load_read_index_acquire; 18 | hsa_queue_load_read_index_relaxed; 19 | hsa_queue_load_write_index_acquire; 20 | hsa_queue_load_write_index_relaxed; 21 | hsa_queue_store_write_index_relaxed; 22 | hsa_queue_store_write_index_release; 23 | hsa_queue_cas_write_index_acq_rel; 24 | hsa_queue_cas_write_index_acquire; 25 | hsa_queue_cas_write_index_relaxed; 26 | hsa_queue_cas_write_index_release; 27 | hsa_queue_add_write_index_acq_rel; 28 | hsa_queue_add_write_index_acquire; 29 | hsa_queue_add_write_index_relaxed; 30 | hsa_queue_add_write_index_release; 31 | hsa_queue_store_read_index_relaxed; 32 | hsa_queue_store_read_index_release; 33 | hsa_agent_iterate_regions; 34 | hsa_region_get_info; 35 | hsa_memory_register; 36 | hsa_memory_deregister; 37 | hsa_memory_allocate; 38 | hsa_memory_free; 39 | hsa_memory_copy; 40 | hsa_memory_assign_agent; 41 | hsa_signal_create; 42 | hsa_signal_destroy; 43 | hsa_signal_load_relaxed; 44 | hsa_signal_load_acquire; 45 | hsa_signal_store_relaxed; 46 | hsa_signal_store_release; 47 | hsa_signal_wait_relaxed; 48 | hsa_signal_wait_acquire; 49 | hsa_signal_and_relaxed; 50 | hsa_signal_and_acquire; 51 | hsa_signal_and_release; 52 | hsa_signal_and_acq_rel; 53 | hsa_signal_or_relaxed; 54 | hsa_signal_or_acquire; 55 | hsa_signal_or_release; 56 | hsa_signal_or_acq_rel; 57 | hsa_signal_xor_relaxed; 58 | hsa_signal_xor_acquire; 59 | hsa_signal_xor_release; 60 | hsa_signal_xor_acq_rel; 61 | hsa_signal_exchange_relaxed; 62 | hsa_signal_exchange_acquire; 63 | hsa_signal_exchange_release; 64 | hsa_signal_exchange_acq_rel; 65 | hsa_signal_add_relaxed; 66 | hsa_signal_add_acquire; 67 | hsa_signal_add_release; 68 | hsa_signal_add_acq_rel; 69 | hsa_signal_subtract_relaxed; 70 | hsa_signal_subtract_acquire; 71 | hsa_signal_subtract_release; 72 | hsa_signal_subtract_acq_rel; 73 | hsa_signal_cas_relaxed; 74 | hsa_signal_cas_acquire; 75 | hsa_signal_cas_release; 76 | hsa_signal_cas_acq_rel; 77 | hsa_isa_from_name; 78 | hsa_isa_get_info; 79 | hsa_isa_compatible; 80 | hsa_code_object_serialize; 81 | hsa_code_object_deserialize; 82 | hsa_code_object_destroy; 83 | hsa_code_object_get_info; 84 | hsa_code_object_get_symbol; 85 | hsa_code_symbol_get_info; 86 | hsa_code_object_iterate_symbols; 87 | hsa_executable_create; 88 | hsa_executable_destroy; 89 | hsa_executable_load_code_object; 90 | hsa_executable_freeze; 91 | hsa_executable_get_info; 92 | hsa_executable_global_variable_define; 93 | hsa_executable_agent_global_variable_define; 94 | hsa_executable_readonly_variable_define; 95 | hsa_executable_validate; 96 | hsa_executable_get_symbol; 97 | hsa_executable_symbol_get_info; 98 | hsa_executable_iterate_symbols; 99 | hsa_status_string; 100 | hsa_ext_program_create; 101 | hsa_ext_program_destroy; 102 | hsa_ext_program_add_module; 103 | hsa_ext_program_iterate_modules; 104 | hsa_ext_program_get_info; 105 | hsa_ext_program_finalize; 106 | hsa_amd_coherency_get_type; 107 | hsa_amd_coherency_set_type; 108 | hsa_amd_profiling_set_profiler_enabled; 109 | hsa_amd_profiling_get_dispatch_time; 110 | hsa_amd_profiling_convert_tick_to_system_domain; 111 | hsa_amd_signal_wait_any; 112 | hsa_amd_signal_async_handler; 113 | hsa_amd_async_function; 114 | hsa_amd_image_get_info_max_dim; 115 | hsa_amd_queue_cu_set_mask; 116 | hsa_amd_memory_fill; 117 | hsa_amd_memory_async_copy; 118 | hsa_amd_memory_lock; 119 | hsa_amd_memory_unlock; 120 | hsa_amd_agent_iterate_memory_pools; 121 | hsa_amd_agent_memory_pool_get_info; 122 | hsa_amd_agents_allow_access; 123 | hsa_amd_memory_pool_get_info; 124 | hsa_amd_memory_pool_allocate; 125 | hsa_amd_memory_pool_free; 126 | hsa_amd_memory_pool_can_migrate; 127 | hsa_amd_memory_migrate; 128 | hsa_amd_interop_map_buffer; 129 | hsa_amd_interop_unmap_buffer; 130 | hsa_amd_image_create; 131 | hsa_ext_image_get_capability; 132 | hsa_ext_image_data_get_info; 133 | hsa_ext_image_create; 134 | hsa_ext_image_import; 135 | hsa_ext_image_export; 136 | hsa_ext_image_copy; 137 | hsa_ext_image_clear; 138 | hsa_ext_image_destroy; 139 | hsa_ext_sampler_create; 140 | hsa_ext_sampler_destroy; 141 | 142 | local: 143 | *; 144 | }; 145 | -------------------------------------------------------------------------------- /src/core/util/flag.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_FLAG_H_ 44 | #define HSA_RUNTIME_CORE_INC_FLAG_H_ 45 | 46 | #include 47 | 48 | #include 49 | 50 | #include "core/util/os.h" 51 | #include "core/util/utils.h" 52 | 53 | class Flag { 54 | public: 55 | explicit Flag() { Refresh(); } 56 | 57 | virtual ~Flag() {} 58 | 59 | void Refresh() { 60 | std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH"); 61 | check_flat_scratch_ = (var == "1") ? true : false; 62 | 63 | var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE"); 64 | enable_vm_fault_message_ = (var == "1") ? true : false; 65 | 66 | var = os::GetEnvVar("HSA_ENABLE_INTERRUPT"); 67 | enable_interrupt_ = (var == "0") ? false : true; 68 | 69 | var = os::GetEnvVar("HSA_ENABLE_SDMA"); 70 | enable_sdma_ = (var == "0") ? false : true; 71 | 72 | var = os::GetEnvVar("HSA_EMULATE_AQL"); 73 | emulate_aql_ = (var == "1") ? true : false; 74 | 75 | var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND"); 76 | running_valgrind_ = (var == "1") ? true : false; 77 | 78 | var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE"); 79 | sdma_wait_idle_ = (var == "1") ? true : false; 80 | 81 | var = os::GetEnvVar("HSA_MAX_QUEUES"); 82 | max_queues_ = static_cast(atoi(var.c_str())); 83 | 84 | var = os::GetEnvVar("HSA_SCRATCH_MEM"); 85 | scratch_mem_size_ = atoi(var.c_str()); 86 | 87 | tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB"); 88 | } 89 | 90 | bool check_flat_scratch() const { return check_flat_scratch_; } 91 | 92 | bool enable_vm_fault_message() const { return enable_vm_fault_message_; } 93 | 94 | bool enable_interrupt() const { return enable_interrupt_; } 95 | 96 | bool enable_sdma() const { return enable_sdma_; } 97 | 98 | bool emulate_aql() const { return emulate_aql_; } 99 | 100 | bool running_valgrind() const { return running_valgrind_; } 101 | 102 | bool sdma_wait_idle() const { return sdma_wait_idle_; } 103 | 104 | uint32_t max_queues() const { return max_queues_; } 105 | 106 | size_t scratch_mem_size() const { return scratch_mem_size_; } 107 | 108 | std::string tools_lib_names() const { return tools_lib_names_; } 109 | 110 | private: 111 | bool check_flat_scratch_; 112 | bool enable_vm_fault_message_; 113 | bool enable_interrupt_; 114 | bool enable_sdma_; 115 | bool emulate_aql_; 116 | bool running_valgrind_; 117 | bool sdma_wait_idle_; 118 | 119 | uint32_t max_queues_; 120 | 121 | size_t scratch_mem_size_; 122 | 123 | std::string tools_lib_names_; 124 | 125 | DISALLOW_COPY_AND_ASSIGN(Flag); 126 | }; 127 | 128 | #endif // header guard 129 | -------------------------------------------------------------------------------- /src/core/inc/blit.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_BLIT_H_ 44 | #define HSA_RUNTIME_CORE_INC_BLIT_H_ 45 | 46 | #include 47 | 48 | #include "core/inc/agent.h" 49 | 50 | namespace core { 51 | class Blit { 52 | public: 53 | explicit Blit() {} 54 | virtual ~Blit() {} 55 | 56 | /// @brief Initialize a blit object. 57 | /// 58 | /// @param agent Pointer to the agent that will execute the blit commands. 59 | /// 60 | /// @return hsa_status_t 61 | virtual hsa_status_t Initialize(const core::Agent& agent) = 0; 62 | 63 | /// @brief Marks the blit object as invalid and uncouples its link with 64 | /// the underlying compute device's control block. Use of blit object 65 | /// once it has been release is illegal and any behavior is indeterminate 66 | /// 67 | /// @note: The call will block until all commands have executed. 68 | /// 69 | /// @return hsa_status_t 70 | virtual hsa_status_t Destroy() = 0; 71 | 72 | /// @brief Submit a linear copy command to the the underlying compute device's 73 | /// control block. The call is blocking until the command execution is 74 | /// finished. 75 | /// 76 | /// @param dst Memory address of the copy destination. 77 | /// @param src Memory address of the copy source. 78 | /// @param size Size of the data to be copied. 79 | virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src, 80 | size_t size) = 0; 81 | 82 | /// @brief Submit a linear copy command to the the underlying compute device's 83 | /// control block. The call is non blocking. The memory transfer will start 84 | /// after all dependent signals are satisfied. After the transfer is 85 | /// completed, the out signal will be decremented. 86 | /// 87 | /// @param dst Memory address of the copy destination. 88 | /// @param src Memory address of the copy source. 89 | /// @param size Size of the data to be copied. 90 | /// @param dep_signals Arrays of dependent signal. 91 | /// @param out_signal Output signal. 92 | virtual hsa_status_t SubmitLinearCopyCommand( 93 | void* dst, const void* src, size_t size, 94 | std::vector& dep_signals, core::Signal& out_signal) = 0; 95 | 96 | /// @brief Submit a linear fill command to the the underlying compute device's 97 | /// control block. The call is blocking until the command execution is 98 | /// finished. 99 | /// 100 | /// @param ptr Memory address of the fill destination. 101 | /// @param value Value to be set. 102 | /// @param num Number of uint32_t element to be set to the value. 103 | virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value, 104 | size_t num) = 0; 105 | }; 106 | } // namespace core 107 | 108 | #endif // header guard 109 | -------------------------------------------------------------------------------- /src/core/util/locks.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // Library of syncronization primitives - to be added to as needed. 44 | 45 | #ifndef HSA_RUNTIME_CORE_UTIL_LOCKS_H_ 46 | #define HSA_RUNTIME_CORE_UTIL_LOCKS_H_ 47 | 48 | #include "utils.h" 49 | #include "os.h" 50 | 51 | /// @brief: A class behaves as a lock in a scope. When trying to enter into the 52 | /// critical section, creat a object of this class. After the control path goes 53 | /// out of the scope, it will release the lock automatically. 54 | template 55 | class ScopedAcquire { 56 | public: 57 | /// @brief: When constructing, acquire the lock. 58 | /// @param: lock(Input), pointer to an existing lock. 59 | explicit ScopedAcquire(LockType* lock) : lock_(lock) { lock_->Acquire(); } 60 | 61 | /// @brief: when destructing, release the lock. 62 | ~ScopedAcquire() { lock_->Release(); } 63 | 64 | private: 65 | LockType* lock_; 66 | /// @brief: Disable copiable and assignable ability. 67 | DISALLOW_COPY_AND_ASSIGN(ScopedAcquire); 68 | }; 69 | 70 | /// @brief: a class represents a kernel mutex. 71 | /// Uses the kernel's scheduler to keep the waiting thread from being scheduled 72 | /// until the lock is released (Best for long waits, though anything using 73 | /// a kernel object is a long wait). 74 | class KernelMutex { 75 | public: 76 | KernelMutex() { lock_ = os::CreateMutex(); } 77 | ~KernelMutex() { os::DestroyMutex(lock_); } 78 | 79 | bool Try() { return os::TryAcquireMutex(lock_); } 80 | bool Acquire() { return os::AcquireMutex(lock_); } 81 | void Release() { os::ReleaseMutex(lock_); } 82 | 83 | private: 84 | os::Mutex lock_; 85 | 86 | /// @brief: Disable copiable and assignable ability. 87 | DISALLOW_COPY_AND_ASSIGN(KernelMutex); 88 | }; 89 | 90 | /// @brief: represents a spin lock. 91 | /// For very short hold durations on the order of the thread scheduling 92 | /// quanta or less. 93 | class SpinMutex { 94 | public: 95 | SpinMutex() { lock_ = 0; } 96 | 97 | bool Try() { 98 | int old = 0; 99 | return lock_.compare_exchange_strong(old, 1); 100 | } 101 | bool Acquire() { 102 | int old = 0; 103 | while (!lock_.compare_exchange_strong(old, 1)) 104 | { 105 | old=0; 106 | os::YieldThread(); 107 | } 108 | return true; 109 | } 110 | void Release() { lock_ = 0; } 111 | 112 | private: 113 | std::atomic lock_; 114 | 115 | /// @brief: Disable copiable and assignable ability. 116 | DISALLOW_COPY_AND_ASSIGN(SpinMutex); 117 | }; 118 | 119 | class KernelEvent { 120 | public: 121 | KernelEvent() { evt_ = os::CreateOsEvent(true, true); } 122 | ~KernelEvent() { os::DestroyOsEvent(evt_); } 123 | 124 | bool IsSet() { return os::WaitForOsEvent(evt_, 0)==0; } 125 | bool WaitForSet() { return os::WaitForOsEvent(evt_, 0xFFFFFFFF)==0; } 126 | void Set() { os::SetOsEvent(evt_); } 127 | void Reset() { os::ResetOsEvent(evt_); } 128 | 129 | private: 130 | os::EventHandle evt_; 131 | 132 | /// @brief: Disable copiable and assignable ability. 133 | DISALLOW_COPY_AND_ASSIGN(KernelEvent); 134 | }; 135 | 136 | #endif // HSA_RUNTIME_CORE_SUTIL_LOCKS_H_ 137 | -------------------------------------------------------------------------------- /src/core/runtime/isa.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "core/inc/isa.h" 44 | 45 | #include 46 | #include 47 | 48 | namespace core { 49 | 50 | const IsaRegistry::IsaMap IsaRegistry::supported_isas_ = 51 | IsaRegistry::GetSupportedIsas(); 52 | 53 | const Isa *IsaRegistry::GetIsa(const std::string &full_name) { 54 | auto isareg_iter = supported_isas_.find(full_name); 55 | return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; 56 | } 57 | 58 | const Isa *IsaRegistry::GetIsa(const Isa::Version &version) { 59 | auto isareg_iter = supported_isas_.find(Isa(version).GetFullName()); 60 | return isareg_iter == supported_isas_.end() ? nullptr : &isareg_iter->second; 61 | } 62 | 63 | const IsaRegistry::IsaMap IsaRegistry::GetSupportedIsas() { 64 | #define ISAREG_ENTRY_GEN(maj, min, stp) \ 65 | Isa amd_amdgpu_##maj##min##stp; \ 66 | amd_amdgpu_##maj##min##stp.version_ = Isa::Version(maj, min, stp); \ 67 | supported_isas.insert( \ 68 | std::make_pair( \ 69 | amd_amdgpu_##maj##min##stp.GetFullName(), amd_amdgpu_##maj##min##stp)); \ 70 | 71 | IsaMap supported_isas; 72 | 73 | ISAREG_ENTRY_GEN(7, 0, 0) 74 | ISAREG_ENTRY_GEN(7, 0, 1) 75 | ISAREG_ENTRY_GEN(8, 0, 0) 76 | ISAREG_ENTRY_GEN(8, 0, 1) 77 | ISAREG_ENTRY_GEN(8, 0, 2) 78 | ISAREG_ENTRY_GEN(8, 0, 3) 79 | ISAREG_ENTRY_GEN(8, 1, 0) 80 | ISAREG_ENTRY_GEN(9, 0, 0) 81 | 82 | return supported_isas; 83 | } 84 | 85 | std::string Isa::GetFullName() const { 86 | std::stringstream full_name; 87 | full_name << GetVendor() << ":" << GetArchitecture() << ":" 88 | << GetMajorVersion() << ":" << GetMinorVersion() << ":" 89 | << GetStepping(); 90 | return full_name.str(); 91 | } 92 | 93 | bool Isa::GetInfo(const hsa_isa_info_t &attribute, void *value) const { 94 | if (!value) { 95 | return false; 96 | } 97 | 98 | switch (attribute) { 99 | case HSA_ISA_INFO_NAME_LENGTH: { 100 | std::string full_name = GetFullName(); 101 | *((uint32_t *)value) = static_cast(full_name.size()); 102 | return true; 103 | } 104 | case HSA_ISA_INFO_NAME: { 105 | std::string full_name = GetFullName(); 106 | memcpy(value, full_name.c_str(), full_name.size()); 107 | return true; 108 | } 109 | // @todo: following case needs to be removed 110 | case HSA_ISA_INFO_CALL_CONVENTION_COUNT: { 111 | *((uint32_t *)value) = 1; 112 | return true; 113 | } 114 | // @todo: following case needs to be removed 115 | case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONT_SIZE: { 116 | *((uint32_t *)value) = 64; 117 | return true; 118 | } 119 | // @todo: following needs to be removed 120 | case HSA_ISA_INFO_CALL_CONVENTION_INFO_WAVEFRONTS_PER_COMPUTE_UNIT: { 121 | *((uint32_t *)value) = 40; 122 | return true; 123 | } 124 | default: { 125 | return false; 126 | } 127 | } 128 | } 129 | 130 | } // namespace core 131 | -------------------------------------------------------------------------------- /src/utils/sp3/sp3-asic.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. 3 | // 4 | /// \author AMD Developer Tools Team 5 | /// \file 6 | /// 7 | //===================================================================== 8 | 9 | #ifndef SP3_ASIC_H 10 | #define SP3_ASIC_H 11 | 12 | 13 | #include "sp3-int.h" 14 | #include "sp3-vm.h" 15 | 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | // ASIC types 23 | 24 | 25 | enum asic_backend { 26 | ASIC_BACKEND_SI, 27 | ASIC_BACKEND_CI, 28 | ASIC_BACKEND_GFX8, 29 | ASIC_BACKEND_GFX81, 30 | ASIC_MAX_BACKEND, // Must be the last entry 31 | }; 32 | 33 | 34 | enum asic_cap_id { 35 | ASIC_THREAD_SIZE = 1, 36 | ASIC_FED_INSTRUCTIONS = 2, 37 | ASIC_LEGACY_LOG = 3, 38 | ASIC_LARGE_DS_READ = 4, 39 | ASIC_32BANK_LDS = 5, 40 | }; 41 | 42 | 43 | struct asic_info { 44 | const char *name; 45 | enum asic_backend backend; // which backend to use 46 | int asic_thread_size; // number of threads in a wave 47 | int asic_fed_instructions; // FED instructions are available 48 | int asic_legacy_log; // Legacy EXP and LOG opcodes are available 49 | int asic_large_ds_read; // Large DS read opcodes (96b and 128b) are available 50 | int asic_32bank_lds; // Full 32 bank lds P1LL_F16 INTERP instruction available 51 | }; 52 | 53 | 54 | struct sp3_asic_state { 55 | struct sp3_asic_aluop { 56 | int pos; // original position in code 57 | int op, na, nc; // na = number of args, nc = number of consts in args 58 | int lds, offset; // lds = is an LDS_IDX_OP subop, offset = LDS offset 59 | unsigned dst; 60 | unsigned arg[3]; 61 | unsigned lit[3]; // float literals are no longer float at this point 62 | unsigned flags; 63 | int scalar; 64 | } bundle [5]; 65 | unsigned lds_lit[2], lds_mask[2]; 66 | int nbundle; 67 | int reorder; 68 | int last_reorder, last_po[5]; 69 | int nscalar; // number of nominally-scalar opcodes in bundle 70 | int barrier_after; // require barrier after this clause 71 | 72 | // sp3-r6xx 73 | int asic; 74 | struct da_reloc { 75 | unsigned addr, ref; 76 | struct da_reloc *next; 77 | } *da_relocs; 78 | struct cf_reloc **instrels; 79 | struct cf_reloc *labels; 80 | int sinstrels; 81 | int slabels; 82 | char unk_name[16]; 83 | }; 84 | #define A S->ap 85 | 86 | 87 | extern struct asic_info asics[]; 88 | #define ASICNAME asics[A->asic].name 89 | #define ASIC asics[A->asic] 90 | void set_asic(Sp, int asic); 91 | int find_asic(const char *name); 92 | 93 | 94 | // opcode tables 95 | 96 | void sp3_unbuild_tables(void); 97 | void sp3_si_unbuild_tables(void); 98 | void sp3_ci_unbuild_tables(void); 99 | void sp3_gfx8_unbuild_tables(void); 100 | 101 | void sp3_build_tables(void); 102 | void sp3_si_build_tables(void); 103 | void sp3_ci_build_tables(void); 104 | void sp3_gfx8_build_tables(void); 105 | 106 | 107 | 108 | 109 | // helper functions 110 | 111 | 112 | #define FMT_FMT 0x00000000 113 | #define FMT_COMP 0x00010000 114 | #define FMT_ENDIAN 0x00020000 115 | #define FMT_NUM 0x00030000 116 | #define FMT_SRF 0x00040000 117 | #define FMT_MASK 0xFFFF0000 118 | #define FMT_IMASK 0x0000FFFF 119 | 120 | void mark_sgpr(Sp, unsigned); 121 | void mark_vgpr(Sp, unsigned); 122 | void mark_global(Sp, unsigned); 123 | void mark_ctemp(Sp, unsigned); 124 | int is_mod_bool(Sp, pnode *, const char *); 125 | int get_mod_bool(Sp, pnode *, const char *); 126 | int get_mod_int(Sp, pnode *, int, int); 127 | int get_mod_int32(Sp, pnode *); 128 | int par_cmask(Sp, pnode *); 129 | unsigned reg_csel(Sp, unsigned , int); 130 | unsigned reg_msel(Sp, unsigned *, int); 131 | 132 | const char *spec_sel_to_name(Sp, int sel); 133 | const char *sp3_fmt_to_name(Sp, int cls, int val); 134 | const char *sp3_si_fmt_to_name(Sp, int cls, int val); 135 | const char *sp3_ci_fmt_to_name(Sp, int cls, int val); 136 | const char *sp3_gfx8_fmt_to_name(Sp, int cls, int val); 137 | 138 | void add_reloc_label(Sp, int li, int blame); 139 | void add_reloc_inst(Sp, int ii, int blame); 140 | void add_reloc_cf(Sp, int offs); 141 | 142 | int grouping_for_group_size(Sp, int group_size); 143 | 144 | //JENNICA - this block of name_tree will go away, replace 145 | //with backend specific. 146 | 147 | enum nametree_enum { 148 | NAMETREE_OPCODES, 149 | NAMETREE_OPCODES_0ARG, 150 | NAMETREE_OPCODES_CALL, 151 | NAMETREE_VTX_FMTS, 152 | NAMETREE_SPEC_SELS, 153 | NAMETREE_SPEC_VEC_SELS, 154 | NAMETREE_SGPR_NAME_SELS, 155 | NAMETREE_CONSTS, 156 | NAMETREE_DEPRECATED, 157 | }; 158 | 159 | struct name_tree **get_name_tree(struct sp3_state *S, enum nametree_enum whichtree); 160 | 161 | extern struct name_tree *opcodes_0arg; 162 | extern struct name_tree *opcodes_call; 163 | extern struct name_tree *vtx_fmts; 164 | extern struct name_tree *spec_sels; 165 | extern struct name_tree *spec_vec_sels; 166 | extern struct name_tree *sgpr_name_sels; 167 | extern struct name_tree *consts; 168 | extern struct name_tree *deprecated; 169 | 170 | extern struct name_tree *asic_names; 171 | struct asic_caps{const char *name; int id;}; 172 | extern struct asic_caps asiccaps[]; 173 | extern struct name_tree *asic_caps; //JENNICA - this may need to go away. 174 | 175 | void update_sgpr_names(Sp); 176 | 177 | #ifdef __cplusplus 178 | } 179 | #endif 180 | 181 | #endif 182 | -------------------------------------------------------------------------------- /src/utils/sp3/sp3-type.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // Copyright 2016 (c), Advanced Micro Devices, Inc. All rights reserved. 3 | // 4 | /// \author AMD Developer Tools Team 5 | /// \file 6 | /// 7 | //===================================================================== 8 | 9 | #ifndef SP3_TYPE_H 10 | #define SP3_TYPE_H 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | /// @file sp3-type.h 17 | /// @brief sp3 types 18 | 19 | enum sp3_shtype { 20 | SP3_SHTYPE_NONE = -1, 21 | SP3_SHTYPE_PS = 0, 22 | SP3_SHTYPE_VS = 1, 23 | SP3_SHTYPE_GS = 2, 24 | SP3_SHTYPE_ES = 3, 25 | SP3_SHTYPE_HS = 4, 26 | SP3_SHTYPE_LS = 5, 27 | SP3_SHTYPE_CS = 6, 28 | }; 29 | 30 | enum sp3_count { 31 | SP3_NUM_MRT = 8, 32 | SP3_NUM_STRM = 4, 33 | }; 34 | 35 | enum sp3_flag { 36 | SP3DIS_NO_STATE = 0x01, 37 | SP3DIS_NO_BINARY = 0x02, 38 | SP3DIS_COMMENTS = 0x04, 39 | SP3DIS_NO_GPR_COUNT = 0x08, 40 | SP3DIS_FORCEVALID = 0x10, 41 | SP3DIS_NO_ASIC = 0x20, 42 | }; 43 | 44 | /// @brief Shader context. Contains no user-visible fields. 45 | struct sp3_context; 46 | 47 | /// @brief Storage entry for register streams. 48 | struct sp3_reg { 49 | unsigned index; ///< One of the mm* values from chip_enum.h. 50 | unsigned value; 51 | }; 52 | 53 | /// @brief Wrapped shader metadata. 54 | /// 55 | /// After generation, shaders are encapsulated in sp3_shader structures. 56 | /// 57 | /// Those structures contain the shader binary, its register stream, 58 | /// constants and constant buffers and metadata needed for SC compatibility. 59 | struct sp3_shader { 60 | int type; ///< One of the SHTYPE_* constants. 61 | int asic_int; ///< Internal ASIC index. Do not use. 62 | const char *asic; ///< ASIC name as a string ("RV870" etc). 63 | unsigned size; ///< Size of the compiled shader, in 32-bit words. 64 | unsigned nsgprs; ///< Number of scalar GPRs used. 65 | unsigned nvgprs; ///< Number of vector GPRs used. 66 | unsigned trap_present; 67 | unsigned user_sgpr_count; 68 | unsigned scratch_en; 69 | unsigned dispatch_draw_en; 70 | unsigned so_en; 71 | unsigned so_base0_en; 72 | unsigned so_base1_en; 73 | unsigned so_base2_en; 74 | unsigned so_base3_en; 75 | unsigned oc_lds_en; 76 | unsigned tg_size_en; 77 | unsigned tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group 78 | unsigned tgid_x_en; 79 | unsigned tgid_y_en; 80 | unsigned tgid_z_en; 81 | unsigned wave_cnt_en; 82 | unsigned sgpr_scratch; 83 | unsigned sgpr_psvs_state; 84 | unsigned sgpr_so_write_index; 85 | unsigned sgpr_so_base_offset0; 86 | unsigned sgpr_so_base_offset1; 87 | unsigned sgpr_so_base_offset2; 88 | unsigned sgpr_so_base_offset3; 89 | unsigned sgpr_offchip_lds; 90 | unsigned sgpr_is_offchip; 91 | unsigned sgpr_ring_offset; 92 | unsigned sgpr_gs_wave_id; 93 | unsigned sgpr_global_wave_id; 94 | unsigned sgpr_tg_size; 95 | unsigned sgpr_tgid_x; 96 | unsigned sgpr_tgid_y; 97 | unsigned sgpr_tgid_z; 98 | unsigned sgpr_tf_base; 99 | unsigned sgpr_wave_cnt; 100 | unsigned pc_exports; ///< Range of parameters exported (if VS). 101 | unsigned pos_export; ///< Shader executes a position export (if VS). 102 | unsigned cb_exports; ///< Range of MRTs exported (if PS). 103 | unsigned mrtz_export_format; ///< Export format of the mrtz export. 104 | unsigned z_export; ///< Shader executes a Z export (if PS). 105 | unsigned pops_en; ///< Shader is POPS (PS) 106 | unsigned load_collision_waveid; ///< Shader sets load collision waveid (if PS). 107 | unsigned stencil_test_export; ///< Shader exports stencil (if PS). 108 | unsigned stencil_op_export; ///< Shader exports stencil (if PS). 109 | unsigned kill_used; ///< Shader executes ALU KILL operations. 110 | unsigned cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS). 111 | unsigned emit_used; ///< EMIT opcodes used (if GS). 112 | unsigned covmask_export; ///< Shader exports coverage mask (if PS). 113 | unsigned mask_export; ///< Shader exports mask (if PS). 114 | unsigned strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map). 115 | unsigned scratch_used; ///< Scratch SMX exports used. 116 | unsigned scratch_itemsize; ///< Scratch ring item size. 117 | unsigned reduction_used; ///< Reduction SMX exports used. 118 | unsigned ring_used; ///< ESGS/GSVS ring SMX exports used. 119 | unsigned ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively). 120 | unsigned vertex_size[4]; ///< GSVS ring vertex size (for GS). 121 | unsigned mem_used; ///< Raw memory SMX exports used. 122 | unsigned rats_used; ///< Mask of RATs (UAVs) used 123 | unsigned group_size[3]; ///< Wavefront group size (for ELF files). 124 | unsigned alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS) 125 | unsigned *data; ///< Shader binary data. 126 | unsigned nregs; ///< Number of register writes in the stream. 127 | struct sp3_reg *regs; ///< Register writes (index-value pairs). 128 | }; 129 | 130 | /// @brief Comment callback. 131 | typedef const char *(*sp3_comment_cb)(void *, int); 132 | 133 | #ifdef __cplusplus 134 | } 135 | #endif 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /src/core/util/timer.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_UTIL_TIMER_H_ 44 | #define HSA_RUNTIME_CORE_UTIL_TIMER_H_ 45 | 46 | #include "core/util/utils.h" 47 | #include "core/util/os.h" 48 | #include 49 | 50 | #include 51 | 52 | namespace timer { 53 | 54 | // Needed to patch around a mixed arithmetic bug in MSVC's duration_cast as of 55 | // VS 2013. 56 | template 57 | struct wide_type { 58 | typedef double type; 59 | }; 60 | template <> 61 | struct wide_type { 62 | typedef uintmax_t type; 63 | }; 64 | template <> 65 | struct wide_type { 66 | typedef intmax_t type; 67 | }; 68 | 69 | template 70 | static __forceinline To 71 | duration_cast(const std::chrono::duration& d) { 72 | typedef typename wide_type::value, 73 | std::is_signed::value>::type wide; 74 | typedef std::chrono::duration unit_convert_t; 75 | 76 | unit_convert_t temp = std::chrono::duration_cast(d); 77 | return To(static_cast(temp.count())); 78 | } 79 | // End patch 80 | 81 | template 82 | static __forceinline double duration_in_seconds( 83 | std::chrono::duration delta) { 84 | typedef std::chrono::duration> seconds; 85 | return seconds(delta).count(); 86 | } 87 | 88 | template 89 | static __forceinline rep duration_from_seconds(double delta) { 90 | typedef std::chrono::duration> seconds; 91 | return std::chrono::duration_cast(seconds(delta)); 92 | } 93 | 94 | // Provices a C++11 standard clock interface to the os::AccurateClock functions 95 | class accurate_clock { 96 | public: 97 | typedef double rep; 98 | typedef std::nano period; 99 | typedef std::chrono::duration duration; 100 | typedef std::chrono::time_point time_point; 101 | 102 | static const bool is_steady = true; 103 | 104 | static __forceinline time_point now() { 105 | return time_point(duration(raw_now() * period_ns)); 106 | } 107 | 108 | // These two extra APIs and types let us use clocks without conversion to the 109 | // arbitrary period unit 110 | typedef uint64_t raw_rep; 111 | typedef uint64_t raw_frequency; 112 | 113 | static __forceinline raw_rep raw_now() { return os::ReadAccurateClock(); } 114 | static __forceinline raw_frequency raw_freq() { return freq; } 115 | 116 | private: 117 | static double period_ns; 118 | static raw_frequency freq; 119 | 120 | class init { 121 | public: 122 | init(); 123 | }; 124 | static init accurate_clock_init; 125 | }; 126 | 127 | // Provices a C++11 standard clock interface to the lowest latency approximate 128 | // clock 129 | class fast_clock { 130 | public: 131 | typedef double rep; 132 | typedef std::pico period; 133 | typedef std::chrono::duration duration; 134 | typedef std::chrono::time_point time_point; 135 | 136 | static const bool is_steady = true; 137 | 138 | static __forceinline time_point now() { 139 | return time_point(duration(raw_now() * period_ps)); 140 | } 141 | 142 | // These two extra APIs and types let us use clocks without conversion to the 143 | // arbitrary period unit 144 | typedef uint64_t raw_rep; 145 | typedef double raw_frequency; 146 | 147 | static __forceinline raw_rep raw_now() { return __rdtsc(); } 148 | static __forceinline raw_frequency raw_freq() { return freq; } 149 | 150 | private: 151 | static double period_ps; 152 | static raw_frequency freq; 153 | 154 | class init { 155 | public: 156 | init(); 157 | }; 158 | static init fast_clock_init; 159 | }; 160 | } 161 | 162 | #endif 163 | -------------------------------------------------------------------------------- /src/core/inc/isa.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_ISA_H_ 44 | #define HSA_RUNTIME_CORE_ISA_H_ 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include "core/inc/amd_hsa_code.hpp" 52 | 53 | namespace core { 54 | 55 | // @class Isa 56 | // @brief Instruction Set Architecture 57 | class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> { 58 | public: 59 | // @brief Isa's version type 60 | typedef std::tuple Version; 61 | 62 | // @brief Default destructor 63 | ~Isa() {} 64 | 65 | // @returns Handle equivalent of @p isa_object 66 | static hsa_isa_t Handle(const Isa *isa_object) { 67 | hsa_isa_t isa_handle = { reinterpret_cast(isa_object) }; 68 | return isa_handle; 69 | } 70 | // @returns Object equivalend of @p isa_handle 71 | static Isa *Object(const hsa_isa_t &isa_handle) { 72 | Isa *isa_object = amd::hsa::common::ObjectAt(isa_handle.handle); 73 | return isa_object; 74 | } 75 | 76 | // @returns This Isa's version 77 | const Version &version() const { 78 | return version_; 79 | } 80 | 81 | // @returns This Isa's vendor 82 | std::string GetVendor() const { 83 | return "AMD"; 84 | } 85 | // @returns This Isa's architecture 86 | std::string GetArchitecture() const { 87 | return "AMDGPU"; 88 | } 89 | // @returns This Isa's major version 90 | int32_t GetMajorVersion() const { 91 | return std::get<0>(version_); 92 | } 93 | // @returns This Isa's minor version 94 | int32_t GetMinorVersion() const { 95 | return std::get<1>(version_); 96 | } 97 | // @returns This Isa's stepping 98 | int32_t GetStepping() const { 99 | return std::get<2>(version_); 100 | } 101 | 102 | // @returns True if this Isa is compatible with @p isa_object, false otherwise 103 | bool IsCompatible(const Isa *isa_object) const { 104 | assert(isa_object); 105 | return version_ == isa_object->version_; 106 | } 107 | // @returns True if this Isa is compatible with @p isa_handle, false otherwise 108 | bool IsCompatible(const hsa_isa_t &isa_handle) const { 109 | assert(isa_handle.handle); 110 | return IsCompatible(Object(isa_handle)); 111 | } 112 | // @brief Isa is always in valid state 113 | bool IsValid() const { 114 | return true; 115 | } 116 | 117 | // @returns This Isa's full name 118 | std::string GetFullName() const; 119 | 120 | // @brief Query value of requested @p attribute and record it in @p value 121 | bool GetInfo(const hsa_isa_info_t &attribute, void *value) const; 122 | 123 | private: 124 | // @brief Default constructor 125 | Isa(): version_(Version(-1, -1, -1)) {} 126 | 127 | // @brief Construct from @p version 128 | Isa(const Version &version): version_(version) {} 129 | 130 | // @brief Isa's version 131 | Version version_; 132 | 133 | // @brief Isa's friends 134 | friend class IsaRegistry; 135 | }; // class Isa 136 | 137 | // @class IsaRegistry 138 | // @brief Instruction Set Architecture Registry 139 | class IsaRegistry final { 140 | public: 141 | // @returns Isa for requested @p full_name, null pointer if not supported 142 | static const Isa *GetIsa(const std::string &full_name); 143 | // @returns Isa for requested @p version, null pointer if not supported 144 | static const Isa *GetIsa(const Isa::Version &version); 145 | 146 | private: 147 | // @brief IsaRegistry's map type 148 | typedef std::unordered_map IsaMap; 149 | 150 | // @brief Supported instruction set architectures 151 | static const IsaMap supported_isas_; 152 | 153 | // @brief Default constructor - not available 154 | IsaRegistry(); 155 | // @brief Default destructor - not available 156 | ~IsaRegistry(); 157 | 158 | // @returns Supported instruction set architectures 159 | static const IsaMap GetSupportedIsas(); 160 | }; // class IsaRegistry 161 | 162 | } // namespace core 163 | 164 | #endif // HSA_RUNTIME_CORE_ISA_HPP_ 165 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### HSA Runtime API and runtime for Boltzmann 2 | 3 | This repository includes the user-mode API interfaces and libraries necessary for host applications to launch compute kernels to available HSA Boltzmann kernel agents. Reference source code for the core runtime is also available. 4 | 5 | Only the AMD/ATI Fiji(c) family of discrete GPUs are currently supported. 6 | 7 | #### Initial Target Platform Requirements 8 | 9 | * CPU: Intel(c) Haswell or newer, Core i5, Core i7, Xeon E3 v4 & v5; Xeon E5 v3 10 | * GPU: Fiji ASIC (AMD R9 Nano, R9 Fury and R9 Fury X) 11 | 12 | #### Source code 13 | 14 | The HSA core runtime source code for Boltzmann is located in the src subdirectory. Please consult the associated README.md file for contents and build instructions. 15 | 16 | #### Binaries for Ubuntu & Fedora and Installation Instructions 17 | 18 | Pre-built binaries are available for installation from the ROCm package repository. For ROCR, they include: 19 | 20 | Core runtime package: 21 | 22 | * HSA include files to support application development on the HSA runtime for Boltzmann 23 | * A 64-bit version of AMD's HSA core runtime for Boltzmann 24 | 25 | Runtime extension package: 26 | 27 | * A 64-bit version of AMD's finalizer extension for Boltzmann 28 | * A 64-bit version of AMD's runtime tools library 29 | 30 | The contents of these packages are installed in /opt/rocm/hsa and /opt/rocm by default. 31 | The core runtime package depends on the hsakmt-roct-dev package 32 | 33 | Installation instructions can be found in the ROCm manifest repository README.md: 34 | 35 | https://github.com/RadeonOpenCompute/ROCm 36 | 37 | #### Infrastructure 38 | 39 | The HSA runtime is a thin, user-mode API that exposes the necessary interfaces to access and interact with graphics hardware driven by the AMDGPU driver set and the Boltzmann HSA kernel driver. Together they enable programmers to directly harness the power of AMD discrete graphics devices by allowing host applications to launch compute kernels directly to the graphics hardware. 40 | 41 | The capabilities expressed by the HSA Runtime API are: 42 | 43 | * Error handling 44 | * Runtime initialization and shutdown 45 | * System and agent information 46 | * Signals and synchronization 47 | * Architected dispatch 48 | * Memory management 49 | * HSA runtime fits into a typical software architecture stack. 50 | 51 | The HSA runtime provides direct access to the graphics hardware to give the programmer more control of the execution. Some examples of low level hardware access is the support of one or more user mode queues provides programmers with a low-latency kernel dispatch interface, allowing them to develop customized dispatch algorithms specific to their application. 52 | 53 | The HSA Architected Queuing Language is an open standard, defined by the HSA Foundation, specifying the packet syntax used to control supported AMD/ATI Radeon (c) graphics devices. The AQL language supports several packet types, including packets that can command the hardware to automatically resolve inter-packet dependencies (barrier AND & barrier OR packet), kernel dispatch packets and agent dispatch packets. 54 | 55 | In addition to user mode queues and AQL, the HSA runtime exposes various virtual address ranges that can be accessed by one or more of the system's graphics devices, and possibly the host. The exposed virtual address ranges either support a fine grained or a coarse grained access. Updates to memory in a fine grained region are immediately visible to all devices that can access it, but only one device can have access to a coarse grained allocation at a time. Ownership of a coarse grained region can be changed using the HSA runtime memory APIs, but this transfer of ownership must be explicitly done by the host application. 56 | 57 | Programmers should consult the HSA Runtime Programmer's Reference Manual for a full description of the HSA Runtime APIs, AQL and the HSA memory policy. 58 | 59 | #### Sample 60 | 61 | The simplest way to check if the kernel, runtime and base development environment are installed correctly is to run a simple sample. A modified version of the vector_copy sample was taken from the HSA-Runtime-AMD repository and added to the ROCR repository to facilitate this. Build the sample and run it, using this series of commands: 62 | 63 | cd ROCR-Runtime/sample && make && ./vector_copy 64 | 65 | If the sample runs without generating errors, the installation is complete. 66 | 67 | #### Known Issues 68 | 69 | * The image extension is currently not supported for discrete GPUs. An image extension library is not provided in the binary package. The standard hsa_ext_image.h extension include file is provided for reference. 70 | * Each HSA process creates and internal DMA queue, but there is a system-wide limit of four DMA queues. The fifths simultaneous HSA process will fail hsa_init() with HSA_STATUS_ERROR_OUT_OF_RESOURCES. To run an unlimited number of simultaneous HSA processes, set the environment variable HSA_ENABLE_SDMA=0. 71 | 72 | #### Disclaimer 73 | 74 | The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale. 75 | 76 | AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. 77 | 78 | Copyright (c) 2014-2016 Advanced Micro Devices, Inc. All rights reserved. 79 | -------------------------------------------------------------------------------- /src/core/inc/amd_cpu_agent.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // AMD specific HSA backend. 44 | 45 | #ifndef HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_ 46 | #define HSA_RUNTIME_CORE_INC_AMD_CPU_AGENT_H_ 47 | 48 | #include 49 | 50 | #include "hsakmt.h" 51 | 52 | #include "core/inc/runtime.h" 53 | #include "core/inc/agent.h" 54 | #include "core/inc/queue.h" 55 | 56 | namespace amd { 57 | // @brief Class to represent a CPU device. 58 | class CpuAgent : public core::Agent { 59 | public: 60 | // @brief CpuAgent constructor. 61 | // 62 | // @param [in] node Node id. Each CPU in different socket will get distinct 63 | // id. 64 | // @param [in] node_props Node property. 65 | CpuAgent(HSAuint32 node, const HsaNodeProperties& node_props); 66 | 67 | // @brief CpuAgent destructor. 68 | ~CpuAgent(); 69 | 70 | // @brief Invoke the user provided callback for each region accessible by 71 | // this agent. 72 | // 73 | // @param [in] include_peer If true, the callback will be also invoked on each 74 | // peer memory region accessible by this agent. If false, only invoke the 75 | // callback on memory region owned by this agent. 76 | // @param [in] callback User provided callback function. 77 | // @param [in] data User provided pointer as input for @p callback. 78 | // 79 | // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed 80 | // region returns ::HSA_STATUS_SUCCESS. 81 | hsa_status_t VisitRegion(bool include_peer, 82 | hsa_status_t (*callback)(hsa_region_t region, 83 | void* data), 84 | void* data) const; 85 | 86 | // @brief Override from core::Agent. 87 | hsa_status_t IterateRegion(hsa_status_t (*callback)(hsa_region_t region, 88 | void* data), 89 | void* data) const override; 90 | 91 | // @brief Override from core::Agent. 92 | hsa_status_t GetInfo(hsa_agent_info_t attribute, void* value) const override; 93 | 94 | // @brief Override from core::Agent. 95 | hsa_status_t QueueCreate(size_t size, hsa_queue_type_t queue_type, 96 | core::HsaEventCallback event_callback, void* data, 97 | uint32_t private_segment_size, 98 | uint32_t group_segment_size, 99 | core::Queue** queue) override; 100 | 101 | // @brief Returns number of data caches. 102 | __forceinline size_t num_cache() const { return cache_props_.size(); } 103 | 104 | // @brief Returns data cache property. 105 | // 106 | // @param [in] idx Cache level. 107 | __forceinline const HsaCacheProperties& cache_prop(int idx) const { 108 | return cache_props_[idx]; 109 | } 110 | 111 | // @brief Override from core::Agent. 112 | const std::vector& regions() const override { 113 | return regions_; 114 | } 115 | 116 | // @brief OVerride from core::Agent. 117 | const core::Isa* isa() const override { return NULL; } 118 | 119 | private: 120 | // @brief Query the driver to get the region list owned by this agent. 121 | void InitRegionList(); 122 | 123 | // @brief Query the driver to get the cache properties. 124 | void InitCacheList(); 125 | 126 | // @brief Invoke the user provided callback for every region in @p regions. 127 | // 128 | // @param [in] regions Array of region object. 129 | // @param [in] callback User provided callback function. 130 | // @param [in] data User provided pointer as input for @p callback. 131 | // 132 | // @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed 133 | // region returns ::HSA_STATUS_SUCCESS. 134 | hsa_status_t VisitRegion( 135 | const std::vector& regions, 136 | hsa_status_t (*callback)(hsa_region_t region, void* data), 137 | void* data) const; 138 | 139 | // @brief Node property. 140 | const HsaNodeProperties properties_; 141 | 142 | // @brief Array of data cache property. The array index represents the cache 143 | // level. 144 | std::vector cache_props_; 145 | 146 | // @brief Array of regions owned by this agent. 147 | std::vector regions_; 148 | 149 | DISALLOW_COPY_AND_ASSIGN(CpuAgent); 150 | }; 151 | 152 | } // namespace amd 153 | 154 | #endif // header guard 155 | -------------------------------------------------------------------------------- /src/core/inc/host_queue.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_ 44 | #define HSA_RUNTIME_CORE_INC_HOST_QUEUE_H_ 45 | 46 | #include "core/inc/memory_region.h" 47 | #include "core/inc/queue.h" 48 | #include "core/inc/runtime.h" 49 | #include "core/inc/signal.h" 50 | 51 | namespace core { 52 | class HostQueue : public Queue { 53 | public: 54 | HostQueue(hsa_region_t region, uint32_t ring_size, hsa_queue_type_t type, 55 | uint32_t features, hsa_signal_t doorbell_signal); 56 | 57 | ~HostQueue(); 58 | 59 | hsa_status_t Inactivate() { return HSA_STATUS_SUCCESS; } 60 | 61 | uint64_t LoadReadIndexAcquire() { 62 | return atomic::Load(&amd_queue_.read_dispatch_id, 63 | std::memory_order_acquire); 64 | } 65 | 66 | uint64_t LoadReadIndexRelaxed() { 67 | return atomic::Load(&amd_queue_.read_dispatch_id, 68 | std::memory_order_relaxed); 69 | } 70 | 71 | uint64_t LoadWriteIndexAcquire() { 72 | return atomic::Load(&amd_queue_.write_dispatch_id, 73 | std::memory_order_acquire); 74 | } 75 | 76 | uint64_t LoadWriteIndexRelaxed() { 77 | return atomic::Load(&amd_queue_.write_dispatch_id, 78 | std::memory_order_relaxed); 79 | } 80 | 81 | void StoreReadIndexRelaxed(uint64_t value) { 82 | atomic::Store(&amd_queue_.read_dispatch_id, value, 83 | std::memory_order_relaxed); 84 | } 85 | 86 | void StoreReadIndexRelease(uint64_t value) { 87 | atomic::Store(&amd_queue_.read_dispatch_id, value, 88 | std::memory_order_release); 89 | } 90 | 91 | void StoreWriteIndexRelaxed(uint64_t value) { 92 | atomic::Store(&amd_queue_.write_dispatch_id, value, 93 | std::memory_order_relaxed); 94 | } 95 | 96 | void StoreWriteIndexRelease(uint64_t value) { 97 | atomic::Store(&amd_queue_.write_dispatch_id, value, 98 | std::memory_order_release); 99 | } 100 | 101 | uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) { 102 | return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, 103 | std::memory_order_acq_rel); 104 | } 105 | 106 | uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) { 107 | return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, 108 | std::memory_order_acquire); 109 | } 110 | 111 | uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) { 112 | return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, 113 | std::memory_order_relaxed); 114 | } 115 | 116 | uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) { 117 | return atomic::Cas(&amd_queue_.write_dispatch_id, value, expected, 118 | std::memory_order_release); 119 | } 120 | 121 | uint64_t AddWriteIndexAcqRel(uint64_t value) { 122 | return atomic::Add(&amd_queue_.write_dispatch_id, value, 123 | std::memory_order_acq_rel); 124 | } 125 | 126 | uint64_t AddWriteIndexAcquire(uint64_t value) { 127 | return atomic::Add(&amd_queue_.write_dispatch_id, value, 128 | std::memory_order_acquire); 129 | } 130 | 131 | uint64_t AddWriteIndexRelaxed(uint64_t value) { 132 | return atomic::Add(&amd_queue_.write_dispatch_id, value, 133 | std::memory_order_relaxed); 134 | } 135 | 136 | uint64_t AddWriteIndexRelease(uint64_t value) { 137 | return atomic::Add(&amd_queue_.write_dispatch_id, value, 138 | std::memory_order_release); 139 | } 140 | 141 | hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count, 142 | const uint32_t* cu_mask) { 143 | return HSA_STATUS_ERROR; 144 | } 145 | 146 | bool active() const { return active_; } 147 | 148 | void* operator new(size_t size) { 149 | return _aligned_malloc(size, HSA_QUEUE_ALIGN_BYTES); 150 | } 151 | 152 | void* operator new(size_t size, void* ptr) { return ptr; } 153 | 154 | void operator delete(void* ptr) { _aligned_free(ptr); } 155 | 156 | void operator delete(void*, void*) {} 157 | 158 | private: 159 | static const size_t kRingAlignment = 256; 160 | const uint32_t size_; 161 | bool active_; 162 | void* ring_; 163 | 164 | DISALLOW_COPY_AND_ASSIGN(HostQueue); 165 | }; 166 | } // namespace core 167 | #endif // header guard 168 | -------------------------------------------------------------------------------- /src/core/inc/default_signal.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // HSA runtime C++ interface file. 44 | 45 | #ifndef HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_ 46 | #define HSA_RUNTME_CORE_INC_DEFAULT_SIGNAL_H_ 47 | 48 | #include "core/inc/runtime.h" 49 | #include "core/inc/signal.h" 50 | #include "core/util/utils.h" 51 | 52 | namespace core { 53 | 54 | /// @brief Simple pure memory based signal. 55 | /// @brief See base class Signal. 56 | class DefaultSignal : public Signal { 57 | public: 58 | /// @brief Determines if a Signal* can be safely converted to DefaultSignal* 59 | /// via static_cast. 60 | static __forceinline bool IsType(Signal* ptr) { 61 | return ptr->IsType(&rtti_id_); 62 | } 63 | 64 | /// @brief See base class Signal. 65 | explicit DefaultSignal(hsa_signal_value_t initial_value); 66 | 67 | /// @brief See base class Signal. 68 | ~DefaultSignal(); 69 | 70 | // Below are various methods corresponding to the APIs, which load/store the 71 | // signal value or modify the existing signal value automically and with 72 | // specified memory ordering semantics. 73 | 74 | hsa_signal_value_t LoadRelaxed(); 75 | 76 | hsa_signal_value_t LoadAcquire(); 77 | 78 | void StoreRelaxed(hsa_signal_value_t value); 79 | 80 | void StoreRelease(hsa_signal_value_t value); 81 | 82 | hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, 83 | hsa_signal_value_t compare_value, 84 | uint64_t timeout, hsa_wait_state_t wait_hint); 85 | 86 | hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, 87 | hsa_signal_value_t compare_value, 88 | uint64_t timeout, hsa_wait_state_t wait_hint); 89 | 90 | void AndRelaxed(hsa_signal_value_t value); 91 | 92 | void AndAcquire(hsa_signal_value_t value); 93 | 94 | void AndRelease(hsa_signal_value_t value); 95 | 96 | void AndAcqRel(hsa_signal_value_t value); 97 | 98 | void OrRelaxed(hsa_signal_value_t value); 99 | 100 | void OrAcquire(hsa_signal_value_t value); 101 | 102 | void OrRelease(hsa_signal_value_t value); 103 | 104 | void OrAcqRel(hsa_signal_value_t value); 105 | 106 | void XorRelaxed(hsa_signal_value_t value); 107 | 108 | void XorAcquire(hsa_signal_value_t value); 109 | 110 | void XorRelease(hsa_signal_value_t value); 111 | 112 | void XorAcqRel(hsa_signal_value_t value); 113 | 114 | void AddRelaxed(hsa_signal_value_t value); 115 | 116 | void AddAcquire(hsa_signal_value_t value); 117 | 118 | void AddRelease(hsa_signal_value_t value); 119 | 120 | void AddAcqRel(hsa_signal_value_t value); 121 | 122 | void SubRelaxed(hsa_signal_value_t value); 123 | 124 | void SubAcquire(hsa_signal_value_t value); 125 | 126 | void SubRelease(hsa_signal_value_t value); 127 | 128 | void SubAcqRel(hsa_signal_value_t value); 129 | 130 | hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value); 131 | 132 | hsa_signal_value_t ExchAcquire(hsa_signal_value_t value); 133 | 134 | hsa_signal_value_t ExchRelease(hsa_signal_value_t value); 135 | 136 | hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value); 137 | 138 | hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, 139 | hsa_signal_value_t value); 140 | 141 | hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, 142 | hsa_signal_value_t value); 143 | 144 | hsa_signal_value_t CasRelease(hsa_signal_value_t expected, 145 | hsa_signal_value_t value); 146 | 147 | hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, 148 | hsa_signal_value_t value); 149 | 150 | /// @brief see the base class Signal 151 | __forceinline hsa_signal_value_t* ValueLocation() const { 152 | return (hsa_signal_value_t*)&signal_.value; 153 | } 154 | 155 | /// @brief see the base class Signal 156 | __forceinline HsaEvent* EopEvent() { return NULL; } 157 | 158 | /// @brief prevent throwing exceptions 159 | void* operator new(size_t size) { return malloc(size); } 160 | 161 | /// @brief prevent throwing exceptions 162 | void operator delete(void* ptr) { free(ptr); } 163 | 164 | protected: 165 | bool _IsA(rtti_t id) const { return id == &rtti_id_; } 166 | 167 | private: 168 | static int rtti_id_; 169 | 170 | DISALLOW_COPY_AND_ASSIGN(DefaultSignal); 171 | }; 172 | 173 | } // namespace core 174 | #endif // header guard 175 | -------------------------------------------------------------------------------- /src/core/util/small_heap.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "small_heap.h" 44 | 45 | SmallHeap::memory_t::iterator SmallHeap::merge( 46 | SmallHeap::memory_t::iterator& keep, 47 | SmallHeap::memory_t::iterator& destroy) { 48 | assert((char*)keep->first + keep->second.len == (char*)destroy->first && 49 | "Invalid merge"); 50 | assert(keep->second.isfree() && "Merge with allocated block"); 51 | assert(destroy->second.isfree() && "Merge with allocated block"); 52 | 53 | keep->second.len += destroy->second.len; 54 | keep->second.next_free = destroy->second.next_free; 55 | if (!destroy->second.islastfree()) 56 | memory[destroy->second.next_free].prior_free = keep->first; 57 | 58 | memory.erase(destroy); 59 | return keep; 60 | } 61 | 62 | void SmallHeap::free(void* ptr) { 63 | if (ptr == NULL) return; 64 | 65 | auto iterator = memory.find(ptr); 66 | 67 | // Check for illegal free 68 | if (iterator == memory.end()) { 69 | assert(false && "Illegal free."); 70 | return; 71 | } 72 | 73 | const auto start_guard = memory.find(0); 74 | const auto end_guard = memory.find((void*)0xFFFFFFFFFFFFFFFFull); 75 | 76 | // Return memory to total and link node into free list 77 | total_free += iterator->second.len; 78 | if (first_free < iterator->first) { 79 | auto before = iterator; 80 | before--; 81 | while (before != start_guard && !before->second.isfree()) before--; 82 | assert(before->second.next_free > iterator->first && 83 | "Inconsistency in small heap."); 84 | iterator->second.prior_free = before->first; 85 | iterator->second.next_free = before->second.next_free; 86 | before->second.next_free = iterator->first; 87 | if (!iterator->second.islastfree()) 88 | memory[iterator->second.next_free].prior_free = iterator->first; 89 | } else { 90 | iterator->second.setfirstfree(); 91 | iterator->second.next_free = first_free; 92 | first_free = iterator->first; 93 | if (!iterator->second.islastfree()) 94 | memory[iterator->second.next_free].prior_free = iterator->first; 95 | } 96 | 97 | // Attempt compaction 98 | auto before = iterator; 99 | before--; 100 | if (before != start_guard) { 101 | if (before->second.isfree()) { 102 | iterator = merge(before, iterator); 103 | } 104 | } 105 | 106 | auto after = iterator; 107 | after++; 108 | if (after != end_guard) { 109 | if (after->second.isfree()) { 110 | iterator = merge(iterator, after); 111 | } 112 | } 113 | } 114 | 115 | void* SmallHeap::alloc(size_t bytes) { 116 | // Is enough memory available? 117 | if ((bytes > total_free) || (bytes == 0)) return NULL; 118 | 119 | memory_t::iterator current; 120 | memory_t::iterator prior; 121 | 122 | // Walk the free list and allocate at first fitting location 123 | prior = current = memory.find(first_free); 124 | while (true) { 125 | if (bytes <= current->second.len) { 126 | // Decrement from total 127 | total_free -= bytes; 128 | 129 | // Is allocation an exact fit? 130 | if (bytes == current->second.len) { 131 | if (prior == current) { 132 | first_free = current->second.next_free; 133 | if (!current->second.islastfree()) 134 | memory[current->second.next_free].setfirstfree(); 135 | } else { 136 | prior->second.next_free = current->second.next_free; 137 | if (!current->second.islastfree()) 138 | memory[current->second.next_free].prior_free = prior->first; 139 | } 140 | current->second.next_free = NULL; 141 | return current->first; 142 | } else { 143 | // Split current node 144 | void* remaining = (char*)current->first + bytes; 145 | Node& node = memory[remaining]; 146 | node.next_free = current->second.next_free; 147 | node.prior_free = current->second.prior_free; 148 | node.len = current->second.len - bytes; 149 | current->second.len = bytes; 150 | 151 | if (prior == current) { 152 | first_free = remaining; 153 | node.setfirstfree(); 154 | } else { 155 | prior->second.next_free = remaining; 156 | node.prior_free = prior->first; 157 | } 158 | if (!node.islastfree()) memory[node.next_free].prior_free = remaining; 159 | 160 | current->second.next_free = NULL; 161 | return current->first; 162 | } 163 | } 164 | 165 | // End of free list? 166 | if (current->second.islastfree()) break; 167 | 168 | prior = current; 169 | current = memory.find(current->second.next_free); 170 | } 171 | 172 | // Can't service the request due to fragmentation 173 | return NULL; 174 | } 175 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | ### Package Contents 2 | 3 | This directory contains the HSA Runtime source code for the Boltzmann release. It has been modified to support 4 | AMD/ATI discrete GPUs. 5 | 6 | #### Source & Include directories 7 | 8 | core - Contains the source code for AMD's implementation of the core HSA Runtime API's. 9 | 10 | cmake_modules - CMake support modules and files. 11 | 12 | inc - Contains the public and AMD specific header files exposing the HSA Runtimes interfaces. 13 | 14 | libamdhsacode - HSAIL/Finalizer runtime interface. 15 | 16 | loader - Used to load code objects. 17 | 18 | utils - Utilities required to build the core runtime. 19 | 20 | #### Build environment 21 | 22 | CMake build framework is used to build the HSA runtime. The minimum version is 2.8. 23 | 24 | Obtain cmake infrastructure: http://www.cmake.org/download/ 25 | 26 | Export cmake bin into your PATH 27 | HSA Runtime CMake build file CMakeLists.txt is located in runtime/core folder. 28 | 29 | #### Package Dependencies 30 | 31 | The following support packages are requried to succesfully build the runtime: 32 | 33 | * libelf-dev 34 | * g++ 35 | * libc6-dev-i386 (for libhsakmt 32bit) 36 | 37 | #### Building the runtime 38 | 39 | To build the runtime a compatible version of the libhsakmt library and the 40 | hsakmt.h header file must be available. The latest version of these files 41 | can be obtained from the ROCT-Thunk-Interface repository, available here: 42 | 43 | https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface 44 | 45 | Specify the directory containing libhsakmt.so.1 and hsakmt.h using the following 46 | cmake variables: 47 | 48 | HSATHK_BUILD_INC_PATH - Set to the dirctory containing hsakmt.h. 49 | 50 | HSATHK_BUILD_LIB_PATH - Set to the directory containing libhsakmt.so.1 51 | 52 | For example, from the top level ROCR repository execute: 53 | 54 | mkdir build 55 | cd build 56 | cmake -D HSATHK_BUILD_INC_PATH= \ 57 | -D HSATHK_BUILD_LIB_PATH= \ 58 | ../src 59 | make 60 | 61 | The name of the core hsa runtime is libhsa-runtime64.so.1. 62 | 63 | #### External requirements 64 | 65 | The core runtime requires the sp3.a library to be able to compiler 66 | on x86_64 architechtures. The binaries for the sp3.a librariy can 67 | be found on the amd-codexl-analyzer GitHub repository: 68 | 69 | https://github.com/GPUOpen-Tools/amd-codexl-analyzer 70 | 71 | The x86_64 library and associated header files have been added to 72 | this code base for convenience, but are still subject to the 73 | AMD copyright license. 74 | 75 | #### Specs 76 | 77 | http://www.hsafoundation.com/standards/ 78 | 79 | HSA Runtime Specification 1.0 80 | 81 | HSA Programmer Reference Manual Specification 1.0 82 | 83 | HSA Platform System Architecture Specification 1.0 84 | 85 | #### Runtime Design overview 86 | 87 | The AMD HSA runtime consists of three primary layers: 88 | 89 | C interface adaptors 90 | C++ interfaces classes and common functions 91 | AMD device specific implementations 92 | Additionally the runtime is dependent on a small utility library which provides simple common functions, limited operating system and compiler abstraction, as well as atomic operation interfaces. 93 | 94 | #### C interface adaptors 95 | 96 | Files : 97 | 98 | hsa.h(cpp) 99 | 100 | hsa_ext_interface.h(cpp) 101 | 102 | The C interface layer provides C99 APIs as defined in the HSA Runtime Specification 1.0. The interfaces and default definitions for the standard extensions are also provided. The interface functions simply forward to a function pointer table defined here. The table is initialized to point to default definitions, which simply return an appropriate error code. If available the extension library is loaded as part of runtime initialization and the table is updated to point into the extension library. In this release the standard extensions (image support and finalizer) are implemented in a separate libraries (not open sourced), and can be obtained from the HSA-Runtime-AMD git repository. 103 | 104 | #### C++ interfaces classes and common functions 105 | 106 | Files : 107 | 108 | runtime.h(cpp) 109 | 110 | agent.h 111 | 112 | queue.h 113 | 114 | signal.h 115 | 116 | memory_region.h(cpp) 117 | 118 | checked.h 119 | 120 | memory_database.h(cpp) 121 | 122 | default_signal.h(cpp) 123 | 124 | The C++ interface layer provides abstract interface classes encapsulating commands to HSA Signals, Agents, and Queues. This layer also contains the implementation of device independent commands, such as hsa_init and hsa_system_get_info, and a default signal and queue implementation. 125 | 126 | #### Device Specific Implementations 127 | 128 | Files: 129 | 130 | amd_cpu_agent.h(cpp) 131 | 132 | amd_gpu_agent.h(cpp) 133 | 134 | amd_hw_aql_command_processor.h(cpp) 135 | 136 | amd_memory_region.h(cpp) 137 | 138 | amd_memory_registration.h(cpp) 139 | 140 | amd_topology.h(cpp) 141 | 142 | host_queue.h(cpp) 143 | 144 | interrupt_signal.h(cpp) 145 | 146 | hsa_ext_private_amd.h(cpp) 147 | 148 | The device specific layer contains implementations of the C++ interface classes which implement HSA functionality for AMD Kaveri & Carrizo APUs. 149 | 150 | #### Implemented functionality 151 | 152 | * The following queries are not implemented: 153 | ** hsa_code_symbol_get_info: HSA_CODE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION 154 | ** hsa_executable_symbol_get_info: HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_OBJECT, HSA_EXECUTABLE_SYMBOL_INFO_INDIRECT_FUNCTION_CALL_CONVENTION 155 | 156 | #### Known Issues 157 | 158 | * Max total coarse grain region limit is 8GB. 159 | * hsa_agent_get_exception_policies is not implemented. 160 | * Image import/export/copy/fill only support image created with memory from host accessible region. 161 | * hsa_system_get_extension_table is not implemented for HSA_EXTENSION_AMD_PROFILER. 162 | 163 | #### Disclaimer 164 | 165 | The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale. 166 | 167 | AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. 168 | 169 | Copyright (c) 2014-2015 Advanced Micro Devices, Inc. All rights reserved. 170 | -------------------------------------------------------------------------------- /src/core/inc/amd_blit_kernel.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_ 44 | #define HSA_RUNTIME_CORE_INC_AMD_BLIT_KERNEL_H_ 45 | 46 | #include 47 | 48 | #include "core/inc/blit.h" 49 | 50 | namespace amd { 51 | class BlitKernel : public core::Blit { 52 | public: 53 | explicit BlitKernel(); 54 | virtual ~BlitKernel() override; 55 | 56 | /// @brief Initialize a blit kernel object. 57 | /// 58 | /// @param agent Pointer to the agent that will execute the AQL packets. 59 | /// 60 | /// @return hsa_status_t 61 | virtual hsa_status_t Initialize(const core::Agent& agent) override; 62 | 63 | /// @brief Marks the blit kernel object as invalid and uncouples its link with 64 | /// the underlying AQL kernel queue. Use of the blit object 65 | /// once it has been release is illegal and any behavior is indeterminate 66 | /// 67 | /// @note: The call will block until all AQL packets have been executed. 68 | /// 69 | /// @return hsa_status_t 70 | virtual hsa_status_t Destroy() override; 71 | 72 | /// @brief Submit an AQL packet to perform vector copy. The call is blocking 73 | /// until the command execution is finished. 74 | /// 75 | /// @param dst Memory address of the copy destination. 76 | /// @param src Memory address of the copy source. 77 | /// @param size Size of the data to be copied. 78 | virtual hsa_status_t SubmitLinearCopyCommand(void* dst, const void* src, 79 | size_t size) override; 80 | 81 | /// @brief Submit a linear copy command to the the underlying compute device's 82 | /// control block. The call is non blocking. The memory transfer will start 83 | /// after all dependent signals are satisfied. After the transfer is 84 | /// completed, the out signal will be decremented. 85 | /// 86 | /// @param dst Memory address of the copy destination. 87 | /// @param src Memory address of the copy source. 88 | /// @param size Size of the data to be copied. 89 | /// @param dep_signals Arrays of dependent signal. 90 | /// @param out_signal Output signal. 91 | virtual hsa_status_t SubmitLinearCopyCommand( 92 | void* dst, const void* src, size_t size, 93 | std::vector& dep_signals, 94 | core::Signal& out_signal) override; 95 | 96 | /// @brief Submit an AQL packet to perform memory fill. The call is blocking 97 | /// until the command execution is finished. 98 | /// 99 | /// @param ptr Memory address of the fill destination. 100 | /// @param value Value to be set. 101 | /// @param count Number of uint32_t element to be set to the value. 102 | virtual hsa_status_t SubmitLinearFillCommand(void* ptr, uint32_t value, 103 | size_t count) override; 104 | 105 | private: 106 | union KernelArgs { 107 | struct __ALIGNED__(16) KernelCopyArgs { 108 | const void* src; 109 | void* dst; 110 | uint64_t size; 111 | uint32_t use_vector; 112 | } copy; 113 | 114 | struct __ALIGNED__(16) KernelFillArgs { 115 | void* ptr; 116 | uint64_t num; 117 | uint32_t value; 118 | } fill; 119 | }; 120 | 121 | /// Reserve a slot in the queue buffer. The call will wait until the queue 122 | /// buffer has a room. 123 | uint64_t AcquireWriteIndex(uint32_t num_packet); 124 | 125 | /// Update the queue doorbell register with ::write_index. This 126 | /// function also serializes concurrent doorbell update to ensure that the 127 | /// packet processor doesn't get invalid packet. 128 | void ReleaseWriteIndex(uint64_t write_index, uint32_t num_packet); 129 | 130 | /// Wait until all packets are finished. 131 | hsa_status_t FenceRelease(uint64_t write_index, uint32_t num_copy_packet, 132 | hsa_fence_scope_t fence); 133 | 134 | void PopulateQueue(uint64_t index, uint64_t code_handle, void* args, 135 | uint32_t grid_size_x, hsa_signal_t completion_signal); 136 | 137 | KernelArgs* ObtainAsyncKernelCopyArg(); 138 | 139 | /// Handles to the vector copy kernel. 140 | uint64_t copy_code_handle_; 141 | 142 | /// Handles to the vector copy aligned kernel. 143 | uint64_t copy_aligned_code_handle_; 144 | 145 | /// Handles to the fill memory kernel. 146 | uint64_t fill_code_handle_; 147 | 148 | /// AQL queue for submitting the vector copy kernel. 149 | hsa_queue_t* queue_; 150 | uint32_t queue_bitmask_; 151 | 152 | /// Index to track concurrent kernel launch. 153 | volatile uint64_t cached_index_; 154 | 155 | /// Pointer to the kernel argument buffer. 156 | KernelArgs* kernarg_async_; 157 | uint32_t kernarg_async_mask_; 158 | volatile uint32_t kernarg_async_counter_; 159 | 160 | /// Completion signal for every kernel dispatched. 161 | hsa_signal_t completion_signal_; 162 | 163 | /// Lock to synchronize access to kernarg_ and completion_signal_ 164 | std::mutex lock_; 165 | 166 | /// Pointer to memory containing the ISA and argument buffer. 167 | void* code_arg_buffer_; 168 | 169 | static const size_t kMaxCopyCount; 170 | static const size_t kMaxFillCount; 171 | static const uint32_t kGroupSize; 172 | }; 173 | } // namespace amd 174 | 175 | #endif // header guard 176 | -------------------------------------------------------------------------------- /src/core/runtime/signal.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #ifndef HSA_RUNTME_CORE_SIGNAL_CPP_ 44 | #define HSA_RUNTME_CORE_SIGNAL_CPP_ 45 | 46 | #include "core/inc/signal.h" 47 | #include "core/util/timer.h" 48 | #include 49 | 50 | namespace core { 51 | 52 | uint32_t Signal::WaitAny(uint32_t signal_count, hsa_signal_t* hsa_signals, 53 | hsa_signal_condition_t* conds, 54 | hsa_signal_value_t* values, uint64_t timeout, 55 | hsa_wait_state_t wait_hint, 56 | hsa_signal_value_t* satisfying_value) { 57 | hsa_signal_handle* signals = 58 | reinterpret_cast(hsa_signals); 59 | uint32_t prior = 0; 60 | for (uint32_t i = 0; i < signal_count; i++) 61 | prior = Max(prior, atomic::Increment(&signals[i]->waiting_)); 62 | 63 | MAKE_SCOPE_GUARD([&]() { 64 | for (uint32_t i = 0; i < signal_count; i++) 65 | atomic::Decrement(&signals[i]->waiting_); 66 | }); 67 | 68 | // Allow only the first waiter to sleep (temporary, known to be bad). 69 | if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE; 70 | 71 | // Ensure that all signals in the list can be slept on. 72 | if (wait_hint != HSA_WAIT_STATE_ACTIVE) { 73 | for (uint32_t i = 0; i < signal_count; i++) { 74 | if (signals[i]->EopEvent() == NULL) { 75 | wait_hint = HSA_WAIT_STATE_ACTIVE; 76 | break; 77 | } 78 | } 79 | } 80 | 81 | const uint32_t small_size = 10; 82 | HsaEvent* short_evts[small_size]; 83 | HsaEvent** evts = NULL; 84 | uint32_t unique_evts = 0; 85 | if (wait_hint != HSA_WAIT_STATE_ACTIVE) { 86 | if (signal_count > small_size) 87 | evts = new HsaEvent* [signal_count]; 88 | else 89 | evts = short_evts; 90 | for (uint32_t i = 0; i < signal_count; i++) 91 | evts[i] = signals[i]->EopEvent(); 92 | std::sort(evts, evts + signal_count); 93 | HsaEvent** end = std::unique(evts, evts + signal_count); 94 | unique_evts = uint32_t(end - evts); 95 | } 96 | MAKE_SCOPE_GUARD([&]() { 97 | if (signal_count > small_size) delete[] evts; 98 | }); 99 | 100 | int64_t value; 101 | 102 | timer::fast_clock::time_point start_time = timer::fast_clock::now(); 103 | 104 | // Set a polling timeout value 105 | // Exact time is not hugely important, it should just be a short while which 106 | // is smaller than the thread scheduling quantum (usually around 16ms) 107 | const timer::fast_clock::duration kMaxElapsed = std::chrono::milliseconds(5); 108 | 109 | // Convert timeout value into the fast_clock domain 110 | uint64_t hsa_freq; 111 | HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq); 112 | const timer::fast_clock::duration fast_timeout = 113 | timer::duration_from_seconds( 114 | double(timeout) / double(hsa_freq)); 115 | 116 | bool condition_met = false; 117 | while (true) { 118 | for (uint32_t i = 0; i < signal_count; i++) { 119 | if (signals[i]->invalid_) return uint32_t(-1); 120 | 121 | // Handling special event. 122 | if (signals[i]->EopEvent() != NULL) { 123 | const HSA_EVENTTYPE event_type = 124 | signals[i]->EopEvent()->EventData.EventType; 125 | if (event_type == HSA_EVENTTYPE_MEMORY) { 126 | const HsaMemoryAccessFault& fault = 127 | signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault; 128 | const uint32_t* failure = 129 | reinterpret_cast(&fault.Failure); 130 | if (*failure != 0) { 131 | return i; 132 | } 133 | } 134 | } 135 | 136 | value = 137 | atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed); 138 | 139 | switch (conds[i]) { 140 | case HSA_SIGNAL_CONDITION_EQ: { 141 | condition_met = (value == values[i]); 142 | break; 143 | } 144 | case HSA_SIGNAL_CONDITION_NE: { 145 | condition_met = (value != values[i]); 146 | break; 147 | } 148 | case HSA_SIGNAL_CONDITION_GTE: { 149 | condition_met = (value >= values[i]); 150 | break; 151 | } 152 | case HSA_SIGNAL_CONDITION_LT: { 153 | condition_met = (value < values[i]); 154 | break; 155 | } 156 | default: 157 | return uint32_t(-1); 158 | } 159 | if (condition_met) { 160 | if (satisfying_value != NULL) *satisfying_value = value; 161 | return i; 162 | } 163 | } 164 | 165 | timer::fast_clock::time_point time = timer::fast_clock::now(); 166 | if (time - start_time > kMaxElapsed) { 167 | if (time - start_time > fast_timeout) { 168 | return uint32_t(-1); 169 | } 170 | if (wait_hint != HSA_WAIT_STATE_ACTIVE) { 171 | uint32_t wait_ms; 172 | auto time_remaining = fast_timeout - (time - start_time); 173 | if ((timeout == -1) || 174 | (time_remaining > std::chrono::milliseconds(uint32_t(-1)))) 175 | wait_ms = uint32_t(-1); 176 | else 177 | wait_ms = timer::duration_cast( 178 | time_remaining).count(); 179 | hsaKmtWaitOnMultipleEvents(evts, unique_evts, false, wait_ms); 180 | } 181 | } 182 | } 183 | } 184 | 185 | } // namespace core 186 | 187 | #endif // header guard 188 | -------------------------------------------------------------------------------- /src/core/inc/amd_memory_region.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // AMD specific HSA backend. 44 | 45 | #ifndef HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_ 46 | #define HSA_RUNTIME_CORE_INC_AMD_MEMORY_REGION_H_ 47 | 48 | #include "hsakmt.h" 49 | 50 | #include "core/inc/agent.h" 51 | #include "core/inc/memory_region.h" 52 | 53 | #include "inc/hsa_ext_amd.h" 54 | 55 | namespace amd { 56 | class MemoryRegion : public core::MemoryRegion { 57 | public: 58 | /// @brief Convert this object into hsa_region_t. 59 | static __forceinline hsa_region_t Convert(MemoryRegion* region) { 60 | const hsa_region_t region_handle = { 61 | static_cast(reinterpret_cast(region))}; 62 | return region_handle; 63 | } 64 | 65 | static __forceinline const hsa_region_t Convert(const MemoryRegion* region) { 66 | const hsa_region_t region_handle = { 67 | static_cast(reinterpret_cast(region))}; 68 | return region_handle; 69 | } 70 | 71 | /// @brief Convert hsa_region_t into amd::MemoryRegion *. 72 | static __forceinline MemoryRegion* Convert(hsa_region_t region) { 73 | return reinterpret_cast(region.handle); 74 | } 75 | 76 | /// @brief Allocate agent accessible memory (system / local memory). 77 | static void* AllocateKfdMemory(const HsaMemFlags& flag, HSAuint32 node_id, 78 | size_t size); 79 | 80 | /// @brief Free agent accessible memory (system / local memory). 81 | static void FreeKfdMemory(void* ptr, size_t size); 82 | 83 | static bool RegisterMemory(void* ptr, size_t size, size_t num_nodes, 84 | const uint32_t* nodes); 85 | 86 | static void DeregisterMemory(void* ptr); 87 | 88 | /// @brief Pin memory. 89 | static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, 90 | void* ptr, size_t size, 91 | uint64_t* alternate_va, 92 | HsaMemMapFlags map_flag); 93 | 94 | /// @brief Unpin memory. 95 | static void MakeKfdMemoryUnresident(void* ptr); 96 | 97 | MemoryRegion(bool fine_grain, bool full_profile, core::Agent* owner, 98 | const HsaMemoryProperties& mem_props); 99 | 100 | ~MemoryRegion(); 101 | 102 | hsa_status_t Allocate(size_t size, void** address) const; 103 | 104 | hsa_status_t Allocate(bool restrict_access, size_t size, 105 | void** address) const; 106 | 107 | hsa_status_t Free(void* address, size_t size) const; 108 | 109 | hsa_status_t GetInfo(hsa_region_info_t attribute, void* value) const; 110 | 111 | hsa_status_t GetPoolInfo(hsa_amd_memory_pool_info_t attribute, 112 | void* value) const; 113 | 114 | hsa_status_t GetAgentPoolInfo(const core::Agent& agent, 115 | hsa_amd_agent_memory_pool_info_t attribute, 116 | void* value) const; 117 | 118 | hsa_status_t AllowAccess(uint32_t num_agents, const hsa_agent_t* agents, 119 | const void* ptr, size_t size) const; 120 | 121 | hsa_status_t CanMigrate(const MemoryRegion& dst, bool& result) const; 122 | 123 | hsa_status_t Migrate(uint32_t flag, const void* ptr) const; 124 | 125 | hsa_status_t Lock(uint32_t num_agents, const hsa_agent_t* agents, 126 | void* host_ptr, size_t size, void** agent_ptr) const; 127 | 128 | hsa_status_t Unlock(void* host_ptr) const; 129 | 130 | HSAuint64 GetBaseAddress() const { return mem_props_.VirtualBaseAddress; } 131 | 132 | HSAuint64 GetPhysicalSize() const { return mem_props_.SizeInBytes; } 133 | 134 | HSAuint64 GetVirtualSize() const { return virtual_size_; } 135 | 136 | hsa_status_t AssignAgent(void* ptr, size_t size, const core::Agent& agent, 137 | hsa_access_permission_t access) const; 138 | 139 | __forceinline bool IsLocalMemory() const { 140 | return ((mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE) || 141 | (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)); 142 | } 143 | 144 | __forceinline bool IsPublic() const { 145 | return (mem_props_.HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC); 146 | } 147 | 148 | __forceinline bool IsSystem() const { 149 | return mem_props_.HeapType == HSA_HEAPTYPE_SYSTEM; 150 | } 151 | 152 | __forceinline bool IsLDS() const { 153 | return mem_props_.HeapType == HSA_HEAPTYPE_GPU_LDS; 154 | } 155 | 156 | __forceinline bool IsGDS() const { 157 | return mem_props_.HeapType == HSA_HEAPTYPE_GPU_GDS; 158 | } 159 | 160 | __forceinline bool IsScratch() const { 161 | return mem_props_.HeapType == HSA_HEAPTYPE_GPU_SCRATCH; 162 | } 163 | 164 | __forceinline bool IsSvm() const { 165 | return mem_props_.HeapType == HSA_HEAPTYPE_DEVICE_SVM; 166 | } 167 | 168 | __forceinline uint32_t BusWidth() const { 169 | return static_cast(mem_props_.Width); 170 | } 171 | 172 | __forceinline uint32_t MaxMemCloc() const { 173 | return static_cast(mem_props_.MemoryClockMax); 174 | } 175 | 176 | private: 177 | const HsaMemoryProperties mem_props_; 178 | 179 | HsaMemFlags mem_flag_; 180 | 181 | HsaMemMapFlags map_flag_; 182 | 183 | size_t max_single_alloc_size_; 184 | 185 | HSAuint64 virtual_size_; 186 | 187 | static const size_t kPageSize_ = 4096; 188 | }; 189 | } // namespace 190 | 191 | #endif // header guard 192 | -------------------------------------------------------------------------------- /src/core/runtime/amd_topology.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | #include "core/inc/amd_topology.h" 44 | 45 | #include 46 | #include 47 | #include 48 | 49 | #include "hsakmt.h" 50 | 51 | #include "core/inc/runtime.h" 52 | #include "core/inc/amd_cpu_agent.h" 53 | #include "core/inc/amd_gpu_agent.h" 54 | #include "core/inc/amd_memory_region.h" 55 | #include "core/util/utils.h" 56 | 57 | namespace amd { 58 | // Minimum acceptable KFD version numbers 59 | static const uint kKfdVersionMajor = 0; 60 | static const uint kKfdVersionMinor = 99; 61 | 62 | CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { 63 | if (node_prop.NumCPUCores == 0) { 64 | return NULL; 65 | } 66 | 67 | CpuAgent* cpu = new CpuAgent(node_id, node_prop); 68 | core::Runtime::runtime_singleton_->RegisterAgent(cpu); 69 | 70 | return cpu; 71 | } 72 | 73 | GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { 74 | if (node_prop.NumFComputeCores == 0) { 75 | return NULL; 76 | } 77 | 78 | GpuAgent* gpu = new GpuAgent(node_id, node_prop); 79 | core::Runtime::runtime_singleton_->RegisterAgent(gpu); 80 | 81 | if (HSA_STATUS_SUCCESS != gpu->InitDma()) { 82 | assert(false && "Fail init blit"); 83 | delete gpu; 84 | gpu = NULL; 85 | } 86 | 87 | return gpu; 88 | } 89 | 90 | void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { 91 | // Register connectivity links for this agent to the runtime. 92 | if (num_link == 0) { 93 | return; 94 | } 95 | 96 | std::vector links(num_link); 97 | if (HSAKMT_STATUS_SUCCESS != 98 | hsaKmtGetNodeIoLinkProperties(node_id, num_link, &links[0])) { 99 | return; 100 | } 101 | 102 | for (HsaIoLinkProperties io_link : links) { 103 | // Populate link info with thunk property. 104 | hsa_amd_memory_pool_link_info_t link_info = {0}; 105 | 106 | if (io_link.Flags.ui32.Override == 1) { 107 | if (io_link.Flags.ui32.NoPeerToPeerDMA == 1) { 108 | // Ignore this link since peer to peer is not allowed. 109 | continue; 110 | } 111 | link_info.atomic_support_32bit = (io_link.Flags.ui32.NoAtomics32bit == 0); 112 | link_info.atomic_support_64bit = (io_link.Flags.ui32.NoAtomics64bit == 0); 113 | link_info.coherent_support = (io_link.Flags.ui32.NonCoherent == 0); 114 | } else { 115 | // TODO: decipher HSA_IOLINKTYPE to fill out the atomic 116 | // and coherent information. 117 | } 118 | 119 | switch (io_link.IoLinkType) { 120 | case HSA_IOLINKTYPE_HYPERTRANSPORT: 121 | link_info.link_type = HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT; 122 | break; 123 | case HSA_IOLINKTYPE_PCIEXPRESS: 124 | link_info.link_type = HSA_AMD_LINK_INFO_TYPE_PCIE; 125 | break; 126 | case HSA_IOLINK_TYPE_QPI_1_1: 127 | link_info.link_type = HSA_AMD_LINK_INFO_TYPE_QPI; 128 | break; 129 | case HSA_IOLINK_TYPE_INFINIBAND: 130 | link_info.link_type = HSA_AMD_LINK_INFO_TYPE_INFINBAND; 131 | break; 132 | default: 133 | break; 134 | } 135 | 136 | link_info.max_bandwidth = io_link.MaximumBandwidth; 137 | link_info.max_latency = io_link.MaximumLatency; 138 | link_info.min_bandwidth = io_link.MinimumBandwidth; 139 | link_info.min_latency = io_link.MinimumLatency; 140 | 141 | core::Runtime::runtime_singleton_->RegisterLinkInfo( 142 | io_link.NodeFrom, io_link.NodeTo, io_link.Weight, link_info); 143 | } 144 | } 145 | 146 | /// @brief Calls Kfd thunk to get the snapshot of the topology of the system, 147 | /// which includes associations between, node, devices, memory and caches. 148 | void BuildTopology() { 149 | HsaVersionInfo info; 150 | if (hsaKmtGetVersion(&info) != HSAKMT_STATUS_SUCCESS) { 151 | return; 152 | } 153 | 154 | if (info.KernelInterfaceMajorVersion == kKfdVersionMajor && 155 | info.KernelInterfaceMinorVersion < kKfdVersionMinor) { 156 | return; 157 | } 158 | 159 | // Disable KFD event support when using open source KFD 160 | if (info.KernelInterfaceMajorVersion == 1 && 161 | info.KernelInterfaceMinorVersion == 0) { 162 | core::g_use_interrupt_wait = false; 163 | } 164 | 165 | HsaSystemProperties props; 166 | hsaKmtReleaseSystemProperties(); 167 | 168 | if (hsaKmtAcquireSystemProperties(&props) != HSAKMT_STATUS_SUCCESS) { 169 | return; 170 | } 171 | 172 | core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); 173 | 174 | // Discover agents on every node in the platform. 175 | for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) { 176 | HsaNodeProperties node_prop = {0}; 177 | if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) { 178 | continue; 179 | } 180 | 181 | const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); 182 | const GpuAgent* gpu = DiscoverGpu(node_id, node_prop); 183 | 184 | assert(!(cpu == NULL && gpu == NULL)); 185 | 186 | RegisterLinkInfo(node_id, node_prop.NumIOLinks); 187 | } 188 | } 189 | 190 | bool Load() { 191 | // Open connection to kernel driver. 192 | if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { 193 | return false; 194 | } 195 | 196 | // Build topology table. 197 | BuildTopology(); 198 | 199 | return true; 200 | } 201 | 202 | bool Unload() { 203 | hsaKmtReleaseSystemProperties(); 204 | 205 | // Close connection to kernel driver. 206 | hsaKmtCloseKFD(); 207 | 208 | return true; 209 | } 210 | } // namespace amd 211 | -------------------------------------------------------------------------------- /src/core/inc/interrupt_signal.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // The University of Illinois/NCSA 4 | // Open Source License (NCSA) 5 | // 6 | // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 | // 8 | // Developed by: 9 | // 10 | // AMD Research and AMD HSA Software Development 11 | // 12 | // Advanced Micro Devices, Inc. 13 | // 14 | // www.amd.com 15 | // 16 | // Permission is hereby granted, free of charge, to any person obtaining a copy 17 | // of this software and associated documentation files (the "Software"), to 18 | // deal with the Software without restriction, including without limitation 19 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 | // and/or sell copies of the Software, and to permit persons to whom the 21 | // Software is furnished to do so, subject to the following conditions: 22 | // 23 | // - Redistributions of source code must retain the above copyright notice, 24 | // this list of conditions and the following disclaimers. 25 | // - Redistributions in binary form must reproduce the above copyright 26 | // notice, this list of conditions and the following disclaimers in 27 | // the documentation and/or other materials provided with the distribution. 28 | // - Neither the names of Advanced Micro Devices, Inc, 29 | // nor the names of its contributors may be used to endorse or promote 30 | // products derived from this Software without specific prior written 31 | // permission. 32 | // 33 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 | // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 | // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 | // DEALINGS WITH THE SOFTWARE. 40 | // 41 | //////////////////////////////////////////////////////////////////////////////// 42 | 43 | // HSA runtime C++ interface file. 44 | 45 | #ifndef HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_ 46 | #define HSA_RUNTME_CORE_INC_INTERRUPT_SIGNAL_H_ 47 | 48 | #include "hsakmt.h" 49 | 50 | #include "core/inc/runtime.h" 51 | #include "core/inc/signal.h" 52 | #include "core/util/utils.h" 53 | 54 | namespace core { 55 | 56 | /// @brief A Signal implementation using interrupts versus plain memory based. 57 | /// Also see base class Signal. 58 | /// 59 | /// Breaks common/vendor separation - signals in general needs to be re-worked 60 | /// at the foundation level to make sense in a multi-device system. 61 | /// Supports only one waiter for now. 62 | /// KFD changes are needed to support multiple waiters and have device 63 | /// signaling. 64 | class InterruptSignal : public Signal { 65 | public: 66 | static HsaEvent* CreateEvent(HSA_EVENTTYPE type, bool manual_reset); 67 | static void DestroyEvent(HsaEvent* evt); 68 | 69 | /// @brief Determines if a Signal* can be safely converted to an 70 | /// InterruptSignal* via static_cast. 71 | static __forceinline bool IsType(Signal* ptr) { 72 | return ptr->IsType(&rtti_id_); 73 | } 74 | 75 | explicit InterruptSignal(hsa_signal_value_t initial_value, 76 | HsaEvent* use_event = NULL); 77 | 78 | ~InterruptSignal(); 79 | 80 | // Below are various methods corresponding to the APIs, which load/store the 81 | // signal value or modify the existing signal value automically and with 82 | // specified memory ordering semantics. 83 | 84 | hsa_signal_value_t LoadRelaxed(); 85 | 86 | hsa_signal_value_t LoadAcquire(); 87 | 88 | void StoreRelaxed(hsa_signal_value_t value); 89 | 90 | void StoreRelease(hsa_signal_value_t value); 91 | 92 | hsa_signal_value_t WaitRelaxed(hsa_signal_condition_t condition, 93 | hsa_signal_value_t compare_value, 94 | uint64_t timeout, hsa_wait_state_t wait_hint); 95 | 96 | hsa_signal_value_t WaitAcquire(hsa_signal_condition_t condition, 97 | hsa_signal_value_t compare_value, 98 | uint64_t timeout, hsa_wait_state_t wait_hint); 99 | 100 | void AndRelaxed(hsa_signal_value_t value); 101 | 102 | void AndAcquire(hsa_signal_value_t value); 103 | 104 | void AndRelease(hsa_signal_value_t value); 105 | 106 | void AndAcqRel(hsa_signal_value_t value); 107 | 108 | void OrRelaxed(hsa_signal_value_t value); 109 | 110 | void OrAcquire(hsa_signal_value_t value); 111 | 112 | void OrRelease(hsa_signal_value_t value); 113 | 114 | void OrAcqRel(hsa_signal_value_t value); 115 | 116 | void XorRelaxed(hsa_signal_value_t value); 117 | 118 | void XorAcquire(hsa_signal_value_t value); 119 | 120 | void XorRelease(hsa_signal_value_t value); 121 | 122 | void XorAcqRel(hsa_signal_value_t value); 123 | 124 | void AddRelaxed(hsa_signal_value_t value); 125 | 126 | void AddAcquire(hsa_signal_value_t value); 127 | 128 | void AddRelease(hsa_signal_value_t value); 129 | 130 | void AddAcqRel(hsa_signal_value_t value); 131 | 132 | void SubRelaxed(hsa_signal_value_t value); 133 | 134 | void SubAcquire(hsa_signal_value_t value); 135 | 136 | void SubRelease(hsa_signal_value_t value); 137 | 138 | void SubAcqRel(hsa_signal_value_t value); 139 | 140 | hsa_signal_value_t ExchRelaxed(hsa_signal_value_t value); 141 | 142 | hsa_signal_value_t ExchAcquire(hsa_signal_value_t value); 143 | 144 | hsa_signal_value_t ExchRelease(hsa_signal_value_t value); 145 | 146 | hsa_signal_value_t ExchAcqRel(hsa_signal_value_t value); 147 | 148 | hsa_signal_value_t CasRelaxed(hsa_signal_value_t expected, 149 | hsa_signal_value_t value); 150 | 151 | hsa_signal_value_t CasAcquire(hsa_signal_value_t expected, 152 | hsa_signal_value_t value); 153 | 154 | hsa_signal_value_t CasRelease(hsa_signal_value_t expected, 155 | hsa_signal_value_t value); 156 | 157 | hsa_signal_value_t CasAcqRel(hsa_signal_value_t expected, 158 | hsa_signal_value_t value); 159 | 160 | /// @brief See base class Signal. 161 | __forceinline hsa_signal_value_t* ValueLocation() const { 162 | return (hsa_signal_value_t*)&signal_.value; 163 | } 164 | 165 | /// @brief See base class Signal. 166 | __forceinline HsaEvent* EopEvent() { return event_; } 167 | 168 | // TODO: work around for SDMA async copy. Bypass waiting on EOP 169 | // event because SDMA copy does not handle interrupt yet. 170 | __forceinline void DisableWaitEvent() { wait_on_event_ = false; } 171 | 172 | /// @brief prevent throwing exceptions 173 | void* operator new(size_t size) { return malloc(size); } 174 | 175 | /// @brief prevent throwing exceptions 176 | void operator delete(void* ptr) { free(ptr); } 177 | 178 | protected: 179 | bool _IsA(rtti_t id) const { return id == &rtti_id_; } 180 | 181 | private: 182 | /// @variable KFD event on which the interrupt signal is based on. 183 | HsaEvent* event_; 184 | 185 | /// @variable Indicates whether the signal should release the event when it 186 | /// closes or not. 187 | bool free_event_; 188 | 189 | // TODO: work around for SDMA async copy. Bypass waiting on EOP 190 | // event because SDMA copy does not handle interrupt yet. 191 | bool wait_on_event_; 192 | 193 | /// Used to obtain a globally unique value (address) for rtti. 194 | static int rtti_id_; 195 | 196 | /// @brief Notify driver of signal value change if necessary. 197 | __forceinline void SetEvent() { 198 | std::atomic_signal_fence(std::memory_order_seq_cst); 199 | if (InWaiting()) hsaKmtSetEvent(event_); 200 | } 201 | 202 | DISALLOW_COPY_AND_ASSIGN(InterruptSignal); 203 | }; 204 | 205 | } // namespace core 206 | #endif // header guard 207 | --------------------------------------------------------------------------------