├── cxxopts ├── src │ ├── .gitignore │ └── CMakeLists.txt ├── test │ ├── .gitignore │ ├── link_b.cpp │ ├── main.cpp │ ├── link_a.cpp │ ├── find-package-test │ │ └── CMakeLists.txt │ ├── add-subdirectory-test │ │ └── CMakeLists.txt │ └── CMakeLists.txt ├── .gitignore ├── cxxopts-config.cmake.in ├── INSTALL ├── LICENSE ├── .travis.yml └── CHANGELOG.md ├── samples ├── hipInfo │ └── CMakeLists.txt ├── 10_memcpy3D │ ├── CMakeLists.txt │ └── main.cpp ├── hipmath │ └── CMakeLists.txt ├── 2_vecadd │ └── CMakeLists.txt ├── 5_2dshfl │ └── CMakeLists.txt ├── 7_streams │ └── CMakeLists.txt ├── 9_unroll │ ├── CMakeLists.txt │ └── unroll.cpp ├── hiptest │ └── CMakeLists.txt ├── 1_hipEvent │ └── CMakeLists.txt ├── hcc_dialects │ ├── CMakeLists.txt │ └── vadd_hip.cpp ├── hip-cuda │ ├── include │ │ ├── SDKFile.hpp │ │ ├── SDKBitMap.hpp │ │ └── SDKThread.hpp │ ├── DCT │ │ └── CMakeLists.txt │ ├── SimpleConvolution │ │ ├── FilterCoeff.h │ │ ├── SimpleConvolution.cpp │ │ └── CMakeLists.txt │ ├── Histogram │ │ └── CMakeLists.txt │ ├── PrefixSum │ │ └── CMakeLists.txt │ ├── dwtHaar1D │ │ └── CMakeLists.txt │ ├── BitonicSort │ │ └── CMakeLists.txt │ ├── RecursiveGaussian │ │ ├── RecursiveGaussian_Input.bmp │ │ └── CMakeLists.txt │ ├── BinomialOption │ │ └── CMakeLists.txt │ ├── FloydWarshall │ │ └── CMakeLists.txt │ ├── FastWalshTransform │ │ └── CMakeLists.txt │ └── CMakeLists.txt ├── bit_extract │ ├── CMakeLists.txt │ └── bit_extract.cpp ├── 3_shared_memory │ └── CMakeLists.txt ├── 0_MatrixMultiply │ └── CMakeLists.txt ├── 0_MatrixTranspose │ ├── CMakeLists.txt │ └── MatrixTranspose.cpp ├── hipDeviceLink │ ├── hipDeviceLink.h │ ├── hipDeviceLinkConsts.h │ ├── CMakeLists.txt │ ├── hipDeviceLinkWrite.cpp │ ├── hipDeviceLinkRead.cpp │ └── hipDeviceLink.cpp ├── hipSymbol │ └── CMakeLists.txt ├── fp16 │ ├── CMakeLists.txt │ └── fp16_conversion.hpp ├── 4_shfl │ ├── CMakeLists.txt │ ├── broadcast.cpp │ └── broadcast2.cpp ├── 6_dynamic_shared │ ├── CMakeLists.txt │ └── hipDynamicShared2.cpp ├── hiploadmodule │ ├── CMakeLists.txt │ ├── kernel.cpp │ └── main.cpp └── CMakeLists.txt ├── include ├── hip │ ├── hip_runtime.h │ └── hip_fatbin.h └── CL │ ├── LICENSE │ ├── opencl.h │ ├── cl_gl_ext.h │ └── cl_version.h ├── spdlog ├── version.h ├── fmt │ ├── ostr.h │ ├── fmt.h │ └── bundled │ │ └── LICENSE.rst ├── formatter.h ├── details │ ├── null_mutex.h │ ├── log_msg.h │ ├── console_globals.h │ ├── circular_q.h │ ├── periodic_worker.h │ ├── async_logger_impl.h │ ├── fmt_helper.h │ └── mpmc_blocking_q.h ├── LICENSE ├── sinks │ ├── msvc_sink.h │ ├── null_sink.h │ ├── ostream_sink.h │ ├── sink.h │ ├── stdout_color_sinks.h │ ├── base_sink.h │ ├── basic_file_sink.h │ ├── dist_sink.h │ ├── syslog_sink.h │ ├── stdout_sinks.h │ └── android_sink.h ├── async_logger.h └── async.h ├── doc ├── cmake.rst ├── env_variables.rst └── notes-0.9.txt ├── bin └── CMakeLists.txt ├── lib ├── bitcode │ ├── OCML │ │ ├── nearbyintF.cl │ │ ├── nearbyintD.cl │ │ ├── scalbnF.cl │ │ ├── scalbnD.cl │ │ ├── ncdfinvF.cl │ │ ├── ncdfinvD.cl │ │ ├── tables.cl │ │ ├── tables.h │ │ ├── scalbF.cl │ │ ├── rhypotF.cl │ │ ├── scalbD.cl │ │ ├── rcbrtF.cl │ │ ├── rhypotD.cl │ │ ├── rcbrtD.cl │ │ ├── rlen3F.cl │ │ ├── i0F.cl │ │ ├── i1F.cl │ │ ├── rlen3D.cl │ │ ├── oclc.h │ │ ├── mathD.h.orig │ │ ├── rlen4F.cl │ │ ├── LICENSE │ │ ├── rlen4D.cl │ │ ├── erfcxF.cl │ │ ├── erfcinvF.cl │ │ ├── erfinvF.cl │ │ ├── i0D.cl │ │ ├── i1D.cl │ │ ├── j0F.cl │ │ ├── j1F.cl │ │ ├── j0D.cl │ │ ├── erfcxD.cl │ │ ├── j1D.cl │ │ ├── mathF.h │ │ └── mathD.h │ └── CMakeLists.txt ├── hipcl-config.cmake.in ├── common.hh ├── log.cc └── CMakeLists.txt ├── hipcl_config.h.in ├── llvm_passes └── CMakeLists.txt ├── LICENSE └── cmake └── run_make2cmake.cmake /cxxopts/src/.gitignore: -------------------------------------------------------------------------------- 1 | example 2 | -------------------------------------------------------------------------------- /cxxopts/test/.gitignore: -------------------------------------------------------------------------------- 1 | options_test 2 | -------------------------------------------------------------------------------- /cxxopts/test/link_b.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /samples/hipInfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_hipcl_binary(hipInfo hipInfo.cpp) 2 | -------------------------------------------------------------------------------- /cxxopts/test/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "catch.hpp" 3 | -------------------------------------------------------------------------------- /samples/10_memcpy3D/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_hipcl_test(memcpy3D memcpy3D PASSED main.cpp) 3 | -------------------------------------------------------------------------------- /samples/hipmath/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_hipcl_test(hipmath hipmath PASSED hipmath.cc) 3 | -------------------------------------------------------------------------------- /samples/2_vecadd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # VecAdd 2 | 3 | add_hipcl_test(VecAdd VecAdd PASSED VecAdd.cpp) 4 | -------------------------------------------------------------------------------- /samples/5_2dshfl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2dshfl 2 | add_hipcl_test(2dshfl 2d_shuffle PASSED 2dshfl.cpp) 3 | -------------------------------------------------------------------------------- /samples/7_streams/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # stream 2 | 3 | add_hipcl_test(stream stream PASSED stream.cpp) 4 | -------------------------------------------------------------------------------- /samples/9_unroll/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # unroll 2 | 3 | add_hipcl_test(unroll unroll PASSED unroll.cpp) 4 | -------------------------------------------------------------------------------- /samples/hiptest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # hiptest 2 | 3 | add_hipcl_test(hiptest hiptest PASSED hiptest.cc) 4 | -------------------------------------------------------------------------------- /cxxopts/test/link_a.cpp: -------------------------------------------------------------------------------- 1 | #include "cxxopts.hpp" 2 | 3 | int main(int, char**) 4 | { 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /samples/1_hipEvent/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # hipEvent 2 | 3 | add_hipcl_test(hipEvent hipEvent PASSED hipEvent.cpp) 4 | -------------------------------------------------------------------------------- /samples/hcc_dialects/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # vadd_hip 2 | 3 | add_hipcl_test(vadd_hip vadd_hip PASSED vadd_hip.cpp) 4 | -------------------------------------------------------------------------------- /samples/hip-cuda/include/SDKFile.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKFile.hpp -------------------------------------------------------------------------------- /samples/hip-cuda/include/SDKBitMap.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKBitMap.hpp -------------------------------------------------------------------------------- /samples/hip-cuda/include/SDKThread.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKThread.hpp -------------------------------------------------------------------------------- /samples/bit_extract/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # bit_extract 2 | 3 | add_hipcl_test(bit_extract bit_extract PASSED bit_extract.cpp) 4 | -------------------------------------------------------------------------------- /samples/3_shared_memory/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # sharedMemory 2 | 3 | add_hipcl_test(sharedMemory sharedMemory PASSED sharedMemory.cpp) 4 | -------------------------------------------------------------------------------- /samples/hip-cuda/DCT/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # DCT 2 | 3 | add_hipcl_test(DCT DCT Passed DCT.cpp 4 | -q -e -t -x 2048 -y 2048 -i 32) 5 | -------------------------------------------------------------------------------- /samples/0_MatrixMultiply/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # MatrixMultiply 2 | 3 | add_hipcl_test(MatrixMultiply MatrixMultiply PASSED MatrixMultiply.cpp) 4 | -------------------------------------------------------------------------------- /samples/hip-cuda/SimpleConvolution/FilterCoeff.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/SimpleConvolution/FilterCoeff.h -------------------------------------------------------------------------------- /cxxopts/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | build* 3 | CMakeCache.txt 4 | Makefile 5 | CMakeFiles/ 6 | Testing/ 7 | CTestTestfile.cmake 8 | cmake_install.cmake 9 | -------------------------------------------------------------------------------- /samples/0_MatrixTranspose/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # MatrixTranspose 2 | 3 | add_hipcl_test(MatrixTranspose MatrixTranspose PASSED MatrixTranspose.cpp) 4 | -------------------------------------------------------------------------------- /samples/hip-cuda/Histogram/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Histogram 2 | 3 | add_hipcl_test(Histogram Histogram Passed Histogram.cpp 4 | -q -e -t -x 1024) 5 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/hipDeviceLink.h: -------------------------------------------------------------------------------- 1 | #include "hipDeviceLinkConsts.h" 2 | #include 3 | extern __device__ int global[NUM]; 4 | -------------------------------------------------------------------------------- /samples/hipSymbol/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test symbol access 2 | 3 | add_hipcl_test(hipTestDeviceSymbol hipTestDeviceSymbol PASSED hipTestDeviceSymbol.cpp) 4 | -------------------------------------------------------------------------------- /samples/hip-cuda/PrefixSum/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # PrefixSum 2 | 3 | add_hipcl_test(PrefixSum PrefixSum Passed PrefixSum.cpp 4 | -q -e -t -x 16384 -i 32) 5 | -------------------------------------------------------------------------------- /samples/hip-cuda/SimpleConvolution/SimpleConvolution.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/SimpleConvolution/SimpleConvolution.cpp -------------------------------------------------------------------------------- /samples/hip-cuda/dwtHaar1D/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # dwtHaar1D 2 | 3 | add_hipcl_test(dwtHaar1D dwtHaar1D Passed dwtHaar1D.cpp 4 | -q -e -t -x 2048 -i 32) 5 | -------------------------------------------------------------------------------- /cxxopts/cxxopts-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake) 4 | check_required_components(cxxopts) 5 | -------------------------------------------------------------------------------- /samples/hip-cuda/BitonicSort/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # BitonicSort 2 | 3 | add_hipcl_test(BitonicSort BitonicSort Passed BitonicSort.cpp 4 | -q -e -t -x 2048 -i 32) 5 | -------------------------------------------------------------------------------- /samples/hip-cuda/RecursiveGaussian/RecursiveGaussian_Input.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/RecursiveGaussian/RecursiveGaussian_Input.bmp -------------------------------------------------------------------------------- /samples/hip-cuda/BinomialOption/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # BinomialOption 2 | 3 | add_hipcl_test(BinomialOption BinomialOption Passed BinomialOption.cpp 4 | -q -e -t -x 2048 -i 32) 5 | -------------------------------------------------------------------------------- /samples/hip-cuda/FloydWarshall/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # FloydWarshall 2 | 3 | add_hipcl_test(FloydWarshall FloydWarshall Passed FloydWarshall.cpp 4 | -q -e -t -x 256 -i 32) 5 | 6 | 7 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/hipDeviceLinkConsts.h: -------------------------------------------------------------------------------- 1 | #define NUM 256 2 | #define SIZE 256 * sizeof(int) 3 | extern void readGlobal(int *hostOut); 4 | extern void writeGlobal(int *hostIn); 5 | -------------------------------------------------------------------------------- /samples/fp16/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_hipcl_test(fp16 fp16 PASSED haxpy-base.cpp) 3 | 4 | # add_hipcl_test(fp16_math fp16_math PASSED half_math.cpp) 5 | add_hipcl_binary(fp16_math half_math.cpp) 6 | -------------------------------------------------------------------------------- /samples/hip-cuda/FastWalshTransform/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # FastWalshTransform 2 | 3 | add_hipcl_test(FastWalshTransform FastWalshTransform Passed FastWalshTransform.cpp 4 | -q -e -t -x 2048 -i 32) 5 | -------------------------------------------------------------------------------- /samples/hip-cuda/SimpleConvolution/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SimpleConvolution 2 | 3 | add_hipcl_test(SimpleConvolution SimpleConvolution Passed SimpleConvolution.cpp 4 | -q -e -t -x 2048 -y 2048 -i 32 -l 256) 5 | -------------------------------------------------------------------------------- /samples/4_shfl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # shfl 2 | 3 | add_hipcl_test(shfl shuffle PASSED shfl.cpp) 4 | 5 | add_hipcl_test(broadcast broadcast PASSED broadcast.cpp) 6 | add_hipcl_test(broadcast2 broadcast2 PASSED broadcast2.cpp) 7 | -------------------------------------------------------------------------------- /samples/6_dynamic_shared/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # dynamic_shared 2 | 3 | add_hipcl_test(dynamic_shared dynamic_shared PASSED dynamic_shared.cpp) 4 | 5 | add_hipcl_test(hipDynamicShared hipDynamicShared PASSED hipDynamicShared.cpp) 6 | 7 | add_hipcl_test(hipDynamicShared2 hipDynamicShared2 PASSED hipDynamicShared2.cpp) 8 | 9 | -------------------------------------------------------------------------------- /cxxopts/test/find-package-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | 3 | project(cxxopts-test) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | 8 | find_package(cxxopts REQUIRED) 9 | 10 | add_executable(library-test "../../src/example.cpp") 11 | target_link_libraries(library-test cxxopts::cxxopts) 12 | -------------------------------------------------------------------------------- /cxxopts/test/add-subdirectory-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | 3 | project(cxxopts-test) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | 8 | add_subdirectory(../.. cxxopts EXCLUDE_FROM_ALL) 9 | 10 | add_executable(library-test "../../src/example.cpp") 11 | target_link_libraries(library-test cxxopts) 12 | -------------------------------------------------------------------------------- /include/hip/hip_runtime.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef HIPCL_HIP_RUNTIME_H 3 | #define HIPCL_HIP_RUNTIME_H 4 | 5 | #ifndef __HIP_PLATFORM_HIPCL__ 6 | #define __HIP_PLATFORM_HIPCL__ 7 | #endif 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /spdlog/version.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #define SPDLOG_VER_MAJOR 1 9 | #define SPDLOG_VER_MINOR 3 10 | #define SPDLOG_VER_PATCH 0 11 | 12 | #define SPDLOG_VERSION (SPDLOG_VER_MAJOR * 10000 + SPDLOG_VER_MINOR * 100 + SPDLOG_VER_PATCH) 13 | -------------------------------------------------------------------------------- /samples/hip-cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(SAMPLES 2 | BinomialOption 3 | BitonicSort 4 | DCT 5 | dwtHaar1D 6 | FastWalshTransform 7 | FloydWarshall 8 | Histogram 9 | # still broken: 10 | # PrefixSum 11 | RecursiveGaussian 12 | SimpleConvolution 13 | ) 14 | 15 | foreach (SAMPLE ${SAMPLES}) 16 | add_subdirectory(${SAMPLE}) 17 | endforeach() 18 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test symbol access 2 | 3 | add_hipcl_binary_device_link( 4 | hipTestDeviceLink 5 | hipDeviceLink.cpp 6 | hipDeviceLinkRead.cpp 7 | hipDeviceLinkWrite.cpp) 8 | 9 | add_test(NAME hipTestDeviceLink 10 | COMMAND "${CMAKE_CURRENT_BINARY_DIR}/hipTestDeviceLink" 11 | ) 12 | 13 | set_tests_properties(hipTestDeviceLink PROPERTIES 14 | PASS_REGULAR_EXPRESSION PASSED) 15 | 16 | -------------------------------------------------------------------------------- /doc/cmake.rst: -------------------------------------------------------------------------------- 1 | CMake variables 2 | --------------------------------------- 3 | 4 | - **SAVE_TEMPS** 5 | If set, temporary compilation products of compiling samples 6 | will be stored in the build directory. 7 | 8 | - **LOGLEVEL** 9 | If set, sets the minimum logging level at compile time. 10 | Log levels below this will not be available at runtime. 11 | Valid values are DEBUG;INFO;WARN;ERROR;CRITICAL;OFF 12 | Defaults to DEBUG. 13 | -------------------------------------------------------------------------------- /samples/hiploadmodule/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Test symbol access 2 | 3 | add_hipcl_binary( 4 | hipModuleLoad 5 | main.cpp) 6 | 7 | add_hipcl_device_binary( 8 | hipModuleLoadBinary 9 | kernel.cpp) 10 | 11 | add_dependencies(hipModuleLoad hipModuleLoadBinary) 12 | 13 | add_test(NAME hipModuleLoad 14 | COMMAND "${CMAKE_CURRENT_BINARY_DIR}/hipModuleLoad" 15 | ) 16 | 17 | set_tests_properties(hipModuleLoad PROPERTIES 18 | PASS_REGULAR_EXPRESSION PASSED) 19 | 20 | -------------------------------------------------------------------------------- /spdlog/fmt/ostr.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // include bundled or external copy of fmtlib's ostream support 9 | // 10 | #if !defined(SPDLOG_FMT_EXTERNAL) 11 | #ifndef FMT_HEADER_ONLY 12 | #define FMT_HEADER_ONLY 13 | #endif 14 | #include "bundled/ostream.h" 15 | #include "fmt.h" 16 | #else 17 | #include 18 | #endif 19 | -------------------------------------------------------------------------------- /samples/hiploadmodule/kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" __global__ void _occa_addVectors_0(const size_t entries, 4 | const float * a, 5 | const float * b, 6 | float * ab) { 7 | size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; 8 | if (i < entries) { 9 | ab[i] = a[i] + b[i]; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/hipDeviceLinkWrite.cpp: -------------------------------------------------------------------------------- 1 | #include "hipDeviceLink.h" 2 | 3 | __global__ void Write(const int *in) { 4 | int tid = threadIdx.x + blockIdx.x * blockDim.x; 5 | global[tid] = in[tid]; 6 | } 7 | 8 | void writeGlobal(int *hostIn) { 9 | int *deviceIn; 10 | hipMalloc((void **)&deviceIn, SIZE); 11 | hipMemcpy(deviceIn, hostIn, SIZE, hipMemcpyHostToDevice); 12 | hipLaunchKernelGGL(Write, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, deviceIn); 13 | hipFree(deviceIn); 14 | } 15 | -------------------------------------------------------------------------------- /bin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set_source_files_properties(hipcl_config.cc PROPERTIES LANGUAGE CXX ) 3 | 4 | add_executable(hipcl_config hipcl_config.cc) 5 | 6 | set_target_properties(hipcl_config PROPERTIES CXX_STANDARD_REQUIRED ON) 7 | 8 | target_include_directories(hipcl_config PRIVATE "${CMAKE_BINARY_DIR}" "${CMAKE_SOURCE_DIR}/cxxopts/include") 9 | 10 | target_link_libraries(hipcl_config ${PTHREAD_LIBRARY}) 11 | 12 | install(TARGETS "hipcl_config" 13 | RUNTIME DESTINATION "${HIPCL_BIN_DIR}") 14 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/nearbyintF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(nearbyint)(float x) 12 | { 13 | return BUILTIN_RINT_F32(x); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/nearbyintD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(nearbyint)(double x) 12 | { 13 | return BUILTIN_RINT_F64(x); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/scalbnF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(scalbn)(float x, int n) 12 | { 13 | return MATH_MANGLE(ldexp)(x, n); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/scalbnD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(scalbn)(double x, int n) 12 | { 13 | return MATH_MANGLE(ldexp)(x, n); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/hipDeviceLinkRead.cpp: -------------------------------------------------------------------------------- 1 | #include "hipDeviceLink.h" 2 | 3 | __device__ int global[NUM]; 4 | 5 | __global__ void Read(int *out) { 6 | int tid = threadIdx.x + blockIdx.x * blockDim.x; 7 | out[tid] = global[tid]; 8 | } 9 | 10 | void readGlobal(int *hostOut) { 11 | int *deviceOut; 12 | hipMalloc((void **)&deviceOut, SIZE); 13 | hipLaunchKernelGGL(Read, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, deviceOut); 14 | hipMemcpy(hostOut, deviceOut, SIZE, hipMemcpyDeviceToHost); 15 | hipFree(deviceOut); 16 | } 17 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/ncdfinvF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(ncdfinv)(float x) 12 | { 13 | return -0x1.6a09e6p+0f * MATH_MANGLE(erfcinv)(x + x); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /spdlog/formatter.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "fmt/fmt.h" 9 | #include "spdlog/details/log_msg.h" 10 | 11 | namespace spdlog { 12 | 13 | class formatter 14 | { 15 | public: 16 | virtual ~formatter() = default; 17 | virtual void format(const details::log_msg &msg, fmt::memory_buffer &dest) = 0; 18 | virtual std::unique_ptr clone() const = 0; 19 | }; 20 | } // namespace spdlog 21 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/ncdfinvD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(ncdfinv)(double x) 12 | { 13 | return -0x1.6a09e667f3bcdp+0 * MATH_MANGLE(erfcinv)(x + x); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /samples/hipDeviceLink/hipDeviceLink.cpp: -------------------------------------------------------------------------------- 1 | #include "hipDeviceLinkConsts.h" 2 | #include 3 | #include 4 | 5 | int main() { 6 | int *hostIn, *hostOut; 7 | hostIn = new int[NUM]; 8 | hostOut = new int[NUM]; 9 | for (int i = 0; i < NUM; i++) { 10 | hostIn[i] = -1 * i; 11 | hostOut[i] = 0; 12 | } 13 | writeGlobal(hostIn); 14 | readGlobal(hostOut); 15 | for (int i = 0; i < NUM; i++) { 16 | assert(hostIn[i] == hostOut[i]); 17 | } 18 | delete[] hostIn; 19 | delete[] hostOut; 20 | printf("PASSED!\n"); 21 | } 22 | -------------------------------------------------------------------------------- /lib/hipcl-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | set_and_check( HIP_INCLUDE_DIR "@PACKAGE_HIPCL_INC_DIR@" ) 4 | set_and_check( HIP_INCLUDE_DIRS "@PACKAGE_HIPCL_INC_DIR@" ) 5 | set_and_check( HIP_LIB_INSTALL_DIR "@PACKAGE_HIPCL_LIB_DIR@" ) 6 | set_and_check( HIP_BIN_INSTALL_DIR "@PACKAGE_HIPCL_BIN_DIR@" ) 7 | 8 | set_and_check(HIP_HIPCC_EXECUTABLE "@HIPCL_COMPILER@") 9 | set_and_check(HIP_HIPCONFIG_EXECUTABLE "${HIP_BIN_INSTALL_DIR}/hipcl_config") 10 | 11 | include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) 12 | 13 | set( HIP_LIBRARIES hip::hipcl) 14 | set( HIP_LIBRARY ${HIP_LIBRARIES}) 15 | 16 | -------------------------------------------------------------------------------- /hipcl_config.h.in: -------------------------------------------------------------------------------- 1 | #cmakedefine CLANG_BIN_PATH "@CLANG_BIN_PATH@" 2 | 3 | #cmakedefine CLANG_ROOT_PATH "@CLANG_ROOT_PATH@" 4 | 5 | #cmakedefine HIPCL_CXX_OPTIONS "@HIPCL_CXX_OPTIONS@" 6 | 7 | #cmakedefine CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@" 8 | 9 | #cmakedefine HIPCL_LIB_DIR "@HIPCL_LIB_DIR@" 10 | #cmakedefine HIPCL_DATA_DIR "@HIPCL_DATA_DIR@" 11 | #cmakedefine HIPCL_INC_DIR "@HIPCL_INC_DIR@" 12 | #cmakedefine HIPCL_BIN_DIR "@HIPCL_BIN_DIR@" 13 | 14 | #cmakedefine HIPCL_VERSION_FULL "@HIPCL_VERSION_FULL@" 15 | 16 | #cmakedefine HIPCL_VERSION_FULL_PRE "@HIPCL_VERSION_FULL_PRE@" 17 | 18 | #define HIP_PLATFORM "hipcl" 19 | -------------------------------------------------------------------------------- /samples/hip-cuda/RecursiveGaussian/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # RecursiveGaussian 2 | 3 | add_hipcl_test(RecursiveGaussian RecursiveGaussian Passed RecursiveGaussian.cpp 4 | -q -e -t -i 32) 5 | 6 | set(DEST_IMAGE "${CMAKE_CURRENT_BINARY_DIR}/RecursiveGaussian_Input.bmp") 7 | 8 | add_custom_command(OUTPUT "${DEST_IMAGE}" 9 | COMMAND "${CMAKE_COMMAND}" -E copy 10 | "${CMAKE_CURRENT_SOURCE_DIR}/RecursiveGaussian_Input.bmp" 11 | "${CMAKE_CURRENT_BINARY_DIR}" 12 | VERBATIM) 13 | 14 | add_custom_target("RecursiveGaussian_image" ALL 15 | DEPENDS "${DEST_IMAGE}") 16 | 17 | add_dependencies(RecursiveGaussian RecursiveGaussian_image) 18 | -------------------------------------------------------------------------------- /spdlog/fmt/fmt.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016-2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // 9 | // Include a bundled header-only copy of fmtlib or an external one. 10 | // By default spdlog include its own copy. 11 | // 12 | 13 | #if !defined(SPDLOG_FMT_EXTERNAL) 14 | #ifndef FMT_HEADER_ONLY 15 | #define FMT_HEADER_ONLY 16 | #endif 17 | #ifndef FMT_USE_WINDOWS_H 18 | #define FMT_USE_WINDOWS_H 0 19 | #endif 20 | #include "bundled/core.h" 21 | #include "bundled/format.h" 22 | #else // external fmtlib 23 | #include 24 | #include 25 | #endif 26 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/tables.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | // #include "ocml.h" 9 | 10 | #include "tables.h" 11 | 12 | 13 | #define DECLARE_TABLE(TYPE,NAME,LENGTH) \ 14 | __attribute__((visibility("protected"))) __constant TYPE TABLE_MANGLE(NAME) [ LENGTH ] = { 15 | 16 | #define END_TABLE() }; 17 | 18 | #include "besselF_table.h" 19 | #include "besselD_table.h" 20 | 21 | 22 | -------------------------------------------------------------------------------- /lib/common.hh: -------------------------------------------------------------------------------- 1 | 2 | enum class OCLType : unsigned { POD = 0, Pointer = 1, Image = 2, Sampler = 3 }; 3 | 4 | enum class OCLSpace : unsigned { 5 | Private = 0, 6 | Global = 1, 7 | Constant = 2, 8 | Local = 3, 9 | Unknown = 1000 10 | }; 11 | 12 | struct OCLArgTypeInfo { 13 | OCLType type; 14 | OCLSpace space; 15 | size_t size; 16 | }; 17 | 18 | struct OCLFuncInfo { 19 | std::vector ArgTypeInfo; 20 | OCLArgTypeInfo retTypeInfo; 21 | }; 22 | 23 | typedef std::map OCLFuncInfoMap; 24 | 25 | typedef std::map OpenCLFunctionInfoMap; 26 | 27 | bool parseSPIR(int32_t *stream, size_t numWords, OpenCLFunctionInfoMap &output); 28 | -------------------------------------------------------------------------------- /cxxopts/INSTALL: -------------------------------------------------------------------------------- 1 | == System installation == 2 | 3 | This library is header only. So you can either copy `include/cxxopts.hpp` to `/usr/include` or `/usr/local/include`, or add `include` to your search path. 4 | 5 | == Building the examples and tests == 6 | 7 | It is preferable to build out of source. Make a build directory somewhere, and then 8 | do the following, where `${CXXOPTS_DIR}` is the path that you checked out `cxxopts` 9 | to: 10 | 11 | cmake ${CXXOPTS_DIR} 12 | make 13 | 14 | You can use another build tool, such as ninja. 15 | 16 | cmake -G Ninja ${CXXOPTS_DIR} 17 | ninja 18 | 19 | 20 | To run the tests, you have to configure `cxxopts` with another flag: 21 | cmake -D CXXOPTS_BUILD_TESTS=On ${CXXOPTS_DIR} 22 | make 23 | make test 24 | -------------------------------------------------------------------------------- /spdlog/details/null_mutex.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include 9 | // null, no cost dummy "mutex" and dummy "atomic" int 10 | 11 | namespace spdlog { 12 | namespace details { 13 | struct null_mutex 14 | { 15 | void lock() {} 16 | void unlock() {} 17 | bool try_lock() 18 | { 19 | return true; 20 | } 21 | }; 22 | 23 | struct null_atomic_int 24 | { 25 | int value; 26 | null_atomic_int() = default; 27 | 28 | explicit null_atomic_int(int val) 29 | : value(val) 30 | { 31 | } 32 | 33 | int load(std::memory_order) const 34 | { 35 | return value; 36 | } 37 | 38 | void store(int val) 39 | { 40 | value = val; 41 | } 42 | }; 43 | 44 | } // namespace details 45 | } // namespace spdlog 46 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/tables.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | // Table stuff 9 | 10 | #define TABLE_MANGLE(NAME) __ocmltbl_##NAME 11 | 12 | extern __constant float TABLE_MANGLE(M32_J0)[]; 13 | extern __constant float TABLE_MANGLE(M32_J1)[]; 14 | extern __constant float TABLE_MANGLE(M32_Y0)[]; 15 | extern __constant float TABLE_MANGLE(M32_Y1)[]; 16 | extern __constant double TABLE_MANGLE(M64_J0)[]; 17 | extern __constant double TABLE_MANGLE(M64_J1)[]; 18 | extern __constant double TABLE_MANGLE(M64_Y0)[]; 19 | extern __constant double TABLE_MANGLE(M64_Y1)[]; 20 | 21 | #define USE_TABLE(TYPE,PTR,NAME) \ 22 | __constant TYPE * PTR = TABLE_MANGLE(NAME) 23 | 24 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/scalbF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(scalb)(float x, float y) 12 | { 13 | float t = BUILTIN_CLAMP_F32(y, -0x1.0p+20f, 0x1.0p+20f); 14 | float ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F32(t)); 15 | 16 | if (!FINITE_ONLY_OPT()) { 17 | ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; 18 | ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_PINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; 19 | ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_NINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; 20 | } 21 | 22 | return ret; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rhypotF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(rhypot)(float x, float y) 12 | { 13 | float a = BUILTIN_ABS_F32(x); 14 | float b = BUILTIN_ABS_F32(y); 15 | float t = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b))); 16 | int e = BUILTIN_FREXP_EXP_F32(t); 17 | a = BUILTIN_FLDEXP_F32(a, -e); 18 | b = BUILTIN_FLDEXP_F32(b, -e); 19 | float ret = BUILTIN_FLDEXP_F32(BUILTIN_RSQRT_F32(MATH_MAD(a, a, b*b)), -e); 20 | 21 | if (!FINITE_ONLY_OPT()) { 22 | ret = (BUILTIN_ISINF_F32(x) | 23 | BUILTIN_ISINF_F32(y)) ? 24 | 0.0f : ret; 25 | } 26 | 27 | return ret; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/scalbD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(scalb)(double x, double y) 12 | { 13 | double t = BUILTIN_MIN_F64(BUILTIN_MAX_F64(y, -0x1.0p+20), 0x1.0p+20); 14 | double ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F64(t)); 15 | 16 | if (!FINITE_ONLY_OPT()) { 17 | ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 18 | ret = (BUILTIN_CLASS_F64(x, CLASS_NZER|CLASS_PZER) & BUILTIN_CLASS_F64(y, CLASS_PINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 19 | ret = (BUILTIN_ISINF_F64(x) & BUILTIN_CLASS_F64(y, CLASS_NINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 20 | } 21 | 22 | return ret; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rcbrtF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(rcbrt)(float x) 12 | { 13 | float ax = BUILTIN_ABS_F32(x); 14 | 15 | ax = BUILTIN_FLDEXP_F32(ax, BUILTIN_CLASS_F32(x, CLASS_NSUB|CLASS_PSUB) ? 24 : 0); 16 | 17 | float z = BUILTIN_EXP2_F32(-0x1.555556p-2f * BUILTIN_LOG2_F32(ax)); 18 | z = MATH_MAD(MATH_MAD(z*z, -z*ax, 1.0f), 0x1.555556p-2f*z, z); 19 | 20 | z = BUILTIN_FLDEXP_F32(z, BUILTIN_CLASS_F32(x, CLASS_NSUB|CLASS_PSUB) ? 8 : 0); 21 | 22 | float xi = MATH_FAST_RCP(x); 23 | z = BUILTIN_CLASS_F32(x, CLASS_SNAN|CLASS_QNAN|CLASS_PZER|CLASS_NZER|CLASS_PINF|CLASS_NINF) ? xi : z; 24 | 25 | return BUILTIN_COPYSIGN_F32(z, x); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /llvm_passes/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard to use.") 2 | set(CMAKE_CXX_EXTENSIONS OFF) 3 | 4 | execute_process(COMMAND "${LLVM_CONFIG}" "--cmakedir" 5 | OUTPUT_VARIABLE LLVM_DIR 6 | OUTPUT_STRIP_TRAILING_WHITESPACE 7 | RESULT_VARIABLE RES) 8 | 9 | if(NOT RES EQUAL 0) 10 | message(FATAL_ERROR "failed to run llvm-config (${LLVM_CONFIG})") 11 | endif() 12 | 13 | message(STATUS "LLVM CMake directory: ${LLVM_DIR}") 14 | 15 | find_package(LLVM REQUIRED CONFIG HINTS "${LLVM_DIR}" PATHS "${LLVM_DIR}" NO_DEFAULT_PATH) 16 | 17 | ###################################### 18 | 19 | add_definitions(${LLVM_DEFINITIONS}) 20 | 21 | include_directories(${LLVM_INCLUDE_DIRS}) 22 | 23 | if(NOT LLVM_ENABLE_RTTI) 24 | add_compile_options("-fno-rtti") 25 | endif() 26 | 27 | if(NOT LLVM_ENABLE_EH) 28 | add_compile_options("-fno-exceptions") 29 | endif() 30 | 31 | add_library(LLVMHipDynMem MODULE HipDynMem.cpp) 32 | 33 | install(TARGETS LLVMHipDynMem 34 | LIBRARY DESTINATION "${HIPCL_LLVM_DIR}" 35 | ARCHIVE DESTINATION "${HIPCL_LLVM_DIR}" 36 | ) 37 | -------------------------------------------------------------------------------- /cxxopts/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Jarryd Beck 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /doc/env_variables.rst: -------------------------------------------------------------------------------- 1 | ENV variables controlling behaviour 2 | --------------------------------------- 3 | 4 | The behavior of HIPCL can be controlled with multiple environment variables 5 | listed below. The variables are helpful both when using and when developing 6 | pocl. 7 | 8 | - **HIPCL_LOGLEVEL** 9 | String value. Changes verbosity of log messages coming from HIPCL. 10 | Possible values are: debug,info,warn,err,crit,off 11 | Defaults to "err". HIPCL will log messages of this priority and higher. 12 | 13 | - **HIPCL_PLATFORM** 14 | Numeric value. If there are multiple OpenCL platforms on the system, setting this to a number (0..platforms-1) 15 | will limit HipCL to that single platform. By default HipCL can access all OpenCL platforms. 16 | 17 | - **HIPCL_DEVICE** 18 | Numeric value. If there are multiple OpenCL devices in the selected platform, setting this to a number (0..N-1) 19 | will limit HipCL to a single device. If HIPCL_PLATFORM is not set but HIPCL_DEVICE is, 20 | HIPCL_PLATFORM defaults to 0. 21 | 22 | - **HIPCL_DEVICE_TYPE** 23 | String value. Limits OpenCL device visibility to HipCL based on device type. 24 | Possible values are: all, cpu, gpu, default, accel 25 | 26 | -------------------------------------------------------------------------------- /spdlog/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Gabi Melman. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /lib/log.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "spdlog/spdlog.h" 4 | #include "spdlog/sinks/stdout_color_sinks.h" 5 | 6 | #ifdef __GNUC__ 7 | #pragma GCC visibility push(hidden) 8 | #endif 9 | 10 | static int SpdlogWasSetup = 0; 11 | 12 | void setupSpdlog() { 13 | if (SpdlogWasSetup) 14 | return; 15 | spdlog::set_default_logger(spdlog::stderr_color_mt("HIPCL")); 16 | spdlog::set_level(spdlog::level::debug); 17 | spdlog::set_pattern("%n %^%l%$ [TID %t] [%E.%F] : %v"); 18 | 19 | spdlog::level::level_enum spd_loglevel = spdlog::level::err; 20 | 21 | const char *loglevel = getenv("HIPCL_LOGLEVEL"); 22 | if (loglevel) { 23 | std::string level(loglevel); 24 | if (level == "debug") 25 | spd_loglevel = spdlog::level::debug; 26 | if (level == "info") 27 | spd_loglevel = spdlog::level::info; 28 | if (level == "warn") 29 | spd_loglevel = spdlog::level::warn; 30 | if (level == "err") 31 | spd_loglevel = spdlog::level::err; 32 | if (level == "crit") 33 | spd_loglevel = spdlog::level::critical; 34 | if (level == "off") 35 | spd_loglevel = spdlog::level::off; 36 | } 37 | 38 | spdlog::set_level(spd_loglevel); 39 | 40 | SpdlogWasSetup = 1; 41 | } 42 | 43 | #ifdef __GNUC__ 44 | #pragma GCC visibility pop 45 | #endif 46 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rhypotD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(rhypot)(double x, double y) 12 | { 13 | double a = BUILTIN_ABS_F64(x); 14 | double b = BUILTIN_ABS_F64(y); 15 | double t = BUILTIN_MAX_F64(a, b); 16 | int e = BUILTIN_FREXP_EXP_F64(t); 17 | a = BUILTIN_FLDEXP_F64(a, -e); 18 | b = BUILTIN_FLDEXP_F64(b, -e); 19 | double d2 = MATH_MAD(a, a, b*b); 20 | double z = BUILTIN_RSQRT_F64(d2); 21 | double u = MATH_MAD(-d2*z, z, 1.0); 22 | z = MATH_MAD(z*u, MATH_MAD(u, 0.375, 0.5), z); 23 | double ret = BUILTIN_FLDEXP_F64(z, -e); 24 | 25 | if (!FINITE_ONLY_OPT()) { 26 | ret = t == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; 27 | 28 | ret = BUILTIN_ISNAN_F64(x) | 29 | BUILTIN_ISNAN_F64(y) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 30 | 31 | ret = BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) ? 0.0 : ret; 32 | } 33 | 34 | return ret; 35 | } 36 | 37 | -------------------------------------------------------------------------------- /spdlog/sinks/msvc_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2016 Alexander Dalshov. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #if defined(_WIN32) 13 | 14 | #include "spdlog/details/null_mutex.h" 15 | #include "spdlog/sinks/base_sink.h" 16 | 17 | #include 18 | 19 | #include 20 | #include 21 | 22 | namespace spdlog { 23 | namespace sinks { 24 | /* 25 | * MSVC sink (logging using OutputDebugStringA) 26 | */ 27 | template 28 | class msvc_sink : public base_sink 29 | { 30 | public: 31 | explicit msvc_sink() {} 32 | 33 | protected: 34 | void sink_it_(const details::log_msg &msg) override 35 | { 36 | 37 | fmt::memory_buffer formatted; 38 | sink::formatter_->format(msg, formatted); 39 | OutputDebugStringA(fmt::to_string(formatted).c_str()); 40 | } 41 | 42 | void flush_() override {} 43 | }; 44 | 45 | using msvc_sink_mt = msvc_sink; 46 | using msvc_sink_st = msvc_sink; 47 | 48 | using windebug_sink_mt = msvc_sink_mt; 49 | using windebug_sink_st = msvc_sink_st; 50 | 51 | } // namespace sinks 52 | } // namespace spdlog 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /cxxopts/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 Jarryd Beck 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE. 20 | 21 | if(CXXOPTS_BUILD_EXAMPLES) 22 | add_executable(example example.cpp) 23 | target_link_libraries(example cxxopts) 24 | endif() 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Tampere University 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | Portions copyright: 22 | 23 | The Khronos Group Inc. (lib/spirv.hh, lib/CL/LICENSE) 24 | 25 | Gabi Melman (spdlog/LICENSE) 26 | 27 | Advanced Micro Devices, Inc. (lib/bitcode/OCML/LICENSE) 28 | -------------------------------------------------------------------------------- /include/CL/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2015 The Khronos Group Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and/or associated documentation files (the 5 | "Materials"), to deal in the Materials without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Materials, and to 8 | permit persons to whom the Materials are furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Materials. 13 | 14 | MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 15 | KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 16 | SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 17 | https://www.khronos.org/registry/ 18 | 19 | THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 | MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 26 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rcbrtD.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(rcbrt)(double x) 12 | { 13 | double a = BUILTIN_ABS_F64(x); 14 | int e3 = BUILTIN_FREXP_EXP_F64(a); 15 | int e = (int)BUILTIN_RINT_F32(0x1.555556p-2f * (float)e3); 16 | a = BUILTIN_FLDEXP_F64(a, -3*e); 17 | 18 | double c = (double)BUILTIN_EXP2_F32(-0x1.555556p-2f * BUILTIN_LOG2_F32((float)a)); 19 | 20 | // Correction is c + c*(1 - a c^3)/(1 + 2 a c^3) 21 | // = c + c*t/(3 - 2t) where t = 1 - a c^3 22 | // use t/(3 - 2t) ~ t/3 + 2 t^2 / 9 + 4 t^3 / 27 ... 23 | // compute t with extra precision for better accuracy 24 | double c3 = c * c * c; 25 | double t = MATH_MAD(-a, c3, 1.0); 26 | c = MATH_MAD(c, t*MATH_MAD(t, 0x1.c71c71c71c8b2p-3, 0x1.5555555555685p-2), c); 27 | 28 | c = BUILTIN_FLDEXP_F64(c, -e); 29 | 30 | if (!FINITE_ONLY_OPT()) { 31 | c = BUILTIN_CLASS_F64(a, CLASS_PINF) ? 0.0 : c; 32 | c = x == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : c; 33 | } 34 | 35 | return BUILTIN_COPYSIGN_F64(c, x); 36 | } 37 | 38 | -------------------------------------------------------------------------------- /spdlog/fmt/bundled/LICENSE.rst: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 - 2016, Victor Zverovich 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /spdlog/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/details/null_mutex.h" 13 | #include "spdlog/sinks/base_sink.h" 14 | 15 | #include 16 | 17 | namespace spdlog { 18 | namespace sinks { 19 | 20 | template 21 | class null_sink : public base_sink 22 | { 23 | protected: 24 | void sink_it_(const details::log_msg &) override {} 25 | void flush_() override {} 26 | }; 27 | 28 | using null_sink_mt = null_sink; 29 | using null_sink_st = null_sink; 30 | 31 | } // namespace sinks 32 | 33 | template 34 | inline std::shared_ptr null_logger_mt(const std::string &logger_name) 35 | { 36 | auto null_logger = Factory::template create(logger_name); 37 | null_logger->set_level(level::off); 38 | return null_logger; 39 | } 40 | 41 | template 42 | inline std::shared_ptr null_logger_st(const std::string &logger_name) 43 | { 44 | auto null_logger = Factory::template create(logger_name); 45 | null_logger->set_level(level::off); 46 | return null_logger; 47 | } 48 | 49 | } // namespace spdlog 50 | -------------------------------------------------------------------------------- /cxxopts/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (CXXOPTS_BUILD_TESTS) 2 | add_executable(options_test main.cpp options.cpp) 3 | target_link_libraries(options_test cxxopts) 4 | add_test(options options_test) 5 | 6 | # test if the targets are findable from the build directory 7 | add_test(find-package-test ${CMAKE_CTEST_COMMAND} 8 | -C ${CMAKE_BUILD_TYPE} 9 | --build-and-test 10 | "${CMAKE_CURRENT_SOURCE_DIR}/find-package-test" 11 | "${CMAKE_CURRENT_BINARY_DIR}/find-package-test" 12 | --build-generator ${CMAKE_GENERATOR} 13 | --build-makeprogram ${CMAKE_MAKE_PROGRAM} 14 | --build-options 15 | "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" 16 | "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" 17 | "-Dcxxopts_DIR=${PROJECT_BINARY_DIR}" 18 | ) 19 | 20 | # test if the targets are findable when add_subdirectory is used 21 | add_test(add-subdirectory-test ${CMAKE_CTEST_COMMAND} 22 | -C ${CMAKE_BUILD_TYPE} 23 | --build-and-test 24 | "${CMAKE_CURRENT_SOURCE_DIR}/add-subdirectory-test" 25 | "${CMAKE_CURRENT_BINARY_DIR}/add-subdirectory-test" 26 | --build-generator ${CMAKE_GENERATOR} 27 | --build-makeprogram ${CMAKE_MAKE_PROGRAM} 28 | --build-options 29 | "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" 30 | "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" 31 | ) 32 | 33 | add_executable(link_test link_a.cpp link_b.cpp) 34 | target_link_libraries(link_test cxxopts) 35 | endif() 36 | -------------------------------------------------------------------------------- /spdlog/details/log_msg.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/common.h" 9 | #include "spdlog/details/os.h" 10 | 11 | #include 12 | #include 13 | 14 | namespace spdlog { 15 | namespace details { 16 | struct log_msg 17 | { 18 | 19 | log_msg(source_loc loc, const std::string *loggers_name, level::level_enum lvl, string_view_t view) 20 | : logger_name(loggers_name) 21 | , level(lvl) 22 | #ifndef SPDLOG_NO_DATETIME 23 | , time(os::now()) 24 | #endif 25 | 26 | #ifndef SPDLOG_NO_THREAD_ID 27 | , thread_id(os::thread_id()) 28 | , source(loc) 29 | , payload(view) 30 | #endif 31 | { 32 | } 33 | 34 | log_msg(const std::string *loggers_name, level::level_enum lvl, string_view_t view) 35 | : log_msg(source_loc{}, loggers_name, lvl, view) 36 | { 37 | } 38 | 39 | log_msg(const log_msg &other) = default; 40 | 41 | const std::string *logger_name{nullptr}; 42 | level::level_enum level{level::off}; 43 | log_clock::time_point time; 44 | size_t thread_id{0}; 45 | size_t msg_id{0}; 46 | 47 | // wrapping the formatted text with color (updated by pattern_formatter). 48 | mutable size_t color_range_start{0}; 49 | mutable size_t color_range_end{0}; 50 | 51 | source_loc source; 52 | const string_view_t payload; 53 | }; 54 | } // namespace details 55 | } // namespace spdlog 56 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rlen3F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(rlen3)(float x, float y, float z) 12 | { 13 | float a = BUILTIN_ABS_F32(x); 14 | float b = BUILTIN_ABS_F32(y); 15 | float c = BUILTIN_ABS_F32(z); 16 | 17 | float a1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b))); 18 | float b1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a), AS_UINT(b))); 19 | 20 | a = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a1), AS_UINT(c))); 21 | float c1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a1), AS_UINT(c))); 22 | 23 | b = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b1), AS_UINT(c1))); 24 | c = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b1), AS_UINT(c1))); 25 | 26 | int e = BUILTIN_FREXP_EXP_F32(a); 27 | a = BUILTIN_FLDEXP_F32(a, -e); 28 | b = BUILTIN_FLDEXP_F32(b, -e); 29 | c = BUILTIN_FLDEXP_F32(c, -e); 30 | 31 | float ret = BUILTIN_RSQRT_F32(MATH_MAD(a, a, MATH_MAD(b, b, c*c))); 32 | ret = BUILTIN_FLDEXP_F32(ret, -e); 33 | 34 | if (!FINITE_ONLY_OPT()) { 35 | ret = (BUILTIN_ISINF_F32(x) | 36 | BUILTIN_ISINF_F32(y) | 37 | BUILTIN_ISINF_F32(z)) ? 0.0f : ret; 38 | } 39 | 40 | return ret; 41 | } 42 | 43 | -------------------------------------------------------------------------------- /spdlog/details/console_globals.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // 3 | // Copyright(c) 2018 Gabi Melman. 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 5 | // 6 | 7 | #include "spdlog/details/null_mutex.h" 8 | #include 9 | #include 10 | 11 | #ifdef _WIN32 12 | 13 | #ifndef NOMINMAX 14 | #define NOMINMAX // prevent windows redefining min/max 15 | #endif 16 | 17 | #ifndef WIN32_LEAN_AND_MEAN 18 | #define WIN32_LEAN_AND_MEAN 19 | #endif 20 | 21 | #include 22 | #endif 23 | 24 | namespace spdlog { 25 | namespace details { 26 | struct console_stdout 27 | { 28 | static std::FILE *stream() 29 | { 30 | return stdout; 31 | } 32 | #ifdef _WIN32 33 | static HANDLE handle() 34 | { 35 | return ::GetStdHandle(STD_OUTPUT_HANDLE); 36 | } 37 | #endif 38 | }; 39 | 40 | struct console_stderr 41 | { 42 | static std::FILE *stream() 43 | { 44 | return stderr; 45 | } 46 | #ifdef _WIN32 47 | static HANDLE handle() 48 | { 49 | return ::GetStdHandle(STD_ERROR_HANDLE); 50 | } 51 | #endif 52 | }; 53 | 54 | struct console_mutex 55 | { 56 | using mutex_t = std::mutex; 57 | static mutex_t &mutex() 58 | { 59 | static mutex_t s_mutex; 60 | return s_mutex; 61 | } 62 | }; 63 | 64 | struct console_nullmutex 65 | { 66 | using mutex_t = null_mutex; 67 | static mutex_t &mutex() 68 | { 69 | static mutex_t s_mutex; 70 | return s_mutex; 71 | } 72 | }; 73 | } // namespace details 74 | } // namespace spdlog 75 | -------------------------------------------------------------------------------- /spdlog/sinks/ostream_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/details/null_mutex.h" 13 | #include "spdlog/sinks/base_sink.h" 14 | 15 | #include 16 | #include 17 | 18 | namespace spdlog { 19 | namespace sinks { 20 | template 21 | class ostream_sink final : public base_sink 22 | { 23 | public: 24 | explicit ostream_sink(std::ostream &os, bool force_flush = false) 25 | : ostream_(os) 26 | , force_flush_(force_flush) 27 | { 28 | } 29 | ostream_sink(const ostream_sink &) = delete; 30 | ostream_sink &operator=(const ostream_sink &) = delete; 31 | 32 | protected: 33 | void sink_it_(const details::log_msg &msg) override 34 | { 35 | fmt::memory_buffer formatted; 36 | sink::formatter_->format(msg, formatted); 37 | ostream_.write(formatted.data(), static_cast(formatted.size())); 38 | if (force_flush_) 39 | { 40 | ostream_.flush(); 41 | } 42 | } 43 | 44 | void flush_() override 45 | { 46 | ostream_.flush(); 47 | } 48 | 49 | std::ostream &ostream_; 50 | bool force_flush_; 51 | }; 52 | 53 | using ostream_sink_mt = ostream_sink; 54 | using ostream_sink_st = ostream_sink; 55 | 56 | } // namespace sinks 57 | } // namespace spdlog 58 | -------------------------------------------------------------------------------- /spdlog/sinks/sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #include "spdlog/details/log_msg.h" 9 | #include "spdlog/details/pattern_formatter.h" 10 | #include "spdlog/formatter.h" 11 | 12 | namespace spdlog { 13 | namespace sinks { 14 | class sink 15 | { 16 | public: 17 | sink() 18 | : level_(level::trace) 19 | , formatter_(new pattern_formatter()) 20 | { 21 | } 22 | 23 | explicit sink(std::unique_ptr formatter) 24 | : level_(level::trace) 25 | , formatter_(std::move(formatter)) 26 | { 27 | } 28 | 29 | virtual ~sink() = default; 30 | virtual void log(const details::log_msg &msg) = 0; 31 | virtual void flush() = 0; 32 | virtual void set_pattern(const std::string &pattern) = 0; 33 | virtual void set_formatter(std::unique_ptr sink_formatter) = 0; 34 | 35 | bool should_log(level::level_enum msg_level) const 36 | { 37 | return msg_level >= level_.load(std::memory_order_relaxed); 38 | } 39 | 40 | void set_level(level::level_enum log_level) 41 | { 42 | level_.store(log_level); 43 | } 44 | 45 | level::level_enum level() const 46 | { 47 | return static_cast(level_.load(std::memory_order_relaxed)); 48 | } 49 | 50 | protected: 51 | // sink log level - default is all 52 | level_t level_; 53 | 54 | // sink formatter - default is full format 55 | std::unique_ptr formatter_; 56 | }; 57 | 58 | } // namespace sinks 59 | } // namespace spdlog 60 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/i0F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(i0)(float x) 12 | { 13 | x = BUILTIN_ABS_F32(x); 14 | 15 | float ret; 16 | 17 | if (x < 8.0f) { 18 | float t = 0.25f * x * x; 19 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 20 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 21 | MATH_MAD(t, 22 | 0x1.38d760p-43f, 0x1.7fd5c6p-38f), 0x1.66ffc8p-31f), 0x1.4ecb6ep-25f), 23 | 0x1.033c70p-19f), 0x1.233bb2p-14f), 0x1.c71db2p-10f), 0x1.c71c5ep-6f), 24 | 0x1.000000p-2f), 0x1.000000p+0f); 25 | ret = MATH_MAD(t, ret, 1.0f); 26 | } else { 27 | float t = MATH_FAST_RCP(x); 28 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 29 | MATH_MAD(t, 30 | 0x1.c49916p-2f, -0x1.110f5ep-5f), 0x1.2a130ap-5f), 0x1.c68702p-6f), 31 | 0x1.9890aep-5f), 0x1.988450p-2f); 32 | float xs = x - 88.0f; 33 | float e1 = MATH_MANGLE(exp)(x > 88.0f ? xs : x); 34 | float e2 = x > 88.0f ? 0x1.f1056ep+126f : 1.0f; 35 | ret = e1 * BUILTIN_RSQRT_F32(x) * ret * e2; 36 | } 37 | 38 | if (!FINITE_ONLY_OPT()) { 39 | ret = BUILTIN_CLASS_F32(x, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? x : ret; 40 | } 41 | 42 | return ret; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/i1F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(i1)(float x) 12 | { 13 | float a = BUILTIN_ABS_F32(x); 14 | 15 | float ret; 16 | 17 | if (a < 8.0f) { 18 | a *= 0.5f; 19 | float t = a * a; 20 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 21 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 22 | 0x1.882dd2p-40f, 0x1.af97f6p-35f), 0x1.66a3eap-28f), 0x1.251b32p-22f), 23 | 0x1.84cbb6p-17f), 0x1.6c0d4ap-12f), 0x1.c71d3ap-8f), 0x1.555550p-4f), 24 | 0x1.000000p-1f); 25 | ret = MATH_MAD(t, a*ret, a); 26 | } else { 27 | float t = MATH_FAST_RCP(a); 28 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 29 | MATH_MAD(t, 30 | -0x1.06de32p-1f, 0x1.043b22p-5f), -0x1.925276p-5f), -0x1.7c15c8p-5f), 31 | -0x1.3266ccp-3f), 0x1.988456p-2f); 32 | 33 | float as = a - 88.0f; 34 | float e1 = MATH_MANGLE(exp)(a > 88.0f ? as : a); 35 | float e2 = a > 88.0f ? 0x1.f1056ep+126f : 1.0f; 36 | ret = e1 * BUILTIN_RSQRT_F32(a) * ret * e2; 37 | } 38 | 39 | if (!FINITE_ONLY_OPT()) { 40 | ret = BUILTIN_CLASS_F32(a, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? a : ret; 41 | } 42 | 43 | return BUILTIN_COPYSIGN_F32(ret, x); 44 | } 45 | 46 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rlen3D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(rlen3)(double x, double y, double z) 12 | { 13 | double a = BUILTIN_ABS_F64(x); 14 | double b = BUILTIN_ABS_F64(y); 15 | double c = BUILTIN_ABS_F64(z); 16 | 17 | double a1 = BUILTIN_MAX_F64(a, b); 18 | double b1 = BUILTIN_MIN_F64(a, b); 19 | 20 | a = BUILTIN_MAX_F64(a1, c); 21 | double c1 = BUILTIN_MIN_F64(a1, c); 22 | 23 | b = BUILTIN_MAX_F64(b1, c1); 24 | c = BUILTIN_MIN_F64(b1, c1); 25 | 26 | int e = BUILTIN_FREXP_EXP_F64(a); 27 | a = BUILTIN_FLDEXP_F64(a, -e); 28 | b = BUILTIN_FLDEXP_F64(b, -e); 29 | c = BUILTIN_FLDEXP_F64(c, -e); 30 | 31 | double d2 = MATH_MAD(a, a, MATH_MAD(b, b, c*c)); 32 | double v = BUILTIN_RSQRT_F64(d2); 33 | double u = MATH_MAD(-d2*v, v, 1.0); 34 | v = MATH_MAD(v*u, MATH_MAD(u, 0.375, 0.5), v); 35 | double ret = BUILTIN_FLDEXP_F64(v, -e); 36 | 37 | if (!FINITE_ONLY_OPT()) { 38 | ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; 39 | 40 | ret = (BUILTIN_ISNAN_F64(x) | 41 | BUILTIN_ISNAN_F64(y) | 42 | BUILTIN_ISNAN_F64(z)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 43 | 44 | ret = (BUILTIN_ISINF_F64(x) | 45 | BUILTIN_ISINF_F64(y) | 46 | BUILTIN_ISINF_F64(z)) ? 0.0 : ret; 47 | } 48 | 49 | return ret; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /spdlog/details/circular_q.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | // cirucal q view of std::vector. 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace spdlog { 12 | namespace details { 13 | template 14 | class circular_q 15 | { 16 | public: 17 | using item_type = T; 18 | 19 | explicit circular_q(size_t max_items) 20 | : max_items_(max_items + 1) // one item is reserved as marker for full q 21 | , v_(max_items_) 22 | { 23 | } 24 | 25 | // push back, overrun (oldest) item if no room left 26 | void push_back(T &&item) 27 | { 28 | v_[tail_] = std::move(item); 29 | tail_ = (tail_ + 1) % max_items_; 30 | 31 | if (tail_ == head_) // overrun last item if full 32 | { 33 | head_ = (head_ + 1) % max_items_; 34 | ++overrun_counter_; 35 | } 36 | } 37 | 38 | // Pop item from front. 39 | // If there are no elements in the container, the behavior is undefined. 40 | void pop_front(T &popped_item) 41 | { 42 | popped_item = std::move(v_[head_]); 43 | head_ = (head_ + 1) % max_items_; 44 | } 45 | 46 | bool empty() 47 | { 48 | return tail_ == head_; 49 | } 50 | 51 | bool full() 52 | { 53 | // head is ahead of the tail by 1 54 | return ((tail_ + 1) % max_items_) == head_; 55 | } 56 | 57 | size_t overrun_counter() const 58 | { 59 | return overrun_counter_; 60 | } 61 | 62 | private: 63 | size_t max_items_; 64 | typename std::vector::size_type head_ = 0; 65 | typename std::vector::size_type tail_ = 0; 66 | 67 | std::vector v_; 68 | 69 | size_t overrun_counter_ = 0; 70 | }; 71 | } // namespace details 72 | } // namespace spdlog 73 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/oclc.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries (orig. repo location: oclc/inc) 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #ifndef OCLC_H 9 | #define OCLC_H 10 | 11 | // These constants are used to control behavior of the libraries which 12 | // check them. 13 | // 14 | // The current list of controls is as follows: 15 | // 16 | // __constant bool __oclc_finite_only_opt 17 | // - the application will only pass finite arguments and expects only finite results 18 | // 19 | // __constant bool __oclc_unsafe_math_opt 20 | // - the aopplication accepts optimizations that may lower the accuracy of the results 21 | // 22 | // __constant bool __oclc_daz_opt(void) 23 | // - the application allows subnormal inputs or outputs to be flushed to zero 24 | // 25 | // __constant bool __oclc_correctly_rounded_sqrt32(void) 26 | // - the application is expecting sqrt(float) to produce a correctly rounded result 27 | // 28 | // __constant int __oclc_ISA_version 29 | // - the ISA version of the target device 30 | // 31 | // it is expected that the implementation provides these as if declared from the following 32 | // C code: 33 | // 34 | // const bool int __oclc_... = 0; // Or 1 35 | // 36 | // allowing them and any control flow associated with them to be optimized away 37 | 38 | extern const __constant bool __oclc_finite_only_opt; 39 | extern const __constant bool __oclc_unsafe_math_opt; 40 | extern const __constant bool __oclc_daz_opt; 41 | extern const __constant bool __oclc_correctly_rounded_sqrt32; 42 | extern const __constant int __oclc_ISA_version; 43 | 44 | #endif // OCLC_H 45 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/mathD.h.orig: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | // OCML prototypes 9 | //#include "ocml.h" 10 | 11 | // Tables 12 | #include "tables.h" 13 | 14 | // Builtins 15 | //#include "builtins.h" 16 | 17 | // Mangling 18 | #define MATH_MANGLE(N) OCML_MANGLE_F64(N) 19 | #define MATH_PRIVATE(N) MANGLE3(__ocmlpriv,N,f64) 20 | 21 | // Optimization Controls 22 | //#include "opts.h" 23 | 24 | // Attributes 25 | #define ALIGNEDATTR(X) __attribute__((aligned(X))) 26 | #define INLINEATTR __attribute__((always_inline)) 27 | #define PUREATTR __attribute__((pure)) 28 | #define CONSTATTR __attribute__((const)) 29 | 30 | // Math controls 31 | //#include "privD.h" 32 | 33 | // Bit patterns 34 | #define SIGNBIT_DP64 0x8000000000000000L 35 | #define EXSIGNBIT_DP64 0x7fffffffffffffffL 36 | #define EXPBITS_DP64 0x7ff0000000000000L 37 | #define MANTBITS_DP64 0x000fffffffffffffL 38 | #define ONEEXPBITS_DP64 0x3ff0000000000000L 39 | #define TWOEXPBITS_DP64 0x4000000000000000L 40 | #define HALFEXPBITS_DP64 0x3fe0000000000000L 41 | #define IMPBIT_DP64 0x0010000000000000L 42 | #define QNANBITPATT_DP64 0x7ff8000000000000L 43 | #define INDEFBITPATT_DP64 0xfff8000000000000L 44 | #define PINFBITPATT_DP64 0x7ff0000000000000L 45 | #define NINFBITPATT_DP64 0xfff0000000000000L 46 | #define EXPBIAS_DP64 1023 47 | #define EXPSHIFTBITS_DP64 52 48 | #define BIASEDEMIN_DP64 1 49 | #define EMIN_DP64 -1022 50 | #define BIASEDEMAX_DP64 2046 51 | #define EMAX_DP64 1023 52 | #define LAMBDA_DP64 1.0e300 53 | #define MANTLENGTH_DP64 53 54 | #define BASEDIGITS_DP64 15 55 | 56 | -------------------------------------------------------------------------------- /spdlog/sinks/stdout_color_sinks.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2018 spdlog 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #ifdef _WIN32 13 | #include "spdlog/sinks/wincolor_sink.h" 14 | #else 15 | #include "spdlog/sinks/ansicolor_sink.h" 16 | #endif 17 | 18 | namespace spdlog { 19 | namespace sinks { 20 | #ifdef _WIN32 21 | using stdout_color_sink_mt = wincolor_stdout_sink_mt; 22 | using stdout_color_sink_st = wincolor_stdout_sink_st; 23 | using stderr_color_sink_mt = wincolor_stderr_sink_mt; 24 | using stderr_color_sink_st = wincolor_stderr_sink_st; 25 | #else 26 | using stdout_color_sink_mt = ansicolor_stdout_sink_mt; 27 | using stdout_color_sink_st = ansicolor_stdout_sink_st; 28 | using stderr_color_sink_mt = ansicolor_stderr_sink_mt; 29 | using stderr_color_sink_st = ansicolor_stderr_sink_st; 30 | #endif 31 | } // namespace sinks 32 | 33 | template 34 | inline std::shared_ptr stdout_color_mt(const std::string &logger_name) 35 | { 36 | return Factory::template create(logger_name); 37 | } 38 | 39 | template 40 | inline std::shared_ptr stdout_color_st(const std::string &logger_name) 41 | { 42 | return Factory::template create(logger_name); 43 | } 44 | 45 | template 46 | inline std::shared_ptr stderr_color_mt(const std::string &logger_name) 47 | { 48 | return Factory::template create(logger_name); 49 | } 50 | 51 | template 52 | inline std::shared_ptr stderr_color_st(const std::string &logger_name) 53 | { 54 | return Factory::template create(logger_name); 55 | } 56 | } // namespace spdlog 57 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rlen4F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(rlen4)(float x, float y, float z, float w) 12 | { 13 | float a = BUILTIN_ABS_F32(x); 14 | float b = BUILTIN_ABS_F32(y); 15 | float c = BUILTIN_ABS_F32(z); 16 | float d = BUILTIN_ABS_F32(w); 17 | 18 | float a1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b))); 19 | float b1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a), AS_UINT(b))); 20 | 21 | float c1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(c), AS_UINT(d))); 22 | float d1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(c), AS_UINT(d))); 23 | 24 | a = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a1), AS_UINT(c1))); 25 | float c2 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a1), AS_UINT(c1))); 26 | 27 | float b2 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b1), AS_UINT(d1))); 28 | d = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b1), AS_UINT(d1))); 29 | 30 | b = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b2), AS_UINT(c2))); 31 | c = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b2), AS_UINT(c2))); 32 | 33 | int e = BUILTIN_FREXP_EXP_F32(a); 34 | a = BUILTIN_FLDEXP_F32(a, -e); 35 | b = BUILTIN_FLDEXP_F32(b, -e); 36 | c = BUILTIN_FLDEXP_F32(c, -e); 37 | d = BUILTIN_FLDEXP_F32(d, -e); 38 | 39 | float ret = BUILTIN_FLDEXP_F32(BUILTIN_RSQRT_F32(MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d)))), -e); 40 | 41 | if (!FINITE_ONLY_OPT()) { 42 | ret = (BUILTIN_ISINF_F32(x) | 43 | BUILTIN_ISINF_F32(y) | 44 | BUILTIN_ISINF_F32(z) | 45 | BUILTIN_ISINF_F32(w)) ? 0.0f : ret; 46 | } 47 | 48 | return ret; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /cmake/run_make2cmake.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Computes dependencies using HIPCC 3 | ############################################################################### 4 | 5 | ############################################################################### 6 | # This file converts dependency files generated using hipcc to a format that 7 | # cmake can understand. 8 | 9 | # Input variables: 10 | # 11 | # input_file:STRING=<> Dependency file to parse. Required argument 12 | # output_file:STRING=<> Output file to generate. Required argument 13 | 14 | if(NOT input_file OR NOT output_file) 15 | message(FATAL_ERROR "You must specify input_file and output_file on the command line") 16 | endif() 17 | 18 | file(READ ${input_file} depend_text) 19 | 20 | if (NOT "${depend_text}" STREQUAL "") 21 | string(REPLACE " /" "\n/" depend_text ${depend_text}) 22 | string(REGEX REPLACE "^.*:" "" depend_text ${depend_text}) 23 | string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) 24 | 25 | set(dependency_list "") 26 | 27 | foreach(file ${depend_text}) 28 | string(REGEX REPLACE "^ +" "" file ${file}) 29 | if(NOT EXISTS "${file}") 30 | message(WARNING " Removing non-existent dependency file: ${file}") 31 | set(file "") 32 | endif() 33 | 34 | if(NOT IS_DIRECTORY "${file}") 35 | get_filename_component(file_absolute "${file}" ABSOLUTE) 36 | list(APPEND dependency_list "${file_absolute}") 37 | endif() 38 | endforeach() 39 | endif() 40 | 41 | # Remove the duplicate entries and sort them. 42 | list(REMOVE_DUPLICATES dependency_list) 43 | list(SORT dependency_list) 44 | 45 | foreach(file ${dependency_list}) 46 | set(hip_hipcc_depend "${hip_hipcc_depend} \"${file}\"\n") 47 | endforeach() 48 | 49 | file(WRITE ${output_file} "# Generated by: FindHIP.cmake. Do not edit.\nSET(HIP_HIPCC_DEPEND\n ${hip_hipcc_depend})\n\n") 50 | # vim: ts=4:sw=4:expandtab:smartindent 51 | -------------------------------------------------------------------------------- /samples/4_shfl/broadcast.cpp: -------------------------------------------------------------------------------- 1 | #include "hip/hip_runtime.h" 2 | 3 | #include 4 | 5 | #define BUF_SIZE 256 6 | #define WARP_MASK 0x7 7 | #define WARP_SUM 28 8 | 9 | #define HIPCHECK(code) \ 10 | do { \ 11 | hiperr = code; \ 12 | if (hiperr != hipSuccess) { \ 13 | std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr \ 14 | << "\n"; \ 15 | return 1; \ 16 | } \ 17 | } while (0) 18 | 19 | __global__ void bcast(int *out) { 20 | int value = (hipThreadIdx_x & WARP_MASK); 21 | 22 | for (int mask = 1; mask < WARP_MASK; mask *= 2) 23 | value += __shfl_xor(value, mask); 24 | 25 | size_t oi = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; 26 | 27 | out[oi] = value; 28 | } 29 | 30 | int main() { 31 | 32 | int *out = (int *)malloc(sizeof(int) * BUF_SIZE); 33 | int *d_out; 34 | hipError_t hiperr = hipSuccess; 35 | 36 | HIPCHECK(hipMalloc((void **)&d_out, sizeof(int) * BUF_SIZE)); 37 | 38 | hipLaunchKernelGGL(bcast, dim3(1), dim3(BUF_SIZE), 0, 0, d_out); 39 | HIPCHECK(hipGetLastError()); 40 | 41 | HIPCHECK( 42 | hipMemcpy(out, d_out, sizeof(int) * BUF_SIZE, hipMemcpyDeviceToHost)); 43 | 44 | size_t errs = 0; 45 | for (int i = 0; i < BUF_SIZE; i++) { 46 | if (out[i] != WARP_SUM) { 47 | std::cout << "ERROR @ " << i << ": " << out[i] << "\n"; 48 | ++errs; 49 | } 50 | } 51 | 52 | free(out); 53 | HIPCHECK(hipFree(d_out)); 54 | 55 | if (errs != 0) { 56 | std::cout << "FAILED: " << errs << " errors\n"; 57 | return 1; 58 | } else { 59 | std::cout << "PASSED!\n"; 60 | return 0; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /include/CL/opencl.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2008-2015 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 16 | * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 17 | * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 18 | * https://www.khronos.org/registry/ 19 | * 20 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 27 | ******************************************************************************/ 28 | 29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ 30 | 31 | #ifndef __OPENCL_H 32 | #define __OPENCL_H 33 | 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | 43 | #ifdef __cplusplus 44 | } 45 | #endif 46 | 47 | #endif /* __OPENCL_H */ 48 | -------------------------------------------------------------------------------- /spdlog/sinks/base_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | // 8 | // base sink templated over a mutex (either dummy or real) 9 | // concrete implementation should override the sink_it_() and flush_() methods. 10 | // locking is taken care of in this class - no locking needed by the 11 | // implementers.. 12 | // 13 | 14 | #include "spdlog/common.h" 15 | #include "spdlog/details/log_msg.h" 16 | #include "spdlog/formatter.h" 17 | #include "spdlog/sinks/sink.h" 18 | 19 | namespace spdlog { 20 | namespace sinks { 21 | template 22 | class base_sink : public sink 23 | { 24 | public: 25 | base_sink() = default; 26 | base_sink(const base_sink &) = delete; 27 | base_sink &operator=(const base_sink &) = delete; 28 | 29 | void log(const details::log_msg &msg) final 30 | { 31 | std::lock_guard lock(mutex_); 32 | sink_it_(msg); 33 | } 34 | 35 | void flush() final 36 | { 37 | std::lock_guard lock(mutex_); 38 | flush_(); 39 | } 40 | 41 | void set_pattern(const std::string &pattern) final 42 | { 43 | std::lock_guard lock(mutex_); 44 | set_pattern_(pattern); 45 | } 46 | 47 | void set_formatter(std::unique_ptr sink_formatter) final 48 | { 49 | std::lock_guard lock(mutex_); 50 | set_formatter_(std::move(sink_formatter)); 51 | } 52 | 53 | protected: 54 | virtual void sink_it_(const details::log_msg &msg) = 0; 55 | virtual void flush_() = 0; 56 | 57 | virtual void set_pattern_(const std::string &pattern) 58 | { 59 | set_formatter_(details::make_unique(pattern)); 60 | } 61 | 62 | virtual void set_formatter_(std::unique_ptr sink_formatter) 63 | { 64 | formatter_ = std::move(sink_formatter); 65 | } 66 | Mutex mutex_; 67 | }; 68 | } // namespace sinks 69 | } // namespace spdlog 70 | -------------------------------------------------------------------------------- /spdlog/sinks/basic_file_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015-2018 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/details/file_helper.h" 13 | #include "spdlog/details/null_mutex.h" 14 | #include "spdlog/sinks/base_sink.h" 15 | 16 | #include 17 | #include 18 | 19 | namespace spdlog { 20 | namespace sinks { 21 | /* 22 | * Trivial file sink with single file as target 23 | */ 24 | template 25 | class basic_file_sink final : public base_sink 26 | { 27 | public: 28 | explicit basic_file_sink(const filename_t &filename, bool truncate = false) 29 | { 30 | file_helper_.open(filename, truncate); 31 | } 32 | 33 | protected: 34 | void sink_it_(const details::log_msg &msg) override 35 | { 36 | fmt::memory_buffer formatted; 37 | sink::formatter_->format(msg, formatted); 38 | file_helper_.write(formatted); 39 | } 40 | 41 | void flush_() override 42 | { 43 | file_helper_.flush(); 44 | } 45 | 46 | private: 47 | details::file_helper file_helper_; 48 | }; 49 | 50 | using basic_file_sink_mt = basic_file_sink; 51 | using basic_file_sink_st = basic_file_sink; 52 | 53 | } // namespace sinks 54 | 55 | // 56 | // factory functions 57 | // 58 | template 59 | inline std::shared_ptr basic_logger_mt(const std::string &logger_name, const filename_t &filename, bool truncate = false) 60 | { 61 | return Factory::template create(logger_name, filename, truncate); 62 | } 63 | 64 | template 65 | inline std::shared_ptr basic_logger_st(const std::string &logger_name, const filename_t &filename, bool truncate = false) 66 | { 67 | return Factory::template create(logger_name, filename, truncate); 68 | } 69 | 70 | } // namespace spdlog 71 | -------------------------------------------------------------------------------- /samples/4_shfl/broadcast2.cpp: -------------------------------------------------------------------------------- 1 | #include "hip/hip_runtime.h" 2 | 3 | #include 4 | 5 | #define BUF_SIZE 256 6 | #define WARP_MASK 0x7 7 | #define EXPECTED 12345 8 | 9 | #define HIPCHECK(code) \ 10 | do { \ 11 | hiperr = code; \ 12 | if (hiperr != hipSuccess) { \ 13 | std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr \ 14 | << "\n"; \ 15 | return 1; \ 16 | } \ 17 | } while (0) 18 | 19 | __global__ void bcast(int arg, int *out) { 20 | int value = ((hipThreadIdx_x & WARP_MASK) == 0) ? arg : 0; 21 | 22 | int out_v = __shfl( 23 | value, 0); // Synchronize all threads in warp, and get "value" from lane 0 24 | 25 | size_t oi = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; 26 | out[oi] = out_v; 27 | } 28 | 29 | int main() { 30 | 31 | int *out = (int *)malloc(sizeof(int) * BUF_SIZE); 32 | int *d_out; 33 | hipError_t hiperr = hipSuccess; 34 | 35 | HIPCHECK(hipMalloc((void **)&d_out, sizeof(int) * BUF_SIZE)); 36 | 37 | hipLaunchKernelGGL(bcast, dim3(1), dim3(BUF_SIZE), 0, 0, EXPECTED, d_out); 38 | HIPCHECK(hipGetLastError()); 39 | 40 | HIPCHECK( 41 | hipMemcpy(out, d_out, sizeof(int) * BUF_SIZE, hipMemcpyDeviceToHost)); 42 | 43 | size_t errs = 0; 44 | for (int i = 0; i < BUF_SIZE; i++) { 45 | if (out[i] != EXPECTED) { 46 | std::cout << "ERROR @ " << i << ": " << out[i] << "\n"; 47 | ++errs; 48 | } 49 | } 50 | 51 | free(out); 52 | HIPCHECK(hipFree(d_out)); 53 | 54 | if (errs != 0) { 55 | std::cout << "FAILED: " << errs << " errors\n"; 56 | return 1; 57 | } else { 58 | std::cout << "PASSED!\n"; 59 | return 0; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/LICENSE: -------------------------------------------------------------------------------- 1 | ============================================================================== 2 | ROCm-Device-Libs Release License 3 | ============================================================================== 4 | University of Illinois/NCSA 5 | Open Source License 6 | 7 | Copyright (c) 2014-2016, Advanced Micro Devices, Inc. 8 | All rights reserved. 9 | 10 | Developed by: 11 | 12 | AMD Research and AMD HSA Software Development 13 | 14 | Advanced Micro Devices, Inc. 15 | 16 | www.amd.com 17 | 18 | Permission is hereby granted, free of charge, to any person obtaining a copy of 19 | this software and associated documentation files (the "Software"), to deal with 20 | the Software without restriction, including without limitation the rights to 21 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 22 | of the Software, and to permit persons to whom the Software is furnished to do 23 | so, subject to the following conditions: 24 | 25 | * Redistributions of source code must retain the above copyright notice, 26 | this list of conditions and the following disclaimers. 27 | 28 | * Redistributions in binary form must reproduce the above copyright notice, 29 | this list of conditions and the following disclaimers in the 30 | documentation and/or other materials provided with the distribution. 31 | 32 | * Neither the names of the LLVM Team, University of Illinois at 33 | Urbana-Champaign, nor the names of its contributors may be used to 34 | endorse or promote products derived from this Software without specific 35 | prior written permission. 36 | 37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 39 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 40 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 43 | SOFTWARE. 44 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/rlen4D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(rlen4)(double x, double y, double z, double w) 12 | { 13 | double a = BUILTIN_ABS_F64(x); 14 | double b = BUILTIN_ABS_F64(y); 15 | double c = BUILTIN_ABS_F64(z); 16 | double d = BUILTIN_ABS_F64(w); 17 | 18 | double a1 = BUILTIN_MAX_F64(a, b); 19 | double b1 = BUILTIN_MIN_F64(a, b); 20 | 21 | double c1 = BUILTIN_MAX_F64(c, d); 22 | double d1 = BUILTIN_MIN_F64(c, d); 23 | 24 | a = BUILTIN_MAX_F64(a1, c1); 25 | double c2 = BUILTIN_MIN_F64(a1, c1); 26 | 27 | double b2 = BUILTIN_MAX_F64(b1, d1); 28 | d = BUILTIN_MIN_F64(b1, d1); 29 | 30 | b = BUILTIN_MAX_F64(b2, c2); 31 | c = BUILTIN_MIN_F64(b2, c2); 32 | 33 | int e = BUILTIN_FREXP_EXP_F64(a); 34 | a = BUILTIN_FLDEXP_F64(a, -e); 35 | b = BUILTIN_FLDEXP_F64(b, -e); 36 | c = BUILTIN_FLDEXP_F64(c, -e); 37 | d = BUILTIN_FLDEXP_F64(d, -e); 38 | 39 | double l2 = MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d))); 40 | double v = BUILTIN_RSQRT_F64(l2); 41 | double u = MATH_MAD(-l2*v, v, 1.0); 42 | v = MATH_MAD(v*u, MATH_MAD(u, 0.375, 0.5), v); 43 | double ret = BUILTIN_FLDEXP_F64(v, -e); 44 | 45 | if (!FINITE_ONLY_OPT()) { 46 | ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; 47 | 48 | ret = (BUILTIN_ISNAN_F64(x) | 49 | BUILTIN_ISNAN_F64(y) | 50 | BUILTIN_ISNAN_F64(z) | 51 | BUILTIN_ISNAN_F64(w)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; 52 | 53 | ret = (BUILTIN_ISINF_F64(x) | 54 | BUILTIN_ISINF_F64(y) | 55 | BUILTIN_ISINF_F64(z) | 56 | BUILTIN_ISINF_F64(w)) ? 0.0 : ret; 57 | } 58 | 59 | return ret; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /cxxopts/.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: cpp 4 | os: 5 | - linux 6 | matrix: 7 | include: 8 | - os: linux 9 | env: COMPILER=g++-4.9 10 | addons: 11 | apt: 12 | packages: 13 | - g++-4.9 14 | sources: &sources 15 | - llvm-toolchain-trusty-3.8 16 | - llvm-toolchain-trusty-5.0 17 | - ubuntu-toolchain-r-test 18 | - os: linux 19 | env: COMPILER=g++-4.9 UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes 20 | addons: 21 | apt: 22 | packages: 23 | - g++-4.9 24 | sources: *sources 25 | - os: linux 26 | env: COMPILER=g++-5 27 | addons: 28 | apt: 29 | packages: 30 | - g++-5 31 | sources: *sources 32 | - os: linux 33 | env: COMPILER=g++-5 UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes 34 | addons: 35 | apt: 36 | packages: 37 | - g++-5 38 | sources: *sources 39 | - os: linux 40 | env: COMPILER=clang++-3.8 CXXFLAGS=-stdlib=libc++ 41 | addons: 42 | apt: 43 | packages: 44 | - clang-3.8 45 | - libc++-dev 46 | sources: *sources 47 | - os: linux 48 | env: COMPILER=clang++-3.8 CXXFLAGS=-stdlib=libc++ UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes 49 | addons: 50 | apt: 51 | packages: 52 | - clang-3.8 53 | - libc++-dev 54 | sources: *sources 55 | - os: linux 56 | env: COMPILER=clang++-5.0 CMAKE_OPTIONS=-DCXXOPTS_CXX_STANDARD=17 57 | addons: 58 | apt: 59 | packages: 60 | - clang-5.0 61 | - g++-5 62 | sources: *sources 63 | script: > 64 | cmake -DCXXOPTS_BUILD_TESTS=ON -DCMAKE_CXX_COMPILER=$COMPILER 65 | -DCMAKE_CXX_FLAGS=$CXXFLAGS $UNICODE_OPTIONS $CMAKE_OPTIONS . 66 | && make && make ARGS=--output-on-failure test 67 | 68 | before_install: 69 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi 70 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install icu4c; fi 71 | -------------------------------------------------------------------------------- /spdlog/details/periodic_worker.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Copyright(c) 2018 Gabi Melman. 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 5 | // 6 | 7 | #pragma once 8 | 9 | // periodic worker thread - periodically executes the given callback function. 10 | // 11 | // RAII over the owned thread: 12 | // creates the thread on construction. 13 | // stops and joins the thread on destruction (if the thread is executing a callback, wait for it to finish first). 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | namespace spdlog { 21 | namespace details { 22 | 23 | class periodic_worker 24 | { 25 | public: 26 | periodic_worker(const std::function &callback_fun, std::chrono::seconds interval) 27 | { 28 | active_ = (interval > std::chrono::seconds::zero()); 29 | if (!active_) 30 | { 31 | return; 32 | } 33 | 34 | worker_thread_ = std::thread([this, callback_fun, interval]() { 35 | for (;;) 36 | { 37 | std::unique_lock lock(this->mutex_); 38 | if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; })) 39 | { 40 | return; // active_ == false, so exit this thread 41 | } 42 | callback_fun(); 43 | } 44 | }); 45 | } 46 | 47 | periodic_worker(const periodic_worker &) = delete; 48 | periodic_worker &operator=(const periodic_worker &) = delete; 49 | 50 | // stop the worker thread and join it 51 | ~periodic_worker() 52 | { 53 | if (worker_thread_.joinable()) 54 | { 55 | { 56 | std::lock_guard lock(mutex_); 57 | active_ = false; 58 | } 59 | cv_.notify_one(); 60 | worker_thread_.join(); 61 | } 62 | } 63 | 64 | private: 65 | bool active_; 66 | std::thread worker_thread_; 67 | std::mutex mutex_; 68 | std::condition_variable cv_; 69 | }; 70 | } // namespace details 71 | } // namespace spdlog 72 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/erfcxF.cl: -------------------------------------------------------------------------------- 1 | 2 | #include "mathF.h" 3 | 4 | CONSTATTR float 5 | MATH_MANGLE(erfcx)(float x) 6 | { 7 | float ax = BUILTIN_ABS_F32(x); 8 | float ret; 9 | 10 | if (ax < 1.0f) { 11 | ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 12 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 13 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 14 | MATH_MAD(x, 15 | -0x1.77d64p-11f, 0x1.269372p-9f), 16 | -0x1.c27dd4p-9f), 0x1.d3d3c4p-8f), 17 | -0x1.35d6cap-6f), 0x1.5bb082p-5f), 18 | -0x1.60e46ep-4f), 0x1.54d3e4p-3f), 19 | -0x1.340edap-2f), 0x1.00049ap-1f), 20 | -0x1.81286p-1f), 0x1.ffffcap-1f), 21 | -0x1.20dd7p+0f), 0x1.0p+0f); 22 | } else if (ax < 32.0f) { 23 | float t = MATH_DIV(ax - 4.0f, ax + 4.0f); 24 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 25 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 26 | MATH_MAD(t, 27 | 0.00416076401f, -0.0167250745f), 28 | 0.0378070959f), -0.0661972834f), 29 | 0.0935599947f), -0.101052745f), 30 | 0.0681148962f), 0.0153801711f), 31 | -0.139621619f), 1.23299511f); 32 | 33 | ret = MATH_DIV(ret, MATH_MAD(ax, 2.0f, 1.0f)); 34 | } else { 35 | const float one_over_sqrtpi = 0x1.20dd76p-1f; 36 | float z = MATH_RCP(x * x); 37 | ret = MATH_DIV(one_over_sqrtpi, x) * MATH_MAD(z, MATH_MAD(z, 0.375f, -0.5f), 1.0f); 38 | } 39 | 40 | if (x <= -1.0f) { 41 | float x2h, x2l; 42 | if (HAVE_FAST_FMA32()) { 43 | x2h = ax * ax; 44 | x2l = BUILTIN_FMA_F32(ax, ax, -x2h); 45 | } else { 46 | float xh = AS_FLOAT(AS_UINT(ax) & 0xfffff000U); 47 | float xl = ax - xh; 48 | x2h = xh*xh; 49 | x2l = (ax + xh)*xl; 50 | } 51 | 52 | ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0f - ret; 53 | ret = x < -10.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; 54 | } 55 | 56 | return ret; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/erfcinvF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(erfcinv)(float y) 12 | { 13 | float ret; 14 | 15 | if (y > 0.625f) { 16 | ret = MATH_MANGLE(erfinv)(1.0f - y); 17 | } else if (y > 0x1.0p-10f) { 18 | float t = -MATH_MANGLE(log)(y * (2.0f - y)) - 3.125f; 19 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 20 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 21 | MATH_MAD(t, MATH_MAD(t, 22 | 0x1.7ee662p-31f, -0x1.3f5a80p-28f), -0x1.b638f0p-26f), 0x1.c9ccc6p-22f), 23 | -0x1.72f8aep-20f), -0x1.d21aa6p-17f), 0x1.87aebcp-13f), -0x1.8455d4p-11f), 24 | -0x1.8b6ca4p-8f), 0x1.ebd80cp-3f), 0x1.a755e8p+0f); 25 | ret = MATH_MAD(-y, ret, ret); 26 | } else { 27 | float s = MATH_FAST_SQRT(-MATH_MANGLE(log)(y)); 28 | float t = MATH_FAST_RCP(s); 29 | 30 | if (y > 0x1.0p-42f) { 31 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 32 | MATH_MAD(t, MATH_MAD(t, 33 | -0x1.57221ep+0f, 0x1.7f6144p+1f), -0x1.98dd40p+1f), 0x1.2c9066p+1f), 34 | -0x1.3a07eap+0f), -0x1.ba546cp-5f), 0x1.004e66p+0f); 35 | } else { 36 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 37 | MATH_MAD(t, MATH_MAD(t, 38 | -0x1.649c6ap+4f, 0x1.8fa8fap+4f), -0x1.a112d8p+3f), 0x1.309d98p+2f), 39 | -0x1.919488p+0f), -0x1.c084ecp-6f), 0x1.00143ep+0f); 40 | } 41 | ret = s * ret; 42 | } 43 | 44 | if (!FINITE_ONLY_OPT()) { 45 | ret = (y < 0.0f) | (y > 2.0f) ? AS_FLOAT(QNANBITPATT_SP32) : ret; 46 | ret = y == 0.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; 47 | ret = y == 2.0f ? AS_FLOAT(NINFBITPATT_SP32) : ret; 48 | } 49 | 50 | return ret; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/erfinvF.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | CONSTATTR float 11 | MATH_MANGLE(erfinv)(float x) 12 | { 13 | float ax = BUILTIN_ABS_F32(x); 14 | float p; 15 | 16 | if (ax < 0.375f) { 17 | float t = ax*ax; 18 | p = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 19 | MATH_MAD(t, MATH_MAD(t, 20 | 0x1.48b6cap-3f, -0x1.a2930ap-6f), 0x1.65b0b4p-4f), 0x1.5581aep-4f), 21 | 0x1.05aa56p-3f), 0x1.db2748p-3f), 0x1.c5bf8ap-1f); 22 | } else { 23 | float w; 24 | if (HAVE_FAST_FMA32()) { 25 | w = BUILTIN_FMA_F32(-ax, ax, 1.0f); 26 | } else { 27 | w = (1.0f - ax) * (1.0f + ax); 28 | } 29 | w = -MATH_MANGLE(log)(w); 30 | 31 | if (w < 5.0f) { 32 | w = w - 2.5f; 33 | p = MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, 34 | MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, 35 | 0x1.e2cb10p-26f, 0x1.70966cp-22f), -0x1.d8e6aep-19f), -0x1.26b582p-18f), 36 | 0x1.ca65b6p-13f), -0x1.48a810p-10f), -0x1.11c9dep-8f), 0x1.f91ec6p-3f), 37 | 0x1.805c5ep+0f); 38 | } else { 39 | w = MATH_SQRT(w) - 3.0f; 40 | p = MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, 41 | MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, 42 | -0x1.a3e136p-13f, 0x1.a76ad6p-14f), 0x1.61b8e4p-10f), -0x1.e17bcep-9f), 43 | 0x1.7824f6p-8f), -0x1.f38baep-8f), 0x1.354afcp-7f), 0x1.006db6p+0f), 44 | 0x1.6a9efcp+1f); 45 | } 46 | } 47 | 48 | float ret = p*ax; 49 | 50 | if (!FINITE_ONLY_OPT()) { 51 | ret = ax > 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret; 52 | ret = ax == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; 53 | } 54 | 55 | return BUILTIN_COPYSIGN_F32(ret, x); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /include/CL/cl_gl_ext.h: -------------------------------------------------------------------------------- 1 | /********************************************************************************** 2 | * Copyright (c) 2008-2019 The Khronos Group Inc. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and/or associated documentation files (the 6 | * "Materials"), to deal in the Materials without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Materials, and to 9 | * permit persons to whom the Materials are furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included 13 | * in all copies or substantial portions of the Materials. 14 | * 15 | * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS 16 | * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS 17 | * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT 18 | * https://www.khronos.org/registry/ 19 | * 20 | * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 | * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 27 | **********************************************************************************/ 28 | 29 | #ifndef __OPENCL_CL_GL_EXT_H 30 | #define __OPENCL_CL_GL_EXT_H 31 | 32 | #ifdef __cplusplus 33 | extern "C" { 34 | #endif 35 | 36 | #include 37 | 38 | /* 39 | * cl_khr_gl_event extension 40 | */ 41 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D 42 | 43 | extern CL_API_ENTRY cl_event CL_API_CALL 44 | clCreateEventFromGLsyncKHR(cl_context context, 45 | cl_GLsync cl_GLsync, 46 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif /* __OPENCL_CL_GL_EXT_H */ 53 | -------------------------------------------------------------------------------- /include/hip/hip_fatbin.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef HIP_SRC_HIP_FATBIN_H 23 | #define HIP_SRC_HIP_FATBIN_H 24 | 25 | // #include "hip/hip_runtime.h" 26 | // #include "hip_hcc_internal.h" 27 | 28 | // hip-clang fatbin format 29 | constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" 30 | 31 | #define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__" 32 | #define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" 33 | 34 | struct __ClangOffloadBundleDesc { 35 | uint64_t offset; 36 | uint64_t size; 37 | uint64_t tripleSize; 38 | const char triple[1]; 39 | }; 40 | 41 | struct __ClangOffloadBundleHeader { 42 | const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1]; 43 | uint64_t numBundles; 44 | __ClangOffloadBundleDesc desc[1]; 45 | }; 46 | 47 | struct __CudaFatBinaryWrapper { 48 | unsigned int magic; 49 | unsigned int version; 50 | __ClangOffloadBundleHeader* binary; 51 | void* unused; 52 | }; 53 | 54 | const void* __hipExtractCodeObjectFromFatBinary(const void* data, 55 | const char* agent_name); 56 | void __hipDumpCodeObject(const std::string& image); 57 | 58 | #endif // HIP_SRC_HIP_FATBIN_H 59 | -------------------------------------------------------------------------------- /spdlog/async_logger.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // Very fast asynchronous logger (millions of logs per second on an average 9 | // desktop) 10 | // Uses pre allocated lockfree queue for maximum throughput even under large 11 | // number of threads. 12 | // Creates a single back thread to pop messages from the queue and log them. 13 | // 14 | // Upon each log write the logger: 15 | // 1. Checks if its log level is enough to log the message 16 | // 2. Push a new copy of the message to a queue (or block the caller until 17 | // space is available in the queue) 18 | // 3. will throw spdlog_ex upon log exceptions 19 | // Upon destruction, logs all remaining messages in the queue before 20 | // destructing.. 21 | 22 | #include "spdlog/common.h" 23 | #include "spdlog/logger.h" 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | namespace spdlog { 30 | 31 | // Async overflow policy - block by default. 32 | enum class async_overflow_policy 33 | { 34 | block, // Block until message can be enqueued 35 | overrun_oldest // Discard oldest message in the queue if full when trying to 36 | // add new item. 37 | }; 38 | 39 | namespace details { 40 | class thread_pool; 41 | } 42 | 43 | class async_logger final : public std::enable_shared_from_this, public logger 44 | { 45 | friend class details::thread_pool; 46 | 47 | public: 48 | template 49 | async_logger(std::string logger_name, It begin, It end, std::weak_ptr tp, 50 | async_overflow_policy overflow_policy = async_overflow_policy::block); 51 | 52 | async_logger(std::string logger_name, sinks_init_list sinks_list, std::weak_ptr tp, 53 | async_overflow_policy overflow_policy = async_overflow_policy::block); 54 | 55 | async_logger(std::string logger_name, sink_ptr single_sink, std::weak_ptr tp, 56 | async_overflow_policy overflow_policy = async_overflow_policy::block); 57 | 58 | std::shared_ptr clone(std::string new_name) override; 59 | 60 | protected: 61 | void sink_it_(details::log_msg &msg) override; 62 | void flush_() override; 63 | 64 | void backend_log_(const details::log_msg &incoming_log_msg); 65 | void backend_flush_(); 66 | 67 | private: 68 | std::weak_ptr thread_pool_; 69 | async_overflow_policy overflow_policy_; 70 | }; 71 | } // namespace spdlog 72 | 73 | #include "details/async_logger_impl.h" 74 | -------------------------------------------------------------------------------- /spdlog/sinks/dist_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2015 David Schury, Gabi Melman 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "base_sink.h" 13 | #include "spdlog/details/log_msg.h" 14 | #include "spdlog/details/null_mutex.h" 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // Distribution sink (mux). Stores a vector of sinks which get called when log 22 | // is called 23 | 24 | namespace spdlog { 25 | namespace sinks { 26 | 27 | template 28 | class dist_sink : public base_sink 29 | { 30 | public: 31 | dist_sink() = default; 32 | dist_sink(const dist_sink &) = delete; 33 | dist_sink &operator=(const dist_sink &) = delete; 34 | 35 | void add_sink(std::shared_ptr sink) 36 | { 37 | std::lock_guard lock(base_sink::mutex_); 38 | sinks_.push_back(sink); 39 | } 40 | 41 | void remove_sink(std::shared_ptr sink) 42 | { 43 | std::lock_guard lock(base_sink::mutex_); 44 | sinks_.erase(std::remove(sinks_.begin(), sinks_.end(), sink), sinks_.end()); 45 | } 46 | 47 | void set_sinks(std::vector> sinks) 48 | { 49 | std::lock_guard lock(base_sink::mutex_); 50 | sinks_ = std::move(sinks); 51 | } 52 | 53 | protected: 54 | void sink_it_(const details::log_msg &msg) override 55 | { 56 | 57 | for (auto &sink : sinks_) 58 | { 59 | if (sink->should_log(msg.level)) 60 | { 61 | sink->log(msg); 62 | } 63 | } 64 | } 65 | 66 | void flush_() override 67 | { 68 | for (auto &sink : sinks_) 69 | { 70 | sink->flush(); 71 | } 72 | } 73 | 74 | void set_pattern_(const std::string &pattern) override 75 | { 76 | set_formatter_(details::make_unique(pattern)); 77 | } 78 | 79 | void set_formatter_(std::unique_ptr sink_formatter) override 80 | { 81 | base_sink::formatter_ = std::move(sink_formatter); 82 | for (auto &sink : sinks_) 83 | { 84 | sink->set_formatter(base_sink::formatter_->clone()); 85 | } 86 | } 87 | std::vector> sinks_; 88 | }; 89 | 90 | using dist_sink_mt = dist_sink; 91 | using dist_sink_st = dist_sink; 92 | 93 | } // namespace sinks 94 | } // namespace spdlog 95 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/i0D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(i0)(double x) 12 | { 13 | x = BUILTIN_ABS_F64(x); 14 | 15 | double ret; 16 | 17 | if (x < 8.0) { 18 | double t = 0.25 * x * x; 19 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 20 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 21 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 22 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 23 | 0x1.dd78750ff79b2p-97, 0x1.4394559531e65p-89), 0x1.6f7123f151c79p-81), 0x1.3d9e7c5528048p-73), 24 | 0x1.e736f323a0cabp-66), 0x1.4196ce3b298c5p-58), 0x1.69caac7bf9255p-51), 0x1.5601878c06ac8p-44), 25 | 0x1.0b313291f5e48p-37), 0x1.522a43f5dcb54p-31), 0x1.522a43f659634p-25), 0x1.02e85c0898945p-19), 26 | 0x1.23456789abcf3p-14), 0x1.c71c71c71c71cp-10), 0x1.c71c71c71c71cp-6), 0x1.0000000000000p-2), 27 | 0x1.0000000000000p+0), 28 | ret = MATH_MAD(t, ret, 1.0); 29 | } else { 30 | double t = MATH_RCP(x); 31 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 32 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 33 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 34 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 35 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 36 | MATH_MAD(t, 37 | 0x1.cc967bacb549dp+49, -0x1.5ba7722975981p+50), 0x1.df0f836763276p+49), -0x1.9042a430f3f43p+48), 38 | 0x1.c630541c4f568p+46), -0x1.7366be5a9784fp+44), 0x1.c5669a48f574ep+41), -0x1.a664cac47f0eap+38), 39 | 0x1.308250566988cp+35), -0x1.56874c2ddb061p+31), 0x1.2da58968da2aap+27), -0x1.9faaa33f0d6bcp+22), 40 | 0x1.be0a8f2bc76ddp+17), -0x1.7123c68c3cb02p+12), 0x1.d402150cc72aap+6), -0x1.7a8ae85359520p+0), 41 | 0x1.bd7e0b6a753cdp-4), 0x1.6d6ce3774506dp-5), 0x1.debdd3d2f7cf9p-6), 0x1.cb94db8d452d5p-6), 42 | 0x1.9884533daea3dp-5), 0x1.9884533d4362fp-2); 43 | double xs = x - 709.0; 44 | double e1 = MATH_MANGLE(exp)(x > 709.0 ? xs : x); 45 | double e2 = x > 709.0 ? 0x1.d422d2be5dc9bp+1022 : 1.0; 46 | ret = e1 * MATH_MANGLE(rsqrt)(x) * ret * e2; 47 | } 48 | 49 | if (!FINITE_ONLY_OPT()) { 50 | ret = BUILTIN_CLASS_F64(x, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? x : ret; 51 | } 52 | 53 | return ret; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/i1D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | CONSTATTR double 11 | MATH_MANGLE(i1)(double x) 12 | { 13 | double a = BUILTIN_ABS_F64(x); 14 | 15 | double ret; 16 | 17 | if (a < 8.0) { 18 | a *= 0.5; 19 | double t = a * a; 20 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 21 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 22 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 23 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 24 | 0x1.fc892c836e80ap-93, 0x1.432352d94a857p-85), 0x1.588ae4f7b7a4ap-77), 0x1.15e96e9231b49p-69), 25 | 0x1.8bdcb5f2184d1p-62), 0x1.e26237a1e02fep-55), 0x1.f176aca1a831fp-48), 0x1.ab81e97c83e75p-41), 26 | 0x1.2c9758e3649ffp-34), 0x1.522a43f5ed306p-28), 0x1.27e4fb778d591p-22), 0x1.845c8a0ce4edap-17), 27 | 0x1.6c16c16c16c26p-12), 0x1.c71c71c71c71cp-8), 0x1.5555555555555p-4), 0x1.0000000000000p-1); 28 | ret = MATH_MAD(t, a*ret, a); 29 | } else { 30 | double t = MATH_RCP(a); 31 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 32 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 33 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 34 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 35 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 36 | MATH_MAD(t, 37 | -0x1.c9d8d43214423p+49, 0x1.5c072e12fb4bap+50), -0x1.e26cff438b6f6p+49), 0x1.952224c61a221p+48), 38 | -0x1.cdc7c873cf435p+46), 0x1.7b1e32a15fb86p+44), -0x1.d07dbd6696f1cp+41), 0x1.b227934f2ced2p+38), 39 | -0x1.39f23e6685444p+35), 0x1.6229383f6f890p+31), -0x1.38bf1ceeee865p+27), 0x1.b01a348b749b8p+22), 40 | -0x1.d0e043ef0916ap+17), 0x1.81b06f82cfbacp+12), -0x1.ea879b2a6508bp+6), 0x1.85cffc8d54f52p+0), 41 | -0x1.09f107ee0f7e2p-3), -0x1.d61631539fb0dp-5), -0x1.4f1e01d904ebap-5), -0x1.7efc0ced79c58p-5), 42 | -0x1.32633e6e0f07ap-3), 0x1.9884533d43674p-2); 43 | 44 | double xs = x - 709.0; 45 | double e1 = MATH_MANGLE(exp)(x > 709.0 ? xs : x); 46 | double e2 = x > 709.0 ? 0x1.d422d2be5dc9bp+1022 : 1.0; 47 | ret = e1 * MATH_MANGLE(rsqrt)(x) * ret * e2; 48 | } 49 | 50 | if (!FINITE_ONLY_OPT()) { 51 | ret = BUILTIN_CLASS_F64(a, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? a : ret; 52 | } 53 | 54 | return BUILTIN_COPYSIGN_F64(ret, x); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /doc/notes-0.9.txt: -------------------------------------------------------------------------------- 1 | HIPCL v0.9 Released 2 | ------------------- 3 | 4 | [Heterogeneous-compute Interface for Portability](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md), or HIP, 5 | is a C++ runtime API and kernel language that allows developers to write code that runs on both AMD and NVidia GPUs. 6 | CUDA applications can be converted to HIP in a largely automated fashion. 7 | 8 | HIPCL is a library that allows applications using the HIP API to be run on devices 9 | which support OpenCL and SPIR-V, thus providing a portability path from CUDA to 10 | advanced OpenCL platforms. 11 | 12 | The detailed and up to date documentation is available in README.md. 13 | 14 | Release Status 15 | -------------- 16 | 17 | * Most of the HIP API and the HIP kernel language is implemented. 18 | 19 | * The most tested implementation is Intel's NEO OpenCL for Intel GPUs. 20 | Intel's CPU OpenCL implementations also work, but they are less mature & miss some features. 21 | 22 | * There are extra 3rd party samples located [here](https://github.com/cpc/hipcl-samples), 23 | and AMD's rocRAND ported to HIPCL located [here](https://github.com/cpc/hipcl-rocRAND). 24 | 25 | Known Issues 26 | ------------ 27 | 28 | * `clEnqueueSVMMemCopy() failed with error -5` - this appears to be a driver bug 29 | on Intel GPUs; occurs when one tries to memcpy from read-only data stored in ELF 30 | to SVM memory. SVMMemCopy from other sources (stack / heap) works without issues. 31 | 32 | * Programs may take a long time to start. This is because Clang inserts startup 33 | hooks which register SPIR-V binaries; HIPCL at this point compiles each, and for 34 | each program built, creates all kernels. This can take a long time on some implementations. 35 | 36 | * Using HIP_DYNAMIC_SHARED() macro outside a function scope is not yet supported. 37 | Doing so will likely result in error: 38 | Assertion `FuncSet.size() <= 1 && "more than one function uses dynamic mem variable!"' failed.` 39 | 40 | * HIPCL reports the global memory size from OpenCL as available memory, but unlike with CUDA, 41 | it's not possible to allocate all of that memory in a single block; 42 | HIPCL is limited by CL_DEVICE_MAX_MEM_ALLOC_SIZE. 43 | 44 | * There are some unresolved compiler bugs present in the HIPCL-patched Clang, so compilation 45 | may fail, especially when HIPCL is compiled with -O0 flag. 46 | 47 | Acknowledgements 48 | ---------------- 49 | 50 | Customized Parallel Computing research group of Tampere 51 | University, Finland likes to thank the HSA Foundation and the 52 | ECSEL JU project FitOptiVis (project number 783162) for funding 53 | most of the development work for this release. Much appreciated! 54 | 55 | Links 56 | ----- 57 | Home page: http://github.com/cpc/hipcl 58 | 3rd party samples: http://github.com/cpc/hipcl-samples 59 | hipcl-rocRAND: https://github.com/cpc/hipcl-rocRAND 60 | -------------------------------------------------------------------------------- /lib/bitcode/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/BC") 3 | 4 | #Ugly fix for interactions between clang13+ and igc 5 | if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13) 6 | set(CLANG_CL_NO_STDINC_FLAG "") 7 | else () 8 | set(CLANG_CL_NO_STDINC_FLAG "-cl-no-stdinc") 9 | endif () 10 | 11 | add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc" 12 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.cl" 13 | COMMAND "${CMAKE_CXX_COMPILER}" 14 | "${CLANG_CL_NO_STDINC_FLAG}" -Xclang -finclude-default-header 15 | -O2 -x cl -cl-std=CL2.0 16 | --target=spir64-unknown-unknown -emit-llvm 17 | -o "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc" 18 | -c "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.cl" 19 | COMMENT "Building mathlib.bc" 20 | VERBATIM) 21 | set(DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc") 22 | 23 | #add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc" 24 | # DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.bc" 25 | # COMMAND "${CMAKE_COMMAND}" -E copy 26 | # "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.bc" 27 | # "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc" 28 | # COMMENT "Copying mathlib.bc" 29 | # VERBATIM) 30 | #set(DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc") 31 | 32 | # kernellib sources 33 | 34 | set(SOURCES erfcinvD erfcinvF erfcxD erfcxF erfinvD erfinvF i0D i0F i1D i1F j0D j0F j1D j1F ncdfD ncdfF ncdfinvD ncdfinvF nearbyintD nearbyintF rcbrtD rcbrtF rhypotF rhypotD rlen3D rlen3F rlen4D rlen4F scalbD scalbF scalbnD scalbnF tables y0D y0F y1D y1F) 35 | foreach(SOURCE IN LISTS SOURCES) 36 | add_custom_command( 37 | OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc" 38 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/OCML/${SOURCE}.cl" 39 | COMMAND "${CMAKE_CXX_COMPILER}" 40 | "${CLANG_CL_NO_STDINC_FLAG}" -Xclang -finclude-default-header 41 | -O2 -pthread -x cl -cl-std=CL2.0 42 | --target=spir64-unknown-unknown -emit-llvm 43 | -o "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc" 44 | -c "${CMAKE_CURRENT_SOURCE_DIR}/OCML/${SOURCE}.cl" 45 | COMMENT "Building ${SOURCE}.bc" 46 | VERBATIM) 47 | list(APPEND DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc") 48 | endforeach() 49 | 50 | # kernellib 51 | 52 | add_custom_command( 53 | OUTPUT "${CMAKE_BINARY_DIR}/kernellib.bc" 54 | DEPENDS ${DEPEND_LIST} 55 | COMMAND "${LLVM_LINK}" 56 | -o "${CMAKE_BINARY_DIR}/kernellib.bc" 57 | ${DEPEND_LIST} 58 | COMMENT "Linking kernellib.bc" 59 | VERBATIM) 60 | 61 | add_custom_target("kernellib_bc" DEPENDS "${CMAKE_BINARY_DIR}/kernellib.bc") 62 | 63 | install(FILES "${CMAKE_BINARY_DIR}/kernellib.bc" DESTINATION "${HIPCL_DATA_DIR}") 64 | 65 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/j0F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | extern float MATH_PRIVATE(cosb)(float, int, float); 11 | extern CONSTATTR float MATH_PRIVATE(bp0)(float); 12 | extern CONSTATTR float MATH_PRIVATE(ba0)(float); 13 | 14 | PUREATTR float 15 | MATH_MANGLE(j0)(float x) 16 | { 17 | x = BUILTIN_ABS_F32(x); 18 | 19 | const float b0 = 1.65625f; 20 | const float b1 = 3.125f; 21 | const float b2 = 4.6875f; 22 | const float b3 = 6.265625f; 23 | const float b4 = 7.84375f; 24 | const float b5 = 9.421875f; 25 | const float b6 = 10.984375f; 26 | const float b7 = 12.578125f; 27 | 28 | float ret; 29 | 30 | if (x <= b7) { 31 | // Ty to maintain relative accuracy here 32 | 33 | USE_TABLE(float, p, M32_J0); 34 | float ch, cl; 35 | 36 | if (x <= b3) { 37 | if (x <= b0) { 38 | ch = 0x0.000000p+0f; 39 | cl = 0x0.000000p+0f; 40 | } else if (x <= b1) { 41 | ch = 0x1.33d152p+1f; 42 | cl = 0x1.d2e368p-24f; 43 | p += 1*9; 44 | } else if (x <= b2) { 45 | ch = 0x1.ea7558p+1f; 46 | cl = -0x1.4a121ep-24f; 47 | p += 2*9; 48 | } else { 49 | ch = 0x1.6148f6p+2f; 50 | cl = -0x1.34f46ep-24f; 51 | p += 3*9; 52 | } 53 | } else { 54 | if (x <= b4) { 55 | ch = 0x1.c0ff60p+2f; 56 | cl = -0x1.8971b6p-23f; 57 | p += 4*9; 58 | } else if (x <= b5) { 59 | ch = 0x1.14eb56p+3f; 60 | cl = 0x1.999bdap-22f; 61 | p += 5*9; 62 | } else if (x <= b6) { 63 | ch = 0x1.458d0ep+3f; 64 | cl = -0x1.e8407ap-22f; 65 | p += 6*9; 66 | } else { 67 | ch = 0x1.795440p+3f; 68 | cl = 0x1.04e56cp-26f; 69 | p += 7*9; 70 | } 71 | } 72 | 73 | x = x - ch - cl; 74 | ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 75 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 76 | p[8], p[7]), p[6]), p[5]), p[4]), 77 | p[3]), p[2]), p[1]), p[0]); 78 | } else { 79 | float r = MATH_RCP(x); 80 | float r2 = r*r; 81 | float p = MATH_PRIVATE(bp0)(r2) * r; 82 | ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p); 83 | ret = BUILTIN_CLASS_F32(x, CLASS_PINF) ? 0.0f : ret; 84 | } 85 | 86 | return ret; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/j1F.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathF.h" 9 | 10 | extern float MATH_PRIVATE(cosb)(float, int, float); 11 | extern CONSTATTR float MATH_PRIVATE(bp1)(float); 12 | extern CONSTATTR float MATH_PRIVATE(ba1)(float); 13 | 14 | PUREATTR float 15 | MATH_MANGLE(j1)(float x) 16 | { 17 | const float b0 = 1.09375f; 18 | const float b1 = 2.84375f; 19 | const float b2 = 4.578125f; 20 | const float b3 = 6.171875f; 21 | const float b4 = 7.78125f; 22 | const float b5 = 9.359375f; 23 | const float b6 = 10.953125f; 24 | const float b7 = 12.515625f; 25 | 26 | float ax = BUILTIN_ABS_F32(x); 27 | float ret; 28 | 29 | if (ax <= b7) { 30 | // Ty to maintain relative accuracy here 31 | 32 | USE_TABLE(float, p, M32_J1); 33 | float ch, cl; 34 | 35 | if (ax <= b3) { 36 | if (ax <= b0) { 37 | ch = 0.0f; 38 | cl = 0.0f; 39 | } else if (ax <= b1) { 40 | ch = 0x1.d757d2p+0f; 41 | cl = -0x1.375c60p-32f; 42 | p += 1*9; 43 | } else if (ax <= b2) { 44 | ch = 0x1.ea7558p+1f; 45 | cl = -0x1.4a121ep-24f; 46 | p += 2*9; 47 | } else { 48 | ch = 0x1.55365cp+2f; 49 | cl = -0x1.fe6dccp-25f; 50 | p += 3*9; 51 | } 52 | } else { 53 | if (ax <= b4) { 54 | ch = 0x1.c0ff60p+2f; 55 | cl = -0x1.8971b6p-23f; 56 | p += 4*9; 57 | } else if (ax <= b5) { 58 | ch = 0x1.112980p+3f; 59 | cl = 0x1.e17114p-22f; 60 | p += 5*9; 61 | } else if (ax <= b6) { 62 | ch = 0x1.458d0ep+3f; 63 | cl = -0x1.e8407ap-22f; 64 | p += 6*9; 65 | } else { 66 | ch = 0x1.769798p+3f; 67 | cl = -0x1.a04694p-23f; 68 | p += 7*9; 69 | } 70 | } 71 | 72 | ax = ax - ch - cl; 73 | ret = MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, 74 | MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, 75 | p[8], p[7]), p[6]), p[5]), p[4]), 76 | p[3]), p[2]), p[1]), p[0]); 77 | } else { 78 | float r = MATH_RCP(x); 79 | float r2 = r*r; 80 | float p = MATH_PRIVATE(bp1)(r2) * r; 81 | ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(x, 1, p); 82 | ret = BUILTIN_CLASS_F32(ax, CLASS_PINF) ? 0.0f : ret; 83 | } 84 | 85 | if (x < 0.0f) 86 | ret = -ret; 87 | 88 | return ret; 89 | } 90 | 91 | -------------------------------------------------------------------------------- /samples/hcc_dialects/vadd_hip.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "hip/hip_runtime.h" 24 | #include 25 | #include 26 | 27 | __global__ void vadd_hip(const float* a, const float* b, float* c, int N) { 28 | int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); 29 | 30 | if (idx < N) { 31 | c[idx] = a[idx] + b[idx]; 32 | } 33 | } 34 | 35 | 36 | int main(int argc, char* argv[]) { 37 | int sizeElements = 1000000; 38 | size_t sizeBytes = sizeElements * sizeof(float); 39 | bool pass = true; 40 | 41 | // Allocate host memory 42 | float* A_h = (float*)malloc(sizeBytes); 43 | float* B_h = (float*)malloc(sizeBytes); 44 | float* C_h = (float*)malloc(sizeBytes); 45 | 46 | // Allocate device memory: 47 | float *A_d, *B_d, *C_d; 48 | hipMalloc((void**)&A_d, sizeBytes); 49 | hipMalloc((void**)&B_d, sizeBytes); 50 | hipMalloc((void**)&C_d, sizeBytes); 51 | 52 | // Initialize host memory 53 | for (int i = 0; i < sizeElements; i++) { 54 | A_h[i] = 1.618f * i; 55 | B_h[i] = 3.142f * i; 56 | } 57 | 58 | // H2D Copy 59 | hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice); 60 | hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice); 61 | 62 | // Launch kernel onto default accelerator 63 | int blockSize = 256; // pick arbitrary block size 64 | int blocks = (sizeElements + blockSize - 1) / blockSize; // round up to launch enough blocks 65 | hipLaunchKernelGGL(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements); 66 | 67 | // D2H Copy 68 | hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost); 69 | 70 | // Verify 71 | for (int i = 0; i < sizeElements; i++) { 72 | float ref = 1.618f * i + 3.142f * i; 73 | if (C_h[i] != ref) { 74 | printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref); 75 | pass = false; 76 | } 77 | }; 78 | if (pass) printf("PASSED!\n"); 79 | } 80 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/j0D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | extern double MATH_PRIVATE(cosb)(double, int, double); 11 | extern CONSTATTR double MATH_PRIVATE(bp0)(double); 12 | extern CONSTATTR double MATH_PRIVATE(ba0)(double); 13 | 14 | PUREATTR double 15 | MATH_MANGLE(j0)(double x) 16 | { 17 | x = BUILTIN_ABS_F64(x); 18 | 19 | const double b0 = 1.65625; 20 | const double b1 = 3.125; 21 | const double b2 = 4.6875; 22 | const double b3 = 6.265625; 23 | const double b4 = 7.84375; 24 | const double b5 = 9.421875; 25 | const double b6 = 10.984375; 26 | const double b7 = 12.578125; 27 | 28 | double ret; 29 | 30 | if (x <= b7) { 31 | // Ty to maintain relative accuracy here 32 | 33 | USE_TABLE(double, p, M64_J0); 34 | double ch, cl; 35 | 36 | if (x <= b3) { 37 | if (x <= b0) { 38 | ch = 0.0; 39 | cl = 0.0; 40 | } else if (x <= b1) { 41 | ch = 0x1.33d152e971b40p+1; 42 | cl = -0x1.0f539d7da258ep-53; 43 | p += 1*15; 44 | } else if (x <= b2) { 45 | ch = 0x1.ea75575af6f09p+1; 46 | cl = -0x1.60155a9d1b256p-53; 47 | p += 2*15; 48 | } else { 49 | ch = 0x1.6148f5b2c2e45p+2; 50 | cl = 0x1.75054cd60a517p-54; 51 | p += 3*15; 52 | } 53 | } else { 54 | if (x <= b4) { 55 | ch = 0x1.c0ff5f3b47250p+2; 56 | cl = -0x1.b226d9d243827p-54; 57 | p += 4*15; 58 | } else if (x <= b5) { 59 | ch = 0x1.14eb56cccdecap+3; 60 | cl = -0x1.51970714c7c25p-52; 61 | p += 5*15; 62 | } else if (x <= b6) { 63 | ch = 0x1.458d0d0bdfc29p+3; 64 | cl = 0x1.02610a51562b6p-51; 65 | p += 6*15; 66 | } else { 67 | ch = 0x1.79544008272b6p+3; 68 | cl = 0x1.444fd5821d5b1p-52; 69 | p += 7*15; 70 | } 71 | } 72 | 73 | x = x - ch - cl; 74 | ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 75 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 76 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 77 | MATH_MAD(x, MATH_MAD(x, 78 | p[14], p[13]), p[12]), 79 | p[11]), p[10]), p[9]), p[8]), 80 | p[7]), p[6]), p[5]), p[4]), 81 | p[3]), p[2]), p[1]), p[0]); 82 | 83 | } else { 84 | double r = MATH_RCP(x); 85 | double r2 = r*r; 86 | double p = MATH_PRIVATE(bp0)(r2) * r; 87 | ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p); 88 | ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret; 89 | } 90 | 91 | return ret; 92 | } 93 | 94 | -------------------------------------------------------------------------------- /cxxopts/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This is the changelog for `cxxopts`, a C++11 library for parsing command line 4 | options. The project adheres to semantic versioning. 5 | 6 | ## 2.2 7 | 8 | ### Changed 9 | 10 | * Allow integers to have leading zeroes. 11 | * Build the tests by default. 12 | * Don't check for container when showing positional help. 13 | 14 | ### Added 15 | 16 | * Iterator inputs to `parse_positional`. 17 | * Throw an exception if the option in `parse_positional` doesn't exist. 18 | * Parse a delimited list in a single argument for vector options. 19 | * Add an option to disable implicit value on booleans. 20 | 21 | ### Bug Fixes 22 | 23 | * Fix a warning about possible loss of data. 24 | * Fix version numbering in CMakeLists.txt 25 | * Remove unused declaration of the undefined `ParseResult::get_option`. 26 | * Throw on invalid option syntax when beginning with a `-`. 27 | * Throw in `as` when option wasn't present. 28 | * Fix catching exceptions by reference. 29 | * Fix out of bounds errors parsing integers. 30 | 31 | ## 2.1.1 32 | 33 | ### Bug Fixes 34 | 35 | * Revert the change adding `const` type for `argv`, because most users expect 36 | to pass a non-const `argv` from `main`. 37 | 38 | ## 2.1 39 | 40 | ### Changed 41 | 42 | * Options with implicit arguments now require the `--option=value` form if 43 | they are to be specified with an option. This is to remove the ambiguity 44 | when a positional argument could follow an option with an implicit value. 45 | For example, `--foo value`, where `foo` has an implicit value, will be 46 | parsed as `--foo=implicit` and a positional argument `value`. 47 | * Boolean values are no longer special, but are just an option with a default 48 | and implicit value. 49 | 50 | ### Added 51 | 52 | * Added support for `std::optional` as a storage type. 53 | * Allow the help string to be customised. 54 | * Use `const` for the type in the `argv` parameter, since the contents of the 55 | arguments is never modified. 56 | 57 | ### Bug Fixes 58 | 59 | * Building against GCC 4.9 was broken due to overly strict shadow warnings. 60 | * Fixed an ambiguous overload in the `parse_positional` function when an 61 | `initializer_list` was directly passed. 62 | * Fixed precedence in the Boolean value regex. 63 | 64 | ## 2.0 65 | 66 | ### Changed 67 | 68 | * `Options::parse` returns a ParseResult rather than storing the parse 69 | result internally. 70 | * Options with default values now get counted as appearing once if they 71 | were not specified by the user. 72 | 73 | ### Added 74 | 75 | * A new `ParseResult` object that is the immutable result of parsing. It 76 | responds to the same `count` and `operator[]` as `Options` of 1.x did. 77 | * The function `ParseResult::arguments` returns a vector of the parsed 78 | arguments to iterate through in the order they were provided. 79 | * The symbol `cxxopts::version` for the version of the library. 80 | * Booleans can be specified with various strings and explicitly set false. 81 | 82 | ## 1.x 83 | 84 | The 1.x series was the first major version of the library, with release numbers 85 | starting to follow semantic versioning, after 0.x being unstable. It never had 86 | a changelog maintained for it. Releases mostly contained bug fixes, with the 87 | occasional feature added. 88 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/erfcxD.cl: -------------------------------------------------------------------------------- 1 | 2 | #include "mathD.h" 3 | 4 | CONSTATTR double 5 | MATH_MANGLE(erfcx)(double x) 6 | { 7 | double ax = BUILTIN_ABS_F64(x); 8 | double ret; 9 | 10 | if (ax < 1.0) { 11 | ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 12 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 13 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 14 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 15 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 16 | MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, 17 | 0x1.997339112da12p-29, -0x1.9a1485b7ae337p-27), 18 | 0x1.9548ab4c5bb56p-26), -0x1.2f88b47e02dc3p-24), 19 | 0x1.282114351c39ap-22), -0x1.e533a426aadd7p-21), 20 | 0x1.723131b8ef11ep-19), -0x1.188f6b08d66b9p-17), 21 | 0x1.a00995a561233p-16), -0x1.2aeb04681fed5p-14), 22 | 0x1.a01b9d82bcaa5p-13), -0x1.182d3bb1ac2c8p-11), 23 | 0x1.6c16a932f49d1p-10), -0x1.c74aef6905182p-9), 24 | 0x1.111111f403407p-7), -0x1.390379458257cp-6), 25 | 0x1.5555554b34536p-5), -0x1.6023e8de7793p-4), 26 | 0x1.5555555597342p-3), -0x1.341f6bc020c17p-2), 27 | 0x1.fffffffffe5aep-2), -0x1.812746b037cadp-1), 28 | 0x1.000000000001dp0), -0x1.20dd750429b6ap0), 29 | 0x1.0p0); 30 | } else if (ax < 5120.0) { 31 | double t = MATH_DIV(ax - 4.0, ax + 4.0); 32 | ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 33 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 34 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 35 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 36 | MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 37 | MATH_MAD(t, MATH_MAD(t, 38 | 0.14981549849751462e-8, -0.69954933359042387e-8), 39 | -0.15965692247743744e-7), 0.92967132363414431e-7), 40 | 0.70214215034531004e-7), -0.80204958740421079e-6), 41 | 0.29923810132862422e-6), 0.56895739871851154e-5), 42 | -0.11226090578381133e-4), -0.2438781785281914e-4), 43 | 0.00015062360829881126), -0.00019926094025574419), 44 | -0.00075777387606136804), 0.0050319709983606006), 45 | -0.016197733946788412), 0.037167515387099868), 46 | -0.066330365824435124), 0.093732835010698844), 47 | -0.10103906603561565), 0.068097054254223675), 48 | 0.015379652102604634), -0.13962111684055725), 49 | 1.2329951186255526); 50 | ret = MATH_DIV(ret, MATH_MAD(ax, 2.0, 1.0)); 51 | } else { 52 | const double one_over_sqrtpi = 0x1.20dd750429b6dp-1; 53 | double z = MATH_RCP(x * x); 54 | ret = MATH_DIV(one_over_sqrtpi, x) * MATH_MAD(z, MATH_MAD(z, 0.375, -0.5), 1.0); 55 | } 56 | 57 | if (x <= -1.0) { 58 | double x2h = ax * ax; 59 | double x2l = BUILTIN_FMA_F64(ax, ax, -x2h); 60 | ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0 - ret; 61 | ret = x < -27.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; 62 | } 63 | 64 | return ret; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/j1D.cl: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | #include "mathD.h" 9 | 10 | extern double MATH_PRIVATE(cosb)(double, int, double); 11 | extern CONSTATTR double MATH_PRIVATE(bp1)(double); 12 | extern CONSTATTR double MATH_PRIVATE(ba1)(double); 13 | 14 | 15 | PUREATTR double 16 | MATH_MANGLE(j1)(double x) 17 | { 18 | const double b0 = 1.09375; 19 | const double b1 = 2.84375; 20 | const double b2 = 4.578125; 21 | const double b3 = 6.171875; 22 | const double b4 = 7.78125; 23 | const double b5 = 9.359375; 24 | const double b6 = 10.953125; 25 | const double b7 = 12.515625; 26 | 27 | double ax = BUILTIN_ABS_F64(x); 28 | double ret; 29 | 30 | if (ax <= b7) { 31 | // Ty to maintain relative accuracy here 32 | 33 | USE_TABLE(double, p, M64_J1); 34 | double ch, cl; 35 | 36 | if (ax <= b3) { 37 | if (ax <= b0) { 38 | ch = 0.0; 39 | cl = 0.0; 40 | } else if (ax <= b1) { 41 | ch = 0x1.d757d1fec8a3ap+0; 42 | cl = 0x1.616d820cfdaebp-58; 43 | p += 1*15; 44 | } else if (ax <= b2) { 45 | ch = 0x1.ea75575af6f09p+1; 46 | cl = -0x1.60155a9d1b256p-53; 47 | p += 2*15; 48 | } else { 49 | ch = 0x1.55365bc032467p+2; 50 | cl = 0x1.5c646a75d7539p-53; 51 | p += 3*15; 52 | } 53 | } else { 54 | if (ax <= b4) { 55 | ch = 0x1.c0ff5f3b47250p+2; 56 | cl = -0x1.b226d9d243827p-54; 57 | p += 4*15; 58 | } else if (ax <= b5) { 59 | ch = 0x1.112980f0b88a1p+3; 60 | cl = -0x1.63e17ec20a31dp-53; 61 | p += 5*15; 62 | } else if (ax <= b6) { 63 | ch = 0x1.458d0d0bdfc29p+3; 64 | cl = 0x1.02610a51562b6p-51; 65 | p += 6*15; 66 | } else { 67 | ch = 0x1.76979797ee5acp+3; 68 | cl = 0x1.9a84d3a5fedc2p-51; 69 | p += 7*15; 70 | } 71 | } 72 | 73 | ax = ax - ch - cl; 74 | 75 | ret = MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, 76 | MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, 77 | MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, 78 | MATH_MAD(ax, MATH_MAD(ax, 79 | p[14], p[13]), p[12]), 80 | p[11]), p[10]), p[9]), p[8]), 81 | p[7]), p[6]), p[5]), p[4]), 82 | p[3]), p[2]), p[1]), p[0]); 83 | } else { 84 | double r = MATH_RCP(x); 85 | double r2 = r*r; 86 | double p = MATH_PRIVATE(bp1)(r2) * r; 87 | ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(x, 1, p); 88 | ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret; 89 | } 90 | 91 | if (x < 0.0) 92 | ret = -ret; 93 | 94 | return ret; 95 | } 96 | 97 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(bitcode) 2 | 3 | ################################################################################### 4 | 5 | find_package(OpenCL 2.0 REQUIRED) 6 | 7 | set(HIPCL_SOURCES hipcl.cc backend.cc log.cc spirv.cc) 8 | 9 | set_source_files_properties(${HIPCL_SOURCES} PROPERTIES LANGUAGE CXX) 10 | 11 | add_library(hipcl SHARED ${HIPCL_SOURCES}) 12 | 13 | set_target_properties(hipcl PROPERTIES 14 | CXX_STANDARD_REQUIRED ON 15 | SOVERSION "${LIB_API_VERSION}" 16 | VERSION "${LIB_BUILD_VERSION}") 17 | 18 | target_link_libraries(hipcl ${SANITIZER_LIBS} ${PTHREAD_LIBRARY} ${OpenCL_LIBRARIES}) 19 | 20 | add_dependencies("hipcl" "kernellib_bc") 21 | 22 | if(DEBUG) 23 | target_compile_definitions(hipcl PRIVATE "_GLIBCXX_DEBUG") 24 | endif() 25 | 26 | if(LOGLEVEL) 27 | set(VALID_LEVELS "DEBUG;INFO;WARN;ERROR;CRITICAL;OFF") 28 | if(LOGLEVEL IN_LIST VALID_LEVELS) 29 | target_compile_definitions(hipcl PRIVATE "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${LOGLEVEL}") 30 | else() 31 | message(WARNING "Unknown loglevel: ${LOGLEVEL}, ignoring") 32 | endif() 33 | endif() 34 | 35 | target_compile_options(hipcl PRIVATE "-Wno-unused-parameter") 36 | 37 | target_compile_options(hipcl INTERFACE "-x" "hip") 38 | 39 | if(SANITIZER_OPTIONS) 40 | target_compile_options(hipcl INTERFACE ${SANITIZER_OPTIONS}) 41 | target_compile_options(hipcl PRIVATE ${SANITIZER_OPTIONS}) 42 | endif() 43 | 44 | target_compile_options(hipcl INTERFACE 45 | "$" 46 | "$" 47 | "--hip-device-lib=kernellib.bc") 48 | 49 | # for LLVM passes 50 | target_compile_options(hipcl INTERFACE 51 | "$" 52 | "$") 53 | 54 | target_include_directories(hipcl 55 | PUBLIC 56 | "$" 57 | "$" 58 | PRIVATE 59 | "${CMAKE_SOURCE_DIR}" 60 | "${CMAKE_SOURCE_DIR}/include" 61 | ) 62 | 63 | install(TARGETS hipcl 64 | EXPORT "hip-targets" 65 | LIBRARY DESTINATION "${HIPCL_LIB_DIR}" 66 | ARCHIVE DESTINATION "${HIPCL_LIB_DIR}" 67 | ) 68 | 69 | install(EXPORT "hip-targets" 70 | NAMESPACE "hip::" 71 | DESTINATION "${HIPCL_CMAKE_DIR}") 72 | 73 | install(FILES 74 | "${CMAKE_SOURCE_DIR}/include/hip/hipcl.hh" 75 | "${CMAKE_SOURCE_DIR}/include/hip/hipcl_mathlib.hh" 76 | "${CMAKE_SOURCE_DIR}/include/hip/hip_fatbin.h" 77 | "${CMAKE_SOURCE_DIR}/include/hip/hip_fp16.h" 78 | "${CMAKE_SOURCE_DIR}/include/hip/hip_runtime.h" 79 | "${CMAKE_SOURCE_DIR}/include/hip/hip_vector_types.h" 80 | DESTINATION "${HIPCL_INC_DIR}/hip") 81 | 82 | include(CMakePackageConfigHelpers) 83 | 84 | configure_package_config_file( 85 | "${CMAKE_CURRENT_SOURCE_DIR}/hipcl-config.cmake.in" 86 | "${CMAKE_BINARY_DIR}/hipConfig.cmake" 87 | INSTALL_DESTINATION "${HIPCL_CMAKE_DIR}" 88 | PATH_VARS HIPCL_INC_DIR HIPCL_LIB_DIR HIPCL_BIN_DIR HIPCL_COMPILER 89 | ) 90 | 91 | write_basic_package_version_file( 92 | "${CMAKE_BINARY_DIR}/hipConfigVersion.cmake" 93 | COMPATIBILITY SameMajorVersion 94 | ) 95 | -------------------------------------------------------------------------------- /spdlog/sinks/syslog_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/sinks/base_sink.h" 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace spdlog { 19 | namespace sinks { 20 | /** 21 | * Sink that write to syslog using the `syscall()` library call. 22 | * 23 | * Locking is not needed, as `syslog()` itself is thread-safe. 24 | */ 25 | template 26 | class syslog_sink : public base_sink 27 | { 28 | public: 29 | // 30 | explicit syslog_sink(std::string ident = "", int syslog_option = 0, int syslog_facility = LOG_USER) 31 | : ident_(std::move(ident)) 32 | { 33 | priorities_[static_cast(level::trace)] = LOG_DEBUG; 34 | priorities_[static_cast(level::debug)] = LOG_DEBUG; 35 | priorities_[static_cast(level::info)] = LOG_INFO; 36 | priorities_[static_cast(level::warn)] = LOG_WARNING; 37 | priorities_[static_cast(level::err)] = LOG_ERR; 38 | priorities_[static_cast(level::critical)] = LOG_CRIT; 39 | priorities_[static_cast(level::off)] = LOG_INFO; 40 | 41 | // set ident to be program name if empty 42 | ::openlog(ident_.empty() ? nullptr : ident_.c_str(), syslog_option, syslog_facility); 43 | } 44 | 45 | ~syslog_sink() override 46 | { 47 | ::closelog(); 48 | } 49 | 50 | syslog_sink(const syslog_sink &) = delete; 51 | syslog_sink &operator=(const syslog_sink &) = delete; 52 | 53 | protected: 54 | void sink_it_(const details::log_msg &msg) override 55 | { 56 | ::syslog(syslog_prio_from_level(msg), "%s", fmt::to_string(msg.payload).c_str()); 57 | } 58 | 59 | void flush_() override {} 60 | 61 | private: 62 | std::array priorities_; 63 | // must store the ident because the man says openlog might use the pointer as 64 | // is and not a string copy 65 | const std::string ident_; 66 | 67 | // 68 | // Simply maps spdlog's log level to syslog priority level. 69 | // 70 | int syslog_prio_from_level(const details::log_msg &msg) const 71 | { 72 | return priorities_[static_cast(msg.level)]; 73 | } 74 | }; 75 | 76 | using syslog_sink_mt = syslog_sink; 77 | using syslog_sink_st = syslog_sink; 78 | } // namespace sinks 79 | 80 | // Create and register a syslog logger 81 | template 82 | inline std::shared_ptr syslog_logger_mt( 83 | const std::string &logger_name, const std::string &syslog_ident = "", int syslog_option = 0, int syslog_facility = (1 << 3)) 84 | { 85 | return Factory::template create(logger_name, syslog_ident, syslog_option, syslog_facility); 86 | } 87 | 88 | template 89 | inline std::shared_ptr syslog_logger_st( 90 | const std::string &logger_name, const std::string &syslog_ident = "", int syslog_option = 0, int syslog_facility = (1 << 3)) 91 | { 92 | return Factory::template create(logger_name, syslog_ident, syslog_option, syslog_facility); 93 | } 94 | } // namespace spdlog 95 | -------------------------------------------------------------------------------- /samples/6_dynamic_shared/hipDynamicShared2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #include "hip/hip_runtime.h" 23 | #include 24 | #include 25 | 26 | #define LEN 16 * 1024 27 | #define SIZE LEN * 4 28 | 29 | #define HIPCHECK(code) \ 30 | do { \ 31 | hiperr = code; \ 32 | if (hiperr != hipSuccess) { \ 33 | std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr \ 34 | << "\n"; \ 35 | return 1; \ 36 | } \ 37 | } while (0) 38 | 39 | __global__ void vectorAdd(float *Ad, float *Bd) { 40 | HIP_DYNAMIC_SHARED(float, sBd); 41 | int tx = threadIdx.x; 42 | for (int i = 0; i < LEN / 64; i++) { 43 | sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f; 44 | Bd[tx + i * 64] = sBd[tx + i * 64]; 45 | } 46 | } 47 | 48 | int main() { 49 | size_t errors = 0; 50 | hipError_t hiperr = hipSuccess; 51 | float *A, *B, *Ad, *Bd; 52 | A = new float[LEN]; 53 | B = new float[LEN]; 54 | for (int i = 0; i < LEN; i++) { 55 | A[i] = 1.0f; 56 | B[i] = 1.0f; 57 | } 58 | HIPCHECK(hipMalloc((void **)&Ad, SIZE)); 59 | HIPCHECK(hipMalloc((void **)&Bd, SIZE)); 60 | HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); 61 | HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); 62 | hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd); 63 | HIPCHECK(hipGetLastError()); 64 | HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); 65 | for (int i = 0; i < LEN; i++) { 66 | if (B[i] < 1.0f || B[i] > 3.0f) 67 | ++errors; 68 | } 69 | HIPCHECK(hipFree(Ad)); 70 | HIPCHECK(hipFree(Bd)); 71 | delete[] A; 72 | delete[] B; 73 | 74 | if (errors != 0) { 75 | std::cout << "hipDynamicShared2 FAILED: " << errors << " errors\n"; 76 | return 1; 77 | } else { 78 | std::cout << "hipDynamicShared2 PASSED!\n"; 79 | return 0; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /spdlog/async.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Copyright(c) 2018 Gabi Melman. 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 5 | // 6 | 7 | #pragma once 8 | 9 | // 10 | // Async logging using global thread pool 11 | // All loggers created here share same global thread pool. 12 | // Each log message is pushed to a queue along withe a shared pointer to the 13 | // logger. 14 | // If a logger deleted while having pending messages in the queue, it's actual 15 | // destruction will defer 16 | // until all its messages are processed by the thread pool. 17 | // This is because each message in the queue holds a shared_ptr to the 18 | // originating logger. 19 | 20 | #include "spdlog/async_logger.h" 21 | #include "spdlog/details/registry.h" 22 | #include "spdlog/details/thread_pool.h" 23 | 24 | #include 25 | #include 26 | 27 | namespace spdlog { 28 | 29 | namespace details { 30 | static const size_t default_async_q_size = 8192; 31 | } 32 | 33 | // async logger factory - creates async loggers backed with thread pool. 34 | // if a global thread pool doesn't already exist, create it with default queue 35 | // size of 8192 items and single thread. 36 | template 37 | struct async_factory_impl 38 | { 39 | template 40 | static std::shared_ptr create(std::string logger_name, SinkArgs &&... args) 41 | { 42 | auto ®istry_inst = details::registry::instance(); 43 | 44 | // create global thread pool if not already exists.. 45 | std::lock_guard tp_lock(registry_inst.tp_mutex()); 46 | auto tp = registry_inst.get_tp(); 47 | if (tp == nullptr) 48 | { 49 | tp = std::make_shared(details::default_async_q_size, 1); 50 | registry_inst.set_tp(tp); 51 | } 52 | 53 | auto sink = std::make_shared(std::forward(args)...); 54 | auto new_logger = std::make_shared(std::move(logger_name), std::move(sink), std::move(tp), OverflowPolicy); 55 | registry_inst.initialize_logger(new_logger); 56 | return new_logger; 57 | } 58 | }; 59 | 60 | using async_factory = async_factory_impl; 61 | using async_factory_nonblock = async_factory_impl; 62 | 63 | template 64 | inline std::shared_ptr create_async(std::string logger_name, SinkArgs &&... sink_args) 65 | { 66 | return async_factory::create(std::move(logger_name), std::forward(sink_args)...); 67 | } 68 | 69 | template 70 | inline std::shared_ptr create_async_nb(std::string logger_name, SinkArgs &&... sink_args) 71 | { 72 | return async_factory_nonblock::create(std::move(logger_name), std::forward(sink_args)...); 73 | } 74 | 75 | // set global thread pool. 76 | inline void init_thread_pool(size_t q_size, size_t thread_count) 77 | { 78 | auto tp = std::make_shared(q_size, thread_count); 79 | details::registry::instance().set_tp(std::move(tp)); 80 | } 81 | 82 | // get the global thread pool. 83 | inline std::shared_ptr thread_pool() 84 | { 85 | return details::registry::instance().get_tp(); 86 | } 87 | } // namespace spdlog 88 | -------------------------------------------------------------------------------- /spdlog/sinks/stdout_sinks.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/details/console_globals.h" 13 | #include "spdlog/details/null_mutex.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace spdlog { 20 | 21 | namespace sinks { 22 | 23 | template 24 | class stdout_sink final : public sink 25 | { 26 | public: 27 | using mutex_t = typename ConsoleMutex::mutex_t; 28 | stdout_sink() 29 | : mutex_(ConsoleMutex::mutex()) 30 | , file_(TargetStream::stream()) 31 | { 32 | } 33 | ~stdout_sink() override = default; 34 | 35 | stdout_sink(const stdout_sink &other) = delete; 36 | stdout_sink &operator=(const stdout_sink &other) = delete; 37 | 38 | void log(const details::log_msg &msg) override 39 | { 40 | std::lock_guard lock(mutex_); 41 | fmt::memory_buffer formatted; 42 | formatter_->format(msg, formatted); 43 | fwrite(formatted.data(), sizeof(char), formatted.size(), file_); 44 | fflush(TargetStream::stream()); 45 | } 46 | 47 | void flush() override 48 | { 49 | std::lock_guard lock(mutex_); 50 | fflush(file_); 51 | } 52 | 53 | void set_pattern(const std::string &pattern) override 54 | { 55 | std::lock_guard lock(mutex_); 56 | formatter_ = std::unique_ptr(new pattern_formatter(pattern)); 57 | } 58 | 59 | void set_formatter(std::unique_ptr sink_formatter) override 60 | { 61 | std::lock_guard lock(mutex_); 62 | formatter_ = std::move(sink_formatter); 63 | } 64 | 65 | private: 66 | mutex_t &mutex_; 67 | FILE *file_; 68 | }; 69 | 70 | using stdout_sink_mt = stdout_sink; 71 | using stdout_sink_st = stdout_sink; 72 | 73 | using stderr_sink_mt = stdout_sink; 74 | using stderr_sink_st = stdout_sink; 75 | 76 | } // namespace sinks 77 | 78 | // factory methods 79 | template 80 | inline std::shared_ptr stdout_logger_mt(const std::string &logger_name) 81 | { 82 | return Factory::template create(logger_name); 83 | } 84 | 85 | template 86 | inline std::shared_ptr stdout_logger_st(const std::string &logger_name) 87 | { 88 | return Factory::template create(logger_name); 89 | } 90 | 91 | template 92 | inline std::shared_ptr stderr_logger_mt(const std::string &logger_name) 93 | { 94 | return Factory::template create(logger_name); 95 | } 96 | 97 | template 98 | inline std::shared_ptr stderr_logger_st(const std::string &logger_name) 99 | { 100 | return Factory::template create(logger_name); 101 | } 102 | } // namespace spdlog 103 | -------------------------------------------------------------------------------- /spdlog/details/async_logger_impl.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | // async logger implementation 9 | // uses a thread pool to perform the actual logging 10 | 11 | #include "spdlog/details/thread_pool.h" 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | template 18 | inline spdlog::async_logger::async_logger( 19 | std::string logger_name, It begin, It end, std::weak_ptr tp, async_overflow_policy overflow_policy) 20 | : logger(std::move(logger_name), begin, end) 21 | , thread_pool_(std::move(tp)) 22 | , overflow_policy_(overflow_policy) 23 | { 24 | } 25 | 26 | inline spdlog::async_logger::async_logger( 27 | std::string logger_name, sinks_init_list sinks_list, std::weak_ptr tp, async_overflow_policy overflow_policy) 28 | : async_logger(std::move(logger_name), sinks_list.begin(), sinks_list.end(), std::move(tp), overflow_policy) 29 | { 30 | } 31 | 32 | inline spdlog::async_logger::async_logger( 33 | std::string logger_name, sink_ptr single_sink, std::weak_ptr tp, async_overflow_policy overflow_policy) 34 | : async_logger(std::move(logger_name), {std::move(single_sink)}, std::move(tp), overflow_policy) 35 | { 36 | } 37 | 38 | // send the log message to the thread pool 39 | inline void spdlog::async_logger::sink_it_(details::log_msg &msg) 40 | { 41 | #if defined(SPDLOG_ENABLE_MESSAGE_COUNTER) 42 | incr_msg_counter_(msg); 43 | #endif 44 | if (auto pool_ptr = thread_pool_.lock()) 45 | { 46 | pool_ptr->post_log(shared_from_this(), msg, overflow_policy_); 47 | } 48 | else 49 | { 50 | throw spdlog_ex("async log: thread pool doesn't exist anymore"); 51 | } 52 | } 53 | 54 | // send flush request to the thread pool 55 | inline void spdlog::async_logger::flush_() 56 | { 57 | if (auto pool_ptr = thread_pool_.lock()) 58 | { 59 | pool_ptr->post_flush(shared_from_this(), overflow_policy_); 60 | } 61 | else 62 | { 63 | throw spdlog_ex("async flush: thread pool doesn't exist anymore"); 64 | } 65 | } 66 | 67 | // 68 | // backend functions - called from the thread pool to do the actual job 69 | // 70 | inline void spdlog::async_logger::backend_log_(const details::log_msg &incoming_log_msg) 71 | { 72 | try 73 | { 74 | for (auto &s : sinks_) 75 | { 76 | if (s->should_log(incoming_log_msg.level)) 77 | { 78 | s->log(incoming_log_msg); 79 | } 80 | } 81 | } 82 | SPDLOG_CATCH_AND_HANDLE 83 | 84 | if (should_flush_(incoming_log_msg)) 85 | { 86 | backend_flush_(); 87 | } 88 | } 89 | 90 | inline void spdlog::async_logger::backend_flush_() 91 | { 92 | try 93 | { 94 | for (auto &sink : sinks_) 95 | { 96 | sink->flush(); 97 | } 98 | } 99 | SPDLOG_CATCH_AND_HANDLE 100 | } 101 | 102 | inline std::shared_ptr spdlog::async_logger::clone(std::string new_name) 103 | { 104 | auto cloned = std::make_shared(std::move(new_name), sinks_.begin(), sinks_.end(), thread_pool_, overflow_policy_); 105 | 106 | cloned->set_level(this->level()); 107 | cloned->flush_on(this->flush_level()); 108 | cloned->set_error_handler(this->error_handler()); 109 | return std::move(cloned); 110 | } 111 | -------------------------------------------------------------------------------- /samples/10_memcpy3D/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define BLOCKSIZE_x 16 6 | #define BLOCKSIZE_y 16 7 | 8 | #define N 128 9 | #define M 64 10 | #define W 16 11 | 12 | 13 | 14 | /*****************/ 15 | /* HIP MEMCHECK */ 16 | /*****************/ 17 | 18 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 19 | 20 | inline void gpuAssert(hipError_t code, std::string file, int line, bool abort=true) 21 | { 22 | 23 | if (code != hipSuccess) 24 | { 25 | fprintf(stderr,"GPUassert: %s %s %dn", hipGetErrorString(code), file.c_str(), line); 26 | 27 | if (abort) { exit(code); } 28 | } 29 | } 30 | 31 | 32 | 33 | /*******************/ 34 | /* iDivUp FUNCTION */ 35 | /*******************/ 36 | 37 | int iDivUp(int a, int b){ return ((a % b) != 0) ? (a / b + 1) : (a / b); } 38 | 39 | 40 | /******************/ 41 | /* TEST KERNEL 3D */ 42 | /******************/ 43 | 44 | __global__ void test_kernel_3D(hipPitchedPtr devPitchedPtr) 45 | { 46 | 47 | int tidx = blockIdx.x*blockDim.x+threadIdx.x; 48 | int tidy = blockIdx.y*blockDim.y+threadIdx.y; 49 | 50 | char* devPtr = (char*) devPitchedPtr.ptr; 51 | size_t pitch = devPitchedPtr.pitch; 52 | size_t slicePitch = pitch * N; 53 | 54 | for (int w = 0; w < W; w++) 55 | { 56 | char* slice = devPtr + w * slicePitch; 57 | float* row = (float*)(slice + tidy * pitch); 58 | row[tidx] = row[tidx] * row[tidx]; 59 | } 60 | } 61 | 62 | 63 | /********/ 64 | /* MAIN */ 65 | /********/ 66 | 67 | int main() 68 | { 69 | float a[N][M][W]; 70 | 71 | for (int i=0; idevice memcopy 82 | hipExtent extent{M * sizeof(float), N, W}; 83 | 84 | hipPitchedPtr devPitchedPtr; 85 | 86 | gpuErrchk(hipMalloc3D(&devPitchedPtr, extent)); 87 | 88 | hipMemcpy3DParms p = { 0 }; 89 | 90 | p.srcPtr.ptr = a; 91 | p.srcPtr.pitch = M * sizeof(float); 92 | p.srcPtr.xsize = M; 93 | p.srcPtr.ysize = N; 94 | p.dstPtr.ptr = devPitchedPtr.ptr; 95 | p.dstPtr.pitch = devPitchedPtr.pitch; 96 | p.dstPtr.xsize = M; 97 | p.dstPtr.ysize = N; 98 | p.extent.width = M * sizeof(float); 99 | p.extent.height = N; 100 | p.extent.depth = W; 101 | p.kind = hipMemcpyHostToDevice; 102 | 103 | gpuErrchk(hipMemcpy3D(&p)); 104 | 105 | dim3 GridSize(iDivUp(M,BLOCKSIZE_x),iDivUp(N,BLOCKSIZE_y)); 106 | 107 | dim3 BlockSize(BLOCKSIZE_y,BLOCKSIZE_x); 108 | 109 | hipLaunchKernelGGL(test_kernel_3D, dim3(GridSize), dim3(BlockSize), 0, 0, devPitchedPtr); 110 | 111 | gpuErrchk(hipPeekAtLastError()); 112 | 113 | gpuErrchk(hipDeviceSynchronize()); 114 | p.srcPtr.ptr = devPitchedPtr.ptr; 115 | p.srcPtr.pitch = devPitchedPtr.pitch; 116 | p.dstPtr.ptr = a; 117 | p.dstPtr.pitch = M * sizeof(float); 118 | p.kind = hipMemcpyDeviceToHost; 119 | 120 | gpuErrchk(hipMemcpy3D(&p)); 121 | 122 | int error = 0; 123 | for (int i=0; i= 220 && !defined(CL_VERSION_2_2) 51 | #define CL_VERSION_2_2 1 52 | #endif 53 | #if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) 54 | #define CL_VERSION_2_1 1 55 | #endif 56 | #if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) 57 | #define CL_VERSION_2_0 1 58 | #endif 59 | #if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) 60 | #define CL_VERSION_1_2 1 61 | #endif 62 | #if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) 63 | #define CL_VERSION_1_1 1 64 | #endif 65 | #if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) 66 | #define CL_VERSION_1_0 1 67 | #endif 68 | 69 | /* Allow deprecated APIs for older OpenCL versions. */ 70 | #if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) 71 | #define CL_USE_DEPRECATED_OPENCL_2_1_APIS 72 | #endif 73 | #if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) 74 | #define CL_USE_DEPRECATED_OPENCL_2_0_APIS 75 | #endif 76 | #if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) 77 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS 78 | #endif 79 | #if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) 80 | #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 81 | #endif 82 | #if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) 83 | #define CL_USE_DEPRECATED_OPENCL_1_0_APIS 84 | #endif 85 | 86 | #endif /* __CL_VERSION_H */ 87 | -------------------------------------------------------------------------------- /spdlog/sinks/android_sink.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright(c) 2015 Gabi Melman. 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 4 | // 5 | 6 | #pragma once 7 | 8 | #ifndef SPDLOG_H 9 | #error "spdlog.h must be included before this file." 10 | #endif 11 | 12 | #include "spdlog/details/fmt_helper.h" 13 | #include "spdlog/details/null_mutex.h" 14 | #include "spdlog/details/os.h" 15 | #include "spdlog/sinks/base_sink.h" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #if !defined(SPDLOG_ANDROID_RETRIES) 24 | #define SPDLOG_ANDROID_RETRIES 2 25 | #endif 26 | 27 | namespace spdlog { 28 | namespace sinks { 29 | 30 | /* 31 | * Android sink (logging using __android_log_write) 32 | */ 33 | template 34 | class android_sink final : public base_sink 35 | { 36 | public: 37 | explicit android_sink(std::string tag = "spdlog", bool use_raw_msg = false) 38 | : tag_(std::move(tag)) 39 | , use_raw_msg_(use_raw_msg) 40 | { 41 | } 42 | 43 | protected: 44 | void sink_it_(const details::log_msg &msg) override 45 | { 46 | const android_LogPriority priority = convert_to_android_(msg.level); 47 | fmt::memory_buffer formatted; 48 | if (use_raw_msg_) 49 | { 50 | details::fmt_helper::append_string_view(msg.payload, formatted); 51 | } 52 | else 53 | { 54 | sink::formatter_->format(msg, formatted); 55 | } 56 | formatted.push_back('\0'); 57 | const char *msg_output = formatted.data(); 58 | 59 | // See system/core/liblog/logger_write.c for explanation of return value 60 | int ret = __android_log_write(priority, tag_.c_str(), msg_output); 61 | int retry_count = 0; 62 | while ((ret == -11 /*EAGAIN*/) && (retry_count < SPDLOG_ANDROID_RETRIES)) 63 | { 64 | details::os::sleep_for_millis(5); 65 | ret = __android_log_write(priority, tag_.c_str(), msg_output); 66 | retry_count++; 67 | } 68 | 69 | if (ret < 0) 70 | { 71 | throw spdlog_ex("__android_log_write() failed", ret); 72 | } 73 | } 74 | 75 | void flush_() override {} 76 | 77 | private: 78 | static android_LogPriority convert_to_android_(spdlog::level::level_enum level) 79 | { 80 | switch (level) 81 | { 82 | case spdlog::level::trace: 83 | return ANDROID_LOG_VERBOSE; 84 | case spdlog::level::debug: 85 | return ANDROID_LOG_DEBUG; 86 | case spdlog::level::info: 87 | return ANDROID_LOG_INFO; 88 | case spdlog::level::warn: 89 | return ANDROID_LOG_WARN; 90 | case spdlog::level::err: 91 | return ANDROID_LOG_ERROR; 92 | case spdlog::level::critical: 93 | return ANDROID_LOG_FATAL; 94 | default: 95 | return ANDROID_LOG_DEFAULT; 96 | } 97 | } 98 | 99 | std::string tag_; 100 | bool use_raw_msg_; 101 | }; 102 | 103 | using android_sink_mt = android_sink; 104 | using android_sink_st = android_sink; 105 | } // namespace sinks 106 | 107 | // Create and register android syslog logger 108 | 109 | template 110 | inline std::shared_ptr android_logger_mt(const std::string &logger_name, const std::string &tag = "spdlog") 111 | { 112 | return Factory::template create(logger_name, tag); 113 | } 114 | 115 | template 116 | inline std::shared_ptr android_logger_st(const std::string &logger_name, const std::string &tag = "spdlog") 117 | { 118 | return Factory::template create(logger_name, tag); 119 | } 120 | 121 | } // namespace spdlog 122 | -------------------------------------------------------------------------------- /spdlog/details/fmt_helper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by gabi on 6/15/18. 3 | // 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include "spdlog/fmt/fmt.h" 10 | 11 | // Some fmt helpers to efficiently format and pad ints and strings 12 | namespace spdlog { 13 | namespace details { 14 | namespace fmt_helper { 15 | 16 | template 17 | inline spdlog::string_view_t to_string_view(const fmt::basic_memory_buffer &buf) SPDLOG_NOEXCEPT 18 | { 19 | return spdlog::string_view_t(buf.data(), buf.size()); 20 | } 21 | 22 | template 23 | inline void append_buf(const fmt::basic_memory_buffer &buf, fmt::basic_memory_buffer &dest) 24 | { 25 | auto *buf_ptr = buf.data(); 26 | dest.append(buf_ptr, buf_ptr + buf.size()); 27 | } 28 | 29 | template 30 | inline void append_string_view(spdlog::string_view_t view, fmt::basic_memory_buffer &dest) 31 | { 32 | auto *buf_ptr = view.data(); 33 | if (buf_ptr != nullptr) 34 | { 35 | dest.append(buf_ptr, buf_ptr + view.size()); 36 | } 37 | } 38 | 39 | template 40 | inline void append_int(T n, fmt::basic_memory_buffer &dest) 41 | { 42 | fmt::format_int i(n); 43 | dest.append(i.data(), i.data() + i.size()); 44 | } 45 | 46 | template 47 | inline unsigned count_digits(T n) 48 | { 49 | using count_type = typename std::conditional<(sizeof(T) > sizeof(uint32_t)), uint64_t, uint32_t>::type; 50 | return fmt::internal::count_digits(static_cast(n)); 51 | } 52 | 53 | template 54 | inline void pad2(int n, fmt::basic_memory_buffer &dest) 55 | { 56 | if (n > 99) 57 | { 58 | append_int(n, dest); 59 | } 60 | else if (n > 9) // 10-99 61 | { 62 | dest.push_back(static_cast('0' + n / 10)); 63 | dest.push_back(static_cast('0' + n % 10)); 64 | } 65 | else if (n >= 0) // 0-9 66 | { 67 | dest.push_back('0'); 68 | dest.push_back(static_cast('0' + n)); 69 | } 70 | else // negatives (unlikely, but just in case, let fmt deal with it) 71 | { 72 | fmt::format_to(dest, "{:02}", n); 73 | } 74 | } 75 | 76 | template 77 | inline void pad_uint(T n, unsigned int width, fmt::basic_memory_buffer &dest) 78 | { 79 | static_assert(std::is_unsigned::value, "pad_uint must get unsigned T"); 80 | auto digits = count_digits(n); 81 | if (width > digits) 82 | { 83 | const char *zeroes = "0000000000000000000"; 84 | dest.append(zeroes, zeroes + width - digits); 85 | } 86 | append_int(n, dest); 87 | } 88 | 89 | template 90 | inline void pad3(T n, fmt::basic_memory_buffer &dest) 91 | { 92 | pad_uint(n, 3, dest); 93 | } 94 | 95 | template 96 | inline void pad6(T n, fmt::basic_memory_buffer &dest) 97 | { 98 | pad_uint(n, 6, dest); 99 | } 100 | 101 | template 102 | inline void pad9(T n, fmt::basic_memory_buffer &dest) 103 | { 104 | pad_uint(n, 9, dest); 105 | } 106 | 107 | // return fraction of a second of the given time_point. 108 | // e.g. 109 | // fraction(tp) -> will return the millis part of the second 110 | template 111 | inline ToDuration time_fraction(const log_clock::time_point &tp) 112 | { 113 | using std::chrono::duration_cast; 114 | using std::chrono::seconds; 115 | auto duration = tp.time_since_epoch(); 116 | auto secs = duration_cast(duration); 117 | return duration_cast(duration) - duration_cast(secs); 118 | } 119 | 120 | } // namespace fmt_helper 121 | } // namespace details 122 | } // namespace spdlog 123 | -------------------------------------------------------------------------------- /spdlog/details/mpmc_blocking_q.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // 4 | // Copyright(c) 2018 Gabi Melman. 5 | // Distributed under the MIT License (http://opensource.org/licenses/MIT) 6 | // 7 | 8 | // multi producer-multi consumer blocking queue. 9 | // enqueue(..) - will block until room found to put the new message. 10 | // enqueue_nowait(..) - will return immediately with false if no room left in 11 | // the queue. 12 | // dequeue_for(..) - will block until the queue is not empty or timeout have 13 | // passed. 14 | 15 | #include "spdlog/details/circular_q.h" 16 | 17 | #include 18 | #include 19 | 20 | namespace spdlog { 21 | namespace details { 22 | 23 | template 24 | class mpmc_blocking_queue 25 | { 26 | public: 27 | using item_type = T; 28 | explicit mpmc_blocking_queue(size_t max_items) 29 | : q_(max_items) 30 | { 31 | } 32 | 33 | #ifndef __MINGW32__ 34 | // try to enqueue and block if no room left 35 | void enqueue(T &&item) 36 | { 37 | { 38 | std::unique_lock lock(queue_mutex_); 39 | pop_cv_.wait(lock, [this] { return !this->q_.full(); }); 40 | q_.push_back(std::move(item)); 41 | } 42 | push_cv_.notify_one(); 43 | } 44 | 45 | // enqueue immediately. overrun oldest message in the queue if no room left. 46 | void enqueue_nowait(T &&item) 47 | { 48 | { 49 | std::unique_lock lock(queue_mutex_); 50 | q_.push_back(std::move(item)); 51 | } 52 | push_cv_.notify_one(); 53 | } 54 | 55 | // try to dequeue item. if no item found. wait upto timeout and try again 56 | // Return true, if succeeded dequeue item, false otherwise 57 | bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) 58 | { 59 | { 60 | std::unique_lock lock(queue_mutex_); 61 | if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) 62 | { 63 | return false; 64 | } 65 | q_.pop_front(popped_item); 66 | } 67 | pop_cv_.notify_one(); 68 | return true; 69 | } 70 | 71 | #else 72 | // apparently mingw deadlocks if the mutex is released before cv.notify_one(), 73 | // so release the mutex at the very end each function. 74 | 75 | // try to enqueue and block if no room left 76 | void enqueue(T &&item) 77 | { 78 | std::unique_lock lock(queue_mutex_); 79 | pop_cv_.wait(lock, [this] { return !this->q_.full(); }); 80 | q_.push_back(std::move(item)); 81 | push_cv_.notify_one(); 82 | } 83 | 84 | // enqueue immediately. overrun oldest message in the queue if no room left. 85 | void enqueue_nowait(T &&item) 86 | { 87 | std::unique_lock lock(queue_mutex_); 88 | q_.push_back(std::move(item)); 89 | push_cv_.notify_one(); 90 | } 91 | 92 | // try to dequeue item. if no item found. wait upto timeout and try again 93 | // Return true, if succeeded dequeue item, false otherwise 94 | bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration) 95 | { 96 | std::unique_lock lock(queue_mutex_); 97 | if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); })) 98 | { 99 | return false; 100 | } 101 | q_.pop_front(popped_item); 102 | pop_cv_.notify_one(); 103 | return true; 104 | } 105 | 106 | #endif 107 | 108 | size_t overrun_counter() 109 | { 110 | std::unique_lock lock(queue_mutex_); 111 | return q_.overrun_counter(); 112 | } 113 | 114 | private: 115 | std::mutex queue_mutex_; 116 | std::condition_variable push_cv_; 117 | std::condition_variable pop_cv_; 118 | spdlog::details::circular_q q_; 119 | }; 120 | } // namespace details 121 | } // namespace spdlog 122 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/mathF.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | // OCML prototypes 9 | //#include "ocml.h" 10 | 11 | // Tables 12 | #include "tables.h" 13 | 14 | // Builtins 15 | //#include "builtins.h" 16 | 17 | // Mangling 18 | #define MATH_MANGLE(N) N 19 | #define MATH_PRIVATE(N) __priv##N 20 | 21 | // mine 22 | #define MATH_MAD(x, y, z) mad(x, y, z) 23 | #define FINITE_ONLY_OPT() 0 24 | #define BUILTIN_FMA_F32(x, y, z) fma(x, y, z) 25 | #define MATH_SQRT(x) sqrt(x) 26 | #define MATH_RCP(x) native_recip(x) 27 | #define AS_FLOAT(x) as_float(x) 28 | #define AS_INT(x) as_int(x) 29 | #define AS_UINT(x) as_uint(x) 30 | #define BUILTIN_ABS_F32(x) fabs(x) 31 | #define BUILTIN_COPYSIGN_F32(x, y) copysign(x, y) 32 | #define HAVE_FAST_FMA32() 1 33 | #define BUILTIN_RSQRT_F32(x) native_rsqrt(x) 34 | #define MATH_FAST_RCP(x) native_recip(x) 35 | #define MATH_FAST_DIV(x, y) ((x) / (y)) 36 | #define MATH_FAST_SQRT(x) native_sqrt(x) 37 | #define MATH_DIV(x, y) ((x) / (y)) 38 | 39 | #define BUILTIN_CLAMP_F32(x, y, z) clamp(x, y, z) 40 | #define BUILTIN_MAX_U32(x, y) max(x, y) 41 | #define BUILTIN_MIN_U32(x, y) min(x, y) 42 | #define BUILTIN_ISINF_F32(x) isinf(x) 43 | #define BUILTIN_ISNAN_F32(x) isnan(x) 44 | 45 | #define BUILTIN_LOG2_F32(x) native_log2(x) 46 | #define BUILTIN_EXP2_F32(x) native_exp2(x) 47 | 48 | #define BUILTIN_RINT_F32(x) rint(x) 49 | 50 | 51 | static inline int frexp_exp(float x) { 52 | int e; 53 | float mant = frexp(x, &e); 54 | return e; 55 | } 56 | 57 | #define BUILTIN_FREXP_EXP_F32(x) frexp_exp(x) 58 | #define BUILTIN_FLDEXP_F32(x, k) ldexp(x, k) 59 | 60 | 61 | // Optimization Controls 62 | //#include "opts.h" 63 | 64 | // Attributes 65 | #define PUREATTR __attribute__((pure)) __attribute__((overloadable)) 66 | #define CONSTATTR __attribute__((const)) __attribute__((overloadable)) 67 | 68 | // Math controls 69 | //#include "privF.h" 70 | 71 | // Floating point patterns 72 | #define SIGNBIT_SP32 (int)0x80000000 73 | #define EXSIGNBIT_SP32 0x7fffffff 74 | #define EXPBITS_SP32 0x7f800000 75 | #define MANTBITS_SP32 0x007fffff 76 | #define ONEEXPBITS_SP32 0x3f800000 77 | #define TWOEXPBITS_SP32 0x40000000 78 | #define HALFEXPBITS_SP32 0x3f000000 79 | #define IMPBIT_SP32 0x00800000 80 | #define QNANBITPATT_SP32 0x7fc00000 81 | #define PINFBITPATT_SP32 0x7f800000 82 | #define NINFBITPATT_SP32 (int)0xff800000 83 | #define EXPBIAS_SP32 127 84 | #define EXPSHIFTBITS_SP32 23 85 | #define BIASEDEMIN_SP32 1 86 | #define EMIN_SP32 -126 87 | #define BIASEDEMAX_SP32 254 88 | #define EMAX_SP32 127 89 | #define MANTLENGTH_SP32 24 90 | #define BASEDIGITS_SP32 7 91 | 92 | #define CLASS_PINF 2 93 | #define CLASS_NINF 4 94 | #define CLASS_QNAN 8 95 | #define CLASS_SNAN 16 96 | #define CLASS_PSUB 32 97 | #define CLASS_NSUB 64 98 | #define CLASS_PZER 128 99 | #define CLASS_NZER 256 100 | 101 | 102 | static inline int CONSTATTR BUILTIN_CLASS_F32(float x, int klass) 103 | { 104 | if ((klass & CLASS_PINF) && (as_int(x) == PINFBITPATT_SP32)) 105 | return -1; 106 | if ((klass & CLASS_NINF) && (as_int(x) == NINFBITPATT_SP32)) 107 | return -1; 108 | 109 | if ((klass & (CLASS_QNAN|CLASS_SNAN)) && (as_int(x) & QNANBITPATT_SP32)) 110 | return -1; 111 | 112 | if ((klass & (CLASS_NZER|CLASS_PZER)) && ((as_int(x) & (~SIGNBIT_SP32)) == 0) ) 113 | return -1; 114 | 115 | if ( 116 | (klass & (CLASS_NSUB|CLASS_PSUB)) && 117 | ( 118 | ((as_int(x) & EXPBITS_SP32) == 0) && ((as_int(x) & MANTBITS_SP32) != 0) 119 | ) 120 | ) 121 | return -1; 122 | 123 | return 0; 124 | } 125 | 126 | // declarations 127 | 128 | PUREATTR float j1(float x); 129 | PUREATTR float j0(float x); 130 | CONSTATTR float erfinv(float x); 131 | CONSTATTR float erfcinv(float x); 132 | -------------------------------------------------------------------------------- /samples/fp16/fp16_conversion.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions 5 | // are met: 6 | // * Redistributions of source code must retain the above copyright 7 | // notice, this list of conditions and the following disclaimer. 8 | // * Redistributions in binary form must reproduce the above copyright 9 | // notice, this list of conditions and the following disclaimer in the 10 | // documentation and/or other materials provided with the distribution. 11 | // * Neither the name of NVIDIA CORPORATION nor the names of its 12 | // contributors may be used to endorse or promote products derived 13 | // from this software without specific prior written permission. 14 | // 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 | // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | // This code modified from the public domain code here: 28 | // https://gist.github.com/rygorous/2156668 29 | // The URL above includes more robust conversion routines 30 | // that handle Inf and NaN correctly. 31 | // 32 | // It is recommended to use the more robust versions in production code. 33 | 34 | typedef unsigned uint; 35 | 36 | union FP32 { 37 | uint u; 38 | float f; 39 | struct { 40 | uint Mantissa : 23; 41 | uint Exponent : 8; 42 | uint Sign : 1; 43 | }; 44 | }; 45 | 46 | union FP16 { 47 | unsigned short u; 48 | struct { 49 | uint Mantissa : 10; 50 | uint Exponent : 5; 51 | uint Sign : 1; 52 | }; 53 | }; 54 | 55 | // Approximate solution. This is faster but converts some sNaNs to 56 | // infinity and doesn't round correctly. Handle with care. 57 | // Approximate solution. This is faster but converts some sNaNs to 58 | // infinity and doesn't round correctly. Handle with care. 59 | static const half approx_float_to_half(float fl) { 60 | FP32 f32infty = {255 << 23}; 61 | FP32 f16max = {(127 + 16) << 23}; 62 | FP32 magic = {15 << 23}; 63 | FP32 expinf = {(255 ^ 31) << 23}; 64 | uint sign_mask = 0x80000000u; 65 | FP16 o = {0}; 66 | 67 | FP32 f = *((FP32 *)&fl); 68 | 69 | uint sign = f.u & sign_mask; 70 | f.u ^= sign; 71 | 72 | if (!(f.f < f32infty.u)) // Inf or NaN 73 | o.u = f.u ^ expinf.u; 74 | else { 75 | if (f.f > f16max.f) 76 | f.f = f16max.f; 77 | f.f *= magic.f; 78 | } 79 | 80 | o.u = f.u >> 13; // Take the mantissa bits 81 | o.u |= sign >> 16; 82 | return *((half *)&o); 83 | } 84 | 85 | // from half->float code - just for verification. 86 | static float half_to_float(half hf) { 87 | FP16 h = *((FP16 *)&hf); 88 | 89 | static const FP32 magic = {113 << 23}; 90 | static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift 91 | FP32 o; 92 | 93 | o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits 94 | uint exp = shifted_exp & o.u; // just the exponent 95 | o.u += (127 - 15) << 23; // exponent adjust 96 | 97 | // handle exponent special cases 98 | if (exp == shifted_exp) // Inf/NaN? 99 | o.u += (128 - 16) << 23; // extra exp adjust 100 | else if (exp == 0) // Zero/Denormal? 101 | { 102 | o.u += 1 << 23; // extra exp adjust 103 | o.f -= magic.f; // renormalize 104 | } 105 | 106 | o.u |= (h.u & 0x8000) << 16; // sign bit 107 | return o.f; 108 | } 109 | -------------------------------------------------------------------------------- /lib/bitcode/OCML/mathD.h: -------------------------------------------------------------------------------- 1 | /*===-------------------------------------------------------------------------- 2 | * ROCm Device Libraries 3 | * 4 | * This file is distributed under the University of Illinois Open Source 5 | * License. See LICENSE.TXT for details. 6 | *===------------------------------------------------------------------------*/ 7 | 8 | // OCML prototypes 9 | //#include "ocml.h" 10 | 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 12 | 13 | // Tables 14 | #include "tables.h" 15 | 16 | // Builtins 17 | //#include "builtins.h" 18 | 19 | // Mangling 20 | #define MATH_MANGLE(N) N 21 | #define MATH_PRIVATE(N) __priv##N 22 | 23 | // mine 24 | #define MATH_MAD(x, y, z) fma(x, y, z) 25 | #define FINITE_ONLY_OPT() 0 26 | #define BUILTIN_FMA_F64(x, y, z) fma(x, y, z) 27 | #define MATH_SQRT(x) sqrt(x) 28 | #define MATH_RCP(x) native_recip(x) 29 | #define AS_DOUBLE(x) as_double(x) 30 | #define AS_LONG(x) as_long(x) 31 | #define BUILTIN_ABS_F64(x) fabs(x) 32 | #define BUILTIN_COPYSIGN_F64(x, y) copysign(x, y) 33 | #define MATH_FAST_SQRT(x) native_sqrt(x) 34 | #define MATH_DIV(x, y) ((x) / (y)) 35 | #define BUILTIN_ISNAN_F64(x) isnan(x) 36 | #define BUILTIN_MAX_F64(x, y) fmax(x, y) 37 | #define BUILTIN_MIN_F64(x, y) fmin(x, y) 38 | 39 | #define BUILTIN_RSQRT_F64(x) native_rsqrt(x) 40 | #define BUILTIN_ISINF_F64(x) isinf(x) 41 | 42 | #define BUILTIN_LOG2_F32(x) native_log2(x) 43 | #define BUILTIN_EXP2_F32(x) native_exp2(x) 44 | 45 | #define BUILTIN_RINT_F32(x) rint(x) 46 | #define BUILTIN_RINT_F64(x) rint(x) 47 | 48 | static inline int frexp_exp(double x) { 49 | int e; 50 | double mant = frexp(x, &e); 51 | return e; 52 | } 53 | 54 | #define BUILTIN_FREXP_EXP_F64(x) frexp_exp(x) 55 | #define BUILTIN_FLDEXP_F64(x, k) ldexp(x, k) 56 | 57 | // Optimization Controls 58 | //#include "opts.h" 59 | 60 | // Attributes 61 | #define PUREATTR __attribute__((pure)) __attribute__((overloadable)) 62 | #define CONSTATTR __attribute__((const)) __attribute__((overloadable)) 63 | 64 | // Math controls 65 | //#include "privD.h" 66 | 67 | // Bit patterns 68 | #define SIGNBIT_DP64 0x8000000000000000L 69 | #define EXSIGNBIT_DP64 0x7fffffffffffffffL 70 | #define EXPBITS_DP64 0x7ff0000000000000L 71 | #define MANTBITS_DP64 0x000fffffffffffffL 72 | #define ONEEXPBITS_DP64 0x3ff0000000000000L 73 | #define TWOEXPBITS_DP64 0x4000000000000000L 74 | #define HALFEXPBITS_DP64 0x3fe0000000000000L 75 | #define IMPBIT_DP64 0x0010000000000000L 76 | #define QNANBITPATT_DP64 0x7ff8000000000000L 77 | #define INDEFBITPATT_DP64 0xfff8000000000000L 78 | #define PINFBITPATT_DP64 0x7ff0000000000000L 79 | #define NINFBITPATT_DP64 0xfff0000000000000L 80 | #define EXPBIAS_DP64 1023 81 | #define EXPSHIFTBITS_DP64 52 82 | #define BIASEDEMIN_DP64 1 83 | #define EMIN_DP64 -1022 84 | #define BIASEDEMAX_DP64 2046 85 | #define EMAX_DP64 1023 86 | #define LAMBDA_DP64 1.0e300 87 | #define MANTLENGTH_DP64 53 88 | #define BASEDIGITS_DP64 15 89 | 90 | #define CLASS_PINF 2 91 | #define CLASS_NINF 4 92 | #define CLASS_QNAN 8 93 | #define CLASS_SNAN 16 94 | #define CLASS_PSUB 32 95 | #define CLASS_NSUB 64 96 | #define CLASS_PZER 128 97 | #define CLASS_NZER 256 98 | 99 | 100 | static inline long CONSTATTR BUILTIN_CLASS_F64(double x, int klass) 101 | { 102 | if ((klass & CLASS_PINF) && (as_long(x) == PINFBITPATT_DP64)) 103 | return -1; 104 | if ((klass & CLASS_NINF) && (as_long(x) == NINFBITPATT_DP64)) 105 | return -1; 106 | 107 | if ((klass & (CLASS_QNAN|CLASS_SNAN)) && (as_long(x) & QNANBITPATT_DP64)) 108 | return -1; 109 | 110 | if ((klass & (CLASS_NZER|CLASS_PZER)) && ((as_long(x) & (~SIGNBIT_DP64)) == 0) ) 111 | return -1; 112 | 113 | if ( 114 | (klass & (CLASS_NSUB|CLASS_PSUB)) && 115 | ( 116 | ((as_long(x) & EXPBITS_DP64) == 0) && ((as_long(x) & MANTBITS_DP64) != 0) 117 | ) 118 | ) 119 | return -1; 120 | 121 | return 0; 122 | } 123 | 124 | // declarations 125 | 126 | PUREATTR double j1(double x); 127 | PUREATTR double j0(double x); 128 | CONSTATTR double erfinv(double x); 129 | CONSTATTR double erfcinv(double x); 130 | -------------------------------------------------------------------------------- /samples/hiploadmodule/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define NUM 100 11 | 12 | #define CHECK(cmd) \ 13 | do { \ 14 | hipError_t error = (cmd); \ 15 | if (error != hipSuccess) { \ 16 | fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \ 17 | __FILE__, __LINE__); \ 18 | exit(1); \ 19 | } \ 20 | } while(0) 21 | 22 | 23 | using namespace std; 24 | 25 | int main(int argc, char* argv[]) 26 | { 27 | // set up arrays for vector add 28 | int i=0; 29 | float* hostA; 30 | float* hostB; 31 | float* hostC; 32 | 33 | float* deviceA; 34 | float* deviceB; 35 | float* deviceC; 36 | 37 | struct { 38 | size_t _n; 39 | void* _Ad; 40 | void* _Bd; 41 | void* _Cd; 42 | } args1; 43 | 44 | hostA = (float*)malloc(NUM * sizeof(float)); 45 | hostB = (float*)malloc(NUM * sizeof(float)); 46 | hostC = (float*)malloc(NUM * sizeof(float)); 47 | 48 | // initialize the input data 49 | for (i = 0; i < NUM; i++) { 50 | hostA[i] = (float)i; 51 | hostB[i] = (float)i; 52 | } 53 | 54 | CHECK(hipInit(0)); 55 | CHECK(hipMalloc((void**)&deviceA, NUM * sizeof(float))); 56 | CHECK(hipMalloc((void**)&deviceB, NUM * sizeof(float))); 57 | CHECK(hipMalloc((void**)&deviceC, NUM * sizeof(float))); 58 | 59 | CHECK(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice)); 60 | CHECK(hipMemcpy(deviceA, hostA, NUM*sizeof(float), hipMemcpyHostToDevice)); 61 | 62 | hipModule_t hipModule = NULL; 63 | hipError_t error; 64 | 65 | char result[ PATH_MAX ]; 66 | ssize_t count = readlink( "/proc/self/exe", result, PATH_MAX ); 67 | std::string executablePath( result, (count > 0) ? count : 0 ); 68 | size_t last_pos = executablePath.find_last_of("/"); 69 | if (last_pos == std::string::npos) 70 | executablePath.assign("./"); 71 | else 72 | executablePath.resize(last_pos+1); 73 | const std::string binaryFilename(executablePath + "hipModuleLoadBinary"); 74 | 75 | error = hipModuleLoad(&hipModule, binaryFilename.c_str()); 76 | if (error) { 77 | printf("%s\n", binaryFilename.c_str()); 78 | cout << "Loading Module ("+binaryFilename+")" << endl; 79 | exit(1); 80 | } 81 | 82 | // get the function from the module 83 | hipFunction_t hipFunction = NULL; 84 | error = hipModuleGetFunction(&hipFunction, hipModule, "_occa_addVectors_0"); 85 | if (error) { 86 | cout << "Getting Function (_occa_addVectors_0)" << endl; 87 | exit(1); 88 | } 89 | 90 | args1._n = NUM; 91 | args1._Ad = deviceA; 92 | args1._Bd = deviceB; 93 | args1._Cd = deviceC; 94 | 95 | size_t size = sizeof(args1); 96 | 97 | void *config[] = { 98 | HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1, 99 | HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, 100 | HIP_LAUNCH_PARAM_END 101 | }; 102 | 103 | // launch the function 104 | error = hipModuleLaunchKernel( hipFunction, 1, 1, 1, NUM, 1, 1, 0, NULL, NULL, 105 | reinterpret_cast(&config) ); 106 | if (error) { 107 | cout << "hipmodulelaunch error" << endl; 108 | exit(1); 109 | } 110 | 111 | CHECK(hipMemcpy(hostC, deviceC, NUM*sizeof(float), hipMemcpyDeviceToHost)); 112 | 113 | // verify the results 114 | int errors = 0; 115 | for (i = 0; i < NUM; i++) { 116 | if (hostC[i] != (hostB[i] + hostA[i])) { 117 | printf( "%f\n", hostC[i]); 118 | errors++; 119 | } 120 | } 121 | if (errors!=0) { 122 | printf("FAILED: %d errors\n",errors); 123 | } else { 124 | printf("PASSED!\n"); 125 | } 126 | 127 | CHECK(hipFree(deviceA)); 128 | CHECK(hipFree(deviceB)); 129 | CHECK(hipFree(deviceC)); 130 | 131 | return 0; 132 | } 133 | -------------------------------------------------------------------------------- /samples/bit_extract/bit_extract.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | #include "hip/hip_runtime.h" 26 | 27 | #define CHECK(cmd) \ 28 | { \ 29 | hipError_t error = cmd; \ 30 | if (error != hipSuccess) { \ 31 | fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \ 32 | __FILE__, __LINE__); \ 33 | exit(EXIT_FAILURE); \ 34 | } \ 35 | } 36 | 37 | __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) { 38 | size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); 39 | size_t stride = hipBlockDim_x * hipGridDim_x; 40 | 41 | for (size_t i = offset; i < N; i += stride) { 42 | C_d[i] = ((A_d[i] & 0xf00) >> 8); 43 | } 44 | } 45 | 46 | 47 | int main(int argc, char* argv[]) { 48 | uint32_t *A_d, *C_d; 49 | uint32_t *A_h, *C_h; 50 | size_t N = 1000000; 51 | size_t Nbytes = N * sizeof(uint32_t); 52 | 53 | int deviceId = 0; 54 | CHECK(hipSetDevice(deviceId)); 55 | printf ("deviceId: %i\n", deviceId); 56 | hipDeviceProp_t props; 57 | CHECK(hipGetDeviceProperties(&props, deviceId)); 58 | printf("info: running on device #%d %s\n", deviceId, props.name); 59 | 60 | 61 | printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); 62 | A_h = (uint32_t*)malloc(Nbytes); 63 | CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess); 64 | C_h = (uint32_t*)malloc(Nbytes); 65 | CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess); 66 | 67 | for (size_t i = 0; i < N; i++) { 68 | A_h[i] = i; 69 | } 70 | 71 | printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0); 72 | CHECK(hipMalloc((void**)&A_d, Nbytes)); 73 | CHECK(hipMalloc((void**)&C_d, Nbytes)); 74 | 75 | printf("info: copy Host2Device\n"); 76 | CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); 77 | 78 | printf("info: launch 'bit_extract_kernel' \n"); 79 | const unsigned blocks = 512; 80 | const unsigned threadsPerBlock = 256; 81 | hipLaunchKernelGGL(bit_extract_kernel, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); 82 | 83 | printf("info: copy Device2Host\n"); 84 | CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); 85 | 86 | printf("info: check result\n"); 87 | for (size_t i = 0; i < N; i++) { 88 | unsigned Agold = ((A_h[i] & 0xf00) >> 8); 89 | if (C_h[i] != Agold) { 90 | fprintf(stderr, "mismatch detected.\n"); 91 | printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]); 92 | CHECK(hipErrorUnknown); 93 | } 94 | } 95 | printf("PASSED!\n"); 96 | } 97 | -------------------------------------------------------------------------------- /samples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | option(SAVE_TEMPS "Save temporary compilation products" OFF) 3 | option(VERBOSE "Verbose compilation" OFF) 4 | 5 | if(SAVE_TEMPS) 6 | add_compile_options("--save-temps") 7 | endif() 8 | 9 | if(VERBOSE) 10 | add_compile_options("-v") 11 | endif() 12 | 13 | # ARGN = test args 14 | function(add_hipcl_test EXEC_NAME TEST_NAME TEST_PASS SOURCE) 15 | 16 | set(TEST_EXEC_ARGS ${ARGN}) 17 | set_source_files_properties(${SOURCE} PROPERTIES LANGUAGE CXX) 18 | 19 | add_executable("${EXEC_NAME}" ${SOURCE}) 20 | 21 | set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON) 22 | 23 | target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl") 24 | 25 | install(TARGETS "${EXEC_NAME}" 26 | RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}") 27 | 28 | add_test(NAME "${TEST_NAME}" 29 | COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${EXEC_NAME}" ${TEST_EXEC_ARGS} 30 | ) 31 | 32 | set_tests_properties("${TEST_NAME}" PROPERTIES 33 | PASS_REGULAR_EXPRESSION "${TEST_PASS}") 34 | 35 | 36 | endfunction() 37 | 38 | 39 | # ARGN = sources 40 | function(add_hipcl_binary EXEC_NAME) 41 | 42 | set(SOURCES ${ARGN}) 43 | set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX) 44 | 45 | add_executable("${EXEC_NAME}" ${SOURCES}) 46 | 47 | set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON) 48 | 49 | target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl") 50 | 51 | install(TARGETS "${EXEC_NAME}" 52 | RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}") 53 | 54 | endfunction() 55 | 56 | # ARGN = sources 57 | function(add_hipcl_device_binary BIN_NAME) 58 | set(SOURCES ${ARGN}) 59 | 60 | set(BIN_NAME_OBJ "${BIN_NAME}_o") 61 | 62 | add_library("${BIN_NAME_OBJ}" OBJECT ${SOURCES}) 63 | 64 | set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX) 65 | 66 | target_link_libraries("${BIN_NAME_OBJ}" "${SANITIZER_LIBS}" "hipcl") 67 | 68 | target_compile_options("${BIN_NAME_OBJ}" PRIVATE "--cuda-device-only") 69 | 70 | add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}" 71 | COMMAND ${CMAKE_COMMAND} -E copy 72 | $ 73 | "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}" 74 | DEPENDS "${BIN_NAME_OBJ}") 75 | 76 | add_custom_target("${BIN_NAME}" DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}") 77 | 78 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}" 79 | DESTINATION "${HIPCL_SAMPLE_BINDIR}") 80 | 81 | endfunction() 82 | 83 | # ARGN = sources 84 | function(add_hipcl_binary_device_link EXEC_NAME) 85 | set(SOURCES ${ARGN}) 86 | set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX) 87 | 88 | add_executable("${EXEC_NAME}" ${SOURCES}) 89 | 90 | set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON) 91 | 92 | target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl") 93 | 94 | target_compile_options("${EXEC_NAME}" PRIVATE "-fgpu-rdc") 95 | 96 | target_link_options("${EXEC_NAME}" PRIVATE 97 | "-fgpu-rdc" 98 | "--hip-link" 99 | "$" 100 | "$" 101 | "$" 102 | "$" 103 | "--hip-device-lib=kernellib.bc") 104 | 105 | install(TARGETS "${EXEC_NAME}" 106 | RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}") 107 | 108 | endfunction() 109 | 110 | 111 | set(SAMPLES 112 | hipmath 113 | hiptest 114 | bit_extract 115 | hcc_dialects 116 | fp16 117 | 0_MatrixTranspose 118 | 0_MatrixMultiply 119 | 1_hipEvent 120 | 2_vecadd 121 | 3_shared_memory 122 | 4_shfl 123 | 5_2dshfl 124 | 6_dynamic_shared 125 | hipInfo 126 | # 7_streams 127 | # 9_unroll 128 | 10_memcpy3D 129 | hipSymbol 130 | hipDeviceLink 131 | hiploadmodule 132 | ) 133 | 134 | foreach (SAMPLE ${SAMPLES}) 135 | add_subdirectory(${SAMPLE}) 136 | endforeach() 137 | 138 | add_subdirectory(hip-cuda) 139 | -------------------------------------------------------------------------------- /samples/0_MatrixTranspose/MatrixTranspose.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | #include 25 | 26 | // hip header file 27 | #include "hip/hip_runtime.h" 28 | 29 | #define WIDTH 1024 30 | 31 | #define NUM (WIDTH * WIDTH) 32 | 33 | #define THREADS_PER_BLOCK_X 4 34 | #define THREADS_PER_BLOCK_Y 4 35 | #define THREADS_PER_BLOCK_Z 1 36 | 37 | // Device (Kernel) function, it must be void 38 | __global__ void matrixTranspose(float* out, float* in, const int width) { 39 | int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; 40 | int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; 41 | 42 | out[y * width + x] = in[x * width + y]; 43 | } 44 | 45 | // CPU implementation of matrix transpose 46 | void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { 47 | for (unsigned int j = 0; j < width; j++) { 48 | for (unsigned int i = 0; i < width; i++) { 49 | output[i * width + j] = input[j * width + i]; 50 | } 51 | } 52 | } 53 | 54 | int main() { 55 | float* Matrix; 56 | float* TransposeMatrix; 57 | float* cpuTransposeMatrix; 58 | 59 | float* gpuMatrix; 60 | float* gpuTransposeMatrix; 61 | 62 | hipDeviceProp_t devProp; 63 | hipGetDeviceProperties(&devProp, 0); 64 | 65 | std::cout << "Device name " << devProp.name << std::endl; 66 | 67 | int i; 68 | int errors; 69 | 70 | Matrix = (float*)malloc(NUM * sizeof(float)); 71 | TransposeMatrix = (float*)malloc(NUM * sizeof(float)); 72 | cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); 73 | 74 | // initialize the input data 75 | for (i = 0; i < NUM; i++) { 76 | Matrix[i] = (float)i * 10.0f; 77 | } 78 | 79 | // allocate the memory on the device side 80 | hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); 81 | hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); 82 | 83 | // Memory transfer from host to device 84 | hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); 85 | 86 | // Lauching kernel from host 87 | hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), 88 | dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, 89 | gpuMatrix, WIDTH); 90 | 91 | // Memory transfer from device to host 92 | hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); 93 | 94 | // CPU MatrixTranspose computation 95 | matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); 96 | 97 | // verify the results 98 | errors = 0; 99 | float eps = 1.0E-6; 100 | for (i = 0; i < NUM; i++) { 101 | if (std::fabs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { 102 | errors++; 103 | } 104 | } 105 | if (errors != 0) { 106 | printf("FAILED: %d errors\n", errors); 107 | } else { 108 | printf("PASSED!\n"); 109 | } 110 | 111 | // free the resources on device side 112 | hipFree(gpuMatrix); 113 | hipFree(gpuTransposeMatrix); 114 | 115 | // free the resources on host side 116 | free(Matrix); 117 | free(TransposeMatrix); 118 | free(cpuTransposeMatrix); 119 | 120 | return errors; 121 | } 122 | -------------------------------------------------------------------------------- /samples/9_unroll/unroll.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include 24 | 25 | // hip header file 26 | #include "hip/hip_runtime.h" 27 | 28 | 29 | #define WIDTH 4 30 | 31 | #define NUM (WIDTH * WIDTH) 32 | 33 | #define THREADS_PER_BLOCK_X 4 34 | #define THREADS_PER_BLOCK_Y 4 35 | #define THREADS_PER_BLOCK_Z 1 36 | 37 | // Device (Kernel) function, it must be void 38 | __global__ void matrixTranspose(float* out, float* in, const int width) { 39 | int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; 40 | float val = in[x]; 41 | 42 | #pragma unroll 43 | for (int i = 0; i < width; i++) { 44 | for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); 45 | } 46 | } 47 | 48 | // CPU implementation of matrix transpose 49 | void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { 50 | for (unsigned int j = 0; j < width; j++) { 51 | for (unsigned int i = 0; i < width; i++) { 52 | output[i * width + j] = input[j * width + i]; 53 | } 54 | } 55 | } 56 | 57 | int main() { 58 | float* Matrix; 59 | float* TransposeMatrix; 60 | float* cpuTransposeMatrix; 61 | 62 | float* gpuMatrix; 63 | float* gpuTransposeMatrix; 64 | 65 | hipDeviceProp_t devProp; 66 | hipGetDeviceProperties(&devProp, 0); 67 | 68 | std::cout << "Device name " << devProp.name << std::endl; 69 | 70 | int i; 71 | int errors; 72 | 73 | Matrix = (float*)malloc(NUM * sizeof(float)); 74 | TransposeMatrix = (float*)malloc(NUM * sizeof(float)); 75 | cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); 76 | 77 | // initialize the input data 78 | for (i = 0; i < NUM; i++) { 79 | Matrix[i] = (float)i * 10.0f; 80 | } 81 | 82 | // allocate the memory on the device side 83 | hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); 84 | hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); 85 | 86 | // Memory transfer from host to device 87 | hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); 88 | 89 | // Lauching kernel from host 90 | hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, 91 | gpuTransposeMatrix, gpuMatrix, WIDTH); 92 | 93 | // Memory transfer from device to host 94 | hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); 95 | 96 | // CPU MatrixTranspose computation 97 | matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); 98 | 99 | // verify the results 100 | errors = 0; 101 | double eps = 1.0E-6; 102 | for (i = 0; i < NUM; i++) { 103 | if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { 104 | printf("%d cpu: %f gpu %f\n", i, cpuTransposeMatrix[i], TransposeMatrix[i]); 105 | errors++; 106 | } 107 | } 108 | if (errors != 0) { 109 | printf("FAILED: %d errors\n", errors); 110 | } else { 111 | printf("PASSED!\n"); 112 | } 113 | 114 | // free the resources on device side 115 | hipFree(gpuMatrix); 116 | hipFree(gpuTransposeMatrix); 117 | 118 | // free the resources on host side 119 | free(Matrix); 120 | free(TransposeMatrix); 121 | free(cpuTransposeMatrix); 122 | 123 | return errors; 124 | } 125 | --------------------------------------------------------------------------------