├── cxxopts
    ├── src
    │   ├── .gitignore
    │   └── CMakeLists.txt
    ├── test
    │   ├── .gitignore
    │   ├── link_b.cpp
    │   ├── main.cpp
    │   ├── link_a.cpp
    │   ├── find-package-test
    │   │   └── CMakeLists.txt
    │   ├── add-subdirectory-test
    │   │   └── CMakeLists.txt
    │   └── CMakeLists.txt
    ├── .gitignore
    ├── cxxopts-config.cmake.in
    ├── INSTALL
    ├── LICENSE
    ├── .travis.yml
    └── CHANGELOG.md
├── samples
    ├── hipInfo
    │   └── CMakeLists.txt
    ├── 10_memcpy3D
    │   ├── CMakeLists.txt
    │   └── main.cpp
    ├── hipmath
    │   └── CMakeLists.txt
    ├── 2_vecadd
    │   └── CMakeLists.txt
    ├── 5_2dshfl
    │   └── CMakeLists.txt
    ├── 7_streams
    │   └── CMakeLists.txt
    ├── 9_unroll
    │   ├── CMakeLists.txt
    │   └── unroll.cpp
    ├── hiptest
    │   └── CMakeLists.txt
    ├── 1_hipEvent
    │   └── CMakeLists.txt
    ├── hcc_dialects
    │   ├── CMakeLists.txt
    │   └── vadd_hip.cpp
    ├── hip-cuda
    │   ├── include
    │   │   ├── SDKFile.hpp
    │   │   ├── SDKBitMap.hpp
    │   │   └── SDKThread.hpp
    │   ├── DCT
    │   │   └── CMakeLists.txt
    │   ├── SimpleConvolution
    │   │   ├── FilterCoeff.h
    │   │   ├── SimpleConvolution.cpp
    │   │   └── CMakeLists.txt
    │   ├── Histogram
    │   │   └── CMakeLists.txt
    │   ├── PrefixSum
    │   │   └── CMakeLists.txt
    │   ├── dwtHaar1D
    │   │   └── CMakeLists.txt
    │   ├── BitonicSort
    │   │   └── CMakeLists.txt
    │   ├── RecursiveGaussian
    │   │   ├── RecursiveGaussian_Input.bmp
    │   │   └── CMakeLists.txt
    │   ├── BinomialOption
    │   │   └── CMakeLists.txt
    │   ├── FloydWarshall
    │   │   └── CMakeLists.txt
    │   ├── FastWalshTransform
    │   │   └── CMakeLists.txt
    │   └── CMakeLists.txt
    ├── bit_extract
    │   ├── CMakeLists.txt
    │   └── bit_extract.cpp
    ├── 3_shared_memory
    │   └── CMakeLists.txt
    ├── 0_MatrixMultiply
    │   └── CMakeLists.txt
    ├── 0_MatrixTranspose
    │   ├── CMakeLists.txt
    │   └── MatrixTranspose.cpp
    ├── hipDeviceLink
    │   ├── hipDeviceLink.h
    │   ├── hipDeviceLinkConsts.h
    │   ├── CMakeLists.txt
    │   ├── hipDeviceLinkWrite.cpp
    │   ├── hipDeviceLinkRead.cpp
    │   └── hipDeviceLink.cpp
    ├── hipSymbol
    │   └── CMakeLists.txt
    ├── fp16
    │   ├── CMakeLists.txt
    │   └── fp16_conversion.hpp
    ├── 4_shfl
    │   ├── CMakeLists.txt
    │   ├── broadcast.cpp
    │   └── broadcast2.cpp
    ├── 6_dynamic_shared
    │   ├── CMakeLists.txt
    │   └── hipDynamicShared2.cpp
    ├── hiploadmodule
    │   ├── CMakeLists.txt
    │   ├── kernel.cpp
    │   └── main.cpp
    └── CMakeLists.txt
├── include
    ├── hip
    │   ├── hip_runtime.h
    │   └── hip_fatbin.h
    └── CL
    │   ├── LICENSE
    │   ├── opencl.h
    │   ├── cl_gl_ext.h
    │   └── cl_version.h
├── spdlog
    ├── version.h
    ├── fmt
    │   ├── ostr.h
    │   ├── fmt.h
    │   └── bundled
    │   │   └── LICENSE.rst
    ├── formatter.h
    ├── details
    │   ├── null_mutex.h
    │   ├── log_msg.h
    │   ├── console_globals.h
    │   ├── circular_q.h
    │   ├── periodic_worker.h
    │   ├── async_logger_impl.h
    │   ├── fmt_helper.h
    │   └── mpmc_blocking_q.h
    ├── LICENSE
    ├── sinks
    │   ├── msvc_sink.h
    │   ├── null_sink.h
    │   ├── ostream_sink.h
    │   ├── sink.h
    │   ├── stdout_color_sinks.h
    │   ├── base_sink.h
    │   ├── basic_file_sink.h
    │   ├── dist_sink.h
    │   ├── syslog_sink.h
    │   ├── stdout_sinks.h
    │   └── android_sink.h
    ├── async_logger.h
    └── async.h
├── doc
    ├── cmake.rst
    ├── env_variables.rst
    └── notes-0.9.txt
├── bin
    └── CMakeLists.txt
├── lib
    ├── bitcode
    │   ├── OCML
    │   │   ├── nearbyintF.cl
    │   │   ├── nearbyintD.cl
    │   │   ├── scalbnF.cl
    │   │   ├── scalbnD.cl
    │   │   ├── ncdfinvF.cl
    │   │   ├── ncdfinvD.cl
    │   │   ├── tables.cl
    │   │   ├── tables.h
    │   │   ├── scalbF.cl
    │   │   ├── rhypotF.cl
    │   │   ├── scalbD.cl
    │   │   ├── rcbrtF.cl
    │   │   ├── rhypotD.cl
    │   │   ├── rcbrtD.cl
    │   │   ├── rlen3F.cl
    │   │   ├── i0F.cl
    │   │   ├── i1F.cl
    │   │   ├── rlen3D.cl
    │   │   ├── oclc.h
    │   │   ├── mathD.h.orig
    │   │   ├── rlen4F.cl
    │   │   ├── LICENSE
    │   │   ├── rlen4D.cl
    │   │   ├── erfcxF.cl
    │   │   ├── erfcinvF.cl
    │   │   ├── erfinvF.cl
    │   │   ├── i0D.cl
    │   │   ├── i1D.cl
    │   │   ├── j0F.cl
    │   │   ├── j1F.cl
    │   │   ├── j0D.cl
    │   │   ├── erfcxD.cl
    │   │   ├── j1D.cl
    │   │   ├── mathF.h
    │   │   └── mathD.h
    │   └── CMakeLists.txt
    ├── hipcl-config.cmake.in
    ├── common.hh
    ├── log.cc
    └── CMakeLists.txt
├── hipcl_config.h.in
├── llvm_passes
    └── CMakeLists.txt
├── LICENSE
└── cmake
    └── run_make2cmake.cmake


/cxxopts/src/.gitignore:
--------------------------------------------------------------------------------
1 | example
2 | 


--------------------------------------------------------------------------------
/cxxopts/test/.gitignore:
--------------------------------------------------------------------------------
1 | options_test
2 | 


--------------------------------------------------------------------------------
/cxxopts/test/link_b.cpp:
--------------------------------------------------------------------------------
1 | #include <cxxopts.hpp>
2 | 


--------------------------------------------------------------------------------
/samples/hipInfo/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_hipcl_binary(hipInfo hipInfo.cpp)
2 | 


--------------------------------------------------------------------------------
/cxxopts/test/main.cpp:
--------------------------------------------------------------------------------
1 | #define CATCH_CONFIG_MAIN
2 | #include "catch.hpp"
3 | 


--------------------------------------------------------------------------------
/samples/10_memcpy3D/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_hipcl_test(memcpy3D memcpy3D PASSED main.cpp)
3 | 


--------------------------------------------------------------------------------
/samples/hipmath/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_hipcl_test(hipmath hipmath PASSED hipmath.cc)
3 | 


--------------------------------------------------------------------------------
/samples/2_vecadd/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # VecAdd
2 | 
3 | add_hipcl_test(VecAdd VecAdd PASSED VecAdd.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/5_2dshfl/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # 2dshfl
2 | add_hipcl_test(2dshfl 2d_shuffle PASSED 2dshfl.cpp)
3 | 


--------------------------------------------------------------------------------
/samples/7_streams/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # stream
2 | 
3 | add_hipcl_test(stream stream PASSED stream.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/9_unroll/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # unroll
2 | 
3 | add_hipcl_test(unroll unroll PASSED unroll.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hiptest/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # hiptest
2 | 
3 | add_hipcl_test(hiptest hiptest PASSED hiptest.cc)
4 | 


--------------------------------------------------------------------------------
/cxxopts/test/link_a.cpp:
--------------------------------------------------------------------------------
1 | #include "cxxopts.hpp"
2 | 
3 | int main(int, char**)
4 | {
5 |   return 0;
6 | }
7 | 


--------------------------------------------------------------------------------
/samples/1_hipEvent/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # hipEvent
2 | 
3 | add_hipcl_test(hipEvent hipEvent PASSED hipEvent.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hcc_dialects/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # vadd_hip
2 | 
3 | add_hipcl_test(vadd_hip vadd_hip PASSED vadd_hip.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/include/SDKFile.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKFile.hpp


--------------------------------------------------------------------------------
/samples/hip-cuda/include/SDKBitMap.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKBitMap.hpp


--------------------------------------------------------------------------------
/samples/hip-cuda/include/SDKThread.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/include/SDKThread.hpp


--------------------------------------------------------------------------------
/samples/bit_extract/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # bit_extract
2 | 
3 | add_hipcl_test(bit_extract bit_extract PASSED bit_extract.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/3_shared_memory/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # sharedMemory
2 | 
3 | add_hipcl_test(sharedMemory sharedMemory PASSED sharedMemory.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/DCT/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # DCT
2 | 
3 | add_hipcl_test(DCT DCT Passed DCT.cpp
4 |     -q -e -t -x 2048 -y 2048 -i 32)
5 | 


--------------------------------------------------------------------------------
/samples/0_MatrixMultiply/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # MatrixMultiply
2 | 
3 | add_hipcl_test(MatrixMultiply MatrixMultiply PASSED MatrixMultiply.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/SimpleConvolution/FilterCoeff.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/SimpleConvolution/FilterCoeff.h


--------------------------------------------------------------------------------
/cxxopts/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | build*
3 | CMakeCache.txt
4 | Makefile
5 | CMakeFiles/
6 | Testing/
7 | CTestTestfile.cmake
8 | cmake_install.cmake
9 | 


--------------------------------------------------------------------------------
/samples/0_MatrixTranspose/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # MatrixTranspose
2 | 
3 | add_hipcl_test(MatrixTranspose MatrixTranspose PASSED MatrixTranspose.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/Histogram/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Histogram
2 | 
3 | add_hipcl_test(Histogram Histogram Passed Histogram.cpp
4 |     -q -e -t -x 1024)
5 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/hipDeviceLink.h:
--------------------------------------------------------------------------------
1 | #include "hipDeviceLinkConsts.h"
2 | #include <hip/hip_runtime.h>
3 | extern __device__ int global[NUM];
4 | 


--------------------------------------------------------------------------------
/samples/hipSymbol/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Test symbol access
2 | 
3 | add_hipcl_test(hipTestDeviceSymbol hipTestDeviceSymbol PASSED hipTestDeviceSymbol.cpp)
4 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/PrefixSum/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # PrefixSum
2 | 
3 | add_hipcl_test(PrefixSum PrefixSum Passed PrefixSum.cpp
4 |     -q -e -t -x 16384 -i 32)
5 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/SimpleConvolution/SimpleConvolution.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/SimpleConvolution/SimpleConvolution.cpp


--------------------------------------------------------------------------------
/samples/hip-cuda/dwtHaar1D/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # dwtHaar1D
2 | 
3 | add_hipcl_test(dwtHaar1D dwtHaar1D Passed dwtHaar1D.cpp
4 |     -q -e -t -x 2048 -i 32)
5 | 


--------------------------------------------------------------------------------
/cxxopts/cxxopts-config.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | 
3 | include(${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake)
4 | check_required_components(cxxopts)
5 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/BitonicSort/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # BitonicSort
2 | 
3 | add_hipcl_test(BitonicSort BitonicSort Passed BitonicSort.cpp
4 |     -q -e -t -x 2048 -i 32)
5 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/RecursiveGaussian/RecursiveGaussian_Input.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cpc/hipcl/HEAD/samples/hip-cuda/RecursiveGaussian/RecursiveGaussian_Input.bmp


--------------------------------------------------------------------------------
/samples/hip-cuda/BinomialOption/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # BinomialOption
2 | 
3 | add_hipcl_test(BinomialOption BinomialOption Passed BinomialOption.cpp
4 |     -q -e -t -x 2048 -i 32)
5 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/FloydWarshall/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # FloydWarshall
2 | 
3 | add_hipcl_test(FloydWarshall FloydWarshall Passed FloydWarshall.cpp
4 |     -q -e -t -x 256 -i 32)
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/hipDeviceLinkConsts.h:
--------------------------------------------------------------------------------
1 | #define NUM 256
2 | #define SIZE 256 * sizeof(int)
3 | extern void readGlobal(int *hostOut);
4 | extern void writeGlobal(int *hostIn);
5 | 


--------------------------------------------------------------------------------
/samples/fp16/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | add_hipcl_test(fp16 fp16 PASSED haxpy-base.cpp)
3 | 
4 | # add_hipcl_test(fp16_math fp16_math PASSED half_math.cpp)
5 | add_hipcl_binary(fp16_math half_math.cpp)
6 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/FastWalshTransform/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # FastWalshTransform
2 | 
3 | add_hipcl_test(FastWalshTransform FastWalshTransform Passed FastWalshTransform.cpp
4 |     -q -e -t -x 2048 -i 32)
5 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/SimpleConvolution/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # SimpleConvolution
2 | 
3 | add_hipcl_test(SimpleConvolution SimpleConvolution Passed SimpleConvolution.cpp
4 |     -q -e -t -x 2048 -y 2048 -i 32 -l 256)
5 | 


--------------------------------------------------------------------------------
/samples/4_shfl/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # shfl
2 | 
3 | add_hipcl_test(shfl shuffle PASSED shfl.cpp)
4 | 
5 | add_hipcl_test(broadcast broadcast PASSED broadcast.cpp)
6 | add_hipcl_test(broadcast2 broadcast2 PASSED broadcast2.cpp)
7 | 


--------------------------------------------------------------------------------
/samples/6_dynamic_shared/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # dynamic_shared
2 | 
3 | add_hipcl_test(dynamic_shared dynamic_shared PASSED dynamic_shared.cpp)
4 | 
5 | add_hipcl_test(hipDynamicShared hipDynamicShared PASSED hipDynamicShared.cpp)
6 | 
7 | add_hipcl_test(hipDynamicShared2 hipDynamicShared2 PASSED hipDynamicShared2.cpp)
8 | 
9 | 


--------------------------------------------------------------------------------
/cxxopts/test/find-package-test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | 
 3 | project(cxxopts-test)
 4 | 
 5 | set(CMAKE_CXX_STANDARD   11)
 6 | set(CMAKE_CXX_EXTENSIONS OFF)
 7 | 
 8 | find_package(cxxopts REQUIRED)
 9 | 
10 | add_executable(library-test "../../src/example.cpp")
11 | target_link_libraries(library-test cxxopts::cxxopts)
12 | 


--------------------------------------------------------------------------------
/cxxopts/test/add-subdirectory-test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | 
 3 | project(cxxopts-test)
 4 | 
 5 | set(CMAKE_CXX_STANDARD   11)
 6 | set(CMAKE_CXX_EXTENSIONS OFF)
 7 | 
 8 | add_subdirectory(../.. cxxopts EXCLUDE_FROM_ALL)
 9 | 
10 | add_executable(library-test "../../src/example.cpp")
11 | target_link_libraries(library-test cxxopts)
12 | 


--------------------------------------------------------------------------------
/include/hip/hip_runtime.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef HIPCL_HIP_RUNTIME_H
 3 | #define HIPCL_HIP_RUNTIME_H
 4 | 
 5 | #ifndef __HIP_PLATFORM_HIPCL__
 6 | #define __HIP_PLATFORM_HIPCL__
 7 | #endif
 8 | 
 9 | #include <cmath>
10 | #include <cstdint>
11 | 
12 | #include <hip/hipcl.hh>
13 | 
14 | #include <hip/hip_vector_types.h>
15 | 
16 | #include <hip/hip_fp16.h>
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/spdlog/version.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #define SPDLOG_VER_MAJOR 1
 9 | #define SPDLOG_VER_MINOR 3
10 | #define SPDLOG_VER_PATCH 0
11 | 
12 | #define SPDLOG_VERSION (SPDLOG_VER_MAJOR * 10000 + SPDLOG_VER_MINOR * 100 + SPDLOG_VER_PATCH)
13 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(SAMPLES
 2 |     BinomialOption
 3 |     BitonicSort
 4 |     DCT
 5 |     dwtHaar1D
 6 |     FastWalshTransform
 7 |     FloydWarshall
 8 |     Histogram
 9 | #   still broken:
10 | #    PrefixSum
11 |     RecursiveGaussian
12 |     SimpleConvolution
13 | )
14 | 
15 | foreach (SAMPLE ${SAMPLES})
16 |   add_subdirectory(${SAMPLE})
17 | endforeach()
18 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Test symbol access
 2 | 
 3 | add_hipcl_binary_device_link(
 4 | 	hipTestDeviceLink
 5 | 	hipDeviceLink.cpp
 6 | 	hipDeviceLinkRead.cpp
 7 | 	hipDeviceLinkWrite.cpp)
 8 | 
 9 | add_test(NAME hipTestDeviceLink
10 | 	 COMMAND "${CMAKE_CURRENT_BINARY_DIR}/hipTestDeviceLink"
11 | 	 )
12 | 
13 | set_tests_properties(hipTestDeviceLink PROPERTIES
14 | 	PASS_REGULAR_EXPRESSION PASSED)
15 | 
16 | 


--------------------------------------------------------------------------------
/doc/cmake.rst:
--------------------------------------------------------------------------------
 1 | CMake variables
 2 | ---------------------------------------
 3 | 
 4 | - **SAVE_TEMPS**
 5 |   If set, temporary compilation products of compiling samples
 6 |   will be stored in the build directory.
 7 | 
 8 | - **LOGLEVEL**
 9 |   If set, sets the minimum logging level at compile time.
10 |   Log levels below this will not be available at runtime.
11 |   Valid values are DEBUG;INFO;WARN;ERROR;CRITICAL;OFF
12 |   Defaults to DEBUG.
13 | 


--------------------------------------------------------------------------------
/samples/hiploadmodule/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Test symbol access
 2 | 
 3 | add_hipcl_binary(
 4 | 	hipModuleLoad
 5 | 	main.cpp)
 6 | 
 7 | add_hipcl_device_binary(
 8 | 	hipModuleLoadBinary
 9 | 	kernel.cpp)
10 | 
11 | add_dependencies(hipModuleLoad hipModuleLoadBinary)
12 | 
13 | add_test(NAME hipModuleLoad
14 | 	 COMMAND "${CMAKE_CURRENT_BINARY_DIR}/hipModuleLoad"
15 | 	 )
16 | 
17 | set_tests_properties(hipModuleLoad PROPERTIES
18 | 	PASS_REGULAR_EXPRESSION PASSED)
19 | 
20 | 


--------------------------------------------------------------------------------
/spdlog/fmt/ostr.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // include bundled or external copy of fmtlib's ostream support
 9 | //
10 | #if !defined(SPDLOG_FMT_EXTERNAL)
11 | #ifndef FMT_HEADER_ONLY
12 | #define FMT_HEADER_ONLY
13 | #endif
14 | #include "bundled/ostream.h"
15 | #include "fmt.h"
16 | #else
17 | #include <fmt/ostream.h>
18 | #endif
19 | 


--------------------------------------------------------------------------------
/samples/hiploadmodule/kernel.cpp:
--------------------------------------------------------------------------------
 1 | #include <hip/hip_runtime.h>
 2 | 
 3 | extern "C" __global__ void _occa_addVectors_0(const size_t entries,
 4 |                                               const float * a,
 5 |                                               const float * b,
 6 |                                               float * ab) {
 7 |   size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 8 |   if (i < entries) {
 9 |     ab[i] = a[i] + b[i];
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/hipDeviceLinkWrite.cpp:
--------------------------------------------------------------------------------
 1 | #include "hipDeviceLink.h"
 2 | 
 3 | __global__ void Write(const int *in) {
 4 |   int tid = threadIdx.x + blockIdx.x * blockDim.x;
 5 |   global[tid] = in[tid];
 6 | }
 7 | 
 8 | void writeGlobal(int *hostIn) {
 9 |   int *deviceIn;
10 |   hipMalloc((void **)&deviceIn, SIZE);
11 |   hipMemcpy(deviceIn, hostIn, SIZE, hipMemcpyHostToDevice);
12 |   hipLaunchKernelGGL(Write, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, deviceIn);
13 |   hipFree(deviceIn);
14 | }
15 | 


--------------------------------------------------------------------------------
/bin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | set_source_files_properties(hipcl_config.cc PROPERTIES LANGUAGE CXX )
 3 | 
 4 | add_executable(hipcl_config hipcl_config.cc)
 5 | 
 6 | set_target_properties(hipcl_config PROPERTIES CXX_STANDARD_REQUIRED ON)
 7 | 
 8 | target_include_directories(hipcl_config PRIVATE "${CMAKE_BINARY_DIR}" "${CMAKE_SOURCE_DIR}/cxxopts/include")
 9 | 
10 | target_link_libraries(hipcl_config ${PTHREAD_LIBRARY})
11 | 
12 | install(TARGETS "hipcl_config"
13 |         RUNTIME DESTINATION "${HIPCL_BIN_DIR}")
14 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/nearbyintF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(nearbyint)(float x)
12 | {
13 |     return BUILTIN_RINT_F32(x);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/nearbyintD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(nearbyint)(double x)
12 | {
13 |     return BUILTIN_RINT_F64(x);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/scalbnF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(scalbn)(float x, int n)
12 | {
13 |     return MATH_MANGLE(ldexp)(x, n);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/scalbnD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(scalbn)(double x, int n)
12 | {
13 |     return MATH_MANGLE(ldexp)(x, n);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/hipDeviceLinkRead.cpp:
--------------------------------------------------------------------------------
 1 | #include "hipDeviceLink.h"
 2 | 
 3 | __device__ int global[NUM];
 4 | 
 5 | __global__ void Read(int *out) {
 6 |   int tid = threadIdx.x + blockIdx.x * blockDim.x;
 7 |   out[tid] = global[tid];
 8 | }
 9 | 
10 | void readGlobal(int *hostOut) {
11 |   int *deviceOut;
12 |   hipMalloc((void **)&deviceOut, SIZE);
13 |   hipLaunchKernelGGL(Read, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, deviceOut);
14 |   hipMemcpy(hostOut, deviceOut, SIZE, hipMemcpyDeviceToHost);
15 |   hipFree(deviceOut);
16 | }
17 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/ncdfinvF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(ncdfinv)(float x)
12 | {
13 |     return -0x1.6a09e6p+0f * MATH_MANGLE(erfcinv)(x + x);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/spdlog/formatter.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "fmt/fmt.h"
 9 | #include "spdlog/details/log_msg.h"
10 | 
11 | namespace spdlog {
12 | 
13 | class formatter
14 | {
15 | public:
16 |     virtual ~formatter() = default;
17 |     virtual void format(const details::log_msg &msg, fmt::memory_buffer &dest) = 0;
18 |     virtual std::unique_ptr<formatter> clone() const = 0;
19 | };
20 | } // namespace spdlog
21 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/ncdfinvD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(ncdfinv)(double x)
12 | {
13 |     return -0x1.6a09e667f3bcdp+0 * MATH_MANGLE(erfcinv)(x + x);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/samples/hipDeviceLink/hipDeviceLink.cpp:
--------------------------------------------------------------------------------
 1 | #include "hipDeviceLinkConsts.h"
 2 | #include <assert.h>
 3 | #include <stdio.h>
 4 | 
 5 | int main() {
 6 |   int *hostIn, *hostOut;
 7 |   hostIn = new int[NUM];
 8 |   hostOut = new int[NUM];
 9 |   for (int i = 0; i < NUM; i++) {
10 |     hostIn[i] = -1 * i;
11 |     hostOut[i] = 0;
12 |   }
13 |   writeGlobal(hostIn);
14 |   readGlobal(hostOut);
15 |   for (int i = 0; i < NUM; i++) {
16 |     assert(hostIn[i] == hostOut[i]);
17 |   }
18 |   delete[] hostIn;
19 |   delete[] hostOut;
20 |   printf("PASSED!\n");
21 | }
22 | 


--------------------------------------------------------------------------------
/lib/hipcl-config.cmake.in:
--------------------------------------------------------------------------------
 1 | @PACKAGE_INIT@
 2 | 
 3 | set_and_check( HIP_INCLUDE_DIR "@PACKAGE_HIPCL_INC_DIR@" )
 4 | set_and_check( HIP_INCLUDE_DIRS "@PACKAGE_HIPCL_INC_DIR@" )
 5 | set_and_check( HIP_LIB_INSTALL_DIR "@PACKAGE_HIPCL_LIB_DIR@" )
 6 | set_and_check( HIP_BIN_INSTALL_DIR "@PACKAGE_HIPCL_BIN_DIR@" )
 7 | 
 8 | set_and_check(HIP_HIPCC_EXECUTABLE "@HIPCL_COMPILER@")
 9 | set_and_check(HIP_HIPCONFIG_EXECUTABLE "${HIP_BIN_INSTALL_DIR}/hipcl_config")
10 | 
11 | include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
12 | 
13 | set( HIP_LIBRARIES hip::hipcl)
14 | set( HIP_LIBRARY ${HIP_LIBRARIES})
15 | 
16 | 


--------------------------------------------------------------------------------
/hipcl_config.h.in:
--------------------------------------------------------------------------------
 1 | #cmakedefine CLANG_BIN_PATH "@CLANG_BIN_PATH@"
 2 | 
 3 | #cmakedefine CLANG_ROOT_PATH "@CLANG_ROOT_PATH@"
 4 | 
 5 | #cmakedefine HIPCL_CXX_OPTIONS "@HIPCL_CXX_OPTIONS@"
 6 | 
 7 | #cmakedefine CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@"
 8 | 
 9 | #cmakedefine HIPCL_LIB_DIR "@HIPCL_LIB_DIR@"
10 | #cmakedefine HIPCL_DATA_DIR "@HIPCL_DATA_DIR@"
11 | #cmakedefine HIPCL_INC_DIR "@HIPCL_INC_DIR@"
12 | #cmakedefine HIPCL_BIN_DIR "@HIPCL_BIN_DIR@"
13 | 
14 | #cmakedefine HIPCL_VERSION_FULL "@HIPCL_VERSION_FULL@"
15 | 
16 | #cmakedefine HIPCL_VERSION_FULL_PRE "@HIPCL_VERSION_FULL_PRE@"
17 | 
18 | #define HIP_PLATFORM "hipcl"
19 | 


--------------------------------------------------------------------------------
/samples/hip-cuda/RecursiveGaussian/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # RecursiveGaussian
 2 | 
 3 | add_hipcl_test(RecursiveGaussian RecursiveGaussian Passed RecursiveGaussian.cpp
 4 |     -q -e -t -i 32)
 5 | 
 6 | set(DEST_IMAGE "${CMAKE_CURRENT_BINARY_DIR}/RecursiveGaussian_Input.bmp")
 7 | 
 8 | add_custom_command(OUTPUT "${DEST_IMAGE}"
 9 |   COMMAND "${CMAKE_COMMAND}" -E copy
10 |     "${CMAKE_CURRENT_SOURCE_DIR}/RecursiveGaussian_Input.bmp"
11 |     "${CMAKE_CURRENT_BINARY_DIR}"
12 |   VERBATIM)
13 | 
14 | add_custom_target("RecursiveGaussian_image" ALL
15 |     DEPENDS "${DEST_IMAGE}")
16 | 
17 | add_dependencies(RecursiveGaussian RecursiveGaussian_image)
18 | 


--------------------------------------------------------------------------------
/spdlog/fmt/fmt.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016-2018 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | //
 9 | // Include a bundled header-only copy of fmtlib or an external one.
10 | // By default spdlog include its own copy.
11 | //
12 | 
13 | #if !defined(SPDLOG_FMT_EXTERNAL)
14 | #ifndef FMT_HEADER_ONLY
15 | #define FMT_HEADER_ONLY
16 | #endif
17 | #ifndef FMT_USE_WINDOWS_H
18 | #define FMT_USE_WINDOWS_H 0
19 | #endif
20 | #include "bundled/core.h"
21 | #include "bundled/format.h"
22 | #else // external fmtlib
23 | #include <fmt/core.h>
24 | #include <fmt/format.h>
25 | #endif
26 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/tables.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | // #include "ocml.h"
 9 | 
10 | #include "tables.h"
11 | 
12 | 
13 | #define DECLARE_TABLE(TYPE,NAME,LENGTH) \
14 | __attribute__((visibility("protected"))) __constant TYPE TABLE_MANGLE(NAME) [ LENGTH ] = {
15 | 
16 | #define END_TABLE() };
17 | 
18 | #include "besselF_table.h"
19 | #include "besselD_table.h"
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/lib/common.hh:
--------------------------------------------------------------------------------
 1 | 
 2 | enum class OCLType : unsigned { POD = 0, Pointer = 1, Image = 2, Sampler = 3 };
 3 | 
 4 | enum class OCLSpace : unsigned {
 5 |   Private = 0,
 6 |   Global = 1,
 7 |   Constant = 2,
 8 |   Local = 3,
 9 |   Unknown = 1000
10 | };
11 | 
12 | struct OCLArgTypeInfo {
13 |   OCLType type;
14 |   OCLSpace space;
15 |   size_t size;
16 | };
17 | 
18 | struct OCLFuncInfo {
19 |   std::vector<OCLArgTypeInfo> ArgTypeInfo;
20 |   OCLArgTypeInfo retTypeInfo;
21 | };
22 | 
23 | typedef std::map<int32_t, OCLFuncInfo *> OCLFuncInfoMap;
24 | 
25 | typedef std::map<std::string, OCLFuncInfo *> OpenCLFunctionInfoMap;
26 | 
27 | bool parseSPIR(int32_t *stream, size_t numWords, OpenCLFunctionInfoMap &output);
28 | 


--------------------------------------------------------------------------------
/cxxopts/INSTALL:
--------------------------------------------------------------------------------
 1 | == System installation ==
 2 | 
 3 | This library is header only. So you can either copy `include/cxxopts.hpp` to `/usr/include` or `/usr/local/include`, or add `include` to your search path.
 4 | 
 5 | == Building the examples and tests ==
 6 | 
 7 | It is preferable to build out of source. Make a build directory somewhere, and then
 8 | do the following, where `${CXXOPTS_DIR}` is the path that you checked out `cxxopts`
 9 | to:
10 | 
11 |   cmake ${CXXOPTS_DIR}
12 |   make
13 | 
14 | You can use another build tool, such as ninja.
15 | 
16 |   cmake -G Ninja ${CXXOPTS_DIR}
17 |   ninja
18 | 
19 | 
20 | To run the tests, you have to configure `cxxopts` with another flag:
21 |    cmake -D CXXOPTS_BUILD_TESTS=On ${CXXOPTS_DIR}
22 |    make
23 |    make test
24 | 


--------------------------------------------------------------------------------
/spdlog/details/null_mutex.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <atomic>
 9 | // null, no cost dummy "mutex" and dummy "atomic" int
10 | 
11 | namespace spdlog {
12 | namespace details {
13 | struct null_mutex
14 | {
15 |     void lock() {}
16 |     void unlock() {}
17 |     bool try_lock()
18 |     {
19 |         return true;
20 |     }
21 | };
22 | 
23 | struct null_atomic_int
24 | {
25 |     int value;
26 |     null_atomic_int() = default;
27 | 
28 |     explicit null_atomic_int(int val)
29 |         : value(val)
30 |     {
31 |     }
32 | 
33 |     int load(std::memory_order) const
34 |     {
35 |         return value;
36 |     }
37 | 
38 |     void store(int val)
39 |     {
40 |         value = val;
41 |     }
42 | };
43 | 
44 | } // namespace details
45 | } // namespace spdlog
46 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/tables.h:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | // Table stuff
 9 | 
10 | #define TABLE_MANGLE(NAME) __ocmltbl_##NAME
11 | 
12 | extern __constant float TABLE_MANGLE(M32_J0)[];
13 | extern __constant float TABLE_MANGLE(M32_J1)[];
14 | extern __constant float TABLE_MANGLE(M32_Y0)[];
15 | extern __constant float TABLE_MANGLE(M32_Y1)[];
16 | extern __constant double TABLE_MANGLE(M64_J0)[];
17 | extern __constant double TABLE_MANGLE(M64_J1)[];
18 | extern __constant double TABLE_MANGLE(M64_Y0)[];
19 | extern __constant double TABLE_MANGLE(M64_Y1)[];
20 | 
21 | #define USE_TABLE(TYPE,PTR,NAME) \
22 |     __constant TYPE * PTR = TABLE_MANGLE(NAME)
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/scalbF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(scalb)(float x, float y)
12 | {
13 |     float t = BUILTIN_CLAMP_F32(y, -0x1.0p+20f, 0x1.0p+20f);
14 |     float ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F32(t));
15 | 
16 |     if (!FINITE_ONLY_OPT()) {
17 |         ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ?  AS_FLOAT(QNANBITPATT_SP32) : ret;
18 |         ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_PINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret;
19 |         ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_NINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret;
20 |     }
21 | 
22 |     return ret;
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rhypotF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(rhypot)(float x, float y)
12 | {
13 |     float a = BUILTIN_ABS_F32(x);
14 |     float b = BUILTIN_ABS_F32(y);
15 |     float t = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b)));
16 |     int e = BUILTIN_FREXP_EXP_F32(t);
17 |     a = BUILTIN_FLDEXP_F32(a, -e);
18 |     b = BUILTIN_FLDEXP_F32(b, -e);
19 |     float ret = BUILTIN_FLDEXP_F32(BUILTIN_RSQRT_F32(MATH_MAD(a, a, b*b)), -e);
20 | 
21 |     if (!FINITE_ONLY_OPT()) {
22 |         ret = (BUILTIN_ISINF_F32(x) |
23 |                BUILTIN_ISINF_F32(y)) ?
24 |               0.0f : ret;
25 |     }
26 | 
27 |     return ret;
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/scalbD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(scalb)(double x, double y)
12 | {
13 |     double t = BUILTIN_MIN_F64(BUILTIN_MAX_F64(y, -0x1.0p+20), 0x1.0p+20);
14 |     double ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F64(t));
15 | 
16 |     if (!FINITE_ONLY_OPT()) {
17 |         ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ?  AS_DOUBLE(QNANBITPATT_DP64) : ret;
18 |         ret = (BUILTIN_CLASS_F64(x, CLASS_NZER|CLASS_PZER) & BUILTIN_CLASS_F64(y, CLASS_PINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret;
19 |         ret = (BUILTIN_ISINF_F64(x) & BUILTIN_CLASS_F64(y, CLASS_NINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret;
20 |     }
21 | 
22 |     return ret;
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rcbrtF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(rcbrt)(float x)
12 | {
13 |     float ax = BUILTIN_ABS_F32(x);
14 |     
15 |     ax = BUILTIN_FLDEXP_F32(ax, BUILTIN_CLASS_F32(x, CLASS_NSUB|CLASS_PSUB) ? 24 : 0);
16 | 
17 |     float z = BUILTIN_EXP2_F32(-0x1.555556p-2f * BUILTIN_LOG2_F32(ax));
18 |     z = MATH_MAD(MATH_MAD(z*z, -z*ax, 1.0f), 0x1.555556p-2f*z, z);
19 | 
20 |     z = BUILTIN_FLDEXP_F32(z, BUILTIN_CLASS_F32(x, CLASS_NSUB|CLASS_PSUB) ? 8 : 0);
21 | 
22 |     float xi = MATH_FAST_RCP(x);
23 |     z = BUILTIN_CLASS_F32(x, CLASS_SNAN|CLASS_QNAN|CLASS_PZER|CLASS_NZER|CLASS_PINF|CLASS_NINF) ? xi : z;
24 | 
25 |     return BUILTIN_COPYSIGN_F32(z, x);
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/llvm_passes/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard to use.")
 2 | set(CMAKE_CXX_EXTENSIONS OFF)
 3 | 
 4 | execute_process(COMMAND "${LLVM_CONFIG}" "--cmakedir"
 5 | 		OUTPUT_VARIABLE LLVM_DIR
 6 | 		OUTPUT_STRIP_TRAILING_WHITESPACE
 7 | 		RESULT_VARIABLE RES)
 8 | 
 9 | if(NOT RES EQUAL 0)
10 |   message(FATAL_ERROR "failed to run llvm-config (${LLVM_CONFIG})")
11 | endif()
12 | 
13 | message(STATUS "LLVM CMake directory: ${LLVM_DIR}")
14 | 
15 | find_package(LLVM REQUIRED CONFIG HINTS "${LLVM_DIR}" PATHS "${LLVM_DIR}" NO_DEFAULT_PATH)
16 | 
17 | ######################################
18 | 
19 | add_definitions(${LLVM_DEFINITIONS})
20 | 
21 | include_directories(${LLVM_INCLUDE_DIRS})
22 | 
23 | if(NOT LLVM_ENABLE_RTTI)
24 |   add_compile_options("-fno-rtti")
25 | endif()
26 | 
27 | if(NOT LLVM_ENABLE_EH)
28 |   add_compile_options("-fno-exceptions")
29 | endif()
30 | 
31 | add_library(LLVMHipDynMem MODULE HipDynMem.cpp)
32 | 
33 | install(TARGETS LLVMHipDynMem
34 |         LIBRARY DESTINATION "${HIPCL_LLVM_DIR}"
35 |         ARCHIVE DESTINATION "${HIPCL_LLVM_DIR}"
36 |         )
37 | 


--------------------------------------------------------------------------------
/cxxopts/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014 Jarryd Beck
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/doc/env_variables.rst:
--------------------------------------------------------------------------------
 1 | ENV variables controlling behaviour
 2 | ---------------------------------------
 3 | 
 4 | The behavior of HIPCL can be controlled with multiple environment variables
 5 | listed below. The variables are helpful both when using and when developing
 6 | pocl.
 7 | 
 8 | - **HIPCL_LOGLEVEL**
 9 |   String value. Changes verbosity of log messages coming from HIPCL.
10 |   Possible values are: debug,info,warn,err,crit,off
11 |   Defaults to "err". HIPCL will log messages of this priority and higher.
12 | 
13 | - **HIPCL_PLATFORM**
14 |   Numeric value. If there are multiple OpenCL platforms on the system, setting this to a number (0..platforms-1)
15 |   will limit HipCL to that single platform. By default HipCL can access all OpenCL platforms.
16 | 
17 | - **HIPCL_DEVICE**
18 |   Numeric value. If there are multiple OpenCL devices in the selected platform, setting this to a number (0..N-1)
19 |   will limit HipCL to a single device. If HIPCL_PLATFORM is not set but HIPCL_DEVICE is,
20 |   HIPCL_PLATFORM defaults to 0.
21 | 
22 | - **HIPCL_DEVICE_TYPE**
23 |   String value. Limits OpenCL device visibility to HipCL based on device type.
24 |   Possible values are: all, cpu, gpu, default, accel
25 | 
26 | 


--------------------------------------------------------------------------------
/spdlog/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Gabi Melman.                                       
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/lib/log.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | 
 3 | #include "spdlog/spdlog.h"
 4 | #include "spdlog/sinks/stdout_color_sinks.h"
 5 | 
 6 | #ifdef __GNUC__
 7 | #pragma GCC visibility push(hidden)
 8 | #endif
 9 | 
10 | static int SpdlogWasSetup = 0;
11 | 
12 | void setupSpdlog() {
13 |   if (SpdlogWasSetup)
14 |     return;
15 |   spdlog::set_default_logger(spdlog::stderr_color_mt("HIPCL"));
16 |   spdlog::set_level(spdlog::level::debug);
17 |   spdlog::set_pattern("%n %^%l%$ [TID %t] [%E.%F] : %v");
18 | 
19 |   spdlog::level::level_enum spd_loglevel = spdlog::level::err;
20 | 
21 |   const char *loglevel = getenv("HIPCL_LOGLEVEL");
22 |   if (loglevel) {
23 |     std::string level(loglevel);
24 |     if (level == "debug")
25 |       spd_loglevel = spdlog::level::debug;
26 |     if (level == "info")
27 |       spd_loglevel = spdlog::level::info;
28 |     if (level == "warn")
29 |       spd_loglevel = spdlog::level::warn;
30 |     if (level == "err")
31 |       spd_loglevel = spdlog::level::err;
32 |     if (level == "crit")
33 |       spd_loglevel = spdlog::level::critical;
34 |     if (level == "off")
35 |       spd_loglevel = spdlog::level::off;
36 |   }
37 | 
38 |   spdlog::set_level(spd_loglevel);
39 | 
40 |   SpdlogWasSetup = 1;
41 | }
42 | 
43 | #ifdef __GNUC__
44 | #pragma GCC visibility pop
45 | #endif
46 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rhypotD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(rhypot)(double x, double y)
12 | {
13 |     double a = BUILTIN_ABS_F64(x);
14 |     double b = BUILTIN_ABS_F64(y);
15 |     double t = BUILTIN_MAX_F64(a, b);
16 |     int e = BUILTIN_FREXP_EXP_F64(t);
17 |     a = BUILTIN_FLDEXP_F64(a, -e);
18 |     b = BUILTIN_FLDEXP_F64(b, -e);
19 |     double d2 = MATH_MAD(a, a, b*b);
20 |     double z = BUILTIN_RSQRT_F64(d2);
21 |     double u = MATH_MAD(-d2*z, z, 1.0);
22 |     z = MATH_MAD(z*u, MATH_MAD(u, 0.375, 0.5), z);
23 |     double ret = BUILTIN_FLDEXP_F64(z, -e);
24 | 
25 |     if (!FINITE_ONLY_OPT()) {
26 |         ret = t == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret;
27 | 
28 |         ret = BUILTIN_ISNAN_F64(x) |
29 |               BUILTIN_ISNAN_F64(y) ?  AS_DOUBLE(QNANBITPATT_DP64) : ret;
30 | 
31 |         ret = BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) ?  0.0 : ret;
32 |     }
33 | 
34 |     return ret;
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/spdlog/sinks/msvc_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2016 Alexander Dalshov.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #if defined(_WIN32)
13 | 
14 | #include "spdlog/details/null_mutex.h"
15 | #include "spdlog/sinks/base_sink.h"
16 | 
17 | #include <winbase.h>
18 | 
19 | #include <mutex>
20 | #include <string>
21 | 
22 | namespace spdlog {
23 | namespace sinks {
24 | /*
25 |  * MSVC sink (logging using OutputDebugStringA)
26 |  */
27 | template<typename Mutex>
28 | class msvc_sink : public base_sink<Mutex>
29 | {
30 | public:
31 |     explicit msvc_sink() {}
32 | 
33 | protected:
34 |     void sink_it_(const details::log_msg &msg) override
35 |     {
36 | 
37 |         fmt::memory_buffer formatted;
38 |         sink::formatter_->format(msg, formatted);
39 |         OutputDebugStringA(fmt::to_string(formatted).c_str());
40 |     }
41 | 
42 |     void flush_() override {}
43 | };
44 | 
45 | using msvc_sink_mt = msvc_sink<std::mutex>;
46 | using msvc_sink_st = msvc_sink<details::null_mutex>;
47 | 
48 | using windebug_sink_mt = msvc_sink_mt;
49 | using windebug_sink_st = msvc_sink_st;
50 | 
51 | } // namespace sinks
52 | } // namespace spdlog
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/cxxopts/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2014 Jarryd Beck
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | # 
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | # 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | # THE SOFTWARE.
20 | 
21 | if(CXXOPTS_BUILD_EXAMPLES)
22 |     add_executable(example example.cpp)
23 |     target_link_libraries(example cxxopts)
24 | endif()
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 Tampere University
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 
21 | Portions copyright:
22 | 
23 | The Khronos Group Inc. (lib/spirv.hh, lib/CL/LICENSE)
24 | 
25 | Gabi Melman (spdlog/LICENSE)
26 | 
27 | Advanced Micro Devices, Inc. (lib/bitcode/OCML/LICENSE)
28 | 


--------------------------------------------------------------------------------
/include/CL/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2008-2015 The Khronos Group Inc.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a
 4 | copy of this software and/or associated documentation files (the
 5 | "Materials"), to deal in the Materials without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Materials, and to
 8 | permit persons to whom the Materials are furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included
12 | in all copies or substantial portions of the Materials.
13 | 
14 | MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
15 | KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
16 | SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
17 |    https://www.khronos.org/registry/
18 | 
19 | THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 | MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
26 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rcbrtD.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(rcbrt)(double x)
12 | {
13 |     double a = BUILTIN_ABS_F64(x);
14 |     int e3 = BUILTIN_FREXP_EXP_F64(a);
15 |     int e = (int)BUILTIN_RINT_F32(0x1.555556p-2f * (float)e3);
16 |     a = BUILTIN_FLDEXP_F64(a, -3*e);
17 | 
18 |     double c = (double)BUILTIN_EXP2_F32(-0x1.555556p-2f * BUILTIN_LOG2_F32((float)a));
19 | 
20 |     // Correction is c + c*(1 - a c^3)/(1 + 2 a c^3)
21 |     //  = c + c*t/(3 - 2t) where t = 1 - a c^3
22 |     // use t/(3 - 2t) ~ t/3 + 2 t^2 / 9 + 4 t^3 / 27 ...
23 |     // compute t with extra precision for better accuracy
24 |     double c3 = c * c * c;
25 |     double t = MATH_MAD(-a, c3, 1.0);
26 |     c = MATH_MAD(c, t*MATH_MAD(t, 0x1.c71c71c71c8b2p-3, 0x1.5555555555685p-2), c);
27 | 
28 |     c = BUILTIN_FLDEXP_F64(c, -e);
29 | 
30 |     if (!FINITE_ONLY_OPT()) {
31 |         c = BUILTIN_CLASS_F64(a, CLASS_PINF) ? 0.0 : c;
32 |         c = x == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : c;
33 |     }
34 | 
35 |     return BUILTIN_COPYSIGN_F64(c, x);
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/spdlog/fmt/bundled/LICENSE.rst:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 - 2016, Victor Zverovich
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 |    this list of conditions and the following disclaimer in the documentation
12 |    and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/spdlog/sinks/null_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #include "spdlog/details/null_mutex.h"
13 | #include "spdlog/sinks/base_sink.h"
14 | 
15 | #include <mutex>
16 | 
17 | namespace spdlog {
18 | namespace sinks {
19 | 
20 | template<typename Mutex>
21 | class null_sink : public base_sink<Mutex>
22 | {
23 | protected:
24 |     void sink_it_(const details::log_msg &) override {}
25 |     void flush_() override {}
26 | };
27 | 
28 | using null_sink_mt = null_sink<std::mutex>;
29 | using null_sink_st = null_sink<details::null_mutex>;
30 | 
31 | } // namespace sinks
32 | 
33 | template<typename Factory = default_factory>
34 | inline std::shared_ptr<logger> null_logger_mt(const std::string &logger_name)
35 | {
36 |     auto null_logger = Factory::template create<sinks::null_sink_mt>(logger_name);
37 |     null_logger->set_level(level::off);
38 |     return null_logger;
39 | }
40 | 
41 | template<typename Factory = default_factory>
42 | inline std::shared_ptr<logger> null_logger_st(const std::string &logger_name)
43 | {
44 |     auto null_logger = Factory::template create<sinks::null_sink_st>(logger_name);
45 |     null_logger->set_level(level::off);
46 |     return null_logger;
47 | }
48 | 
49 | } // namespace spdlog
50 | 


--------------------------------------------------------------------------------
/cxxopts/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if (CXXOPTS_BUILD_TESTS)
 2 |     add_executable(options_test main.cpp options.cpp)
 3 |     target_link_libraries(options_test cxxopts)
 4 |     add_test(options options_test)
 5 | 
 6 |     # test if the targets are findable from the build directory
 7 |     add_test(find-package-test ${CMAKE_CTEST_COMMAND}
 8 |         -C ${CMAKE_BUILD_TYPE}
 9 |         --build-and-test
10 |         "${CMAKE_CURRENT_SOURCE_DIR}/find-package-test"
11 |         "${CMAKE_CURRENT_BINARY_DIR}/find-package-test"
12 |         --build-generator ${CMAKE_GENERATOR}
13 |         --build-makeprogram ${CMAKE_MAKE_PROGRAM}
14 |         --build-options
15 |         "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
16 |         "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
17 |         "-Dcxxopts_DIR=${PROJECT_BINARY_DIR}"
18 |     )
19 | 
20 |     # test if the targets are findable when add_subdirectory is used
21 |     add_test(add-subdirectory-test ${CMAKE_CTEST_COMMAND}
22 |         -C ${CMAKE_BUILD_TYPE}
23 |         --build-and-test
24 |         "${CMAKE_CURRENT_SOURCE_DIR}/add-subdirectory-test"
25 |         "${CMAKE_CURRENT_BINARY_DIR}/add-subdirectory-test"
26 |         --build-generator ${CMAKE_GENERATOR}
27 |         --build-makeprogram ${CMAKE_MAKE_PROGRAM}
28 |         --build-options
29 |         "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
30 |         "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
31 |     )
32 | 
33 |     add_executable(link_test link_a.cpp link_b.cpp)
34 |     target_link_libraries(link_test cxxopts)
35 | endif()
36 | 


--------------------------------------------------------------------------------
/spdlog/details/log_msg.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/common.h"
 9 | #include "spdlog/details/os.h"
10 | 
11 | #include <string>
12 | #include <utility>
13 | 
14 | namespace spdlog {
15 | namespace details {
16 | struct log_msg
17 | {
18 | 
19 |     log_msg(source_loc loc, const std::string *loggers_name, level::level_enum lvl, string_view_t view)
20 |         : logger_name(loggers_name)
21 |         , level(lvl)
22 | #ifndef SPDLOG_NO_DATETIME
23 |         , time(os::now())
24 | #endif
25 | 
26 | #ifndef SPDLOG_NO_THREAD_ID
27 |         , thread_id(os::thread_id())
28 |         , source(loc)
29 |         , payload(view)
30 | #endif
31 |     {
32 |     }
33 | 
34 |     log_msg(const std::string *loggers_name, level::level_enum lvl, string_view_t view)
35 |         : log_msg(source_loc{}, loggers_name, lvl, view)
36 |     {
37 |     }
38 | 
39 |     log_msg(const log_msg &other) = default;
40 | 
41 |     const std::string *logger_name{nullptr};
42 |     level::level_enum level{level::off};
43 |     log_clock::time_point time;
44 |     size_t thread_id{0};
45 |     size_t msg_id{0};
46 | 
47 |     // wrapping the formatted text with color (updated by pattern_formatter).
48 |     mutable size_t color_range_start{0};
49 |     mutable size_t color_range_end{0};
50 | 
51 |     source_loc source;
52 |     const string_view_t payload;
53 | };
54 | } // namespace details
55 | } // namespace spdlog
56 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rlen3F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(rlen3)(float x, float y, float z)
12 | {
13 |     float a = BUILTIN_ABS_F32(x);
14 |     float b = BUILTIN_ABS_F32(y);
15 |     float c = BUILTIN_ABS_F32(z);
16 | 
17 |     float a1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b)));
18 |     float b1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a), AS_UINT(b)));
19 | 
20 |     a        = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a1), AS_UINT(c)));
21 |     float c1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a1), AS_UINT(c)));
22 | 
23 |     b        = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b1), AS_UINT(c1)));
24 |     c        = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b1), AS_UINT(c1)));
25 | 
26 |     int e = BUILTIN_FREXP_EXP_F32(a);
27 |     a = BUILTIN_FLDEXP_F32(a, -e);
28 |     b = BUILTIN_FLDEXP_F32(b, -e);
29 |     c = BUILTIN_FLDEXP_F32(c, -e);
30 | 
31 |     float ret = BUILTIN_RSQRT_F32(MATH_MAD(a, a, MATH_MAD(b, b, c*c)));
32 |     ret = BUILTIN_FLDEXP_F32(ret, -e);
33 | 
34 |     if (!FINITE_ONLY_OPT()) {
35 |         ret = (BUILTIN_ISINF_F32(x) |
36 |                BUILTIN_ISINF_F32(y) |
37 |                BUILTIN_ISINF_F32(z)) ? 0.0f : ret;
38 |     }
39 | 
40 |     return ret;
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/spdlog/details/console_globals.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | //
 3 | // Copyright(c) 2018 Gabi Melman.
 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 5 | //
 6 | 
 7 | #include "spdlog/details/null_mutex.h"
 8 | #include <cstdio>
 9 | #include <mutex>
10 | 
11 | #ifdef _WIN32
12 | 
13 | #ifndef NOMINMAX
14 | #define NOMINMAX // prevent windows redefining min/max
15 | #endif
16 | 
17 | #ifndef WIN32_LEAN_AND_MEAN
18 | #define WIN32_LEAN_AND_MEAN
19 | #endif
20 | 
21 | #include <windows.h>
22 | #endif
23 | 
24 | namespace spdlog {
25 | namespace details {
26 | struct console_stdout
27 | {
28 |     static std::FILE *stream()
29 |     {
30 |         return stdout;
31 |     }
32 | #ifdef _WIN32
33 |     static HANDLE handle()
34 |     {
35 |         return ::GetStdHandle(STD_OUTPUT_HANDLE);
36 |     }
37 | #endif
38 | };
39 | 
40 | struct console_stderr
41 | {
42 |     static std::FILE *stream()
43 |     {
44 |         return stderr;
45 |     }
46 | #ifdef _WIN32
47 |     static HANDLE handle()
48 |     {
49 |         return ::GetStdHandle(STD_ERROR_HANDLE);
50 |     }
51 | #endif
52 | };
53 | 
54 | struct console_mutex
55 | {
56 |     using mutex_t = std::mutex;
57 |     static mutex_t &mutex()
58 |     {
59 |         static mutex_t s_mutex;
60 |         return s_mutex;
61 |     }
62 | };
63 | 
64 | struct console_nullmutex
65 | {
66 |     using mutex_t = null_mutex;
67 |     static mutex_t &mutex()
68 |     {
69 |         static mutex_t s_mutex;
70 |         return s_mutex;
71 |     }
72 | };
73 | } // namespace details
74 | } // namespace spdlog
75 | 


--------------------------------------------------------------------------------
/spdlog/sinks/ostream_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #include "spdlog/details/null_mutex.h"
13 | #include "spdlog/sinks/base_sink.h"
14 | 
15 | #include <mutex>
16 | #include <ostream>
17 | 
18 | namespace spdlog {
19 | namespace sinks {
20 | template<typename Mutex>
21 | class ostream_sink final : public base_sink<Mutex>
22 | {
23 | public:
24 |     explicit ostream_sink(std::ostream &os, bool force_flush = false)
25 |         : ostream_(os)
26 |         , force_flush_(force_flush)
27 |     {
28 |     }
29 |     ostream_sink(const ostream_sink &) = delete;
30 |     ostream_sink &operator=(const ostream_sink &) = delete;
31 | 
32 | protected:
33 |     void sink_it_(const details::log_msg &msg) override
34 |     {
35 |         fmt::memory_buffer formatted;
36 |         sink::formatter_->format(msg, formatted);
37 |         ostream_.write(formatted.data(), static_cast<std::streamsize>(formatted.size()));
38 |         if (force_flush_)
39 |         {
40 |             ostream_.flush();
41 |         }
42 |     }
43 | 
44 |     void flush_() override
45 |     {
46 |         ostream_.flush();
47 |     }
48 | 
49 |     std::ostream &ostream_;
50 |     bool force_flush_;
51 | };
52 | 
53 | using ostream_sink_mt = ostream_sink<std::mutex>;
54 | using ostream_sink_st = ostream_sink<details::null_mutex>;
55 | 
56 | } // namespace sinks
57 | } // namespace spdlog
58 | 


--------------------------------------------------------------------------------
/spdlog/sinks/sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #include "spdlog/details/log_msg.h"
 9 | #include "spdlog/details/pattern_formatter.h"
10 | #include "spdlog/formatter.h"
11 | 
12 | namespace spdlog {
13 | namespace sinks {
14 | class sink
15 | {
16 | public:
17 |     sink()
18 |         : level_(level::trace)
19 |         , formatter_(new pattern_formatter())
20 |     {
21 |     }
22 | 
23 |     explicit sink(std::unique_ptr<spdlog::pattern_formatter> formatter)
24 |         : level_(level::trace)
25 |         , formatter_(std::move(formatter))
26 |     {
27 |     }
28 | 
29 |     virtual ~sink() = default;
30 |     virtual void log(const details::log_msg &msg) = 0;
31 |     virtual void flush() = 0;
32 |     virtual void set_pattern(const std::string &pattern) = 0;
33 |     virtual void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) = 0;
34 | 
35 |     bool should_log(level::level_enum msg_level) const
36 |     {
37 |         return msg_level >= level_.load(std::memory_order_relaxed);
38 |     }
39 | 
40 |     void set_level(level::level_enum log_level)
41 |     {
42 |         level_.store(log_level);
43 |     }
44 | 
45 |     level::level_enum level() const
46 |     {
47 |         return static_cast<spdlog::level::level_enum>(level_.load(std::memory_order_relaxed));
48 |     }
49 | 
50 | protected:
51 |     // sink log level - default is all
52 |     level_t level_;
53 | 
54 |     // sink formatter - default is full format
55 |     std::unique_ptr<spdlog::formatter> formatter_;
56 | };
57 | 
58 | } // namespace sinks
59 | } // namespace spdlog
60 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/i0F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(i0)(float x)
12 | {
13 |     x = BUILTIN_ABS_F32(x);
14 | 
15 |     float ret;
16 | 
17 |     if (x < 8.0f) {
18 |         float t = 0.25f * x * x;
19 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
20 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
21 |               MATH_MAD(t, 
22 |                   0x1.38d760p-43f, 0x1.7fd5c6p-38f), 0x1.66ffc8p-31f), 0x1.4ecb6ep-25f),
23 |                   0x1.033c70p-19f), 0x1.233bb2p-14f), 0x1.c71db2p-10f), 0x1.c71c5ep-6f),
24 |                   0x1.000000p-2f), 0x1.000000p+0f);
25 |         ret = MATH_MAD(t, ret, 1.0f);
26 |     } else {
27 |         float t = MATH_FAST_RCP(x);
28 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
29 |               MATH_MAD(t, 
30 |                   0x1.c49916p-2f, -0x1.110f5ep-5f), 0x1.2a130ap-5f), 0x1.c68702p-6f),
31 |                   0x1.9890aep-5f), 0x1.988450p-2f);
32 |         float xs = x - 88.0f;
33 |         float e1 = MATH_MANGLE(exp)(x > 88.0f ? xs : x);
34 |         float e2 = x > 88.0f ? 0x1.f1056ep+126f : 1.0f;
35 |         ret = e1 * BUILTIN_RSQRT_F32(x) * ret * e2;
36 |     }
37 | 
38 |     if  (!FINITE_ONLY_OPT()) {
39 |         ret = BUILTIN_CLASS_F32(x, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? x : ret;
40 |     }
41 | 
42 |     return ret;
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/i1F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(i1)(float x)
12 | {
13 |     float a = BUILTIN_ABS_F32(x);
14 | 
15 |     float ret;
16 | 
17 |     if (a < 8.0f) {
18 |         a *= 0.5f;
19 |         float t = a * a;
20 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
21 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
22 |                   0x1.882dd2p-40f, 0x1.af97f6p-35f), 0x1.66a3eap-28f), 0x1.251b32p-22f),
23 |                   0x1.84cbb6p-17f), 0x1.6c0d4ap-12f), 0x1.c71d3ap-8f), 0x1.555550p-4f),
24 |                   0x1.000000p-1f);
25 |         ret = MATH_MAD(t, a*ret, a);
26 |     } else {
27 |         float t = MATH_FAST_RCP(a);
28 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
29 |               MATH_MAD(t, 
30 |                   -0x1.06de32p-1f, 0x1.043b22p-5f), -0x1.925276p-5f), -0x1.7c15c8p-5f),
31 |                   -0x1.3266ccp-3f), 0x1.988456p-2f);
32 | 
33 |         float as = a - 88.0f;
34 |         float e1 = MATH_MANGLE(exp)(a > 88.0f ? as : a);
35 |         float e2 = a > 88.0f ? 0x1.f1056ep+126f : 1.0f;
36 |         ret = e1 * BUILTIN_RSQRT_F32(a) * ret * e2;
37 |     }
38 | 
39 |     if  (!FINITE_ONLY_OPT()) {
40 |         ret = BUILTIN_CLASS_F32(a, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? a : ret;
41 |     }
42 | 
43 |     return BUILTIN_COPYSIGN_F32(ret, x);
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rlen3D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(rlen3)(double x, double y, double z)
12 | {
13 |     double a = BUILTIN_ABS_F64(x);
14 |     double b = BUILTIN_ABS_F64(y);
15 |     double c = BUILTIN_ABS_F64(z);
16 | 
17 |     double a1 = BUILTIN_MAX_F64(a, b);
18 |     double b1 = BUILTIN_MIN_F64(a, b);
19 | 
20 |     a         = BUILTIN_MAX_F64(a1, c);
21 |     double c1 = BUILTIN_MIN_F64(a1, c);
22 | 
23 |     b         = BUILTIN_MAX_F64(b1, c1);
24 |     c         = BUILTIN_MIN_F64(b1, c1);
25 | 
26 |     int e = BUILTIN_FREXP_EXP_F64(a);
27 |     a = BUILTIN_FLDEXP_F64(a, -e);
28 |     b = BUILTIN_FLDEXP_F64(b, -e);
29 |     c = BUILTIN_FLDEXP_F64(c, -e);
30 | 
31 |     double d2 = MATH_MAD(a, a, MATH_MAD(b, b, c*c));
32 |     double v = BUILTIN_RSQRT_F64(d2);
33 |     double u = MATH_MAD(-d2*v, v, 1.0);
34 |     v = MATH_MAD(v*u, MATH_MAD(u, 0.375, 0.5), v);
35 |     double ret = BUILTIN_FLDEXP_F64(v, -e);
36 | 
37 |     if (!FINITE_ONLY_OPT()) {
38 |         ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret;
39 | 
40 |         ret = (BUILTIN_ISNAN_F64(x) |
41 |                BUILTIN_ISNAN_F64(y) |
42 |                BUILTIN_ISNAN_F64(z)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret;
43 | 
44 |         ret = (BUILTIN_ISINF_F64(x) |
45 |                BUILTIN_ISINF_F64(y) |
46 |                BUILTIN_ISINF_F64(z)) ? 0.0 : ret;
47 |     }
48 | 
49 |     return ret;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/spdlog/details/circular_q.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2018 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | // cirucal q view of std::vector.
 7 | #pragma once
 8 | 
 9 | #include <vector>
10 | 
11 | namespace spdlog {
12 | namespace details {
13 | template<typename T>
14 | class circular_q
15 | {
16 | public:
17 |     using item_type = T;
18 | 
19 |     explicit circular_q(size_t max_items)
20 |         : max_items_(max_items + 1) // one item is reserved as marker for full q
21 |         , v_(max_items_)
22 |     {
23 |     }
24 | 
25 |     // push back, overrun (oldest) item if no room left
26 |     void push_back(T &&item)
27 |     {
28 |         v_[tail_] = std::move(item);
29 |         tail_ = (tail_ + 1) % max_items_;
30 | 
31 |         if (tail_ == head_) // overrun last item if full
32 |         {
33 |             head_ = (head_ + 1) % max_items_;
34 |             ++overrun_counter_;
35 |         }
36 |     }
37 | 
38 |     // Pop item from front.
39 |     // If there are no elements in the container, the behavior is undefined.
40 |     void pop_front(T &popped_item)
41 |     {
42 |         popped_item = std::move(v_[head_]);
43 |         head_ = (head_ + 1) % max_items_;
44 |     }
45 | 
46 |     bool empty()
47 |     {
48 |         return tail_ == head_;
49 |     }
50 | 
51 |     bool full()
52 |     {
53 |         // head is ahead of the tail by 1
54 |         return ((tail_ + 1) % max_items_) == head_;
55 |     }
56 | 
57 |     size_t overrun_counter() const
58 |     {
59 |         return overrun_counter_;
60 |     }
61 | 
62 | private:
63 |     size_t max_items_;
64 |     typename std::vector<T>::size_type head_ = 0;
65 |     typename std::vector<T>::size_type tail_ = 0;
66 | 
67 |     std::vector<T> v_;
68 | 
69 |     size_t overrun_counter_ = 0;
70 | };
71 | } // namespace details
72 | } // namespace spdlog
73 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/oclc.h:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries (orig. repo location: oclc/inc)
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #ifndef OCLC_H
 9 | #define OCLC_H
10 | 
11 | // These constants are used to control behavior of the libraries which
12 | // check them.
13 | //
14 | // The current list of controls is as follows:
15 | //
16 | //    __constant bool __oclc_finite_only_opt
17 | //        - the application will only pass finite arguments and expects only finite results
18 | //
19 | //    __constant bool __oclc_unsafe_math_opt
20 | //        - the aopplication accepts optimizations that may lower the accuracy of the results
21 | //
22 | //    __constant bool __oclc_daz_opt(void)
23 | //        - the application allows subnormal inputs or outputs to be flushed to zero
24 | //
25 | //    __constant bool __oclc_correctly_rounded_sqrt32(void)
26 | //        - the application is expecting sqrt(float) to produce a correctly rounded result
27 | //
28 | //    __constant int __oclc_ISA_version
29 | //        - the ISA version of the target device
30 | //
31 | // it is expected that the implementation provides these as if declared from the following
32 | // C code:
33 | //
34 | //     const bool int __oclc_... = 0; // Or 1
35 | //
36 | // allowing them and any control flow associated with them to be optimized away
37 | 
38 | extern const __constant bool __oclc_finite_only_opt;
39 | extern const __constant bool __oclc_unsafe_math_opt;
40 | extern const __constant bool __oclc_daz_opt;
41 | extern const __constant bool __oclc_correctly_rounded_sqrt32;
42 | extern const __constant int __oclc_ISA_version;
43 | 
44 | #endif // OCLC_H
45 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/mathD.h.orig:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | // OCML prototypes
 9 | //#include "ocml.h"
10 | 
11 | // Tables
12 | #include "tables.h"
13 | 
14 | // Builtins
15 | //#include "builtins.h"
16 | 
17 | // Mangling
18 | #define MATH_MANGLE(N) OCML_MANGLE_F64(N)
19 | #define MATH_PRIVATE(N) MANGLE3(__ocmlpriv,N,f64)
20 | 
21 | // Optimization Controls
22 | //#include "opts.h"
23 | 
24 | // Attributes
25 | #define ALIGNEDATTR(X) __attribute__((aligned(X)))
26 | #define INLINEATTR __attribute__((always_inline))
27 | #define PUREATTR __attribute__((pure))
28 | #define CONSTATTR __attribute__((const))
29 | 
30 | // Math controls
31 | //#include "privD.h"
32 | 
33 | // Bit patterns
34 | #define SIGNBIT_DP64      0x8000000000000000L
35 | #define EXSIGNBIT_DP64    0x7fffffffffffffffL
36 | #define EXPBITS_DP64      0x7ff0000000000000L
37 | #define MANTBITS_DP64     0x000fffffffffffffL
38 | #define ONEEXPBITS_DP64   0x3ff0000000000000L
39 | #define TWOEXPBITS_DP64   0x4000000000000000L
40 | #define HALFEXPBITS_DP64  0x3fe0000000000000L
41 | #define IMPBIT_DP64       0x0010000000000000L
42 | #define QNANBITPATT_DP64  0x7ff8000000000000L
43 | #define INDEFBITPATT_DP64 0xfff8000000000000L
44 | #define PINFBITPATT_DP64  0x7ff0000000000000L
45 | #define NINFBITPATT_DP64  0xfff0000000000000L
46 | #define EXPBIAS_DP64      1023
47 | #define EXPSHIFTBITS_DP64 52
48 | #define BIASEDEMIN_DP64   1
49 | #define EMIN_DP64         -1022
50 | #define BIASEDEMAX_DP64   2046
51 | #define EMAX_DP64         1023
52 | #define LAMBDA_DP64       1.0e300
53 | #define MANTLENGTH_DP64   53
54 | #define BASEDIGITS_DP64   15
55 | 
56 | 


--------------------------------------------------------------------------------
/spdlog/sinks/stdout_color_sinks.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2018 spdlog
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #ifdef _WIN32
13 | #include "spdlog/sinks/wincolor_sink.h"
14 | #else
15 | #include "spdlog/sinks/ansicolor_sink.h"
16 | #endif
17 | 
18 | namespace spdlog {
19 | namespace sinks {
20 | #ifdef _WIN32
21 | using stdout_color_sink_mt = wincolor_stdout_sink_mt;
22 | using stdout_color_sink_st = wincolor_stdout_sink_st;
23 | using stderr_color_sink_mt = wincolor_stderr_sink_mt;
24 | using stderr_color_sink_st = wincolor_stderr_sink_st;
25 | #else
26 | using stdout_color_sink_mt = ansicolor_stdout_sink_mt;
27 | using stdout_color_sink_st = ansicolor_stdout_sink_st;
28 | using stderr_color_sink_mt = ansicolor_stderr_sink_mt;
29 | using stderr_color_sink_st = ansicolor_stderr_sink_st;
30 | #endif
31 | } // namespace sinks
32 | 
33 | template<typename Factory = default_factory>
34 | inline std::shared_ptr<logger> stdout_color_mt(const std::string &logger_name)
35 | {
36 |     return Factory::template create<sinks::stdout_color_sink_mt>(logger_name);
37 | }
38 | 
39 | template<typename Factory = default_factory>
40 | inline std::shared_ptr<logger> stdout_color_st(const std::string &logger_name)
41 | {
42 |     return Factory::template create<sinks::stdout_color_sink_st>(logger_name);
43 | }
44 | 
45 | template<typename Factory = default_factory>
46 | inline std::shared_ptr<logger> stderr_color_mt(const std::string &logger_name)
47 | {
48 |     return Factory::template create<sinks::stderr_color_sink_mt>(logger_name);
49 | }
50 | 
51 | template<typename Factory = default_factory>
52 | inline std::shared_ptr<logger> stderr_color_st(const std::string &logger_name)
53 | {
54 |     return Factory::template create<sinks::stderr_color_sink_mt>(logger_name);
55 | }
56 | } // namespace spdlog
57 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rlen4F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(rlen4)(float x, float y, float z, float w)
12 | {
13 |     float a = BUILTIN_ABS_F32(x);
14 |     float b = BUILTIN_ABS_F32(y);
15 |     float c = BUILTIN_ABS_F32(z);
16 |     float d = BUILTIN_ABS_F32(w);
17 | 
18 |     float a1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a), AS_UINT(b)));
19 |     float b1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a), AS_UINT(b)));
20 | 
21 |     float c1 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(c), AS_UINT(d)));
22 |     float d1 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(c), AS_UINT(d)));
23 | 
24 |     a        = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(a1), AS_UINT(c1)));
25 |     float c2 = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(a1), AS_UINT(c1)));
26 | 
27 |     float b2 = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b1), AS_UINT(d1)));
28 |     d        = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b1), AS_UINT(d1)));
29 | 
30 |     b        = AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(b2), AS_UINT(c2)));
31 |     c        = AS_FLOAT(BUILTIN_MIN_U32(AS_UINT(b2), AS_UINT(c2)));
32 | 
33 |     int e = BUILTIN_FREXP_EXP_F32(a);
34 |     a = BUILTIN_FLDEXP_F32(a, -e);
35 |     b = BUILTIN_FLDEXP_F32(b, -e);
36 |     c = BUILTIN_FLDEXP_F32(c, -e);
37 |     d = BUILTIN_FLDEXP_F32(d, -e);
38 | 
39 |     float ret = BUILTIN_FLDEXP_F32(BUILTIN_RSQRT_F32(MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d)))), -e);
40 | 
41 |     if (!FINITE_ONLY_OPT()) {
42 |         ret = (BUILTIN_ISINF_F32(x) |
43 |                BUILTIN_ISINF_F32(y) |
44 |                BUILTIN_ISINF_F32(z) |
45 |                BUILTIN_ISINF_F32(w)) ? 0.0f : ret;
46 |     }
47 | 
48 |     return ret;
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/cmake/run_make2cmake.cmake:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Computes dependencies using HIPCC
 3 | ###############################################################################
 4 | 
 5 | ###############################################################################
 6 | # This file converts dependency files generated using hipcc to a format that
 7 | # cmake can understand.
 8 | 
 9 | # Input variables:
10 | #
11 | # input_file:STRING=<> Dependency file to parse. Required argument
12 | # output_file:STRING=<> Output file to generate. Required argument
13 | 
14 | if(NOT input_file OR NOT output_file)
15 |     message(FATAL_ERROR "You must specify input_file and output_file on the command line")
16 | endif()
17 | 
18 | file(READ ${input_file} depend_text)
19 | 
20 | if (NOT "${depend_text}" STREQUAL "")
21 |     string(REPLACE " /" "\n/" depend_text ${depend_text})
22 |     string(REGEX REPLACE "^.*:" "" depend_text ${depend_text})
23 |     string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text})
24 | 
25 |     set(dependency_list "")
26 | 
27 |     foreach(file ${depend_text})
28 |         string(REGEX REPLACE "^ +" "" file ${file})
29 |         if(NOT EXISTS "${file}")
30 |             message(WARNING " Removing non-existent dependency file: ${file}")
31 |             set(file "")
32 |         endif()
33 | 
34 |         if(NOT IS_DIRECTORY "${file}")
35 |             get_filename_component(file_absolute "${file}" ABSOLUTE)
36 |             list(APPEND dependency_list "${file_absolute}")
37 |         endif()
38 |     endforeach()
39 | endif()
40 | 
41 | # Remove the duplicate entries and sort them.
42 | list(REMOVE_DUPLICATES dependency_list)
43 | list(SORT dependency_list)
44 | 
45 | foreach(file ${dependency_list})
46 |     set(hip_hipcc_depend "${hip_hipcc_depend} \"${file}\"\n")
47 | endforeach()
48 | 
49 | file(WRITE ${output_file} "# Generated by: FindHIP.cmake. Do not edit.\nSET(HIP_HIPCC_DEPEND\n ${hip_hipcc_depend})\n\n")
50 | # vim: ts=4:sw=4:expandtab:smartindent
51 | 


--------------------------------------------------------------------------------
/samples/4_shfl/broadcast.cpp:
--------------------------------------------------------------------------------
 1 | #include "hip/hip_runtime.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | #define BUF_SIZE 256
 6 | #define WARP_MASK 0x7
 7 | #define WARP_SUM 28
 8 | 
 9 | #define HIPCHECK(code)                                                         \
10 |   do {                                                                         \
11 |     hiperr = code;                                                             \
12 |     if (hiperr != hipSuccess) {                                                \
13 |       std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr    \
14 |                 << "\n";                                                       \
15 |       return 1;                                                                \
16 |     }                                                                          \
17 |   } while (0)
18 | 
19 | __global__ void bcast(int *out) {
20 |   int value = (hipThreadIdx_x & WARP_MASK);
21 | 
22 |   for (int mask = 1; mask < WARP_MASK; mask *= 2)
23 |     value += __shfl_xor(value, mask);
24 | 
25 |   size_t oi = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
26 | 
27 |   out[oi] = value;
28 | }
29 | 
30 | int main() {
31 | 
32 |   int *out = (int *)malloc(sizeof(int) * BUF_SIZE);
33 |   int *d_out;
34 |   hipError_t hiperr = hipSuccess;
35 | 
36 |   HIPCHECK(hipMalloc((void **)&d_out, sizeof(int) * BUF_SIZE));
37 | 
38 |   hipLaunchKernelGGL(bcast, dim3(1), dim3(BUF_SIZE), 0, 0, d_out);
39 |   HIPCHECK(hipGetLastError());
40 | 
41 |   HIPCHECK(
42 |       hipMemcpy(out, d_out, sizeof(int) * BUF_SIZE, hipMemcpyDeviceToHost));
43 | 
44 |   size_t errs = 0;
45 |   for (int i = 0; i < BUF_SIZE; i++) {
46 |     if (out[i] != WARP_SUM) {
47 |       std::cout << "ERROR @ " << i << ":  " << out[i] << "\n";
48 |       ++errs;
49 |     }
50 |   }
51 | 
52 |   free(out);
53 |   HIPCHECK(hipFree(d_out));
54 | 
55 |   if (errs != 0) {
56 |     std::cout << "FAILED: " << errs << " errors\n";
57 |     return 1;
58 |   } else {
59 |     std::cout << "PASSED!\n";
60 |     return 0;
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/include/CL/opencl.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2008-2015 The Khronos Group Inc.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and/or associated documentation files (the
 6 |  * "Materials"), to deal in the Materials without restriction, including
 7 |  * without limitation the rights to use, copy, modify, merge, publish,
 8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
 9 |  * permit persons to whom the Materials are furnished to do so, subject to
10 |  * the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included
13 |  * in all copies or substantial portions of the Materials.
14 |  *
15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
18 |  *    https://www.khronos.org/registry/
19 |  *
20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
27 |  ******************************************************************************/
28 | 
29 | /* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
30 | 
31 | #ifndef __OPENCL_H
32 | #define __OPENCL_H
33 | 
34 | #ifdef __cplusplus
35 | extern "C" {
36 | #endif
37 | 
38 | #include <CL/cl.h>
39 | #include <CL/cl_gl.h>
40 | #include <CL/cl_gl_ext.h>
41 | #include <CL/cl_ext.h>
42 | 
43 | #ifdef __cplusplus
44 | }
45 | #endif
46 | 
47 | #endif  /* __OPENCL_H   */
48 | 


--------------------------------------------------------------------------------
/spdlog/sinks/base_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | //
 8 | // base sink templated over a mutex (either dummy or real)
 9 | // concrete implementation should override the sink_it_() and flush_()  methods.
10 | // locking is taken care of in this class - no locking needed by the
11 | // implementers..
12 | //
13 | 
14 | #include "spdlog/common.h"
15 | #include "spdlog/details/log_msg.h"
16 | #include "spdlog/formatter.h"
17 | #include "spdlog/sinks/sink.h"
18 | 
19 | namespace spdlog {
20 | namespace sinks {
21 | template<typename Mutex>
22 | class base_sink : public sink
23 | {
24 | public:
25 |     base_sink() = default;
26 |     base_sink(const base_sink &) = delete;
27 |     base_sink &operator=(const base_sink &) = delete;
28 | 
29 |     void log(const details::log_msg &msg) final
30 |     {
31 |         std::lock_guard<Mutex> lock(mutex_);
32 |         sink_it_(msg);
33 |     }
34 | 
35 |     void flush() final
36 |     {
37 |         std::lock_guard<Mutex> lock(mutex_);
38 |         flush_();
39 |     }
40 | 
41 |     void set_pattern(const std::string &pattern) final
42 |     {
43 |         std::lock_guard<Mutex> lock(mutex_);
44 |         set_pattern_(pattern);
45 |     }
46 | 
47 |     void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) final
48 |     {
49 |         std::lock_guard<Mutex> lock(mutex_);
50 |         set_formatter_(std::move(sink_formatter));
51 |     }
52 | 
53 | protected:
54 |     virtual void sink_it_(const details::log_msg &msg) = 0;
55 |     virtual void flush_() = 0;
56 | 
57 |     virtual void set_pattern_(const std::string &pattern)
58 |     {
59 |         set_formatter_(details::make_unique<spdlog::pattern_formatter>(pattern));
60 |     }
61 | 
62 |     virtual void set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter)
63 |     {
64 |         formatter_ = std::move(sink_formatter);
65 |     }
66 |     Mutex mutex_;
67 | };
68 | } // namespace sinks
69 | } // namespace spdlog
70 | 


--------------------------------------------------------------------------------
/spdlog/sinks/basic_file_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015-2018 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #include "spdlog/details/file_helper.h"
13 | #include "spdlog/details/null_mutex.h"
14 | #include "spdlog/sinks/base_sink.h"
15 | 
16 | #include <mutex>
17 | #include <string>
18 | 
19 | namespace spdlog {
20 | namespace sinks {
21 | /*
22 |  * Trivial file sink with single file as target
23 |  */
24 | template<typename Mutex>
25 | class basic_file_sink final : public base_sink<Mutex>
26 | {
27 | public:
28 |     explicit basic_file_sink(const filename_t &filename, bool truncate = false)
29 |     {
30 |         file_helper_.open(filename, truncate);
31 |     }
32 | 
33 | protected:
34 |     void sink_it_(const details::log_msg &msg) override
35 |     {
36 |         fmt::memory_buffer formatted;
37 |         sink::formatter_->format(msg, formatted);
38 |         file_helper_.write(formatted);
39 |     }
40 | 
41 |     void flush_() override
42 |     {
43 |         file_helper_.flush();
44 |     }
45 | 
46 | private:
47 |     details::file_helper file_helper_;
48 | };
49 | 
50 | using basic_file_sink_mt = basic_file_sink<std::mutex>;
51 | using basic_file_sink_st = basic_file_sink<details::null_mutex>;
52 | 
53 | } // namespace sinks
54 | 
55 | //
56 | // factory functions
57 | //
58 | template<typename Factory = default_factory>
59 | inline std::shared_ptr<logger> basic_logger_mt(const std::string &logger_name, const filename_t &filename, bool truncate = false)
60 | {
61 |     return Factory::template create<sinks::basic_file_sink_mt>(logger_name, filename, truncate);
62 | }
63 | 
64 | template<typename Factory = default_factory>
65 | inline std::shared_ptr<logger> basic_logger_st(const std::string &logger_name, const filename_t &filename, bool truncate = false)
66 | {
67 |     return Factory::template create<sinks::basic_file_sink_st>(logger_name, filename, truncate);
68 | }
69 | 
70 | } // namespace spdlog
71 | 


--------------------------------------------------------------------------------
/samples/4_shfl/broadcast2.cpp:
--------------------------------------------------------------------------------
 1 | #include "hip/hip_runtime.h"
 2 | 
 3 | #include <iostream>
 4 | 
 5 | #define BUF_SIZE 256
 6 | #define WARP_MASK 0x7
 7 | #define EXPECTED 12345
 8 | 
 9 | #define HIPCHECK(code)                                                         \
10 |   do {                                                                         \
11 |     hiperr = code;                                                             \
12 |     if (hiperr != hipSuccess) {                                                \
13 |       std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr    \
14 |                 << "\n";                                                       \
15 |       return 1;                                                                \
16 |     }                                                                          \
17 |   } while (0)
18 | 
19 | __global__ void bcast(int arg, int *out) {
20 |   int value = ((hipThreadIdx_x & WARP_MASK) == 0) ? arg : 0;
21 | 
22 |   int out_v = __shfl(
23 |       value, 0); // Synchronize all threads in warp, and get "value" from lane 0
24 | 
25 |   size_t oi = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
26 |   out[oi] = out_v;
27 | }
28 | 
29 | int main() {
30 | 
31 |   int *out = (int *)malloc(sizeof(int) * BUF_SIZE);
32 |   int *d_out;
33 |   hipError_t hiperr = hipSuccess;
34 | 
35 |   HIPCHECK(hipMalloc((void **)&d_out, sizeof(int) * BUF_SIZE));
36 | 
37 |   hipLaunchKernelGGL(bcast, dim3(1), dim3(BUF_SIZE), 0, 0, EXPECTED, d_out);
38 |   HIPCHECK(hipGetLastError());
39 | 
40 |   HIPCHECK(
41 |       hipMemcpy(out, d_out, sizeof(int) * BUF_SIZE, hipMemcpyDeviceToHost));
42 | 
43 |   size_t errs = 0;
44 |   for (int i = 0; i < BUF_SIZE; i++) {
45 |     if (out[i] != EXPECTED) {
46 |       std::cout << "ERROR @ " << i << ":  " << out[i] << "\n";
47 |       ++errs;
48 |     }
49 |   }
50 | 
51 |   free(out);
52 |   HIPCHECK(hipFree(d_out));
53 | 
54 |   if (errs != 0) {
55 |     std::cout << "FAILED: " << errs << " errors\n";
56 |     return 1;
57 |   } else {
58 |     std::cout << "PASSED!\n";
59 |     return 0;
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/LICENSE:
--------------------------------------------------------------------------------
 1 | ==============================================================================
 2 | ROCm-Device-Libs Release License
 3 | ==============================================================================
 4 | University of Illinois/NCSA
 5 | Open Source License
 6 | 
 7 | Copyright (c) 2014-2016, Advanced Micro Devices, Inc.
 8 | All rights reserved.
 9 | 
10 | Developed by:
11 | 
12 |     AMD Research and AMD HSA Software Development
13 | 
14 |     Advanced Micro Devices, Inc.
15 | 
16 |     www.amd.com
17 | 
18 | Permission is hereby granted, free of charge, to any person obtaining a copy of
19 | this software and associated documentation files (the "Software"), to deal with
20 | the Software without restriction, including without limitation the rights to
21 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
22 | of the Software, and to permit persons to whom the Software is furnished to do
23 | so, subject to the following conditions:
24 | 
25 |     * Redistributions of source code must retain the above copyright notice,
26 |       this list of conditions and the following disclaimers.
27 | 
28 |     * Redistributions in binary form must reproduce the above copyright notice,
29 |       this list of conditions and the following disclaimers in the
30 |       documentation and/or other materials provided with the distribution.
31 | 
32 |     * Neither the names of the LLVM Team, University of Illinois at
33 |       Urbana-Champaign, nor the names of its contributors may be used to
34 |       endorse or promote products derived from this Software without specific
35 |       prior written permission.
36 | 
37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
39 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
40 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
43 | SOFTWARE.
44 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/rlen4D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(rlen4)(double x, double y, double z, double w)
12 | {
13 |     double a = BUILTIN_ABS_F64(x);
14 |     double b = BUILTIN_ABS_F64(y);
15 |     double c = BUILTIN_ABS_F64(z);
16 |     double d = BUILTIN_ABS_F64(w);
17 | 
18 |     double a1 = BUILTIN_MAX_F64(a, b);
19 |     double b1 = BUILTIN_MIN_F64(a, b);
20 | 
21 |     double c1 = BUILTIN_MAX_F64(c, d);
22 |     double d1 = BUILTIN_MIN_F64(c, d);
23 | 
24 |     a         = BUILTIN_MAX_F64(a1, c1);
25 |     double c2 = BUILTIN_MIN_F64(a1, c1);
26 | 
27 |     double b2 = BUILTIN_MAX_F64(b1, d1);
28 |     d         = BUILTIN_MIN_F64(b1, d1);
29 | 
30 |     b         = BUILTIN_MAX_F64(b2, c2);
31 |     c         = BUILTIN_MIN_F64(b2, c2);
32 | 
33 |     int e = BUILTIN_FREXP_EXP_F64(a);
34 |     a = BUILTIN_FLDEXP_F64(a, -e);
35 |     b = BUILTIN_FLDEXP_F64(b, -e);
36 |     c = BUILTIN_FLDEXP_F64(c, -e);
37 |     d = BUILTIN_FLDEXP_F64(d, -e);
38 | 
39 |     double l2 = MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d)));
40 |     double v = BUILTIN_RSQRT_F64(l2);
41 |     double u = MATH_MAD(-l2*v, v, 1.0);
42 |     v = MATH_MAD(v*u, MATH_MAD(u, 0.375, 0.5), v);
43 |     double ret = BUILTIN_FLDEXP_F64(v, -e);
44 | 
45 |     if (!FINITE_ONLY_OPT()) {
46 |         ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret;
47 | 
48 |         ret = (BUILTIN_ISNAN_F64(x) |
49 |                BUILTIN_ISNAN_F64(y) |
50 |                BUILTIN_ISNAN_F64(z) |
51 |                BUILTIN_ISNAN_F64(w)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret;
52 | 
53 |         ret = (BUILTIN_ISINF_F64(x) |
54 |                BUILTIN_ISINF_F64(y) |
55 |                BUILTIN_ISINF_F64(z) |
56 |                BUILTIN_ISINF_F64(w)) ? 0.0 : ret;
57 |     }
58 | 
59 |     return ret;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/cxxopts/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: trusty
 3 | language: cpp
 4 | os:
 5 |   - linux
 6 | matrix:
 7 |   include:
 8 |     - os: linux
 9 |       env: COMPILER=g++-4.9
10 |       addons:
11 |         apt:
12 |           packages:
13 |             - g++-4.9
14 |           sources: &sources
15 |             - llvm-toolchain-trusty-3.8
16 |             - llvm-toolchain-trusty-5.0
17 |             - ubuntu-toolchain-r-test
18 |     - os: linux
19 |       env: COMPILER=g++-4.9 UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes
20 |       addons:
21 |         apt:
22 |           packages:
23 |             - g++-4.9
24 |           sources: *sources
25 |     - os: linux
26 |       env: COMPILER=g++-5
27 |       addons:
28 |         apt:
29 |           packages:
30 |             - g++-5
31 |           sources: *sources
32 |     - os: linux
33 |       env: COMPILER=g++-5 UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes
34 |       addons:
35 |         apt:
36 |           packages:
37 |             - g++-5
38 |           sources: *sources
39 |     - os: linux
40 |       env: COMPILER=clang++-3.8 CXXFLAGS=-stdlib=libc++
41 |       addons:
42 |         apt:
43 |           packages:
44 |             - clang-3.8
45 |             - libc++-dev
46 |           sources: *sources
47 |     - os: linux
48 |       env: COMPILER=clang++-3.8 CXXFLAGS=-stdlib=libc++ UNICODE_OPTIONS=-DCXXOPTS_USE_UNICODE_HELP=Yes
49 |       addons:
50 |         apt:
51 |           packages:
52 |             - clang-3.8
53 |             - libc++-dev
54 |           sources: *sources
55 |     - os: linux
56 |       env: COMPILER=clang++-5.0 CMAKE_OPTIONS=-DCXXOPTS_CXX_STANDARD=17
57 |       addons:
58 |         apt:
59 |           packages:
60 |             - clang-5.0
61 |             - g++-5
62 |           sources: *sources
63 | script: >
64 |     cmake -DCXXOPTS_BUILD_TESTS=ON -DCMAKE_CXX_COMPILER=$COMPILER
65 |     -DCMAKE_CXX_FLAGS=$CXXFLAGS $UNICODE_OPTIONS $CMAKE_OPTIONS .
66 |     && make && make ARGS=--output-on-failure test
67 | 
68 | before_install:
69 |   - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update          ; fi
70 |   - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install icu4c; fi
71 | 


--------------------------------------------------------------------------------
/spdlog/details/periodic_worker.h:
--------------------------------------------------------------------------------
 1 | 
 2 | //
 3 | // Copyright(c) 2018 Gabi Melman.
 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 5 | //
 6 | 
 7 | #pragma once
 8 | 
 9 | // periodic worker thread - periodically executes the given callback function.
10 | //
11 | // RAII over the owned thread:
12 | //    creates the thread on construction.
13 | //    stops and joins the thread on destruction (if the thread is executing a callback, wait for it to finish first).
14 | 
15 | #include <chrono>
16 | #include <condition_variable>
17 | #include <functional>
18 | #include <mutex>
19 | #include <thread>
20 | namespace spdlog {
21 | namespace details {
22 | 
23 | class periodic_worker
24 | {
25 | public:
26 |     periodic_worker(const std::function<void()> &callback_fun, std::chrono::seconds interval)
27 |     {
28 |         active_ = (interval > std::chrono::seconds::zero());
29 |         if (!active_)
30 |         {
31 |             return;
32 |         }
33 | 
34 |         worker_thread_ = std::thread([this, callback_fun, interval]() {
35 |             for (;;)
36 |             {
37 |                 std::unique_lock<std::mutex> lock(this->mutex_);
38 |                 if (this->cv_.wait_for(lock, interval, [this] { return !this->active_; }))
39 |                 {
40 |                     return; // active_ == false, so exit this thread
41 |                 }
42 |                 callback_fun();
43 |             }
44 |         });
45 |     }
46 | 
47 |     periodic_worker(const periodic_worker &) = delete;
48 |     periodic_worker &operator=(const periodic_worker &) = delete;
49 | 
50 |     // stop the worker thread and join it
51 |     ~periodic_worker()
52 |     {
53 |         if (worker_thread_.joinable())
54 |         {
55 |             {
56 |                 std::lock_guard<std::mutex> lock(mutex_);
57 |                 active_ = false;
58 |             }
59 |             cv_.notify_one();
60 |             worker_thread_.join();
61 |         }
62 |     }
63 | 
64 | private:
65 |     bool active_;
66 |     std::thread worker_thread_;
67 |     std::mutex mutex_;
68 |     std::condition_variable cv_;
69 | };
70 | } // namespace details
71 | } // namespace spdlog
72 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/erfcxF.cl:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mathF.h"
 3 | 
 4 | CONSTATTR float
 5 | MATH_MANGLE(erfcx)(float x)
 6 | {
 7 |     float ax = BUILTIN_ABS_F32(x);
 8 |     float ret;
 9 | 
10 |     if (ax < 1.0f) {
11 |         ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
12 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
13 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
14 |               MATH_MAD(x,
15 |                   -0x1.77d64p-11f, 0x1.269372p-9f),
16 |                   -0x1.c27dd4p-9f), 0x1.d3d3c4p-8f),
17 |                   -0x1.35d6cap-6f), 0x1.5bb082p-5f),
18 |                   -0x1.60e46ep-4f), 0x1.54d3e4p-3f),
19 |                   -0x1.340edap-2f), 0x1.00049ap-1f),
20 |                   -0x1.81286p-1f), 0x1.ffffcap-1f),
21 |                   -0x1.20dd7p+0f), 0x1.0p+0f);
22 |     } else if (ax < 32.0f) {
23 |         float t = MATH_DIV(ax - 4.0f, ax + 4.0f);
24 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
25 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
26 |               MATH_MAD(t,
27 |                   0.00416076401f, -0.0167250745f),
28 |                   0.0378070959f), -0.0661972834f),
29 |                   0.0935599947f), -0.101052745f),
30 |                   0.0681148962f), 0.0153801711f),
31 |                   -0.139621619f), 1.23299511f);
32 | 
33 |         ret = MATH_DIV(ret, MATH_MAD(ax, 2.0f, 1.0f));
34 |     } else {
35 |         const float one_over_sqrtpi = 0x1.20dd76p-1f;
36 |         float z = MATH_RCP(x * x);
37 |         ret =  MATH_DIV(one_over_sqrtpi, x) * MATH_MAD(z, MATH_MAD(z, 0.375f, -0.5f), 1.0f);
38 |     }
39 | 
40 |     if (x <= -1.0f) {
41 |         float x2h, x2l;
42 |         if (HAVE_FAST_FMA32()) {
43 |             x2h = ax * ax;
44 |             x2l = BUILTIN_FMA_F32(ax, ax, -x2h);
45 |         } else {
46 |             float xh = AS_FLOAT(AS_UINT(ax) & 0xfffff000U);
47 |             float xl = ax - xh;
48 |             x2h = xh*xh;
49 |             x2l = (ax + xh)*xl;
50 |         }
51 | 
52 |         ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0f - ret;
53 |         ret = x < -10.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret;
54 |     }
55 | 
56 |     return ret;
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/erfcinvF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(erfcinv)(float y)
12 | {
13 |     float ret;
14 | 
15 |     if (y > 0.625f) {
16 |         ret = MATH_MANGLE(erfinv)(1.0f - y);
17 |     } else if (y > 0x1.0p-10f) {
18 |         float t = -MATH_MANGLE(log)(y * (2.0f - y)) - 3.125f;
19 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
20 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
21 |               MATH_MAD(t, MATH_MAD(t,
22 |                   0x1.7ee662p-31f, -0x1.3f5a80p-28f), -0x1.b638f0p-26f), 0x1.c9ccc6p-22f),
23 |                   -0x1.72f8aep-20f), -0x1.d21aa6p-17f), 0x1.87aebcp-13f), -0x1.8455d4p-11f),
24 |                   -0x1.8b6ca4p-8f), 0x1.ebd80cp-3f), 0x1.a755e8p+0f);
25 |         ret = MATH_MAD(-y, ret, ret);
26 |     } else {
27 |         float s = MATH_FAST_SQRT(-MATH_MANGLE(log)(y));
28 |         float t = MATH_FAST_RCP(s);
29 | 
30 |         if (y > 0x1.0p-42f) {
31 |             ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
32 |                   MATH_MAD(t, MATH_MAD(t,
33 |                       -0x1.57221ep+0f, 0x1.7f6144p+1f), -0x1.98dd40p+1f), 0x1.2c9066p+1f),
34 |                       -0x1.3a07eap+0f), -0x1.ba546cp-5f), 0x1.004e66p+0f);
35 |         } else {
36 |             ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
37 |                   MATH_MAD(t, MATH_MAD(t,
38 |                       -0x1.649c6ap+4f, 0x1.8fa8fap+4f), -0x1.a112d8p+3f), 0x1.309d98p+2f),
39 |                       -0x1.919488p+0f), -0x1.c084ecp-6f), 0x1.00143ep+0f);
40 |         }
41 |         ret = s * ret;
42 |     }
43 | 
44 |     if (!FINITE_ONLY_OPT()) {
45 |         ret = (y < 0.0f) | (y > 2.0f) ? AS_FLOAT(QNANBITPATT_SP32) : ret;
46 |         ret = y == 0.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret;
47 |         ret = y == 2.0f ? AS_FLOAT(NINFBITPATT_SP32) : ret;
48 |     }
49 | 
50 |     return ret;
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/erfinvF.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | CONSTATTR float
11 | MATH_MANGLE(erfinv)(float x)
12 | {
13 |     float ax = BUILTIN_ABS_F32(x);
14 |     float p;
15 | 
16 |     if (ax < 0.375f) {
17 |         float t = ax*ax;
18 |         p = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
19 |             MATH_MAD(t, MATH_MAD(t,
20 |                 0x1.48b6cap-3f, -0x1.a2930ap-6f), 0x1.65b0b4p-4f), 0x1.5581aep-4f),
21 |                 0x1.05aa56p-3f), 0x1.db2748p-3f), 0x1.c5bf8ap-1f);
22 |     } else {
23 |         float w;
24 |         if (HAVE_FAST_FMA32()) {
25 |             w = BUILTIN_FMA_F32(-ax, ax, 1.0f);
26 |         } else {
27 |             w = (1.0f - ax) * (1.0f + ax);
28 |         }
29 |         w = -MATH_MANGLE(log)(w);
30 | 
31 |         if (w < 5.0f) {
32 |             w = w - 2.5f;
33 |             p = MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w,
34 |                 MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w,
35 |                     0x1.e2cb10p-26f, 0x1.70966cp-22f), -0x1.d8e6aep-19f), -0x1.26b582p-18f),
36 |                     0x1.ca65b6p-13f), -0x1.48a810p-10f), -0x1.11c9dep-8f), 0x1.f91ec6p-3f),
37 |                     0x1.805c5ep+0f);
38 |         } else {
39 |             w = MATH_SQRT(w) - 3.0f;
40 |             p = MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w,
41 |                 MATH_MAD(w, MATH_MAD(w, MATH_MAD(w, MATH_MAD(w,
42 |                     -0x1.a3e136p-13f, 0x1.a76ad6p-14f), 0x1.61b8e4p-10f), -0x1.e17bcep-9f),
43 |                     0x1.7824f6p-8f), -0x1.f38baep-8f), 0x1.354afcp-7f), 0x1.006db6p+0f),
44 |                     0x1.6a9efcp+1f);
45 |         }
46 |     }
47 | 
48 |     float ret = p*ax;
49 | 
50 |     if (!FINITE_ONLY_OPT()) {
51 |         ret = ax > 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret;
52 |         ret = ax == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret;
53 |     }
54 | 
55 |     return BUILTIN_COPYSIGN_F32(ret, x);
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/include/CL/cl_gl_ext.h:
--------------------------------------------------------------------------------
 1 | /**********************************************************************************
 2 |  * Copyright (c) 2008-2019 The Khronos Group Inc.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and/or associated documentation files (the
 6 |  * "Materials"), to deal in the Materials without restriction, including
 7 |  * without limitation the rights to use, copy, modify, merge, publish,
 8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
 9 |  * permit persons to whom the Materials are furnished to do so, subject to
10 |  * the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included
13 |  * in all copies or substantial portions of the Materials.
14 |  *
15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
18 |  *    https://www.khronos.org/registry/
19 |  *
20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
27 |  **********************************************************************************/
28 | 
29 | #ifndef __OPENCL_CL_GL_EXT_H
30 | #define __OPENCL_CL_GL_EXT_H
31 | 
32 | #ifdef __cplusplus
33 | extern "C" {
34 | #endif
35 | 
36 | #include <CL/cl_gl.h>
37 | 
38 | /* 
39 |  *  cl_khr_gl_event extension
40 |  */
41 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
42 | 
43 | extern CL_API_ENTRY cl_event CL_API_CALL
44 | clCreateEventFromGLsyncKHR(cl_context context,
45 |                            cl_GLsync  cl_GLsync,
46 |                            cl_int *   errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
47 | 
48 | #ifdef __cplusplus
49 | }
50 | #endif
51 | 
52 | #endif	/* __OPENCL_CL_GL_EXT_H  */
53 | 


--------------------------------------------------------------------------------
/include/hip/hip_fatbin.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | #ifndef HIP_SRC_HIP_FATBIN_H
23 | #define HIP_SRC_HIP_FATBIN_H
24 | 
25 | // #include "hip/hip_runtime.h"
26 | // #include "hip_hcc_internal.h"
27 | 
28 | // hip-clang fatbin format
29 | constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
30 | 
31 | #define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__"
32 | #define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
33 | 
34 | struct __ClangOffloadBundleDesc {
35 |   uint64_t offset;
36 |   uint64_t size;
37 |   uint64_t tripleSize;
38 |   const char triple[1];
39 | };
40 | 
41 | struct __ClangOffloadBundleHeader {
42 |   const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1];
43 |   uint64_t numBundles;
44 |   __ClangOffloadBundleDesc desc[1];
45 | };
46 | 
47 | struct __CudaFatBinaryWrapper {
48 |   unsigned int                magic;
49 |   unsigned int                version;
50 |   __ClangOffloadBundleHeader* binary;
51 |   void*                       unused;
52 | };
53 | 
54 | const void* __hipExtractCodeObjectFromFatBinary(const void* data,
55 |     const char* agent_name);
56 | void __hipDumpCodeObject(const std::string& image);
57 | 
58 | #endif // HIP_SRC_HIP_FATBIN_H
59 | 


--------------------------------------------------------------------------------
/spdlog/async_logger.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | // Very fast asynchronous logger (millions of logs per second on an average
 9 | // desktop)
10 | // Uses pre allocated lockfree queue for maximum throughput even under large
11 | // number of threads.
12 | // Creates a single back thread to pop messages from the queue and log them.
13 | //
14 | // Upon each log write the logger:
15 | //    1. Checks if its log level is enough to log the message
16 | //    2. Push a new copy of the message to a queue (or block the caller until
17 | //    space is available in the queue)
18 | //    3. will throw spdlog_ex upon log exceptions
19 | // Upon destruction, logs all remaining messages in the queue before
20 | // destructing..
21 | 
22 | #include "spdlog/common.h"
23 | #include "spdlog/logger.h"
24 | 
25 | #include <chrono>
26 | #include <memory>
27 | #include <string>
28 | 
29 | namespace spdlog {
30 | 
31 | // Async overflow policy - block by default.
32 | enum class async_overflow_policy
33 | {
34 |     block,         // Block until message can be enqueued
35 |     overrun_oldest // Discard oldest message in the queue if full when trying to
36 |                    // add new item.
37 | };
38 | 
39 | namespace details {
40 | class thread_pool;
41 | }
42 | 
43 | class async_logger final : public std::enable_shared_from_this<async_logger>, public logger
44 | {
45 |     friend class details::thread_pool;
46 | 
47 | public:
48 |     template<typename It>
49 |     async_logger(std::string logger_name, It begin, It end, std::weak_ptr<details::thread_pool> tp,
50 |         async_overflow_policy overflow_policy = async_overflow_policy::block);
51 | 
52 |     async_logger(std::string logger_name, sinks_init_list sinks_list, std::weak_ptr<details::thread_pool> tp,
53 |         async_overflow_policy overflow_policy = async_overflow_policy::block);
54 | 
55 |     async_logger(std::string logger_name, sink_ptr single_sink, std::weak_ptr<details::thread_pool> tp,
56 |         async_overflow_policy overflow_policy = async_overflow_policy::block);
57 | 
58 |     std::shared_ptr<logger> clone(std::string new_name) override;
59 | 
60 | protected:
61 |     void sink_it_(details::log_msg &msg) override;
62 |     void flush_() override;
63 | 
64 |     void backend_log_(const details::log_msg &incoming_log_msg);
65 |     void backend_flush_();
66 | 
67 | private:
68 |     std::weak_ptr<details::thread_pool> thread_pool_;
69 |     async_overflow_policy overflow_policy_;
70 | };
71 | } // namespace spdlog
72 | 
73 | #include "details/async_logger_impl.h"
74 | 


--------------------------------------------------------------------------------
/spdlog/sinks/dist_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright (c) 2015 David Schury, Gabi Melman
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #include "base_sink.h"
13 | #include "spdlog/details/log_msg.h"
14 | #include "spdlog/details/null_mutex.h"
15 | 
16 | #include <algorithm>
17 | #include <memory>
18 | #include <mutex>
19 | #include <vector>
20 | 
21 | // Distribution sink (mux). Stores a vector of sinks which get called when log
22 | // is called
23 | 
24 | namespace spdlog {
25 | namespace sinks {
26 | 
27 | template<typename Mutex>
28 | class dist_sink : public base_sink<Mutex>
29 | {
30 | public:
31 |     dist_sink() = default;
32 |     dist_sink(const dist_sink &) = delete;
33 |     dist_sink &operator=(const dist_sink &) = delete;
34 | 
35 |     void add_sink(std::shared_ptr<sink> sink)
36 |     {
37 |         std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
38 |         sinks_.push_back(sink);
39 |     }
40 | 
41 |     void remove_sink(std::shared_ptr<sink> sink)
42 |     {
43 |         std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
44 |         sinks_.erase(std::remove(sinks_.begin(), sinks_.end(), sink), sinks_.end());
45 |     }
46 | 
47 |     void set_sinks(std::vector<std::shared_ptr<sink>> sinks)
48 |     {
49 |         std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
50 |         sinks_ = std::move(sinks);
51 |     }
52 | 
53 | protected:
54 |     void sink_it_(const details::log_msg &msg) override
55 |     {
56 | 
57 |         for (auto &sink : sinks_)
58 |         {
59 |             if (sink->should_log(msg.level))
60 |             {
61 |                 sink->log(msg);
62 |             }
63 |         }
64 |     }
65 | 
66 |     void flush_() override
67 |     {
68 |         for (auto &sink : sinks_)
69 |         {
70 |             sink->flush();
71 |         }
72 |     }
73 | 
74 |     void set_pattern_(const std::string &pattern) override
75 |     {
76 |         set_formatter_(details::make_unique<spdlog::pattern_formatter>(pattern));
77 |     }
78 | 
79 |     void set_formatter_(std::unique_ptr<spdlog::formatter> sink_formatter) override
80 |     {
81 |         base_sink<Mutex>::formatter_ = std::move(sink_formatter);
82 |         for (auto &sink : sinks_)
83 |         {
84 |             sink->set_formatter(base_sink<Mutex>::formatter_->clone());
85 |         }
86 |     }
87 |     std::vector<std::shared_ptr<sink>> sinks_;
88 | };
89 | 
90 | using dist_sink_mt = dist_sink<std::mutex>;
91 | using dist_sink_st = dist_sink<details::null_mutex>;
92 | 
93 | } // namespace sinks
94 | } // namespace spdlog
95 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/i0D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(i0)(double x)
12 | {
13 |     x = BUILTIN_ABS_F64(x);
14 | 
15 |     double ret;
16 | 
17 |     if (x < 8.0) {
18 |         double t = 0.25 * x * x;
19 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
20 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
21 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
22 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
23 |                   0x1.dd78750ff79b2p-97, 0x1.4394559531e65p-89), 0x1.6f7123f151c79p-81), 0x1.3d9e7c5528048p-73),
24 |                   0x1.e736f323a0cabp-66), 0x1.4196ce3b298c5p-58), 0x1.69caac7bf9255p-51), 0x1.5601878c06ac8p-44),
25 |                   0x1.0b313291f5e48p-37), 0x1.522a43f5dcb54p-31), 0x1.522a43f659634p-25), 0x1.02e85c0898945p-19),
26 |                   0x1.23456789abcf3p-14), 0x1.c71c71c71c71cp-10), 0x1.c71c71c71c71cp-6), 0x1.0000000000000p-2),
27 |                   0x1.0000000000000p+0),
28 |         ret = MATH_MAD(t, ret, 1.0);
29 |     } else {
30 |         double t = MATH_RCP(x);
31 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
32 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
33 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
34 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
35 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
36 |               MATH_MAD(t, 
37 |                   0x1.cc967bacb549dp+49, -0x1.5ba7722975981p+50), 0x1.df0f836763276p+49), -0x1.9042a430f3f43p+48),
38 |                   0x1.c630541c4f568p+46), -0x1.7366be5a9784fp+44), 0x1.c5669a48f574ep+41), -0x1.a664cac47f0eap+38),
39 |                   0x1.308250566988cp+35), -0x1.56874c2ddb061p+31), 0x1.2da58968da2aap+27), -0x1.9faaa33f0d6bcp+22),
40 |                   0x1.be0a8f2bc76ddp+17), -0x1.7123c68c3cb02p+12), 0x1.d402150cc72aap+6), -0x1.7a8ae85359520p+0),
41 |                   0x1.bd7e0b6a753cdp-4), 0x1.6d6ce3774506dp-5), 0x1.debdd3d2f7cf9p-6), 0x1.cb94db8d452d5p-6),
42 |                   0x1.9884533daea3dp-5), 0x1.9884533d4362fp-2);
43 |         double xs = x - 709.0;
44 |         double e1 = MATH_MANGLE(exp)(x > 709.0 ? xs : x);
45 |         double e2 = x > 709.0 ? 0x1.d422d2be5dc9bp+1022 : 1.0;
46 |         ret = e1 * MATH_MANGLE(rsqrt)(x) * ret * e2;
47 |     }
48 | 
49 |     if  (!FINITE_ONLY_OPT()) {
50 |         ret = BUILTIN_CLASS_F64(x, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? x : ret;
51 |     }
52 | 
53 |     return ret;
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/i1D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | CONSTATTR double
11 | MATH_MANGLE(i1)(double x)
12 | {
13 |     double a = BUILTIN_ABS_F64(x);
14 | 
15 |     double ret;
16 | 
17 |     if (a < 8.0) {
18 |         a *= 0.5;
19 |         double t = a * a;
20 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
21 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
22 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
23 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
24 |                   0x1.fc892c836e80ap-93, 0x1.432352d94a857p-85), 0x1.588ae4f7b7a4ap-77), 0x1.15e96e9231b49p-69),
25 |                   0x1.8bdcb5f2184d1p-62), 0x1.e26237a1e02fep-55), 0x1.f176aca1a831fp-48), 0x1.ab81e97c83e75p-41),
26 |                   0x1.2c9758e3649ffp-34), 0x1.522a43f5ed306p-28), 0x1.27e4fb778d591p-22), 0x1.845c8a0ce4edap-17),
27 |                   0x1.6c16c16c16c26p-12), 0x1.c71c71c71c71cp-8), 0x1.5555555555555p-4), 0x1.0000000000000p-1);
28 |         ret = MATH_MAD(t, a*ret, a);
29 |     } else {
30 |         double t = MATH_RCP(a);
31 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
32 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
33 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
34 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
35 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, 
36 |               MATH_MAD(t, 
37 |                   -0x1.c9d8d43214423p+49, 0x1.5c072e12fb4bap+50), -0x1.e26cff438b6f6p+49), 0x1.952224c61a221p+48),
38 |                   -0x1.cdc7c873cf435p+46), 0x1.7b1e32a15fb86p+44), -0x1.d07dbd6696f1cp+41), 0x1.b227934f2ced2p+38),
39 |                   -0x1.39f23e6685444p+35), 0x1.6229383f6f890p+31), -0x1.38bf1ceeee865p+27), 0x1.b01a348b749b8p+22),
40 |                   -0x1.d0e043ef0916ap+17), 0x1.81b06f82cfbacp+12), -0x1.ea879b2a6508bp+6), 0x1.85cffc8d54f52p+0),
41 |                   -0x1.09f107ee0f7e2p-3), -0x1.d61631539fb0dp-5), -0x1.4f1e01d904ebap-5), -0x1.7efc0ced79c58p-5),
42 |                   -0x1.32633e6e0f07ap-3), 0x1.9884533d43674p-2);
43 | 
44 |         double xs = x - 709.0;
45 |         double e1 = MATH_MANGLE(exp)(x > 709.0 ? xs : x);
46 |         double e2 = x > 709.0 ? 0x1.d422d2be5dc9bp+1022 : 1.0;
47 |         ret = e1 * MATH_MANGLE(rsqrt)(x) * ret * e2;
48 |     }
49 | 
50 |     if  (!FINITE_ONLY_OPT()) {
51 |         ret = BUILTIN_CLASS_F64(a, CLASS_PINF|CLASS_QNAN|CLASS_SNAN) ? a : ret;
52 |     }
53 | 
54 |     return BUILTIN_COPYSIGN_F64(ret, x);
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/doc/notes-0.9.txt:
--------------------------------------------------------------------------------
 1 | HIPCL v0.9 Released
 2 | -------------------
 3 | 
 4 | [Heterogeneous-compute Interface for Portability](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_faq.md), or HIP,
 5 | is a C++ runtime API and kernel language that allows developers to write code that runs on both AMD and NVidia GPUs.
 6 | CUDA applications can be converted to HIP in a largely automated fashion.
 7 | 
 8 | HIPCL is a library that allows applications using the HIP API to be run on devices
 9 | which support OpenCL and SPIR-V, thus providing a portability path from CUDA to
10 | advanced OpenCL platforms.
11 | 
12 | The detailed and up to date documentation is available in README.md.
13 | 
14 | Release Status
15 | --------------
16 | 
17 | * Most of the HIP API and the HIP kernel language is implemented.
18 | 
19 | * The most tested implementation is Intel's NEO OpenCL for Intel GPUs.
20 |   Intel's CPU OpenCL implementations also work, but they are less mature & miss some features.
21 | 
22 | * There are extra 3rd party samples located [here](https://github.com/cpc/hipcl-samples),
23 |   and AMD's rocRAND ported to HIPCL located [here](https://github.com/cpc/hipcl-rocRAND).
24 | 
25 | Known Issues
26 | ------------
27 | 
28 | * `clEnqueueSVMMemCopy() failed with error -5` - this appears to be a driver bug
29 |    on Intel GPUs; occurs when one tries to memcpy from read-only data stored in ELF
30 |    to SVM memory. SVMMemCopy from other sources (stack / heap) works without issues.
31 | 
32 | * Programs may take a long time to start. This is because Clang inserts startup
33 |   hooks which register SPIR-V binaries; HIPCL at this point compiles each, and for
34 |   each program built, creates all kernels. This can take a long time on some implementations.
35 | 
36 | * Using HIP_DYNAMIC_SHARED() macro outside a function scope is not yet supported.
37 |   Doing so will likely result in error:
38 |   Assertion `FuncSet.size() <= 1 && "more than one function uses dynamic mem variable!"' failed.`
39 | 
40 | * HIPCL reports the global memory size from OpenCL as available memory, but unlike with CUDA,
41 |   it's not possible to allocate all of that memory in a single block;
42 |   HIPCL is limited by CL_DEVICE_MAX_MEM_ALLOC_SIZE.
43 | 
44 | * There are some unresolved compiler bugs present in the HIPCL-patched Clang, so compilation
45 |   may fail, especially when HIPCL is compiled with -O0 flag.
46 | 
47 | Acknowledgements
48 | ----------------
49 | 
50 | Customized Parallel Computing research group of Tampere
51 | University, Finland likes to thank the HSA Foundation and the
52 | ECSEL JU project FitOptiVis (project number 783162) for funding
53 | most of the development work for this release. Much appreciated!
54 | 
55 | Links
56 | -----
57 | Home page:         http://github.com/cpc/hipcl
58 | 3rd party samples: http://github.com/cpc/hipcl-samples
59 | hipcl-rocRAND:     https://github.com/cpc/hipcl-rocRAND
60 | 


--------------------------------------------------------------------------------
/lib/bitcode/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/BC")
 3 | 
 4 | #Ugly fix for interactions between clang13+ and igc
 5 | if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
 6 |   set(CLANG_CL_NO_STDINC_FLAG "")
 7 | else ()
 8 |   set(CLANG_CL_NO_STDINC_FLAG "-cl-no-stdinc")
 9 | endif ()
10 | 
11 | add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc"
12 |                     DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.cl"
13 |         COMMAND "${CMAKE_CXX_COMPILER}"
14 |         "${CLANG_CL_NO_STDINC_FLAG}" -Xclang -finclude-default-header
15 |         -O2 -x cl -cl-std=CL2.0
16 |         --target=spir64-unknown-unknown -emit-llvm
17 |         -o "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc"
18 |         -c "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.cl"
19 |         COMMENT "Building mathlib.bc"
20 |         VERBATIM)
21 | set(DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc")
22 | 
23 | #add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc"
24 | #                    DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.bc"
25 | #                    COMMAND "${CMAKE_COMMAND}" -E copy
26 | #                    "${CMAKE_CURRENT_SOURCE_DIR}/mathlib.bc"
27 | #                    "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc"
28 | #                    COMMENT "Copying mathlib.bc"
29 | #                    VERBATIM)
30 | #set(DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/mathlib.bc")
31 | 
32 | # kernellib sources
33 | 
34 | set(SOURCES erfcinvD erfcinvF erfcxD erfcxF erfinvD erfinvF i0D i0F i1D i1F j0D j0F j1D j1F ncdfD ncdfF ncdfinvD ncdfinvF nearbyintD nearbyintF rcbrtD rcbrtF rhypotF rhypotD rlen3D rlen3F rlen4D rlen4F scalbD scalbF scalbnD scalbnF tables y0D y0F y1D y1F)
35 | foreach(SOURCE IN LISTS SOURCES)
36 |   add_custom_command(
37 |         OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc"
38 |         DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/OCML/${SOURCE}.cl"
39 |         COMMAND "${CMAKE_CXX_COMPILER}"
40 |         "${CLANG_CL_NO_STDINC_FLAG}" -Xclang -finclude-default-header
41 |         -O2 -pthread -x cl -cl-std=CL2.0
42 |         --target=spir64-unknown-unknown -emit-llvm
43 |         -o "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc"
44 |         -c "${CMAKE_CURRENT_SOURCE_DIR}/OCML/${SOURCE}.cl"
45 |         COMMENT "Building ${SOURCE}.bc"
46 |         VERBATIM)
47 |   list(APPEND DEPEND_LIST "${CMAKE_CURRENT_BINARY_DIR}/BC/${SOURCE}.bc")
48 | endforeach()
49 | 
50 | # kernellib
51 | 
52 | add_custom_command(
53 |         OUTPUT "${CMAKE_BINARY_DIR}/kernellib.bc"
54 |         DEPENDS ${DEPEND_LIST}
55 |         COMMAND "${LLVM_LINK}"
56 |         -o "${CMAKE_BINARY_DIR}/kernellib.bc"
57 |         ${DEPEND_LIST}
58 |         COMMENT "Linking kernellib.bc"
59 |         VERBATIM)
60 | 
61 | add_custom_target("kernellib_bc" DEPENDS "${CMAKE_BINARY_DIR}/kernellib.bc")
62 | 
63 | install(FILES "${CMAKE_BINARY_DIR}/kernellib.bc" DESTINATION "${HIPCL_DATA_DIR}")
64 | 
65 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/j0F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | extern float MATH_PRIVATE(cosb)(float, int, float);
11 | extern CONSTATTR float MATH_PRIVATE(bp0)(float);
12 | extern CONSTATTR float MATH_PRIVATE(ba0)(float);
13 | 
14 | PUREATTR float
15 | MATH_MANGLE(j0)(float x)
16 | {
17 |     x = BUILTIN_ABS_F32(x);
18 | 
19 |     const float b0 = 1.65625f;
20 |     const float b1 = 3.125f;
21 |     const float b2 = 4.6875f;
22 |     const float b3 = 6.265625f;
23 |     const float b4 = 7.84375f;
24 |     const float b5 = 9.421875f;
25 |     const float b6 = 10.984375f;
26 |     const float b7 = 12.578125f;
27 | 
28 |     float ret;
29 | 
30 |     if (x <= b7) {
31 |         // Ty to maintain relative accuracy here
32 | 
33 |         USE_TABLE(float, p, M32_J0);
34 |         float ch, cl;
35 | 
36 |         if (x <= b3) {
37 |             if (x <= b0) {
38 |                 ch = 0x0.000000p+0f;
39 |                 cl = 0x0.000000p+0f;
40 |             } else if (x <= b1) {
41 |                 ch = 0x1.33d152p+1f;
42 |                 cl = 0x1.d2e368p-24f;
43 |                 p += 1*9;
44 |             } else if (x <= b2) {
45 |                 ch = 0x1.ea7558p+1f;
46 |                 cl = -0x1.4a121ep-24f;
47 |                 p += 2*9;
48 |             } else {
49 |                 ch = 0x1.6148f6p+2f;
50 |                 cl = -0x1.34f46ep-24f;
51 |                 p += 3*9;
52 |             }
53 |         } else {
54 |             if (x <= b4) {
55 |                 ch = 0x1.c0ff60p+2f;
56 |                 cl = -0x1.8971b6p-23f;
57 |                 p += 4*9;
58 |             } else if (x <= b5) {
59 |                 ch = 0x1.14eb56p+3f;
60 |                 cl = 0x1.999bdap-22f;
61 |                 p += 5*9;
62 |             } else if (x <= b6) {
63 |                 ch = 0x1.458d0ep+3f;
64 |                 cl = -0x1.e8407ap-22f;
65 |                 p += 6*9;
66 |             } else {
67 |                 ch = 0x1.795440p+3f;
68 |                 cl = 0x1.04e56cp-26f;
69 |                 p += 7*9;
70 |             }
71 |         }
72 | 
73 |         x = x - ch - cl;
74 |         ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
75 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
76 |               p[8],  p[7]), p[6]), p[5]), p[4]),
77 |               p[3]), p[2]), p[1]), p[0]);
78 |     } else {
79 |         float r = MATH_RCP(x);
80 |         float r2 = r*r;
81 |         float p = MATH_PRIVATE(bp0)(r2) * r;
82 |         ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p);
83 |         ret = BUILTIN_CLASS_F32(x, CLASS_PINF) ? 0.0f : ret;
84 |     }
85 | 
86 |     return ret;
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/j1F.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathF.h"
 9 | 
10 | extern float MATH_PRIVATE(cosb)(float, int, float);
11 | extern CONSTATTR float MATH_PRIVATE(bp1)(float);
12 | extern CONSTATTR float MATH_PRIVATE(ba1)(float);
13 | 
14 | PUREATTR float
15 | MATH_MANGLE(j1)(float x)
16 | {
17 |     const float b0 =  1.09375f;
18 |     const float b1 =  2.84375f;
19 |     const float b2 =  4.578125f;
20 |     const float b3 =  6.171875f;
21 |     const float b4 =  7.78125f;
22 |     const float b5 =  9.359375f;
23 |     const float b6 = 10.953125f;
24 |     const float b7 = 12.515625f;
25 | 
26 |     float ax = BUILTIN_ABS_F32(x);
27 |     float ret;
28 | 
29 |     if (ax <= b7) {
30 |         // Ty to maintain relative accuracy here
31 | 
32 |         USE_TABLE(float, p, M32_J1);
33 |         float ch, cl;
34 | 
35 |         if (ax <= b3) {
36 |             if (ax <= b0) {
37 |                 ch = 0.0f;
38 |                 cl = 0.0f;
39 |             } else if (ax <= b1) {
40 |                 ch = 0x1.d757d2p+0f;
41 |                 cl = -0x1.375c60p-32f;
42 |                 p += 1*9;
43 |             } else if (ax <= b2) {
44 |                 ch = 0x1.ea7558p+1f;
45 |                 cl = -0x1.4a121ep-24f;
46 |                 p += 2*9;
47 |             } else {
48 |                 ch = 0x1.55365cp+2f;
49 |                 cl = -0x1.fe6dccp-25f;
50 |                 p += 3*9;
51 |             }
52 |         } else {
53 |             if (ax <= b4) {
54 |                 ch = 0x1.c0ff60p+2f;
55 |                 cl = -0x1.8971b6p-23f;
56 |                 p += 4*9;
57 |             } else if (ax <= b5) {
58 |                 ch = 0x1.112980p+3f;
59 |                 cl = 0x1.e17114p-22f;
60 |                 p += 5*9;
61 |             } else if (ax <= b6) {
62 |                 ch = 0x1.458d0ep+3f;
63 |                 cl = -0x1.e8407ap-22f;
64 |                 p += 6*9;
65 |             } else {
66 |                 ch = 0x1.769798p+3f;
67 |                 cl = -0x1.a04694p-23f;
68 |                 p += 7*9;
69 |             }
70 |         }
71 | 
72 |         ax = ax - ch - cl;
73 |         ret = MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax,
74 |               MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax,
75 |               p[8],  p[7]), p[6]), p[5]), p[4]),
76 |               p[3]), p[2]), p[1]), p[0]);
77 |     } else {
78 |         float r = MATH_RCP(x);
79 |         float r2 = r*r;
80 |         float p = MATH_PRIVATE(bp1)(r2) * r;
81 |         ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(x, 1, p);
82 |         ret = BUILTIN_CLASS_F32(ax, CLASS_PINF) ? 0.0f : ret;
83 |     }
84 | 
85 |     if (x < 0.0f)
86 |         ret = -ret;
87 | 
88 |     return ret;
89 | }
90 | 
91 | 


--------------------------------------------------------------------------------
/samples/hcc_dialects/vadd_hip.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include "hip/hip_runtime.h"
24 | #include <cstdio>
25 | #include <cstdlib>
26 | 
27 | __global__ void vadd_hip(const float* a, const float* b, float* c, int N) {
28 |     int idx = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
29 | 
30 |     if (idx < N) {
31 |         c[idx] = a[idx] + b[idx];
32 |     }
33 | }
34 | 
35 | 
36 | int main(int argc, char* argv[]) {
37 |     int sizeElements = 1000000;
38 |     size_t sizeBytes = sizeElements * sizeof(float);
39 |     bool pass = true;
40 | 
41 |     // Allocate host memory
42 |     float* A_h = (float*)malloc(sizeBytes);
43 |     float* B_h = (float*)malloc(sizeBytes);
44 |     float* C_h = (float*)malloc(sizeBytes);
45 | 
46 |     // Allocate device memory:
47 |     float *A_d, *B_d, *C_d;
48 |     hipMalloc((void**)&A_d, sizeBytes);
49 |     hipMalloc((void**)&B_d, sizeBytes);
50 |     hipMalloc((void**)&C_d, sizeBytes);
51 | 
52 |     // Initialize host memory
53 |     for (int i = 0; i < sizeElements; i++) {
54 |         A_h[i] = 1.618f * i;
55 |         B_h[i] = 3.142f * i;
56 |     }
57 | 
58 |     // H2D Copy
59 |     hipMemcpy(A_d, A_h, sizeBytes, hipMemcpyHostToDevice);
60 |     hipMemcpy(B_d, B_h, sizeBytes, hipMemcpyHostToDevice);
61 | 
62 |     // Launch kernel onto default accelerator
63 |     int blockSize = 256;                                      // pick arbitrary block size
64 |     int blocks = (sizeElements + blockSize - 1) / blockSize;  // round up to launch enough blocks
65 |     hipLaunchKernelGGL(vadd_hip, dim3(blocks), dim3(blockSize), 0, 0, A_d, B_d, C_d, sizeElements);
66 | 
67 |     // D2H Copy
68 |     hipMemcpy(C_h, C_d, sizeBytes, hipMemcpyDeviceToHost);
69 | 
70 |     // Verify
71 |     for (int i = 0; i < sizeElements; i++) {
72 |         float ref = 1.618f * i + 3.142f * i;
73 |         if (C_h[i] != ref) {
74 |             printf("error:%d computed=%6.2f, reference=%6.2f\n", i, C_h[i], ref);
75 |             pass = false;
76 |         }
77 |     };
78 |     if (pass) printf("PASSED!\n");
79 | }
80 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/j0D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | extern double MATH_PRIVATE(cosb)(double, int, double);
11 | extern CONSTATTR double MATH_PRIVATE(bp0)(double);
12 | extern CONSTATTR double MATH_PRIVATE(ba0)(double);
13 | 
14 | PUREATTR double
15 | MATH_MANGLE(j0)(double x)
16 | {
17 |     x = BUILTIN_ABS_F64(x);
18 | 
19 |     const double b0 = 1.65625;
20 |     const double b1 = 3.125;
21 |     const double b2 = 4.6875;
22 |     const double b3 = 6.265625;
23 |     const double b4 = 7.84375;
24 |     const double b5 = 9.421875;
25 |     const double b6 = 10.984375;
26 |     const double b7 = 12.578125;
27 | 
28 |     double ret;
29 | 
30 |     if (x <= b7) {
31 |         // Ty to maintain relative accuracy here
32 | 
33 |         USE_TABLE(double, p, M64_J0);
34 |         double ch, cl;
35 | 
36 |         if (x <= b3) {
37 |             if (x <= b0) {
38 |                 ch = 0.0;
39 |                 cl = 0.0;
40 |             } else if (x <= b1) {
41 |                 ch = 0x1.33d152e971b40p+1;
42 |                 cl = -0x1.0f539d7da258ep-53;
43 |                 p += 1*15;
44 |             } else if (x <= b2) {
45 |                 ch = 0x1.ea75575af6f09p+1;
46 |                 cl = -0x1.60155a9d1b256p-53;
47 |                 p += 2*15;
48 |             } else {
49 |                 ch = 0x1.6148f5b2c2e45p+2;
50 |                 cl = 0x1.75054cd60a517p-54;
51 |                 p += 3*15;
52 |             }
53 |         } else {
54 |             if (x <= b4) {
55 |                 ch = 0x1.c0ff5f3b47250p+2;
56 |                 cl = -0x1.b226d9d243827p-54;
57 |                 p += 4*15;
58 |             } else if (x <= b5) {
59 |                 ch = 0x1.14eb56cccdecap+3;
60 |                 cl = -0x1.51970714c7c25p-52;
61 |                 p += 5*15;
62 |             } else if (x <= b6) {
63 |                 ch = 0x1.458d0d0bdfc29p+3;
64 |                 cl = 0x1.02610a51562b6p-51;
65 |                 p += 6*15;
66 |             } else {
67 |                 ch = 0x1.79544008272b6p+3;
68 |                 cl = 0x1.444fd5821d5b1p-52;
69 |                 p += 7*15;
70 |             }
71 |         }
72 | 
73 |         x = x - ch - cl;
74 |         ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
75 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
76 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
77 |               MATH_MAD(x, MATH_MAD(x,
78 |               p[14], p[13]), p[12]),
79 |               p[11]), p[10]), p[9]), p[8]),
80 |               p[7]), p[6]), p[5]), p[4]),
81 |               p[3]), p[2]), p[1]), p[0]);
82 |               
83 |     } else {
84 |         double r = MATH_RCP(x);
85 |         double r2 = r*r;
86 |         double p = MATH_PRIVATE(bp0)(r2) * r;
87 |         ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p);
88 |         ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret;
89 |     }
90 | 
91 |     return ret;
92 | }
93 | 
94 | 


--------------------------------------------------------------------------------
/cxxopts/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | This is the changelog for `cxxopts`, a C++11 library for parsing command line
 4 | options. The project adheres to semantic versioning.
 5 | 
 6 | ## 2.2
 7 | 
 8 | ### Changed
 9 | 
10 | * Allow integers to have leading zeroes.
11 | * Build the tests by default.
12 | * Don't check for container when showing positional help.
13 | 
14 | ### Added
15 | 
16 | * Iterator inputs to `parse_positional`.
17 | * Throw an exception if the option in `parse_positional` doesn't exist.
18 | * Parse a delimited list in a single argument for vector options.
19 | * Add an option to disable implicit value on booleans.
20 | 
21 | ### Bug Fixes
22 | 
23 | * Fix a warning about possible loss of data.
24 | * Fix version numbering in CMakeLists.txt
25 | * Remove unused declaration of the undefined `ParseResult::get_option`.
26 | * Throw on invalid option syntax when beginning with a `-`.
27 | * Throw in `as` when option wasn't present.
28 | * Fix catching exceptions by reference.
29 | * Fix out of bounds errors parsing integers.
30 | 
31 | ## 2.1.1
32 | 
33 | ### Bug Fixes
34 | 
35 | * Revert the change adding `const` type for `argv`, because most users expect
36 |   to pass a non-const `argv` from `main`.
37 | 
38 | ## 2.1
39 | 
40 | ### Changed
41 | 
42 | * Options with implicit arguments now require the `--option=value` form if
43 |   they are to be specified with an option. This is to remove the ambiguity
44 |   when a positional argument could follow an option with an implicit value.
45 |   For example, `--foo value`, where `foo` has an implicit value, will be
46 |   parsed as `--foo=implicit` and a positional argument `value`.
47 | * Boolean values are no longer special, but are just an option with a default
48 |   and implicit value.
49 | 
50 | ### Added
51 | 
52 | * Added support for `std::optional` as a storage type.
53 | * Allow the help string to be customised.
54 | * Use `const` for the type in the `argv` parameter, since the contents of the
55 |   arguments is never modified.
56 | 
57 | ### Bug Fixes
58 | 
59 | * Building against GCC 4.9 was broken due to overly strict shadow warnings.
60 | * Fixed an ambiguous overload in the `parse_positional` function when an
61 |   `initializer_list` was directly passed.
62 | * Fixed precedence in the Boolean value regex.
63 | 
64 | ## 2.0
65 | 
66 | ### Changed
67 | 
68 | * `Options::parse` returns a ParseResult rather than storing the parse
69 |   result internally.
70 | * Options with default values now get counted as appearing once if they
71 |   were not specified by the user.
72 | 
73 | ### Added
74 | 
75 | * A new `ParseResult` object that is the immutable result of parsing. It
76 |   responds to the same `count` and `operator[]` as `Options` of 1.x did.
77 | * The function `ParseResult::arguments` returns a vector of the parsed
78 |   arguments to iterate through in the order they were provided.
79 | * The symbol `cxxopts::version` for the version of the library.
80 | * Booleans can be specified with various strings and explicitly set false.
81 | 
82 | ## 1.x
83 | 
84 | The 1.x series was the first major version of the library, with release numbers
85 | starting to follow semantic versioning, after 0.x being unstable.  It never had
86 | a changelog maintained for it. Releases mostly contained bug fixes, with the
87 | occasional feature added.
88 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/erfcxD.cl:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "mathD.h"
 3 | 
 4 | CONSTATTR double
 5 | MATH_MANGLE(erfcx)(double x)
 6 | {
 7 |     double ax = BUILTIN_ABS_F64(x);
 8 |     double ret;
 9 | 
10 |     if (ax < 1.0) {
11 |         ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
12 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
13 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
14 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
15 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
16 |               MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, MATH_MAD(x,
17 |                   0x1.997339112da12p-29, -0x1.9a1485b7ae337p-27),
18 |                   0x1.9548ab4c5bb56p-26), -0x1.2f88b47e02dc3p-24),
19 |                   0x1.282114351c39ap-22), -0x1.e533a426aadd7p-21),
20 |                   0x1.723131b8ef11ep-19), -0x1.188f6b08d66b9p-17),
21 |                   0x1.a00995a561233p-16), -0x1.2aeb04681fed5p-14),
22 |                   0x1.a01b9d82bcaa5p-13), -0x1.182d3bb1ac2c8p-11),
23 |                   0x1.6c16a932f49d1p-10), -0x1.c74aef6905182p-9),
24 |                   0x1.111111f403407p-7), -0x1.390379458257cp-6),
25 |                   0x1.5555554b34536p-5), -0x1.6023e8de7793p-4),
26 |                   0x1.5555555597342p-3), -0x1.341f6bc020c17p-2),
27 |                   0x1.fffffffffe5aep-2), -0x1.812746b037cadp-1),
28 |                   0x1.000000000001dp0), -0x1.20dd750429b6ap0),
29 |                   0x1.0p0);
30 |     } else if (ax < 5120.0) {
31 |         double t = MATH_DIV(ax - 4.0, ax + 4.0);
32 |         ret = MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
33 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
34 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
35 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
36 |               MATH_MAD(t, MATH_MAD(t, MATH_MAD(t, MATH_MAD(t,
37 |               MATH_MAD(t, MATH_MAD(t,
38 |                   0.14981549849751462e-8, -0.69954933359042387e-8),
39 |                   -0.15965692247743744e-7), 0.92967132363414431e-7),
40 |                   0.70214215034531004e-7), -0.80204958740421079e-6),
41 |                   0.29923810132862422e-6), 0.56895739871851154e-5),
42 |                   -0.11226090578381133e-4), -0.2438781785281914e-4),
43 |                   0.00015062360829881126), -0.00019926094025574419),
44 |                   -0.00075777387606136804), 0.0050319709983606006),
45 |                   -0.016197733946788412), 0.037167515387099868),
46 |                   -0.066330365824435124), 0.093732835010698844),
47 |                   -0.10103906603561565), 0.068097054254223675),
48 |                   0.015379652102604634), -0.13962111684055725),
49 |                   1.2329951186255526);
50 |         ret = MATH_DIV(ret, MATH_MAD(ax, 2.0, 1.0));
51 |     } else {
52 |         const double one_over_sqrtpi = 0x1.20dd750429b6dp-1;
53 |         double z = MATH_RCP(x * x);
54 |         ret =  MATH_DIV(one_over_sqrtpi, x) * MATH_MAD(z, MATH_MAD(z, 0.375, -0.5), 1.0);
55 |     }
56 | 
57 |     if (x <= -1.0) {
58 |         double x2h = ax * ax;
59 |         double x2l = BUILTIN_FMA_F64(ax, ax, -x2h);
60 |         ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0 - ret;
61 |         ret = x < -27.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret;
62 |     }
63 | 
64 |     return ret;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/j1D.cl:
--------------------------------------------------------------------------------
 1 | /*===--------------------------------------------------------------------------
 2 |  *                   ROCm Device Libraries
 3 |  *
 4 |  * This file is distributed under the University of Illinois Open Source
 5 |  * License. See LICENSE.TXT for details.
 6 |  *===------------------------------------------------------------------------*/
 7 | 
 8 | #include "mathD.h"
 9 | 
10 | extern double MATH_PRIVATE(cosb)(double, int, double);
11 | extern CONSTATTR double MATH_PRIVATE(bp1)(double);
12 | extern CONSTATTR double MATH_PRIVATE(ba1)(double);
13 | 
14 | 
15 | PUREATTR double
16 | MATH_MANGLE(j1)(double x)
17 | {
18 |     const double b0 =  1.09375;
19 |     const double b1 =  2.84375;
20 |     const double b2 =  4.578125;
21 |     const double b3 =  6.171875;
22 |     const double b4 =  7.78125;
23 |     const double b5 =  9.359375;
24 |     const double b6 = 10.953125;
25 |     const double b7 = 12.515625;
26 | 
27 |     double ax = BUILTIN_ABS_F64(x);
28 |     double ret;
29 | 
30 |     if (ax <= b7) {
31 |         // Ty to maintain relative accuracy here
32 | 
33 |         USE_TABLE(double, p, M64_J1);
34 |         double ch, cl;
35 | 
36 |         if (ax <= b3) {
37 |             if (ax <= b0) {
38 |                 ch = 0.0;
39 |                 cl = 0.0;
40 |             } else if (ax <= b1) {
41 |                 ch = 0x1.d757d1fec8a3ap+0;
42 |                 cl = 0x1.616d820cfdaebp-58;
43 |                 p += 1*15;
44 |             } else if (ax <= b2) {
45 |                 ch = 0x1.ea75575af6f09p+1;
46 |                 cl = -0x1.60155a9d1b256p-53;
47 |                 p += 2*15;
48 |             } else {
49 |                 ch = 0x1.55365bc032467p+2;
50 |                 cl = 0x1.5c646a75d7539p-53;
51 |                 p += 3*15;
52 |             }
53 |         } else {
54 |             if (ax <= b4) {
55 |                 ch = 0x1.c0ff5f3b47250p+2;
56 |                 cl = -0x1.b226d9d243827p-54;
57 |                 p += 4*15;
58 |             } else if (ax <= b5) {
59 |                 ch = 0x1.112980f0b88a1p+3;
60 |                 cl = -0x1.63e17ec20a31dp-53;
61 |                 p += 5*15;
62 |             } else if (ax <= b6) {
63 |                 ch = 0x1.458d0d0bdfc29p+3;
64 |                 cl = 0x1.02610a51562b6p-51;
65 |                 p += 6*15;
66 |             } else {
67 |                 ch = 0x1.76979797ee5acp+3;
68 |                 cl = 0x1.9a84d3a5fedc2p-51;
69 |                 p += 7*15;
70 |             }
71 |         }
72 | 
73 |         ax = ax - ch - cl;
74 | 
75 |         ret = MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax,
76 |               MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax,
77 |               MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax, MATH_MAD(ax,
78 |               MATH_MAD(ax, MATH_MAD(ax,
79 |               p[14], p[13]), p[12]),
80 |               p[11]), p[10]), p[9]), p[8]),
81 |               p[7]), p[6]), p[5]), p[4]),
82 |               p[3]), p[2]), p[1]), p[0]);
83 |     } else {
84 |         double r = MATH_RCP(x);
85 |         double r2 = r*r;
86 |         double p = MATH_PRIVATE(bp1)(r2) * r;
87 |         ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(x, 1, p);
88 |         ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret;
89 |     }
90 | 
91 |     if (x < 0.0)
92 |         ret = -ret;
93 | 
94 |     return ret;
95 | }
96 | 
97 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(bitcode)
 2 | 
 3 | ###################################################################################
 4 | 
 5 | find_package(OpenCL 2.0 REQUIRED)
 6 | 
 7 | set(HIPCL_SOURCES hipcl.cc backend.cc log.cc spirv.cc)
 8 | 
 9 | set_source_files_properties(${HIPCL_SOURCES} PROPERTIES LANGUAGE CXX)
10 | 
11 | add_library(hipcl SHARED ${HIPCL_SOURCES})
12 | 
13 | set_target_properties(hipcl PROPERTIES
14 |                       CXX_STANDARD_REQUIRED ON
15 |                       SOVERSION "${LIB_API_VERSION}"
16 |                       VERSION "${LIB_BUILD_VERSION}")
17 | 
18 | target_link_libraries(hipcl ${SANITIZER_LIBS} ${PTHREAD_LIBRARY} ${OpenCL_LIBRARIES})
19 | 
20 | add_dependencies("hipcl" "kernellib_bc")
21 | 
22 | if(DEBUG)
23 |     target_compile_definitions(hipcl PRIVATE "_GLIBCXX_DEBUG")
24 | endif()
25 | 
26 | if(LOGLEVEL)
27 |   set(VALID_LEVELS "DEBUG;INFO;WARN;ERROR;CRITICAL;OFF")
28 |   if(LOGLEVEL IN_LIST VALID_LEVELS)
29 |     target_compile_definitions(hipcl PRIVATE "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${LOGLEVEL}")
30 |   else()
31 |     message(WARNING "Unknown loglevel: ${LOGLEVEL}, ignoring")
32 |   endif()
33 | endif()
34 | 
35 | target_compile_options(hipcl PRIVATE "-Wno-unused-parameter")
36 | 
37 | target_compile_options(hipcl INTERFACE "-x" "hip")
38 | 
39 | if(SANITIZER_OPTIONS)
40 |   target_compile_options(hipcl INTERFACE ${SANITIZER_OPTIONS})
41 |   target_compile_options(hipcl PRIVATE ${SANITIZER_OPTIONS})
42 | endif()
43 | 
44 | target_compile_options(hipcl INTERFACE
45 |         "$<INSTALL_INTERFACE:--hip-device-lib-path=${HIPCL_DATA_DIR}>"
46 |         "$<BUILD_INTERFACE:--hip-device-lib-path=${CMAKE_BINARY_DIR}>"
47 |         "--hip-device-lib=kernellib.bc")
48 | 
49 | # for LLVM passes
50 | target_compile_options(hipcl INTERFACE
51 |         "$<INSTALL_INTERFACE:--hip-llvm-pass-path=${HIPCL_LLVM_DIR}>"
52 |         "$<BUILD_INTERFACE:--hip-llvm-pass-path=${CMAKE_BINARY_DIR}/llvm_passes>")
53 | 
54 | target_include_directories(hipcl
55 |     PUBLIC
56 |         "$<INSTALL_INTERFACE:${HIPCL_INC_DIR}>"
57 |         "$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include>"
58 |     PRIVATE
59 |         "${CMAKE_SOURCE_DIR}"
60 |         "${CMAKE_SOURCE_DIR}/include"
61 | )
62 | 
63 | install(TARGETS hipcl
64 |         EXPORT "hip-targets"
65 |         LIBRARY DESTINATION "${HIPCL_LIB_DIR}"
66 |         ARCHIVE DESTINATION "${HIPCL_LIB_DIR}"
67 |         )
68 | 
69 | install(EXPORT "hip-targets"
70 |         NAMESPACE "hip::"
71 |         DESTINATION "${HIPCL_CMAKE_DIR}")
72 | 
73 | install(FILES
74 |         "${CMAKE_SOURCE_DIR}/include/hip/hipcl.hh"
75 |         "${CMAKE_SOURCE_DIR}/include/hip/hipcl_mathlib.hh"
76 |         "${CMAKE_SOURCE_DIR}/include/hip/hip_fatbin.h"
77 |         "${CMAKE_SOURCE_DIR}/include/hip/hip_fp16.h"
78 |         "${CMAKE_SOURCE_DIR}/include/hip/hip_runtime.h"
79 |         "${CMAKE_SOURCE_DIR}/include/hip/hip_vector_types.h"
80 |   DESTINATION "${HIPCL_INC_DIR}/hip")
81 | 
82 | include(CMakePackageConfigHelpers)
83 | 
84 | configure_package_config_file(
85 |   "${CMAKE_CURRENT_SOURCE_DIR}/hipcl-config.cmake.in"
86 |   "${CMAKE_BINARY_DIR}/hipConfig.cmake"
87 |   INSTALL_DESTINATION "${HIPCL_CMAKE_DIR}"
88 |   PATH_VARS HIPCL_INC_DIR HIPCL_LIB_DIR HIPCL_BIN_DIR HIPCL_COMPILER
89 |   )
90 | 
91 | write_basic_package_version_file(
92 |   "${CMAKE_BINARY_DIR}/hipConfigVersion.cmake"
93 |   COMPATIBILITY SameMajorVersion
94 |   )
95 | 


--------------------------------------------------------------------------------
/spdlog/sinks/syslog_sink.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Copyright(c) 2015 Gabi Melman.
 3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 4 | //
 5 | 
 6 | #pragma once
 7 | 
 8 | #ifndef SPDLOG_H
 9 | #error "spdlog.h must be included before this file."
10 | #endif
11 | 
12 | #include "spdlog/sinks/base_sink.h"
13 | 
14 | #include <array>
15 | #include <string>
16 | #include <syslog.h>
17 | 
18 | namespace spdlog {
19 | namespace sinks {
20 | /**
21 |  * Sink that write to syslog using the `syscall()` library call.
22 |  *
23 |  * Locking is not needed, as `syslog()` itself is thread-safe.
24 |  */
25 | template<typename Mutex>
26 | class syslog_sink : public base_sink<Mutex>
27 | {
28 | public:
29 |     //
30 |     explicit syslog_sink(std::string ident = "", int syslog_option = 0, int syslog_facility = LOG_USER)
31 |         : ident_(std::move(ident))
32 |     {
33 |         priorities_[static_cast<size_t>(level::trace)] = LOG_DEBUG;
34 |         priorities_[static_cast<size_t>(level::debug)] = LOG_DEBUG;
35 |         priorities_[static_cast<size_t>(level::info)] = LOG_INFO;
36 |         priorities_[static_cast<size_t>(level::warn)] = LOG_WARNING;
37 |         priorities_[static_cast<size_t>(level::err)] = LOG_ERR;
38 |         priorities_[static_cast<size_t>(level::critical)] = LOG_CRIT;
39 |         priorities_[static_cast<size_t>(level::off)] = LOG_INFO;
40 | 
41 |         // set ident to be program name if empty
42 |         ::openlog(ident_.empty() ? nullptr : ident_.c_str(), syslog_option, syslog_facility);
43 |     }
44 | 
45 |     ~syslog_sink() override
46 |     {
47 |         ::closelog();
48 |     }
49 | 
50 |     syslog_sink(const syslog_sink &) = delete;
51 |     syslog_sink &operator=(const syslog_sink &) = delete;
52 | 
53 | protected:
54 |     void sink_it_(const details::log_msg &msg) override
55 |     {
56 |         ::syslog(syslog_prio_from_level(msg), "%s", fmt::to_string(msg.payload).c_str());
57 |     }
58 | 
59 |     void flush_() override {}
60 | 
61 | private:
62 |     std::array<int, 7> priorities_;
63 |     // must store the ident because the man says openlog might use the pointer as
64 |     // is and not a string copy
65 |     const std::string ident_;
66 | 
67 |     //
68 |     // Simply maps spdlog's log level to syslog priority level.
69 |     //
70 |     int syslog_prio_from_level(const details::log_msg &msg) const
71 |     {
72 |         return priorities_[static_cast<size_t>(msg.level)];
73 |     }
74 | };
75 | 
76 | using syslog_sink_mt = syslog_sink<std::mutex>;
77 | using syslog_sink_st = syslog_sink<details::null_mutex>;
78 | } // namespace sinks
79 | 
80 | // Create and register a syslog logger
81 | template<typename Factory = default_factory>
82 | inline std::shared_ptr<logger> syslog_logger_mt(
83 |     const std::string &logger_name, const std::string &syslog_ident = "", int syslog_option = 0, int syslog_facility = (1 << 3))
84 | {
85 |     return Factory::template create<sinks::syslog_sink_mt>(logger_name, syslog_ident, syslog_option, syslog_facility);
86 | }
87 | 
88 | template<typename Factory = default_factory>
89 | inline std::shared_ptr<logger> syslog_logger_st(
90 |     const std::string &logger_name, const std::string &syslog_ident = "", int syslog_option = 0, int syslog_facility = (1 << 3))
91 | {
92 |     return Factory::template create<sinks::syslog_sink_st>(logger_name, syslog_ident, syslog_option, syslog_facility);
93 | }
94 | } // namespace spdlog
95 | 


--------------------------------------------------------------------------------
/samples/6_dynamic_shared/hipDynamicShared2.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | #include "hip/hip_runtime.h"
23 | #include <cassert>
24 | #include <iostream>
25 | 
26 | #define LEN 16 * 1024
27 | #define SIZE LEN * 4
28 | 
29 | #define HIPCHECK(code)                                                         \
30 |   do {                                                                         \
31 |     hiperr = code;                                                             \
32 |     if (hiperr != hipSuccess) {                                                \
33 |       std::cerr << "ERROR on line " << __LINE__ << ": " << (unsigned)hiperr    \
34 |                 << "\n";                                                       \
35 |       return 1;                                                                \
36 |     }                                                                          \
37 |   } while (0)
38 | 
39 | __global__ void vectorAdd(float *Ad, float *Bd) {
40 |   HIP_DYNAMIC_SHARED(float, sBd);
41 |   int tx = threadIdx.x;
42 |   for (int i = 0; i < LEN / 64; i++) {
43 |     sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
44 |     Bd[tx + i * 64] = sBd[tx + i * 64];
45 |   }
46 | }
47 | 
48 | int main() {
49 |   size_t errors = 0;
50 |   hipError_t hiperr = hipSuccess;
51 |   float *A, *B, *Ad, *Bd;
52 |   A = new float[LEN];
53 |   B = new float[LEN];
54 |   for (int i = 0; i < LEN; i++) {
55 |     A[i] = 1.0f;
56 |     B[i] = 1.0f;
57 |   }
58 |   HIPCHECK(hipMalloc((void **)&Ad, SIZE));
59 |   HIPCHECK(hipMalloc((void **)&Bd, SIZE));
60 |   HIPCHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
61 |   HIPCHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
62 |   hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
63 |   HIPCHECK(hipGetLastError());
64 |   HIPCHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
65 |   for (int i = 0; i < LEN; i++) {
66 |     if (B[i] < 1.0f || B[i] > 3.0f)
67 |       ++errors;
68 |   }
69 |   HIPCHECK(hipFree(Ad));
70 |   HIPCHECK(hipFree(Bd));
71 |   delete[] A;
72 |   delete[] B;
73 | 
74 |   if (errors != 0) {
75 |     std::cout << "hipDynamicShared2 FAILED: " << errors << " errors\n";
76 |     return 1;
77 |   } else {
78 |     std::cout << "hipDynamicShared2 PASSED!\n";
79 |     return 0;
80 |   }
81 | }
82 | 


--------------------------------------------------------------------------------
/spdlog/async.h:
--------------------------------------------------------------------------------
 1 | 
 2 | //
 3 | // Copyright(c) 2018 Gabi Melman.
 4 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
 5 | //
 6 | 
 7 | #pragma once
 8 | 
 9 | //
10 | // Async logging using global thread pool
11 | // All loggers created here share same global thread pool.
12 | // Each log message is pushed to a queue along withe a shared pointer to the
13 | // logger.
14 | // If a logger deleted while having pending messages in the queue, it's actual
15 | // destruction will defer
16 | // until all its messages are processed by the thread pool.
17 | // This is because each message in the queue holds a shared_ptr to the
18 | // originating logger.
19 | 
20 | #include "spdlog/async_logger.h"
21 | #include "spdlog/details/registry.h"
22 | #include "spdlog/details/thread_pool.h"
23 | 
24 | #include <memory>
25 | #include <mutex>
26 | 
27 | namespace spdlog {
28 | 
29 | namespace details {
30 | static const size_t default_async_q_size = 8192;
31 | }
32 | 
33 | // async logger factory - creates async loggers backed with thread pool.
34 | // if a global thread pool doesn't already exist, create it with default queue
35 | // size of 8192 items and single thread.
36 | template<async_overflow_policy OverflowPolicy = async_overflow_policy::block>
37 | struct async_factory_impl
38 | {
39 |     template<typename Sink, typename... SinkArgs>
40 |     static std::shared_ptr<async_logger> create(std::string logger_name, SinkArgs &&... args)
41 |     {
42 |         auto &registry_inst = details::registry::instance();
43 | 
44 |         // create global thread pool if not already exists..
45 |         std::lock_guard<std::recursive_mutex> tp_lock(registry_inst.tp_mutex());
46 |         auto tp = registry_inst.get_tp();
47 |         if (tp == nullptr)
48 |         {
49 |             tp = std::make_shared<details::thread_pool>(details::default_async_q_size, 1);
50 |             registry_inst.set_tp(tp);
51 |         }
52 | 
53 |         auto sink = std::make_shared<Sink>(std::forward<SinkArgs>(args)...);
54 |         auto new_logger = std::make_shared<async_logger>(std::move(logger_name), std::move(sink), std::move(tp), OverflowPolicy);
55 |         registry_inst.initialize_logger(new_logger);
56 |         return new_logger;
57 |     }
58 | };
59 | 
60 | using async_factory = async_factory_impl<async_overflow_policy::block>;
61 | using async_factory_nonblock = async_factory_impl<async_overflow_policy::overrun_oldest>;
62 | 
63 | template<typename Sink, typename... SinkArgs>
64 | inline std::shared_ptr<spdlog::logger> create_async(std::string logger_name, SinkArgs &&... sink_args)
65 | {
66 |     return async_factory::create<Sink>(std::move(logger_name), std::forward<SinkArgs>(sink_args)...);
67 | }
68 | 
69 | template<typename Sink, typename... SinkArgs>
70 | inline std::shared_ptr<spdlog::logger> create_async_nb(std::string logger_name, SinkArgs &&... sink_args)
71 | {
72 |     return async_factory_nonblock::create<Sink>(std::move(logger_name), std::forward<SinkArgs>(sink_args)...);
73 | }
74 | 
75 | // set global thread pool.
76 | inline void init_thread_pool(size_t q_size, size_t thread_count)
77 | {
78 |     auto tp = std::make_shared<details::thread_pool>(q_size, thread_count);
79 |     details::registry::instance().set_tp(std::move(tp));
80 | }
81 | 
82 | // get the global thread pool.
83 | inline std::shared_ptr<spdlog::details::thread_pool> thread_pool()
84 | {
85 |     return details::registry::instance().get_tp();
86 | }
87 | } // namespace spdlog
88 | 


--------------------------------------------------------------------------------
/spdlog/sinks/stdout_sinks.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright(c) 2015 Gabi Melman.
  3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
  4 | //
  5 | 
  6 | #pragma once
  7 | 
  8 | #ifndef SPDLOG_H
  9 | #error "spdlog.h must be included before this file."
 10 | #endif
 11 | 
 12 | #include "spdlog/details/console_globals.h"
 13 | #include "spdlog/details/null_mutex.h"
 14 | 
 15 | #include <cstdio>
 16 | #include <memory>
 17 | #include <mutex>
 18 | 
 19 | namespace spdlog {
 20 | 
 21 | namespace sinks {
 22 | 
 23 | template<typename TargetStream, typename ConsoleMutex>
 24 | class stdout_sink final : public sink
 25 | {
 26 | public:
 27 |     using mutex_t = typename ConsoleMutex::mutex_t;
 28 |     stdout_sink()
 29 |         : mutex_(ConsoleMutex::mutex())
 30 |         , file_(TargetStream::stream())
 31 |     {
 32 |     }
 33 |     ~stdout_sink() override = default;
 34 | 
 35 |     stdout_sink(const stdout_sink &other) = delete;
 36 |     stdout_sink &operator=(const stdout_sink &other) = delete;
 37 | 
 38 |     void log(const details::log_msg &msg) override
 39 |     {
 40 |         std::lock_guard<mutex_t> lock(mutex_);
 41 |         fmt::memory_buffer formatted;
 42 |         formatter_->format(msg, formatted);
 43 |         fwrite(formatted.data(), sizeof(char), formatted.size(), file_);
 44 |         fflush(TargetStream::stream());
 45 |     }
 46 | 
 47 |     void flush() override
 48 |     {
 49 |         std::lock_guard<mutex_t> lock(mutex_);
 50 |         fflush(file_);
 51 |     }
 52 | 
 53 |     void set_pattern(const std::string &pattern) override
 54 |     {
 55 |         std::lock_guard<mutex_t> lock(mutex_);
 56 |         formatter_ = std::unique_ptr<spdlog::formatter>(new pattern_formatter(pattern));
 57 |     }
 58 | 
 59 |     void set_formatter(std::unique_ptr<spdlog::formatter> sink_formatter) override
 60 |     {
 61 |         std::lock_guard<mutex_t> lock(mutex_);
 62 |         formatter_ = std::move(sink_formatter);
 63 |     }
 64 | 
 65 | private:
 66 |     mutex_t &mutex_;
 67 |     FILE *file_;
 68 | };
 69 | 
 70 | using stdout_sink_mt = stdout_sink<details::console_stdout, details::console_mutex>;
 71 | using stdout_sink_st = stdout_sink<details::console_stdout, details::console_nullmutex>;
 72 | 
 73 | using stderr_sink_mt = stdout_sink<details::console_stderr, details::console_mutex>;
 74 | using stderr_sink_st = stdout_sink<details::console_stderr, details::console_nullmutex>;
 75 | 
 76 | } // namespace sinks
 77 | 
 78 | // factory methods
 79 | template<typename Factory = default_factory>
 80 | inline std::shared_ptr<logger> stdout_logger_mt(const std::string &logger_name)
 81 | {
 82 |     return Factory::template create<sinks::stdout_sink_mt>(logger_name);
 83 | }
 84 | 
 85 | template<typename Factory = default_factory>
 86 | inline std::shared_ptr<logger> stdout_logger_st(const std::string &logger_name)
 87 | {
 88 |     return Factory::template create<sinks::stdout_sink_st>(logger_name);
 89 | }
 90 | 
 91 | template<typename Factory = default_factory>
 92 | inline std::shared_ptr<logger> stderr_logger_mt(const std::string &logger_name)
 93 | {
 94 |     return Factory::template create<sinks::stderr_sink_mt>(logger_name);
 95 | }
 96 | 
 97 | template<typename Factory = default_factory>
 98 | inline std::shared_ptr<logger> stderr_logger_st(const std::string &logger_name)
 99 | {
100 |     return Factory::template create<sinks::stderr_sink_st>(logger_name);
101 | }
102 | } // namespace spdlog
103 | 


--------------------------------------------------------------------------------
/spdlog/details/async_logger_impl.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright(c) 2015 Gabi Melman.
  3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
  4 | //
  5 | 
  6 | #pragma once
  7 | 
  8 | // async logger implementation
  9 | // uses a thread pool to perform the actual logging
 10 | 
 11 | #include "spdlog/details/thread_pool.h"
 12 | 
 13 | #include <chrono>
 14 | #include <memory>
 15 | #include <string>
 16 | 
 17 | template<typename It>
 18 | inline spdlog::async_logger::async_logger(
 19 |     std::string logger_name, It begin, It end, std::weak_ptr<details::thread_pool> tp, async_overflow_policy overflow_policy)
 20 |     : logger(std::move(logger_name), begin, end)
 21 |     , thread_pool_(std::move(tp))
 22 |     , overflow_policy_(overflow_policy)
 23 | {
 24 | }
 25 | 
 26 | inline spdlog::async_logger::async_logger(
 27 |     std::string logger_name, sinks_init_list sinks_list, std::weak_ptr<details::thread_pool> tp, async_overflow_policy overflow_policy)
 28 |     : async_logger(std::move(logger_name), sinks_list.begin(), sinks_list.end(), std::move(tp), overflow_policy)
 29 | {
 30 | }
 31 | 
 32 | inline spdlog::async_logger::async_logger(
 33 |     std::string logger_name, sink_ptr single_sink, std::weak_ptr<details::thread_pool> tp, async_overflow_policy overflow_policy)
 34 |     : async_logger(std::move(logger_name), {std::move(single_sink)}, std::move(tp), overflow_policy)
 35 | {
 36 | }
 37 | 
 38 | // send the log message to the thread pool
 39 | inline void spdlog::async_logger::sink_it_(details::log_msg &msg)
 40 | {
 41 | #if defined(SPDLOG_ENABLE_MESSAGE_COUNTER)
 42 |     incr_msg_counter_(msg);
 43 | #endif
 44 |     if (auto pool_ptr = thread_pool_.lock())
 45 |     {
 46 |         pool_ptr->post_log(shared_from_this(), msg, overflow_policy_);
 47 |     }
 48 |     else
 49 |     {
 50 |         throw spdlog_ex("async log: thread pool doesn't exist anymore");
 51 |     }
 52 | }
 53 | 
 54 | // send flush request to the thread pool
 55 | inline void spdlog::async_logger::flush_()
 56 | {
 57 |     if (auto pool_ptr = thread_pool_.lock())
 58 |     {
 59 |         pool_ptr->post_flush(shared_from_this(), overflow_policy_);
 60 |     }
 61 |     else
 62 |     {
 63 |         throw spdlog_ex("async flush: thread pool doesn't exist anymore");
 64 |     }
 65 | }
 66 | 
 67 | //
 68 | // backend functions - called from the thread pool to do the actual job
 69 | //
 70 | inline void spdlog::async_logger::backend_log_(const details::log_msg &incoming_log_msg)
 71 | {
 72 |     try
 73 |     {
 74 |         for (auto &s : sinks_)
 75 |         {
 76 |             if (s->should_log(incoming_log_msg.level))
 77 |             {
 78 |                 s->log(incoming_log_msg);
 79 |             }
 80 |         }
 81 |     }
 82 |     SPDLOG_CATCH_AND_HANDLE
 83 | 
 84 |     if (should_flush_(incoming_log_msg))
 85 |     {
 86 |         backend_flush_();
 87 |     }
 88 | }
 89 | 
 90 | inline void spdlog::async_logger::backend_flush_()
 91 | {
 92 |     try
 93 |     {
 94 |         for (auto &sink : sinks_)
 95 |         {
 96 |             sink->flush();
 97 |         }
 98 |     }
 99 |     SPDLOG_CATCH_AND_HANDLE
100 | }
101 | 
102 | inline std::shared_ptr<spdlog::logger> spdlog::async_logger::clone(std::string new_name)
103 | {
104 |     auto cloned = std::make_shared<spdlog::async_logger>(std::move(new_name), sinks_.begin(), sinks_.end(), thread_pool_, overflow_policy_);
105 | 
106 |     cloned->set_level(this->level());
107 |     cloned->flush_on(this->flush_level());
108 |     cloned->set_error_handler(this->error_handler());
109 |     return std::move(cloned);
110 | }
111 | 


--------------------------------------------------------------------------------
/samples/10_memcpy3D/main.cpp:
--------------------------------------------------------------------------------
  1 | #include<stdio.h>
  2 | #include<string>
  3 | #include<hip/hip_runtime.h>
  4 | 
  5 | #define BLOCKSIZE_x 16
  6 | #define BLOCKSIZE_y 16
  7 | 
  8 | #define N 128
  9 | #define M 64
 10 | #define W 16
 11 | 
 12 | 
 13 | 
 14 | /*****************/
 15 | /* HIP MEMCHECK */
 16 | /*****************/
 17 | 
 18 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 19 | 
 20 | inline void gpuAssert(hipError_t code, std::string file, int line, bool abort=true)
 21 | {
 22 | 
 23 |    if (code != hipSuccess)
 24 |    {
 25 |       fprintf(stderr,"GPUassert: %s %s %dn", hipGetErrorString(code), file.c_str(), line);
 26 | 
 27 |        if (abort) { exit(code); }
 28 |     }
 29 | }
 30 | 
 31 | 
 32 | 
 33 | /*******************/
 34 | /* iDivUp FUNCTION */
 35 | /*******************/
 36 | 
 37 | int iDivUp(int a, int b){ return ((a % b) != 0) ? (a / b + 1) : (a / b); }
 38 | 
 39 | 
 40 | /******************/
 41 | /* TEST KERNEL 3D */
 42 | /******************/
 43 | 
 44 | __global__ void test_kernel_3D(hipPitchedPtr devPitchedPtr)
 45 | {
 46 | 
 47 |    int tidx = blockIdx.x*blockDim.x+threadIdx.x;
 48 |    int tidy = blockIdx.y*blockDim.y+threadIdx.y;
 49 | 
 50 |    char* devPtr = (char*) devPitchedPtr.ptr;
 51 |    size_t pitch = devPitchedPtr.pitch;
 52 |    size_t slicePitch = pitch * N;
 53 | 
 54 |    for (int w = 0; w < W; w++)
 55 |    {
 56 |       char* slice = devPtr + w * slicePitch;
 57 |       float* row = (float*)(slice + tidy * pitch);
 58 |       row[tidx] = row[tidx] * row[tidx];
 59 |    }
 60 | }
 61 | 
 62 | 
 63 | /********/
 64 | /* MAIN */
 65 | /********/
 66 | 
 67 | int main()
 68 | {
 69 |    float a[N][M][W];
 70 | 
 71 | for (int i=0; i<N; i++)
 72 |    for (int j=0; j<M; j++)
 73 |       for (int w=0; w<W; w++)
 74 |       {
 75 |            a[i][j][w] = 3.f;
 76 |            //printf("row %i column %i depth %i value %f n",i,j,w,a[i][j][w]);
 77 |       }
 78 | 
 79 | 
 80 | 
 81 |    // --- 3D pitched allocation and host->device memcopy
 82 |    hipExtent extent{M * sizeof(float), N, W};
 83 | 
 84 |    hipPitchedPtr devPitchedPtr;
 85 | 
 86 |    gpuErrchk(hipMalloc3D(&devPitchedPtr, extent));
 87 | 
 88 |    hipMemcpy3DParms p = { 0 };
 89 | 
 90 |    p.srcPtr.ptr = a;
 91 |    p.srcPtr.pitch = M * sizeof(float);
 92 |    p.srcPtr.xsize = M;
 93 |    p.srcPtr.ysize = N;
 94 |    p.dstPtr.ptr = devPitchedPtr.ptr;
 95 |    p.dstPtr.pitch = devPitchedPtr.pitch;
 96 |    p.dstPtr.xsize = M;
 97 |    p.dstPtr.ysize = N;
 98 |    p.extent.width = M * sizeof(float);
 99 |    p.extent.height = N;
100 |    p.extent.depth = W;
101 |    p.kind = hipMemcpyHostToDevice;
102 | 
103 |    gpuErrchk(hipMemcpy3D(&p));
104 | 
105 |    dim3 GridSize(iDivUp(M,BLOCKSIZE_x),iDivUp(N,BLOCKSIZE_y));
106 | 
107 |    dim3 BlockSize(BLOCKSIZE_y,BLOCKSIZE_x);
108 | 
109 |    hipLaunchKernelGGL(test_kernel_3D, dim3(GridSize), dim3(BlockSize), 0, 0, devPitchedPtr);
110 | 
111 |    gpuErrchk(hipPeekAtLastError());
112 | 
113 |    gpuErrchk(hipDeviceSynchronize());
114 |    p.srcPtr.ptr = devPitchedPtr.ptr;
115 |    p.srcPtr.pitch = devPitchedPtr.pitch;
116 |    p.dstPtr.ptr = a;
117 |    p.dstPtr.pitch = M * sizeof(float);
118 |    p.kind = hipMemcpyDeviceToHost;
119 | 
120 |    gpuErrchk(hipMemcpy3D(&p));
121 | 
122 |    int error = 0;
123 |    for (int i=0; i<N; i++)
124 |       for (int j=0; j<M; j++)
125 |          for (int w=0; w<W; w++)
126 |             if (a[i][j][w] != 9.0f) error++;
127 |             //printf("row %i column %i depth %i value %fn",i,j,w,a[i][j][w]);
128 | 
129 |    if (error) { 
130 |      printf("FAILED\n");
131 |      return 1;
132 |    }
133 |    else {
134 |      printf("PASSED\n");
135 |      return 0;
136 |    }
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/include/CL/cl_version.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2018 The Khronos Group Inc.
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and/or associated documentation files (the
 6 |  * "Materials"), to deal in the Materials without restriction, including
 7 |  * without limitation the rights to use, copy, modify, merge, publish,
 8 |  * distribute, sublicense, and/or sell copies of the Materials, and to
 9 |  * permit persons to whom the Materials are furnished to do so, subject to
10 |  * the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included
13 |  * in all copies or substantial portions of the Materials.
14 |  *
15 |  * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
16 |  * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
17 |  * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
18 |  *    https://www.khronos.org/registry/
19 |  *
20 |  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 |  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 |  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 |  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
27 |  ******************************************************************************/
28 | 
29 | #ifndef __CL_VERSION_H
30 | #define __CL_VERSION_H
31 | 
32 | /* Detect which version to target */
33 | #if !defined(CL_TARGET_OPENCL_VERSION)
34 | #pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
35 | #define CL_TARGET_OPENCL_VERSION 220
36 | #endif
37 | #if CL_TARGET_OPENCL_VERSION != 100 && \
38 |     CL_TARGET_OPENCL_VERSION != 110 && \
39 |     CL_TARGET_OPENCL_VERSION != 120 && \
40 |     CL_TARGET_OPENCL_VERSION != 200 && \
41 |     CL_TARGET_OPENCL_VERSION != 210 && \
42 |     CL_TARGET_OPENCL_VERSION != 220
43 | #pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
44 | #undef CL_TARGET_OPENCL_VERSION
45 | #define CL_TARGET_OPENCL_VERSION 220
46 | #endif
47 | 
48 | 
49 | /* OpenCL Version */
50 | #if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
51 | #define CL_VERSION_2_2  1
52 | #endif
53 | #if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
54 | #define CL_VERSION_2_1  1
55 | #endif
56 | #if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
57 | #define CL_VERSION_2_0  1
58 | #endif
59 | #if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
60 | #define CL_VERSION_1_2  1
61 | #endif
62 | #if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
63 | #define CL_VERSION_1_1  1
64 | #endif
65 | #if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
66 | #define CL_VERSION_1_0  1
67 | #endif
68 | 
69 | /* Allow deprecated APIs for older OpenCL versions. */
70 | #if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
71 | #define CL_USE_DEPRECATED_OPENCL_2_1_APIS
72 | #endif
73 | #if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
74 | #define CL_USE_DEPRECATED_OPENCL_2_0_APIS
75 | #endif
76 | #if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
77 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
78 | #endif
79 | #if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
80 | #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
81 | #endif
82 | #if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
83 | #define CL_USE_DEPRECATED_OPENCL_1_0_APIS
84 | #endif
85 | 
86 | #endif  /* __CL_VERSION_H */
87 | 


--------------------------------------------------------------------------------
/spdlog/sinks/android_sink.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright(c) 2015 Gabi Melman.
  3 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
  4 | //
  5 | 
  6 | #pragma once
  7 | 
  8 | #ifndef SPDLOG_H
  9 | #error "spdlog.h must be included before this file."
 10 | #endif
 11 | 
 12 | #include "spdlog/details/fmt_helper.h"
 13 | #include "spdlog/details/null_mutex.h"
 14 | #include "spdlog/details/os.h"
 15 | #include "spdlog/sinks/base_sink.h"
 16 | 
 17 | #include <android/log.h>
 18 | #include <chrono>
 19 | #include <mutex>
 20 | #include <string>
 21 | #include <thread>
 22 | 
 23 | #if !defined(SPDLOG_ANDROID_RETRIES)
 24 | #define SPDLOG_ANDROID_RETRIES 2
 25 | #endif
 26 | 
 27 | namespace spdlog {
 28 | namespace sinks {
 29 | 
 30 | /*
 31 |  * Android sink (logging using __android_log_write)
 32 |  */
 33 | template<typename Mutex>
 34 | class android_sink final : public base_sink<Mutex>
 35 | {
 36 | public:
 37 |     explicit android_sink(std::string tag = "spdlog", bool use_raw_msg = false)
 38 |         : tag_(std::move(tag))
 39 |         , use_raw_msg_(use_raw_msg)
 40 |     {
 41 |     }
 42 | 
 43 | protected:
 44 |     void sink_it_(const details::log_msg &msg) override
 45 |     {
 46 |         const android_LogPriority priority = convert_to_android_(msg.level);
 47 |         fmt::memory_buffer formatted;
 48 |         if (use_raw_msg_)
 49 |         {
 50 |             details::fmt_helper::append_string_view(msg.payload, formatted);
 51 |         }
 52 |         else
 53 |         {
 54 |             sink::formatter_->format(msg, formatted);
 55 |         }
 56 |         formatted.push_back('\0');
 57 |         const char *msg_output = formatted.data();
 58 | 
 59 |         // See system/core/liblog/logger_write.c for explanation of return value
 60 |         int ret = __android_log_write(priority, tag_.c_str(), msg_output);
 61 |         int retry_count = 0;
 62 |         while ((ret == -11 /*EAGAIN*/) && (retry_count < SPDLOG_ANDROID_RETRIES))
 63 |         {
 64 |             details::os::sleep_for_millis(5);
 65 |             ret = __android_log_write(priority, tag_.c_str(), msg_output);
 66 |             retry_count++;
 67 |         }
 68 | 
 69 |         if (ret < 0)
 70 |         {
 71 |             throw spdlog_ex("__android_log_write() failed", ret);
 72 |         }
 73 |     }
 74 | 
 75 |     void flush_() override {}
 76 | 
 77 | private:
 78 |     static android_LogPriority convert_to_android_(spdlog::level::level_enum level)
 79 |     {
 80 |         switch (level)
 81 |         {
 82 |         case spdlog::level::trace:
 83 |             return ANDROID_LOG_VERBOSE;
 84 |         case spdlog::level::debug:
 85 |             return ANDROID_LOG_DEBUG;
 86 |         case spdlog::level::info:
 87 |             return ANDROID_LOG_INFO;
 88 |         case spdlog::level::warn:
 89 |             return ANDROID_LOG_WARN;
 90 |         case spdlog::level::err:
 91 |             return ANDROID_LOG_ERROR;
 92 |         case spdlog::level::critical:
 93 |             return ANDROID_LOG_FATAL;
 94 |         default:
 95 |             return ANDROID_LOG_DEFAULT;
 96 |         }
 97 |     }
 98 | 
 99 |     std::string tag_;
100 |     bool use_raw_msg_;
101 | };
102 | 
103 | using android_sink_mt = android_sink<std::mutex>;
104 | using android_sink_st = android_sink<details::null_mutex>;
105 | } // namespace sinks
106 | 
107 | // Create and register android syslog logger
108 | 
109 | template<typename Factory = default_factory>
110 | inline std::shared_ptr<logger> android_logger_mt(const std::string &logger_name, const std::string &tag = "spdlog")
111 | {
112 |     return Factory::template create<sinks::android_sink_mt>(logger_name, tag);
113 | }
114 | 
115 | template<typename Factory = default_factory>
116 | inline std::shared_ptr<logger> android_logger_st(const std::string &logger_name, const std::string &tag = "spdlog")
117 | {
118 |     return Factory::template create<sinks::android_sink_st>(logger_name, tag);
119 | }
120 | 
121 | } // namespace spdlog
122 | 


--------------------------------------------------------------------------------
/spdlog/details/fmt_helper.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by gabi on 6/15/18.
  3 | //
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <chrono>
  8 | #include <type_traits>
  9 | #include "spdlog/fmt/fmt.h"
 10 | 
 11 | // Some fmt helpers to efficiently format and pad ints and strings
 12 | namespace spdlog {
 13 | namespace details {
 14 | namespace fmt_helper {
 15 | 
 16 | template<size_t Buffer_Size>
 17 | inline spdlog::string_view_t to_string_view(const fmt::basic_memory_buffer<char, Buffer_Size> &buf) SPDLOG_NOEXCEPT
 18 | {
 19 |     return spdlog::string_view_t(buf.data(), buf.size());
 20 | }
 21 | 
 22 | template<size_t Buffer_Size1, size_t Buffer_Size2>
 23 | inline void append_buf(const fmt::basic_memory_buffer<char, Buffer_Size1> &buf, fmt::basic_memory_buffer<char, Buffer_Size2> &dest)
 24 | {
 25 |     auto *buf_ptr = buf.data();
 26 |     dest.append(buf_ptr, buf_ptr + buf.size());
 27 | }
 28 | 
 29 | template<size_t Buffer_Size>
 30 | inline void append_string_view(spdlog::string_view_t view, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 31 | {
 32 |     auto *buf_ptr = view.data();
 33 |     if (buf_ptr != nullptr)
 34 |     {
 35 |         dest.append(buf_ptr, buf_ptr + view.size());
 36 |     }
 37 | }
 38 | 
 39 | template<typename T, size_t Buffer_Size>
 40 | inline void append_int(T n, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 41 | {
 42 |     fmt::format_int i(n);
 43 |     dest.append(i.data(), i.data() + i.size());
 44 | }
 45 | 
 46 | template<typename T>
 47 | inline unsigned count_digits(T n)
 48 | {
 49 |     using count_type = typename std::conditional<(sizeof(T) > sizeof(uint32_t)), uint64_t, uint32_t>::type;
 50 |     return fmt::internal::count_digits(static_cast<count_type>(n));
 51 | }
 52 | 
 53 | template<size_t Buffer_Size>
 54 | inline void pad2(int n, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 55 | {
 56 |     if (n > 99)
 57 |     {
 58 |         append_int(n, dest);
 59 |     }
 60 |     else if (n > 9) // 10-99
 61 |     {
 62 |         dest.push_back(static_cast<char>('0' + n / 10));
 63 |         dest.push_back(static_cast<char>('0' + n % 10));
 64 |     }
 65 |     else if (n >= 0) // 0-9
 66 |     {
 67 |         dest.push_back('0');
 68 |         dest.push_back(static_cast<char>('0' + n));
 69 |     }
 70 |     else // negatives (unlikely, but just in case, let fmt deal with it)
 71 |     {
 72 |         fmt::format_to(dest, "{:02}", n);
 73 |     }
 74 | }
 75 | 
 76 | template<typename T, size_t Buffer_Size>
 77 | inline void pad_uint(T n, unsigned int width, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 78 | {
 79 |     static_assert(std::is_unsigned<T>::value, "pad_uint must get unsigned T");
 80 |     auto digits = count_digits(n);
 81 |     if (width > digits)
 82 |     {
 83 |         const char *zeroes = "0000000000000000000";
 84 |         dest.append(zeroes, zeroes + width - digits);
 85 |     }
 86 |     append_int(n, dest);
 87 | }
 88 | 
 89 | template<typename T, size_t Buffer_Size>
 90 | inline void pad3(T n, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 91 | {
 92 |     pad_uint(n, 3, dest);
 93 | }
 94 | 
 95 | template<typename T, size_t Buffer_Size>
 96 | inline void pad6(T n, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
 97 | {
 98 |     pad_uint(n, 6, dest);
 99 | }
100 | 
101 | template<typename T, size_t Buffer_Size>
102 | inline void pad9(T n, fmt::basic_memory_buffer<char, Buffer_Size> &dest)
103 | {
104 |     pad_uint(n, 9, dest);
105 | }
106 | 
107 | // return fraction of a second of the given time_point.
108 | // e.g.
109 | // fraction<std::milliseconds>(tp) -> will return the millis part of the second
110 | template<typename ToDuration>
111 | inline ToDuration time_fraction(const log_clock::time_point &tp)
112 | {
113 |     using std::chrono::duration_cast;
114 |     using std::chrono::seconds;
115 |     auto duration = tp.time_since_epoch();
116 |     auto secs = duration_cast<seconds>(duration);
117 |     return duration_cast<ToDuration>(duration) - duration_cast<ToDuration>(secs);
118 | }
119 | 
120 | } // namespace fmt_helper
121 | } // namespace details
122 | } // namespace spdlog
123 | 


--------------------------------------------------------------------------------
/spdlog/details/mpmc_blocking_q.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | //
  4 | // Copyright(c) 2018 Gabi Melman.
  5 | // Distributed under the MIT License (http://opensource.org/licenses/MIT)
  6 | //
  7 | 
  8 | // multi producer-multi consumer blocking queue.
  9 | // enqueue(..) - will block until room found to put the new message.
 10 | // enqueue_nowait(..) - will return immediately with false if no room left in
 11 | // the queue.
 12 | // dequeue_for(..) - will block until the queue is not empty or timeout have
 13 | // passed.
 14 | 
 15 | #include "spdlog/details/circular_q.h"
 16 | 
 17 | #include <condition_variable>
 18 | #include <mutex>
 19 | 
 20 | namespace spdlog {
 21 | namespace details {
 22 | 
 23 | template<typename T>
 24 | class mpmc_blocking_queue
 25 | {
 26 | public:
 27 |     using item_type = T;
 28 |     explicit mpmc_blocking_queue(size_t max_items)
 29 |         : q_(max_items)
 30 |     {
 31 |     }
 32 | 
 33 | #ifndef __MINGW32__
 34 |     // try to enqueue and block if no room left
 35 |     void enqueue(T &&item)
 36 |     {
 37 |         {
 38 |             std::unique_lock<std::mutex> lock(queue_mutex_);
 39 |             pop_cv_.wait(lock, [this] { return !this->q_.full(); });
 40 |             q_.push_back(std::move(item));
 41 |         }
 42 |         push_cv_.notify_one();
 43 |     }
 44 | 
 45 |     // enqueue immediately. overrun oldest message in the queue if no room left.
 46 |     void enqueue_nowait(T &&item)
 47 |     {
 48 |         {
 49 |             std::unique_lock<std::mutex> lock(queue_mutex_);
 50 |             q_.push_back(std::move(item));
 51 |         }
 52 |         push_cv_.notify_one();
 53 |     }
 54 | 
 55 |     // try to dequeue item. if no item found. wait upto timeout and try again
 56 |     // Return true, if succeeded dequeue item, false otherwise
 57 |     bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration)
 58 |     {
 59 |         {
 60 |             std::unique_lock<std::mutex> lock(queue_mutex_);
 61 |             if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); }))
 62 |             {
 63 |                 return false;
 64 |             }
 65 |             q_.pop_front(popped_item);
 66 |         }
 67 |         pop_cv_.notify_one();
 68 |         return true;
 69 |     }
 70 | 
 71 | #else
 72 |     // apparently mingw deadlocks if the mutex is released before cv.notify_one(),
 73 |     // so release the mutex at the very end each function.
 74 | 
 75 |     // try to enqueue and block if no room left
 76 |     void enqueue(T &&item)
 77 |     {
 78 |         std::unique_lock<std::mutex> lock(queue_mutex_);
 79 |         pop_cv_.wait(lock, [this] { return !this->q_.full(); });
 80 |         q_.push_back(std::move(item));
 81 |         push_cv_.notify_one();
 82 |     }
 83 | 
 84 |     // enqueue immediately. overrun oldest message in the queue if no room left.
 85 |     void enqueue_nowait(T &&item)
 86 |     {
 87 |         std::unique_lock<std::mutex> lock(queue_mutex_);
 88 |         q_.push_back(std::move(item));
 89 |         push_cv_.notify_one();
 90 |     }
 91 | 
 92 |     // try to dequeue item. if no item found. wait upto timeout and try again
 93 |     // Return true, if succeeded dequeue item, false otherwise
 94 |     bool dequeue_for(T &popped_item, std::chrono::milliseconds wait_duration)
 95 |     {
 96 |         std::unique_lock<std::mutex> lock(queue_mutex_);
 97 |         if (!push_cv_.wait_for(lock, wait_duration, [this] { return !this->q_.empty(); }))
 98 |         {
 99 |             return false;
100 |         }
101 |         q_.pop_front(popped_item);
102 |         pop_cv_.notify_one();
103 |         return true;
104 |     }
105 | 
106 | #endif
107 | 
108 |     size_t overrun_counter()
109 |     {
110 |         std::unique_lock<std::mutex> lock(queue_mutex_);
111 |         return q_.overrun_counter();
112 |     }
113 | 
114 | private:
115 |     std::mutex queue_mutex_;
116 |     std::condition_variable push_cv_;
117 |     std::condition_variable pop_cv_;
118 |     spdlog::details::circular_q<T> q_;
119 | };
120 | } // namespace details
121 | } // namespace spdlog
122 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/mathF.h:
--------------------------------------------------------------------------------
  1 | /*===--------------------------------------------------------------------------
  2 |  *                   ROCm Device Libraries
  3 |  *
  4 |  * This file is distributed under the University of Illinois Open Source
  5 |  * License. See LICENSE.TXT for details.
  6 |  *===------------------------------------------------------------------------*/
  7 | 
  8 | // OCML prototypes
  9 | //#include "ocml.h"
 10 | 
 11 | // Tables
 12 | #include "tables.h"
 13 | 
 14 | // Builtins
 15 | //#include "builtins.h"
 16 | 
 17 | // Mangling
 18 | #define MATH_MANGLE(N) N
 19 | #define MATH_PRIVATE(N) __priv##N
 20 | 
 21 | // mine
 22 | #define MATH_MAD(x, y, z) mad(x, y, z)
 23 | #define FINITE_ONLY_OPT() 0
 24 | #define BUILTIN_FMA_F32(x, y, z) fma(x, y, z)
 25 | #define MATH_SQRT(x) sqrt(x)
 26 | #define MATH_RCP(x) native_recip(x)
 27 | #define AS_FLOAT(x) as_float(x)
 28 | #define AS_INT(x) as_int(x)
 29 | #define AS_UINT(x) as_uint(x)
 30 | #define BUILTIN_ABS_F32(x) fabs(x)
 31 | #define BUILTIN_COPYSIGN_F32(x, y) copysign(x, y)
 32 | #define HAVE_FAST_FMA32() 1
 33 | #define BUILTIN_RSQRT_F32(x) native_rsqrt(x)
 34 | #define MATH_FAST_RCP(x) native_recip(x)
 35 | #define MATH_FAST_DIV(x, y) ((x) / (y))
 36 | #define MATH_FAST_SQRT(x) native_sqrt(x)
 37 | #define MATH_DIV(x, y) ((x) / (y))
 38 | 
 39 | #define BUILTIN_CLAMP_F32(x, y, z) clamp(x, y, z)
 40 | #define BUILTIN_MAX_U32(x, y) max(x, y)
 41 | #define BUILTIN_MIN_U32(x, y) min(x, y)
 42 | #define BUILTIN_ISINF_F32(x) isinf(x)
 43 | #define BUILTIN_ISNAN_F32(x) isnan(x)
 44 | 
 45 | #define BUILTIN_LOG2_F32(x) native_log2(x)
 46 | #define BUILTIN_EXP2_F32(x) native_exp2(x)
 47 | 
 48 | #define BUILTIN_RINT_F32(x) rint(x)
 49 | 
 50 | 
 51 | static inline int frexp_exp(float x) {
 52 |   int e;
 53 |   float mant = frexp(x, &e);
 54 |   return e;
 55 | }
 56 | 
 57 | #define BUILTIN_FREXP_EXP_F32(x) frexp_exp(x)
 58 | #define BUILTIN_FLDEXP_F32(x, k) ldexp(x, k)
 59 | 
 60 | 
 61 | // Optimization Controls
 62 | //#include "opts.h"
 63 | 
 64 | // Attributes
 65 | #define PUREATTR __attribute__((pure)) __attribute__((overloadable))
 66 | #define CONSTATTR __attribute__((const)) __attribute__((overloadable))
 67 | 
 68 | // Math controls
 69 | //#include "privF.h"
 70 | 
 71 | // Floating point patterns
 72 | #define SIGNBIT_SP32      (int)0x80000000
 73 | #define EXSIGNBIT_SP32    0x7fffffff
 74 | #define EXPBITS_SP32      0x7f800000
 75 | #define MANTBITS_SP32     0x007fffff
 76 | #define ONEEXPBITS_SP32   0x3f800000
 77 | #define TWOEXPBITS_SP32   0x40000000
 78 | #define HALFEXPBITS_SP32  0x3f000000
 79 | #define IMPBIT_SP32       0x00800000
 80 | #define QNANBITPATT_SP32  0x7fc00000
 81 | #define PINFBITPATT_SP32  0x7f800000
 82 | #define NINFBITPATT_SP32  (int)0xff800000
 83 | #define EXPBIAS_SP32      127
 84 | #define EXPSHIFTBITS_SP32 23
 85 | #define BIASEDEMIN_SP32   1
 86 | #define EMIN_SP32         -126
 87 | #define BIASEDEMAX_SP32   254
 88 | #define EMAX_SP32         127
 89 | #define MANTLENGTH_SP32   24
 90 | #define BASEDIGITS_SP32   7
 91 | 
 92 | #define CLASS_PINF 2
 93 | #define CLASS_NINF 4
 94 | #define CLASS_QNAN 8
 95 | #define CLASS_SNAN 16
 96 | #define CLASS_PSUB 32
 97 | #define CLASS_NSUB 64
 98 | #define CLASS_PZER 128
 99 | #define CLASS_NZER 256
100 | 
101 | 
102 | static inline int CONSTATTR BUILTIN_CLASS_F32(float x, int klass)
103 | {
104 |   if ((klass & CLASS_PINF) && (as_int(x) == PINFBITPATT_SP32))
105 |     return -1;
106 |   if ((klass & CLASS_NINF) && (as_int(x) == NINFBITPATT_SP32))
107 |     return -1;
108 | 
109 |   if ((klass & (CLASS_QNAN|CLASS_SNAN)) && (as_int(x) & QNANBITPATT_SP32))
110 |     return -1;
111 | 
112 |   if ((klass & (CLASS_NZER|CLASS_PZER)) && ((as_int(x) & (~SIGNBIT_SP32)) == 0) )
113 |     return -1;
114 | 
115 |   if (
116 |         (klass & (CLASS_NSUB|CLASS_PSUB)) &&
117 |         (
118 |             ((as_int(x) & EXPBITS_SP32) == 0) && ((as_int(x) & MANTBITS_SP32) != 0)
119 |         )
120 |      )
121 |     return -1;
122 | 
123 |   return 0;
124 | }
125 | 
126 | // declarations
127 | 
128 | PUREATTR float j1(float x);
129 | PUREATTR float j0(float x);
130 | CONSTATTR float erfinv(float x);
131 | CONSTATTR float erfcinv(float x);
132 | 


--------------------------------------------------------------------------------
/samples/fp16/fp16_conversion.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 1993-2016, NVIDIA CORPORATION. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions
  5 | // are met:
  6 | //  * Redistributions of source code must retain the above copyright
  7 | //    notice, this list of conditions and the following disclaimer.
  8 | //  * Redistributions in binary form must reproduce the above copyright
  9 | //    notice, this list of conditions and the following disclaimer in the
 10 | //    documentation and/or other materials provided with the distribution.
 11 | //  * Neither the name of NVIDIA CORPORATION nor the names of its
 12 | //    contributors may be used to endorse or promote products derived
 13 | //    from this software without specific prior written permission.
 14 | //
 15 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 16 | // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18 | // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 19 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 20 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 21 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 22 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 23 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | // This code modified from the public domain code here:
 28 | // https://gist.github.com/rygorous/2156668
 29 | // The URL above includes more robust conversion routines
 30 | // that handle Inf and NaN correctly.
 31 | //
 32 | // It is recommended to use the more robust versions in production code.
 33 | 
 34 | typedef unsigned uint;
 35 | 
 36 | union FP32 {
 37 |   uint u;
 38 |   float f;
 39 |   struct {
 40 |     uint Mantissa : 23;
 41 |     uint Exponent : 8;
 42 |     uint Sign : 1;
 43 |   };
 44 | };
 45 | 
 46 | union FP16 {
 47 |   unsigned short u;
 48 |   struct {
 49 |     uint Mantissa : 10;
 50 |     uint Exponent : 5;
 51 |     uint Sign : 1;
 52 |   };
 53 | };
 54 | 
 55 | // Approximate solution. This is faster but converts some sNaNs to
 56 | // infinity and doesn't round correctly. Handle with care.
 57 | // Approximate solution. This is faster but converts some sNaNs to
 58 | // infinity and doesn't round correctly. Handle with care.
 59 | static const half approx_float_to_half(float fl) {
 60 |   FP32 f32infty = {255 << 23};
 61 |   FP32 f16max = {(127 + 16) << 23};
 62 |   FP32 magic = {15 << 23};
 63 |   FP32 expinf = {(255 ^ 31) << 23};
 64 |   uint sign_mask = 0x80000000u;
 65 |   FP16 o = {0};
 66 | 
 67 |   FP32 f = *((FP32 *)&fl);
 68 | 
 69 |   uint sign = f.u & sign_mask;
 70 |   f.u ^= sign;
 71 | 
 72 |   if (!(f.f < f32infty.u)) // Inf or NaN
 73 |     o.u = f.u ^ expinf.u;
 74 |   else {
 75 |     if (f.f > f16max.f)
 76 |       f.f = f16max.f;
 77 |     f.f *= magic.f;
 78 |   }
 79 | 
 80 |   o.u = f.u >> 13; // Take the mantissa bits
 81 |   o.u |= sign >> 16;
 82 |   return *((half *)&o);
 83 | }
 84 | 
 85 | // from half->float code - just for verification.
 86 | static float half_to_float(half hf) {
 87 |   FP16 h = *((FP16 *)&hf);
 88 | 
 89 |   static const FP32 magic = {113 << 23};
 90 |   static const uint shifted_exp = 0x7c00 << 13; // exponent mask after shift
 91 |   FP32 o;
 92 | 
 93 |   o.u = (h.u & 0x7fff) << 13;   // exponent/mantissa bits
 94 |   uint exp = shifted_exp & o.u; // just the exponent
 95 |   o.u += (127 - 15) << 23;      // exponent adjust
 96 | 
 97 |   // handle exponent special cases
 98 |   if (exp == shifted_exp)    // Inf/NaN?
 99 |     o.u += (128 - 16) << 23; // extra exp adjust
100 |   else if (exp == 0)         // Zero/Denormal?
101 |   {
102 |     o.u += 1 << 23; // extra exp adjust
103 |     o.f -= magic.f; // renormalize
104 |   }
105 | 
106 |   o.u |= (h.u & 0x8000) << 16; // sign bit
107 |   return o.f;
108 | }
109 | 


--------------------------------------------------------------------------------
/lib/bitcode/OCML/mathD.h:
--------------------------------------------------------------------------------
  1 | /*===--------------------------------------------------------------------------
  2 |  *                   ROCm Device Libraries
  3 |  *
  4 |  * This file is distributed under the University of Illinois Open Source
  5 |  * License. See LICENSE.TXT for details.
  6 |  *===------------------------------------------------------------------------*/
  7 | 
  8 | // OCML prototypes
  9 | //#include "ocml.h"
 10 | 
 11 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 12 | 
 13 | // Tables
 14 | #include "tables.h"
 15 | 
 16 | // Builtins
 17 | //#include "builtins.h"
 18 | 
 19 | // Mangling
 20 | #define MATH_MANGLE(N) N
 21 | #define MATH_PRIVATE(N) __priv##N
 22 | 
 23 | // mine
 24 | #define MATH_MAD(x, y, z) fma(x, y, z)
 25 | #define FINITE_ONLY_OPT() 0
 26 | #define BUILTIN_FMA_F64(x, y, z) fma(x, y, z)
 27 | #define MATH_SQRT(x) sqrt(x)
 28 | #define MATH_RCP(x) native_recip(x)
 29 | #define AS_DOUBLE(x) as_double(x)
 30 | #define AS_LONG(x) as_long(x)
 31 | #define BUILTIN_ABS_F64(x) fabs(x)
 32 | #define BUILTIN_COPYSIGN_F64(x, y) copysign(x, y)
 33 | #define MATH_FAST_SQRT(x) native_sqrt(x)
 34 | #define MATH_DIV(x, y) ((x) / (y))
 35 | #define BUILTIN_ISNAN_F64(x) isnan(x)
 36 | #define BUILTIN_MAX_F64(x, y) fmax(x, y)
 37 | #define BUILTIN_MIN_F64(x, y) fmin(x, y)
 38 | 
 39 | #define BUILTIN_RSQRT_F64(x) native_rsqrt(x)
 40 | #define BUILTIN_ISINF_F64(x) isinf(x)
 41 | 
 42 | #define BUILTIN_LOG2_F32(x) native_log2(x)
 43 | #define BUILTIN_EXP2_F32(x) native_exp2(x)
 44 | 
 45 | #define BUILTIN_RINT_F32(x) rint(x)
 46 | #define BUILTIN_RINT_F64(x) rint(x)
 47 | 
 48 | static inline int frexp_exp(double x) {
 49 |   int e;
 50 |   double mant = frexp(x, &e);
 51 |   return e;
 52 | }
 53 | 
 54 | #define BUILTIN_FREXP_EXP_F64(x) frexp_exp(x)
 55 | #define BUILTIN_FLDEXP_F64(x, k) ldexp(x, k)
 56 | 
 57 | // Optimization Controls
 58 | //#include "opts.h"
 59 | 
 60 | // Attributes
 61 | #define PUREATTR __attribute__((pure)) __attribute__((overloadable))
 62 | #define CONSTATTR __attribute__((const)) __attribute__((overloadable))
 63 | 
 64 | // Math controls
 65 | //#include "privD.h"
 66 | 
 67 | // Bit patterns
 68 | #define SIGNBIT_DP64      0x8000000000000000L
 69 | #define EXSIGNBIT_DP64    0x7fffffffffffffffL
 70 | #define EXPBITS_DP64      0x7ff0000000000000L
 71 | #define MANTBITS_DP64     0x000fffffffffffffL
 72 | #define ONEEXPBITS_DP64   0x3ff0000000000000L
 73 | #define TWOEXPBITS_DP64   0x4000000000000000L
 74 | #define HALFEXPBITS_DP64  0x3fe0000000000000L
 75 | #define IMPBIT_DP64       0x0010000000000000L
 76 | #define QNANBITPATT_DP64  0x7ff8000000000000L
 77 | #define INDEFBITPATT_DP64 0xfff8000000000000L
 78 | #define PINFBITPATT_DP64  0x7ff0000000000000L
 79 | #define NINFBITPATT_DP64  0xfff0000000000000L
 80 | #define EXPBIAS_DP64      1023
 81 | #define EXPSHIFTBITS_DP64 52
 82 | #define BIASEDEMIN_DP64   1
 83 | #define EMIN_DP64         -1022
 84 | #define BIASEDEMAX_DP64   2046
 85 | #define EMAX_DP64         1023
 86 | #define LAMBDA_DP64       1.0e300
 87 | #define MANTLENGTH_DP64   53
 88 | #define BASEDIGITS_DP64   15
 89 | 
 90 | #define CLASS_PINF 2
 91 | #define CLASS_NINF 4
 92 | #define CLASS_QNAN 8
 93 | #define CLASS_SNAN 16
 94 | #define CLASS_PSUB 32
 95 | #define CLASS_NSUB 64
 96 | #define CLASS_PZER 128
 97 | #define CLASS_NZER 256
 98 | 
 99 | 
100 | static inline long CONSTATTR BUILTIN_CLASS_F64(double x, int klass)
101 | {
102 |   if ((klass & CLASS_PINF) && (as_long(x) == PINFBITPATT_DP64))
103 |     return -1;
104 |   if ((klass & CLASS_NINF) && (as_long(x) == NINFBITPATT_DP64))
105 |     return -1;
106 | 
107 |   if ((klass & (CLASS_QNAN|CLASS_SNAN)) && (as_long(x) & QNANBITPATT_DP64))
108 |     return -1;
109 | 
110 |   if ((klass & (CLASS_NZER|CLASS_PZER)) && ((as_long(x) & (~SIGNBIT_DP64)) == 0) )
111 |     return -1;
112 | 
113 |   if (
114 |         (klass & (CLASS_NSUB|CLASS_PSUB)) &&
115 |         (
116 |             ((as_long(x) & EXPBITS_DP64) == 0) && ((as_long(x) & MANTBITS_DP64) != 0)
117 |         )
118 |      )
119 |     return -1;
120 | 
121 |   return 0;
122 | }
123 | 
124 | // declarations
125 | 
126 | PUREATTR double j1(double x);
127 | PUREATTR double j0(double x);
128 | CONSTATTR double erfinv(double x);
129 | CONSTATTR double erfcinv(double x);
130 | 


--------------------------------------------------------------------------------
/samples/hiploadmodule/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <hip/hip_runtime.h>
  2 | #include <string.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <iostream>
  6 | #include <string>
  7 | #include <limits.h>
  8 | #include <unistd.h>
  9 | 
 10 | #define NUM 100
 11 | 
 12 | #define CHECK(cmd)                                                                   \
 13 |   do {                                                                               \
 14 |     hipError_t error = (cmd);                                                        \
 15 |     if (error != hipSuccess) {                                                       \
 16 |       fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error, \
 17 |               __FILE__, __LINE__);                                                   \
 18 |       exit(1);                                                                       \
 19 |     }                                                                                \
 20 |   } while(0)
 21 | 
 22 | 
 23 | using namespace std;
 24 | 
 25 | int main(int argc, char* argv[])
 26 | {
 27 |   // set up arrays for vector add
 28 |   int i=0;
 29 |   float* hostA;
 30 |   float* hostB;
 31 |   float* hostC;
 32 | 
 33 |   float* deviceA;
 34 |   float* deviceB;
 35 |   float* deviceC;
 36 | 
 37 |   struct {
 38 |     size_t _n;
 39 |     void* _Ad;
 40 |     void* _Bd;
 41 |     void* _Cd;
 42 |   } args1;
 43 | 
 44 |   hostA = (float*)malloc(NUM * sizeof(float));
 45 |   hostB = (float*)malloc(NUM * sizeof(float));
 46 |   hostC = (float*)malloc(NUM * sizeof(float));
 47 | 
 48 |   // initialize the input data
 49 |   for (i = 0; i < NUM; i++) {
 50 |     hostA[i] = (float)i;
 51 |     hostB[i] = (float)i;
 52 |   }
 53 | 
 54 |   CHECK(hipInit(0));
 55 |   CHECK(hipMalloc((void**)&deviceA, NUM * sizeof(float)));
 56 |   CHECK(hipMalloc((void**)&deviceB, NUM * sizeof(float)));
 57 |   CHECK(hipMalloc((void**)&deviceC, NUM * sizeof(float)));
 58 | 
 59 |   CHECK(hipMemcpy(deviceB, hostB, NUM*sizeof(float), hipMemcpyHostToDevice));
 60 |   CHECK(hipMemcpy(deviceA, hostA, NUM*sizeof(float), hipMemcpyHostToDevice));
 61 | 
 62 |   hipModule_t hipModule = NULL;
 63 |   hipError_t error;
 64 | 
 65 |   char result[ PATH_MAX ];
 66 |   ssize_t count = readlink( "/proc/self/exe", result, PATH_MAX );
 67 |   std::string executablePath( result, (count > 0) ? count : 0 );
 68 |   size_t last_pos = executablePath.find_last_of("/");
 69 |   if (last_pos == std::string::npos)
 70 |     executablePath.assign("./");
 71 |   else
 72 |     executablePath.resize(last_pos+1);
 73 |   const std::string  binaryFilename(executablePath + "hipModuleLoadBinary");
 74 | 
 75 |   error = hipModuleLoad(&hipModule, binaryFilename.c_str());
 76 |   if (error) {
 77 |     printf("%s\n",  binaryFilename.c_str());
 78 |     cout << "Loading Module ("+binaryFilename+")" << endl;
 79 |     exit(1);
 80 |   }
 81 | 
 82 |   // get the function from the module
 83 |   hipFunction_t hipFunction = NULL;
 84 |   error = hipModuleGetFunction(&hipFunction, hipModule, "_occa_addVectors_0");
 85 |   if (error) {
 86 |     cout << "Getting Function (_occa_addVectors_0)" << endl;
 87 |     exit(1);
 88 |   }
 89 | 
 90 |   args1._n = NUM;
 91 |   args1._Ad = deviceA;
 92 |   args1._Bd = deviceB;
 93 |   args1._Cd = deviceC;
 94 | 
 95 |   size_t size = sizeof(args1);
 96 | 
 97 |   void *config[] = {
 98 |     HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
 99 |     HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
100 |     HIP_LAUNCH_PARAM_END
101 |   };
102 | 
103 |   // launch the function
104 |   error = hipModuleLaunchKernel( hipFunction, 1, 1, 1, NUM, 1, 1, 0, NULL, NULL,
105 |                                  reinterpret_cast<void**>(&config) );
106 |   if (error) {
107 |     cout << "hipmodulelaunch error" << endl;
108 |     exit(1);
109 |   }
110 | 
111 |   CHECK(hipMemcpy(hostC, deviceC, NUM*sizeof(float), hipMemcpyDeviceToHost));
112 | 
113 |   // verify the results
114 |   int errors = 0;
115 |   for (i = 0; i < NUM; i++) {
116 |     if (hostC[i] != (hostB[i] + hostA[i])) {
117 |       printf( "%f\n", hostC[i]);
118 |       errors++;
119 |     }
120 |   }
121 |   if (errors!=0) {
122 |     printf("FAILED: %d errors\n",errors);
123 |   } else {
124 |     printf("PASSED!\n");
125 |   }
126 | 
127 |   CHECK(hipFree(deviceA));
128 |   CHECK(hipFree(deviceB));
129 |   CHECK(hipFree(deviceC));
130 | 
131 |   return 0;
132 | }
133 | 


--------------------------------------------------------------------------------
/samples/bit_extract/bit_extract.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 | 
23 | #include <stdio.h>
24 | #include <iostream>
25 | #include "hip/hip_runtime.h"
26 | 
27 | #define CHECK(cmd)                                                                                 \
28 |     {                                                                                              \
29 |         hipError_t error = cmd;                                                                    \
30 |         if (error != hipSuccess) {                                                                 \
31 |             fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,         \
32 |                     __FILE__, __LINE__);                                                           \
33 |             exit(EXIT_FAILURE);                                                                    \
34 |         }                                                                                          \
35 |     }
36 | 
37 | __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) {
38 |     size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
39 |     size_t stride = hipBlockDim_x * hipGridDim_x;
40 | 
41 |     for (size_t i = offset; i < N; i += stride) {
42 |         C_d[i] = ((A_d[i] & 0xf00) >> 8);
43 |     }
44 | }
45 | 
46 | 
47 | int main(int argc, char* argv[]) {
48 |     uint32_t *A_d, *C_d;
49 |     uint32_t *A_h, *C_h;
50 |     size_t N = 1000000;
51 |     size_t Nbytes = N * sizeof(uint32_t);
52 | 
53 |     int deviceId = 0;
54 |     CHECK(hipSetDevice(deviceId));
55 |     printf ("deviceId: %i\n", deviceId);
56 |     hipDeviceProp_t props;
57 |     CHECK(hipGetDeviceProperties(&props, deviceId));
58 |     printf("info: running on device #%d %s\n", deviceId, props.name);
59 | 
60 | 
61 |     printf("info: allocate host mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
62 |     A_h = (uint32_t*)malloc(Nbytes);
63 |     CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess);
64 |     C_h = (uint32_t*)malloc(Nbytes);
65 |     CHECK(C_h == 0 ? hipErrorMemoryAllocation : hipSuccess);
66 | 
67 |     for (size_t i = 0; i < N; i++) {
68 |         A_h[i] = i;
69 |     }
70 | 
71 |     printf("info: allocate device mem (%6.2f MB)\n", 2 * Nbytes / 1024.0 / 1024.0);
72 |     CHECK(hipMalloc((void**)&A_d, Nbytes));
73 |     CHECK(hipMalloc((void**)&C_d, Nbytes));
74 | 
75 |     printf("info: copy Host2Device\n");
76 |     CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
77 | 
78 |     printf("info: launch 'bit_extract_kernel' \n");
79 |     const unsigned blocks = 512;
80 |     const unsigned threadsPerBlock = 256;
81 |     hipLaunchKernelGGL(bit_extract_kernel, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
82 | 
83 |     printf("info: copy Device2Host\n");
84 |     CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
85 | 
86 |     printf("info: check result\n");
87 |     for (size_t i = 0; i < N; i++) {
88 |         unsigned Agold = ((A_h[i] & 0xf00) >> 8);
89 |         if (C_h[i] != Agold) {
90 |             fprintf(stderr, "mismatch detected.\n");
91 |             printf("%zu: %08x =? %08x (Ain=%08x)\n", i, C_h[i], Agold, A_h[i]);
92 |             CHECK(hipErrorUnknown);
93 |         }
94 |     }
95 |     printf("PASSED!\n");
96 | }
97 | 


--------------------------------------------------------------------------------
/samples/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | option(SAVE_TEMPS "Save temporary compilation products" OFF)
  3 | option(VERBOSE "Verbose compilation" OFF)
  4 | 
  5 | if(SAVE_TEMPS)
  6 |   add_compile_options("--save-temps")
  7 | endif()
  8 | 
  9 | if(VERBOSE)
 10 |   add_compile_options("-v")
 11 | endif()
 12 | 
 13 | # ARGN = test args
 14 | function(add_hipcl_test EXEC_NAME TEST_NAME TEST_PASS SOURCE)
 15 | 
 16 |     set(TEST_EXEC_ARGS ${ARGN})
 17 |     set_source_files_properties(${SOURCE} PROPERTIES LANGUAGE CXX)
 18 | 
 19 |     add_executable("${EXEC_NAME}" ${SOURCE})
 20 | 
 21 |     set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON)
 22 | 
 23 |     target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl")
 24 | 
 25 |     install(TARGETS "${EXEC_NAME}"
 26 |             RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}")
 27 | 
 28 |     add_test(NAME "${TEST_NAME}"
 29 |              COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${EXEC_NAME}" ${TEST_EXEC_ARGS}
 30 |              )
 31 | 
 32 |     set_tests_properties("${TEST_NAME}" PROPERTIES
 33 |              PASS_REGULAR_EXPRESSION "${TEST_PASS}")
 34 | 
 35 | 
 36 | endfunction()
 37 | 
 38 | 
 39 | # ARGN = sources
 40 | function(add_hipcl_binary EXEC_NAME)
 41 | 
 42 |     set(SOURCES ${ARGN})
 43 |     set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX)
 44 | 
 45 |     add_executable("${EXEC_NAME}" ${SOURCES})
 46 | 
 47 |     set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON)
 48 | 
 49 |     target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl")
 50 | 
 51 |     install(TARGETS "${EXEC_NAME}"
 52 |             RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}")
 53 | 
 54 | endfunction()
 55 | 
 56 | # ARGN = sources
 57 | function(add_hipcl_device_binary BIN_NAME)
 58 |     set(SOURCES ${ARGN})
 59 | 
 60 |     set(BIN_NAME_OBJ "${BIN_NAME}_o")
 61 | 
 62 |     add_library("${BIN_NAME_OBJ}" OBJECT ${SOURCES})
 63 | 
 64 |     set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX)
 65 | 
 66 |     target_link_libraries("${BIN_NAME_OBJ}" "${SANITIZER_LIBS}" "hipcl")
 67 | 
 68 |     target_compile_options("${BIN_NAME_OBJ}" PRIVATE "--cuda-device-only")
 69 | 
 70 |     add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}"
 71 |                        COMMAND ${CMAKE_COMMAND} -E copy
 72 |                                $<TARGET_OBJECTS:${BIN_NAME_OBJ}>
 73 |                                "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}"
 74 |                        DEPENDS "${BIN_NAME_OBJ}")
 75 | 
 76 |     add_custom_target("${BIN_NAME}" DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}")
 77 | 
 78 |     install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${BIN_NAME}"
 79 |             DESTINATION "${HIPCL_SAMPLE_BINDIR}")
 80 | 
 81 | endfunction()
 82 | 
 83 | # ARGN = sources
 84 | function(add_hipcl_binary_device_link EXEC_NAME)
 85 |     set(SOURCES ${ARGN})
 86 |     set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX)
 87 | 
 88 |     add_executable("${EXEC_NAME}" ${SOURCES})
 89 | 
 90 |     set_target_properties("${EXEC_NAME}" PROPERTIES CXX_STANDARD_REQUIRED ON)
 91 | 
 92 |     target_link_libraries("${EXEC_NAME}" "${SANITIZER_LIBS}" "hipcl")
 93 | 
 94 |     target_compile_options("${EXEC_NAME}" PRIVATE "-fgpu-rdc")
 95 | 
 96 |     target_link_options("${EXEC_NAME}" PRIVATE
 97 |         "-fgpu-rdc"
 98 |         "--hip-link"
 99 |         "$<INSTALL_INTERFACE:--hip-llvm-pass-path=${HIPCL_LLVM_DIR}>"
100 |         "$<BUILD_INTERFACE:--hip-llvm-pass-path=${CMAKE_BINARY_DIR}/llvm_passes>"
101 |         "$<INSTALL_INTERFACE:--hip-device-lib-path=${HIPCL_DATA_DIR}>"
102 |         "$<BUILD_INTERFACE:--hip-device-lib-path=${CMAKE_BINARY_DIR}>"
103 |         "--hip-device-lib=kernellib.bc")
104 | 
105 |     install(TARGETS "${EXEC_NAME}"
106 |             RUNTIME DESTINATION "${HIPCL_SAMPLE_BINDIR}")
107 | 
108 | endfunction()
109 | 
110 | 
111 | set(SAMPLES
112 |     hipmath
113 |     hiptest
114 |     bit_extract
115 |     hcc_dialects
116 |     fp16
117 |     0_MatrixTranspose
118 |     0_MatrixMultiply
119 |     1_hipEvent
120 |     2_vecadd
121 |     3_shared_memory
122 |     4_shfl
123 |     5_2dshfl
124 |     6_dynamic_shared
125 |     hipInfo
126 | #    7_streams
127 | #    9_unroll
128 |     10_memcpy3D
129 |     hipSymbol
130 |     hipDeviceLink
131 |     hiploadmodule
132 | )
133 | 
134 | foreach (SAMPLE ${SAMPLES})
135 |   add_subdirectory(${SAMPLE})
136 | endforeach()
137 | 
138 | add_subdirectory(hip-cuda)
139 | 


--------------------------------------------------------------------------------
/samples/0_MatrixTranspose/MatrixTranspose.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | */
 22 | 
 23 | #include <iostream>
 24 | #include <cmath>
 25 | 
 26 | // hip header file
 27 | #include "hip/hip_runtime.h"
 28 | 
 29 | #define WIDTH 1024
 30 | 
 31 | #define NUM (WIDTH * WIDTH)
 32 | 
 33 | #define THREADS_PER_BLOCK_X 4
 34 | #define THREADS_PER_BLOCK_Y 4
 35 | #define THREADS_PER_BLOCK_Z 1
 36 | 
 37 | // Device (Kernel) function, it must be void
 38 | __global__ void matrixTranspose(float* out, float* in, const int width) {
 39 |     int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 40 |     int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
 41 | 
 42 |     out[y * width + x] = in[x * width + y];
 43 | }
 44 | 
 45 | // CPU implementation of matrix transpose
 46 | void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
 47 |     for (unsigned int j = 0; j < width; j++) {
 48 |         for (unsigned int i = 0; i < width; i++) {
 49 |             output[i * width + j] = input[j * width + i];
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | int main() {
 55 |     float* Matrix;
 56 |     float* TransposeMatrix;
 57 |     float* cpuTransposeMatrix;
 58 | 
 59 |     float* gpuMatrix;
 60 |     float* gpuTransposeMatrix;
 61 | 
 62 |     hipDeviceProp_t devProp;
 63 |     hipGetDeviceProperties(&devProp, 0);
 64 | 
 65 |     std::cout << "Device name " << devProp.name << std::endl;
 66 | 
 67 |     int i;
 68 |     int errors;
 69 | 
 70 |     Matrix = (float*)malloc(NUM * sizeof(float));
 71 |     TransposeMatrix = (float*)malloc(NUM * sizeof(float));
 72 |     cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
 73 | 
 74 |     // initialize the input data
 75 |     for (i = 0; i < NUM; i++) {
 76 |         Matrix[i] = (float)i * 10.0f;
 77 |     }
 78 | 
 79 |     // allocate the memory on the device side
 80 |     hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
 81 |     hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
 82 | 
 83 |     // Memory transfer from host to device
 84 |     hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
 85 | 
 86 |     // Lauching kernel from host
 87 |     hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
 88 |                     dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
 89 |                     gpuMatrix, WIDTH);
 90 | 
 91 |     // Memory transfer from device to host
 92 |     hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
 93 | 
 94 |     // CPU MatrixTranspose computation
 95 |     matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
 96 | 
 97 |     // verify the results
 98 |     errors = 0;
 99 |     float eps = 1.0E-6;
100 |     for (i = 0; i < NUM; i++) {
101 |         if (std::fabs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
102 |             errors++;
103 |         }
104 |     }
105 |     if (errors != 0) {
106 |         printf("FAILED: %d errors\n", errors);
107 |     } else {
108 |         printf("PASSED!\n");
109 |     }
110 | 
111 |     // free the resources on device side
112 |     hipFree(gpuMatrix);
113 |     hipFree(gpuTransposeMatrix);
114 | 
115 |     // free the resources on host side
116 |     free(Matrix);
117 |     free(TransposeMatrix);
118 |     free(cpuTransposeMatrix);
119 | 
120 |     return errors;
121 | }
122 | 


--------------------------------------------------------------------------------
/samples/9_unroll/unroll.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
  3 | 
  4 | Permission is hereby granted, free of charge, to any person obtaining a copy
  5 | of this software and associated documentation files (the "Software"), to deal
  6 | in the Software without restriction, including without limitation the rights
  7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 | copies of the Software, and to permit persons to whom the Software is
  9 | furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in
 12 | all copies or substantial portions of the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 | THE SOFTWARE.
 21 | */
 22 | 
 23 | #include <iostream>
 24 | 
 25 | // hip header file
 26 | #include "hip/hip_runtime.h"
 27 | 
 28 | 
 29 | #define WIDTH 4
 30 | 
 31 | #define NUM (WIDTH * WIDTH)
 32 | 
 33 | #define THREADS_PER_BLOCK_X 4
 34 | #define THREADS_PER_BLOCK_Y 4
 35 | #define THREADS_PER_BLOCK_Z 1
 36 | 
 37 | // Device (Kernel) function, it must be void
 38 | __global__ void matrixTranspose(float* out, float* in, const int width) {
 39 |     int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
 40 |     float val = in[x];
 41 | 
 42 | #pragma unroll
 43 |     for (int i = 0; i < width; i++) {
 44 |         for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
 45 |     }
 46 | }
 47 | 
 48 | // CPU implementation of matrix transpose
 49 | void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
 50 |     for (unsigned int j = 0; j < width; j++) {
 51 |         for (unsigned int i = 0; i < width; i++) {
 52 |             output[i * width + j] = input[j * width + i];
 53 |         }
 54 |     }
 55 | }
 56 | 
 57 | int main() {
 58 |     float* Matrix;
 59 |     float* TransposeMatrix;
 60 |     float* cpuTransposeMatrix;
 61 | 
 62 |     float* gpuMatrix;
 63 |     float* gpuTransposeMatrix;
 64 | 
 65 |     hipDeviceProp_t devProp;
 66 |     hipGetDeviceProperties(&devProp, 0);
 67 | 
 68 |     std::cout << "Device name " << devProp.name << std::endl;
 69 | 
 70 |     int i;
 71 |     int errors;
 72 | 
 73 |     Matrix = (float*)malloc(NUM * sizeof(float));
 74 |     TransposeMatrix = (float*)malloc(NUM * sizeof(float));
 75 |     cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
 76 | 
 77 |     // initialize the input data
 78 |     for (i = 0; i < NUM; i++) {
 79 |         Matrix[i] = (float)i * 10.0f;
 80 |     }
 81 | 
 82 |     // allocate the memory on the device side
 83 |     hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
 84 |     hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
 85 | 
 86 |     // Memory transfer from host to device
 87 |     hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
 88 | 
 89 |     // Lauching kernel from host
 90 |     hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
 91 |                     gpuTransposeMatrix, gpuMatrix, WIDTH);
 92 | 
 93 |     // Memory transfer from device to host
 94 |     hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
 95 | 
 96 |     // CPU MatrixTranspose computation
 97 |     matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
 98 | 
 99 |     // verify the results
100 |     errors = 0;
101 |     double eps = 1.0E-6;
102 |     for (i = 0; i < NUM; i++) {
103 |         if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
104 |             printf("%d cpu: %f gpu  %f\n", i, cpuTransposeMatrix[i], TransposeMatrix[i]);
105 |             errors++;
106 |         }
107 |     }
108 |     if (errors != 0) {
109 |         printf("FAILED: %d errors\n", errors);
110 |     } else {
111 |         printf("PASSED!\n");
112 |     }
113 | 
114 |     // free the resources on device side
115 |     hipFree(gpuMatrix);
116 |     hipFree(gpuTransposeMatrix);
117 | 
118 |     // free the resources on host side
119 |     free(Matrix);
120 |     free(TransposeMatrix);
121 |     free(cpuTransposeMatrix);
122 | 
123 |     return errors;
124 | }
125 | 


--------------------------------------------------------------------------------