├── CMakeLists.txt ├── README.md ├── cmake ├── FindClang.cmake └── FindLLVM.cmake ├── include ├── CMakeLists.txt └── cl.h ├── restrict.patch ├── sources ├── CBackend │ ├── CBackend.cpp │ ├── CMakeLists.txt │ ├── CTargetMachine.h │ └── TargetInfo │ │ ├── CBackendTargetInfo.cpp │ │ └── CMakeLists.txt ├── CMakeLists.txt ├── Main.cpp ├── compiler │ ├── BitcodeDisassembler.cpp │ ├── BitcodeDisassembler.h │ ├── Compiler.cpp │ ├── Compiler.h │ ├── MainEntry.cpp │ ├── MainEntry.h │ ├── Rewriter.cpp │ └── Rewriter.h └── compute │ └── ParallelForEach.h └── tests ├── CMakeLists.txt ├── catch.h ├── kernel.cpp ├── test_kernel.cpp └── test_rewriter.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(cpp-opencl CXX) 3 | 4 | set(CPACK_PACKAGE_NAME "cpp-opencl") 5 | set(CPACK_PACKAGE_CONTACT "Dimitios Christodoulou ") 6 | set(CPACK_PACKAGE_VENDOR ${CPACK_PACKAGE_CONTACT}) 7 | set(CPACK_PACKAGE_VERSION "1.0") 8 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "C++ to OpenCl kernel translation") 9 | set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") 10 | set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) 11 | set(CPACK_GENERATOR "TGZ;RPM;DEB") 12 | set(CPACK_RPM_PACKAGE_RELEASE "1%{?dist}") 13 | set(CPACK_RPM_PACKAGE_LICENSE "MIT") 14 | set(CPACK_RPM_PACKAGE_GROUP "Development/Tools") 15 | set(CPACK_RPM_PACKAGE_URL "http://github.com/dimitrs/cpp-opencl") 16 | set(CPACK_RPM_PACKAGE_DESCRIPTION "C++ to OpenCl kernel translation") 17 | include(CPack) 18 | include(GNUInstallDirs) 19 | install(FILES COPYING README.md 20 | DESTINATION ${CMAKE_INSTALL_DOCDIR}) 21 | 22 | set(GCC_COMMON_WARNING_FLAGS "-Wall -Wextra -O -g -Wformat=2 -Wfloat-equal -Wno-unused-parameter") 23 | set(C_WARNINGS "${GCC_COMMON_WARNING_FLAGS}") 24 | set(CXX_WARNINGS "${GCC_COMMON_WARNING_FLAGS}") 25 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_WARNINGS} -g -O0 -std=c++11 ") 26 | 27 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/) 28 | Find_Package(LLVM REQUIRED) 29 | Find_Package(Clang REQUIRED) 30 | 31 | add_subdirectory(include) 32 | add_subdirectory(sources) 33 | add_subdirectory(tests) 34 | 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cpp-opencl 2 | ========== 3 | 4 | Please see http://dimitri-christodoulou.blogspot.com.es/2014/02/implement-data-parallelism-on-gpu.html 5 | 6 | 7 | The cpp-opencl project provides a way to make programming GPUs easy for the developer. It allows you to implement data parallelism on a GPU directly in C++ instead of using OpenCL. See the example below. The code in the parallel_for_each lambda function is executed on the GPU, and all the rest is executed on the CPU. More specifically, the “square” function is executed both on the CPU (via a call to std::transform) and the GPU (via a call to compute::parallel_for_each). Conceptually, compute::parallel_for_each is similar to std::transform except that one executes code on the GPU and the other on the CPU. 8 | 9 | ``` 10 | #include 11 | #include 12 | #include "ParallelForEach.h" 13 | 14 | template 15 | T square(T x) 16 | { 17 | return x * x; 18 | } 19 | 20 | void func() { 21 | std::vector In {1,2,3,4,5,6}; 22 | std::vector OutGpu(6); 23 | std::vector OutCpu(6); 24 | 25 | compute::parallel_for_each(In.begin(), In.end(), OutGpu.begin(), [](int x){ 26 | return square(x); 27 | }); 28 | 29 | 30 | std::transform(In.begin(), In.end(), OutCpu.begin(), [](int x) { 31 | return square(x); 32 | }); 33 | 34 | // 35 | // Do something with OutCpu and OutGpu ….......... 36 | 37 | // 38 | 39 | } 40 | 41 | int main() { 42 | func(); 43 | return 0; 44 | } 45 | ``` 46 | 47 | Function Overloading 48 | -------------------- 49 | 50 | Additionally, it is possible to overload functions. The “A::GetIt” member function below is overloaded. The function marked as “gpu” will be executed on the GPU and other on the CPU. 51 | 52 | ``` 53 | struct A { 54 | int GetIt() const __attribute__((amp_restrict("cpu"))) { 55 | return 2; 56 | } 57 | int GetIt() const __attribute__((amp_restrict("gpu"))) { 58 | return 4; 59 | } 60 | }; 61 | 62 | compute::parallel_for_each(In.begin(), In.end(), OutGpu.begin(), [](int x){ 63 | A a; 64 | return a.GetIt(); // returns 4 65 | }); 66 | ``` 67 | 68 | If you want to use function overloading using the amp_restrict attribute, you will need to patch your Clang compiler: 69 | 70 | ``` 71 | git clone https://github.com/llvm-mirror/clang.git 72 | cd clang 73 | git checkout 5806bb59d2d19a9b32b739589865d8bb1e2627c5 74 | git apply PATH-TO-cpp_opencl/restrict.patch 75 | ``` 76 | I used this llvm version: 77 | ``` 78 | git clone https://github.com/llvm-mirror/llvm.git 79 | cd llvm 80 | git checkout 47042bcc266285676f8ff284e5d46a2c196c367b 81 | ``` 82 | 83 | You can use any recent Clang version already installed on your machine (without the patch), if you do not intend to use the amp_restrict attribute. 84 | 85 | 86 | Build the Executable 87 | -------------------- 88 | 89 | The tool uses a special compiler based on Clang/LLVM. 90 | 91 | cpp_opencl -x c++ -std=c++11 -O3 -o Input.cc.o -c Input.cc 92 | 93 | The above command generates four files: 94 | 1. Input.cc.o 95 | 2. Input.cc.cl 96 | 3. Input.cc_cpu.cpp 97 | 4. Input.cc_gpu.cpp 98 | 99 | Use the Clang C++ compiler directly to link: 100 | 101 | clang++ ./Input.cc.o -o test -lOpenCL 102 | 103 | 104 | Then just execute: 105 | 106 | ./test 107 | -------------------------------------------------------------------------------- /cmake/FindClang.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 Laszlo Nagy 2 | 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | # output: 23 | # CLANG_FOUND 24 | # CLANG_INCLUDE_DIRS 25 | # CLANG_DEFINITIONS 26 | # CLANG_EXECUTABLE 27 | 28 | function(set_clang_definitions config_cmd) 29 | execute_process( 30 | COMMAND ${config_cmd} --cppflags 31 | OUTPUT_VARIABLE llvm_cppflags 32 | OUTPUT_STRIP_TRAILING_WHITESPACE) 33 | string(REGEX MATCHALL "(-D[^ ]*)" dflags ${llvm_cppflags}) 34 | string(REGEX MATCHALL "(-U[^ ]*)" uflags ${llvm_cppflags}) 35 | list(APPEND cxxflags ${dflags}) 36 | list(APPEND cxxflags ${uflags}) 37 | list(APPEND cxxflags -fno-rtti) 38 | # list(APPEND cxxflags -fno-exceptions) 39 | 40 | set(CLANG_DEFINITIONS ${cxxflags} PARENT_SCOPE) 41 | endfunction() 42 | 43 | function(is_clang_installed config_cmd) 44 | execute_process( 45 | COMMAND ${config_cmd} --includedir 46 | OUTPUT_VARIABLE include_dirs 47 | OUTPUT_STRIP_TRAILING_WHITESPACE) 48 | execute_process( 49 | COMMAND ${config_cmd} --src-root 50 | OUTPUT_VARIABLE llvm_src_dir 51 | OUTPUT_STRIP_TRAILING_WHITESPACE) 52 | string(FIND ${include_dirs} ${llvm_src_dir} result) 53 | 54 | set(CLANG_INSTALLED ${result} PARENT_SCOPE) 55 | endfunction() 56 | 57 | function(set_clang_include_dirs config_cmd) 58 | is_clang_installed(${config_cmd}) 59 | if(CLANG_INSTALLED) 60 | execute_process( 61 | COMMAND ${config_cmd} --includedir 62 | OUTPUT_VARIABLE include_dirs 63 | OUTPUT_STRIP_TRAILING_WHITESPACE) 64 | else() 65 | execute_process( 66 | COMMAND ${config_cmd} --src-root 67 | OUTPUT_VARIABLE llvm_src_dir 68 | OUTPUT_STRIP_TRAILING_WHITESPACE) 69 | execute_process( 70 | COMMAND ${config_cmd} --obj-root 71 | OUTPUT_VARIABLE llvm_obj_dir 72 | OUTPUT_STRIP_TRAILING_WHITESPACE) 73 | list(APPEND include_dirs "${llvm_src_dir}/include") 74 | list(APPEND include_dirs "${llvm_obj_dir}/include") 75 | list(APPEND include_dirs "${llvm_src_dir}/tools/clang/include") 76 | list(APPEND include_dirs "${llvm_obj_dir}/tools/clang/include") 77 | endif() 78 | 79 | set(CLANG_INCLUDE_DIRS ${include_dirs} PARENT_SCOPE) 80 | endfunction() 81 | 82 | 83 | find_program(LLVM_CONFIG 84 | NAMES llvm-config-3.2 llvm-config 85 | PATHS ENV LLVM_PATH) 86 | if(LLVM_CONFIG) 87 | message(STATUS "llvm-config found : ${LLVM_CONFIG}") 88 | else() 89 | message(FATAL_ERROR "Can't found program: llvm-config") 90 | endif() 91 | 92 | find_program(CLANG_EXECUTABLE 93 | NAMES clang-3.2 clang 94 | PATHS ENV LLVM_PATH) 95 | if(CLANG_EXECUTABLE) 96 | message(STATUS "clang found : ${CLANG_EXECUTABLE}") 97 | else() 98 | message(FATAL_ERROR "Can't found program: clang") 99 | endif() 100 | 101 | set_clang_definitions(${LLVM_CONFIG}) 102 | set_clang_include_dirs(${LLVM_CONFIG}) 103 | 104 | message(STATUS "llvm-config filtered cpp flags : ${CLANG_DEFINITIONS}") 105 | message(STATUS "llvm-config filtered include dirs : ${CLANG_INCLUDE_DIRS}") 106 | 107 | set(CLANG_FOUND 1) 108 | 109 | 110 | MACRO(FIND_AND_ADD_CLANG_LIB _libname_) 111 | find_library(CLANG_${_libname_}_LIB ${_libname_} ${LLVM_LIB_DIR} ${CLANG_LIB_DIR}) 112 | if (CLANG_${_libname_}_LIB) 113 | set(CLANG_LIBS ${CLANG_LIBS} ${CLANG_${_libname_}_LIB}) 114 | endif(CLANG_${_libname_}_LIB) 115 | ENDMACRO(FIND_AND_ADD_CLANG_LIB) 116 | 117 | set(CLANG_INCLUDE_DIRS ${CLANG_INCLUDE_DIRS} ${LLVM_INCLUDE_DIR}) 118 | set(CLANG_INCLUDE_DIRS ${CLANG_INCLUDE_DIRS} ${CLANG_INCLUDE_DIR}) 119 | 120 | FIND_AND_ADD_CLANG_LIB(clangFrontend) 121 | FIND_AND_ADD_CLANG_LIB(clangDriver) 122 | FIND_AND_ADD_CLANG_LIB(clangCodeGen) 123 | FIND_AND_ADD_CLANG_LIB(clangSema) 124 | FIND_AND_ADD_CLANG_LIB(clangChecker) 125 | FIND_AND_ADD_CLANG_LIB(clangAnalysis) 126 | FIND_AND_ADD_CLANG_LIB(clangRewrite) 127 | FIND_AND_ADD_CLANG_LIB(clangAST) 128 | FIND_AND_ADD_CLANG_LIB(clangParse) 129 | FIND_AND_ADD_CLANG_LIB(clangLex) 130 | FIND_AND_ADD_CLANG_LIB(clangBasic) 131 | FIND_AND_ADD_CLANG_LIB(clangARCMigrate) 132 | FIND_AND_ADD_CLANG_LIB(clangEdit) 133 | FIND_AND_ADD_CLANG_LIB(clangFrontendTool) 134 | FIND_AND_ADD_CLANG_LIB(clangRewrite) 135 | FIND_AND_ADD_CLANG_LIB(clangSerialization) 136 | FIND_AND_ADD_CLANG_LIB(clangTooling) 137 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCheckers) 138 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCore) 139 | FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerFrontend) 140 | FIND_AND_ADD_CLANG_LIB(clangSema) 141 | FIND_AND_ADD_CLANG_LIB(clangRewriteCore) 142 | 143 | -------------------------------------------------------------------------------- /cmake/FindLLVM.cmake: -------------------------------------------------------------------------------- 1 | # Detect LLVM and set various variable to link against the different component of LLVM 2 | # 3 | # NOTE: This is a modified version of the module originally found in the OpenGTL project 4 | # at www.opengtl.org 5 | # 6 | # LLVM_BIN_DIR : directory with LLVM binaries 7 | # LLVM_LIB_DIR : directory with LLVM library 8 | # LLVM_INCLUDE_DIR : directory with LLVM include 9 | # 10 | # LLVM_COMPILE_FLAGS : compile flags needed to build a program using LLVM headers 11 | # LLVM_LDFLAGS : ldflags needed to link 12 | # LLVM_LIBS_CORE : ldflags needed to link against a LLVM core library 13 | # LLVM_LIBS_JIT : ldflags needed to link against a LLVM JIT 14 | # LLVM_LIBS_JIT_OBJECTS : objects you need to add to your source when using LLVM JIT 15 | 16 | if (LLVM_INCLUDE_DIR) 17 | set(LLVM_FOUND TRUE) 18 | else (LLVM_INCLUDE_DIR) 19 | 20 | find_program(LLVM_CONFIG_EXECUTABLE 21 | NAMES llvm-config 22 | PATHS 23 | /opt/local/bin 24 | ) 25 | 26 | MACRO(FIND_LLVM_LIBS LLVM_CONFIG_EXECUTABLE _libname_ LIB_VAR OBJECT_VAR) 27 | exec_program( ${LLVM_CONFIG_EXECUTABLE} ARGS --libs ${_libname_} OUTPUT_VARIABLE ${LIB_VAR} ) 28 | STRING(REGEX MATCHALL "[^ ]*[.]o[ $]" ${OBJECT_VAR} ${${LIB_VAR}}) 29 | SEPARATE_ARGUMENTS(${OBJECT_VAR}) 30 | STRING(REGEX REPLACE "[^ ]*[.]o[ $]" "" ${LIB_VAR} ${${LIB_VAR}}) 31 | ENDMACRO(FIND_LLVM_LIBS) 32 | 33 | 34 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --bindir OUTPUT_VARIABLE LLVM_BIN_DIR ) 35 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --libdir OUTPUT_VARIABLE LLVM_LIB_DIR ) 36 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --includedir OUTPUT_VARIABLE LLVM_INCLUDE_DIR ) 37 | 38 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --cxxflags OUTPUT_VARIABLE LLVM_COMPILE_FLAGS ) 39 | MESSAGE(STATUS "LLVM CXX flags: " ${LLVM_COMPILE_FLAGS}) 40 | 41 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --ldflags OUTPUT_VARIABLE LLVM_LDFLAGS ) 42 | MESSAGE(STATUS "LLVM LD flags: " ${LLVM_LDFLAGS}) 43 | 44 | exec_program(${LLVM_CONFIG_EXECUTABLE} ARGS --libs OUTPUT_VARIABLE LLVM_LIBS_CORE ) 45 | MESSAGE(STATUS "LLVM core libs: " ${LLVM_LIBS_CORE}) 46 | FIND_LLVM_LIBS( ${LLVM_CONFIG_EXECUTABLE} "jit native" LLVM_LIBS_JIT LLVM_LIBS_JIT_OBJECTS ) 47 | 48 | MESSAGE(STATUS "LLVM JIT libs: " ${LLVM_LIBS_JIT}) 49 | MESSAGE(STATUS "LLVM JIT objs: " ${LLVM_LIBS_JIT_OBJECTS}) 50 | 51 | if(LLVM_INCLUDE_DIR) 52 | set(LLVM_FOUND TRUE) 53 | endif(LLVM_INCLUDE_DIR) 54 | 55 | if(LLVM_FOUND) 56 | message(STATUS "Found LLVM: ${LLVM_INCLUDE_DIR}") 57 | else(LLVM_FOUND) 58 | if(LLVM_FIND_REQUIRED) 59 | message(FATAL_ERROR "Could NOT find LLVM") 60 | endif(LLVM_FIND_REQUIRED) 61 | endif(LLVM_FOUND) 62 | 63 | endif (LLVM_INCLUDE_DIR) 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimitrs/cpp-opencl/77e51885a7651b15318839edcbbc11a67759e7a7/include/CMakeLists.txt -------------------------------------------------------------------------------- /restrict.patch: -------------------------------------------------------------------------------- 1 | diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td 2 | index 8c820a0..0da4e02 100644 3 | --- a/include/clang/Basic/Attr.td 4 | +++ b/include/clang/Basic/Attr.td 5 | @@ -144,6 +144,12 @@ class IgnoredAttr : Attr { 6 | // Attributes begin here 7 | // 8 | 9 | +def AMPrestrict : InheritableAttr { 10 | + let Spellings = [GNU<"amp_restrict">]; 11 | + let Args = [VariadicExprArgument<"Args">]; 12 | + let Subjects = [Function]; 13 | +} 14 | + 15 | def AddressSpace : Attr { 16 | let Spellings = [GNU<"address_space">]; 17 | let Args = [IntArgument<"AddressSpace">]; 18 | @@ -169,13 +175,13 @@ def Aligned : InheritableAttr { 19 | 20 | def AlignMac68k : InheritableAttr { 21 | let Spellings = []; 22 | - let SemaHandler = 0; 23 | -} 24 | - 25 | -def AllocSize : InheritableAttr { 26 | - let Spellings = [GNU<"alloc_size">, CXX11<"gnu", "alloc_size">]; 27 | - let Args = [VariadicUnsignedArgument<"Args">]; 28 | -} 29 | + let SemaHandler = 0; 30 | +} 31 | + 32 | +def AllocSize : InheritableAttr { 33 | + let Spellings = [GNU<"alloc_size">, CXX11<"gnu", "alloc_size">]; 34 | + let Args = [VariadicUnsignedArgument<"Args">]; 35 | +} 36 | 37 | def AlwaysInline : InheritableAttr { 38 | let Spellings = [GNU<"always_inline">, CXX11<"gnu", "always_inline">]; 39 | @@ -323,17 +329,17 @@ def C11NoReturn : InheritableAttr { 40 | 41 | def CXX11NoReturn : InheritableAttr { 42 | let Spellings = [CXX11<"","noreturn">, CXX11<"std","noreturn">]; 43 | - let Subjects = [Function]; 44 | -} 45 | - 46 | -def OpenCLKernel : InheritableAttr { 47 | - let Spellings = [Keyword<"__kernel">, Keyword<"kernel">]; 48 | -} 49 | - 50 | -def OpenCLImageAccess : Attr { 51 | - let Spellings = [GNU<"opencl_image_access">]; 52 | - let Args = [IntArgument<"Access">]; 53 | -} 54 | + let Subjects = [Function]; 55 | +} 56 | + 57 | +def OpenCLKernel : InheritableAttr { 58 | + let Spellings = [Keyword<"__kernel">, Keyword<"kernel">]; 59 | +} 60 | + 61 | +def OpenCLImageAccess : Attr { 62 | + let Spellings = [GNU<"opencl_image_access">]; 63 | + let Args = [IntArgument<"Access">]; 64 | +} 65 | 66 | def Deprecated : InheritableAttr { 67 | let Spellings = [GNU<"deprecated">, CXX11<"gnu", "deprecated">]; 68 | @@ -342,13 +348,13 @@ def Deprecated : InheritableAttr { 69 | 70 | def Destructor : InheritableAttr { 71 | let Spellings = [GNU<"destructor">, CXX11<"gnu", "destructor">]; 72 | - let Args = [IntArgument<"Priority">]; 73 | -} 74 | - 75 | -def ExtVectorType : Attr { 76 | - let Spellings = [GNU<"ext_vector_type">]; 77 | - let Args = [ExprArgument<"NumElements">]; 78 | - let ASTNode = 0; 79 | + let Args = [IntArgument<"Priority">]; 80 | +} 81 | + 82 | +def ExtVectorType : Attr { 83 | + let Spellings = [GNU<"ext_vector_type">]; 84 | + let Args = [ExprArgument<"NumElements">]; 85 | + let ASTNode = 0; 86 | } 87 | 88 | def FallThrough : Attr { 89 | @@ -435,13 +441,13 @@ def MBlazeSaveVolatiles : InheritableAttr { 90 | 91 | def Mips16 : InheritableAttr { 92 | let Spellings = [GNU<"mips16">, CXX11<"gnu", "mips16">]; 93 | - let Subjects = [Function]; 94 | -} 95 | - 96 | -def Mode : Attr { 97 | - let Spellings = [GNU<"mode">, CXX11<"gnu", "mode">]; 98 | - let Args = [IdentifierArgument<"Mode">]; 99 | -} 100 | + let Subjects = [Function]; 101 | +} 102 | + 103 | +def Mode : Attr { 104 | + let Spellings = [GNU<"mode">, CXX11<"gnu", "mode">]; 105 | + let Args = [IdentifierArgument<"Mode">]; 106 | +} 107 | 108 | def Naked : InheritableAttr { 109 | let Spellings = [GNU<"naked">, CXX11<"gnu", "naked">]; 110 | @@ -554,28 +560,28 @@ def ObjCMethodFamily : InheritableAttr { 111 | } 112 | 113 | def ObjCNSObject : InheritableAttr { 114 | - let Spellings = [GNU<"NSObject">]; 115 | -} 116 | - 117 | -def ObjCPreciseLifetime : InheritableAttr { 118 | - let Spellings = [GNU<"objc_precise_lifetime">]; 119 | - let Subjects = [Var]; 120 | -} 121 | - 122 | -def ObjCReturnsInnerPointer : InheritableAttr { 123 | - let Spellings = [GNU<"objc_returns_inner_pointer">]; 124 | - let Subjects = [ObjCMethod]; 125 | -} 126 | + let Spellings = [GNU<"NSObject">]; 127 | +} 128 | + 129 | +def ObjCPreciseLifetime : InheritableAttr { 130 | + let Spellings = [GNU<"objc_precise_lifetime">]; 131 | + let Subjects = [Var]; 132 | +} 133 | + 134 | +def ObjCReturnsInnerPointer : InheritableAttr { 135 | + let Spellings = [GNU<"objc_returns_inner_pointer">]; 136 | + let Subjects = [ObjCMethod]; 137 | +} 138 | 139 | def ObjCRequiresSuper : InheritableAttr { 140 | let Spellings = [GNU<"objc_requires_super">]; 141 | - let Subjects = [ObjCMethod]; 142 | -} 143 | - 144 | -def ObjCRootClass : InheritableAttr { 145 | - let Spellings = [GNU<"objc_root_class">]; 146 | - let Subjects = [ObjCInterface]; 147 | -} 148 | + let Subjects = [ObjCMethod]; 149 | +} 150 | + 151 | +def ObjCRootClass : InheritableAttr { 152 | + let Spellings = [GNU<"objc_root_class">]; 153 | + let Subjects = [ObjCInterface]; 154 | +} 155 | 156 | def Overloadable : Attr { 157 | let Spellings = [GNU<"overloadable">]; 158 | @@ -689,13 +695,13 @@ def ArcWeakrefUnavailable : InheritableAttr { 159 | def ObjCGC : Attr { 160 | let Spellings = [GNU<"objc_gc">]; 161 | let Args = [IdentifierArgument<"Kind">]; 162 | - let ASTNode = 0; 163 | -} 164 | - 165 | -def ObjCOwnership : InheritableAttr { 166 | - let Spellings = [GNU<"objc_ownership">]; 167 | - let Args = [IdentifierArgument<"Kind">]; 168 | - let ASTNode = 0; 169 | + let ASTNode = 0; 170 | +} 171 | + 172 | +def ObjCOwnership : InheritableAttr { 173 | + let Spellings = [GNU<"objc_ownership">]; 174 | + let Args = [IdentifierArgument<"Kind">]; 175 | + let ASTNode = 0; 176 | } 177 | 178 | def ObjCRequiresPropertyDefs : InheritableAttr { 179 | diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def 180 | index 3f156a8..1dde81b 100644 181 | --- a/include/clang/Basic/TokenKinds.def 182 | +++ b/include/clang/Basic/TokenKinds.def 183 | @@ -537,6 +537,7 @@ ALIAS("_thiscall" , __thiscall , KEYMS) 184 | ALIAS("_uuidof" , __uuidof , KEYMS | KEYBORLAND) 185 | ALIAS("_inline" , inline , KEYMS) 186 | ALIAS("_declspec" , __declspec , KEYMS) 187 | +ALIAS("restrict" , restrict , KEYALL) 188 | 189 | // Borland Extensions which should be disabled in strict conformance mode. 190 | ALIAS("_pascal" , __pascal , KEYBORLAND) 191 | diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h 192 | index d4c867d..9793bd0 100644 193 | --- a/include/clang/Parse/Parser.h 194 | +++ b/include/clang/Parse/Parser.h 195 | @@ -469,7 +469,7 @@ private: 196 | public: 197 | /// NextToken - This peeks ahead one token and returns it without 198 | /// consuming it. 199 | - const Token &NextToken() { 200 | + const Token &NextToken() const { 201 | return PP.LookAhead(0); 202 | } 203 | 204 | diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp 205 | index 0b276d0..60d6930 100644 206 | --- a/lib/Parse/ParseDecl.cpp 207 | +++ b/lib/Parse/ParseDecl.cpp 208 | @@ -139,7 +139,6 @@ void Parser::ParseGNUAttributes(ParsedAttributes &attrs, 209 | // we have an identifier or declaration specifier (const, int, etc.) 210 | IdentifierInfo *AttrName = Tok.getIdentifierInfo(); 211 | SourceLocation AttrNameLoc = ConsumeToken(); 212 | - 213 | if (Tok.is(tok::l_paren)) { 214 | // handle "parameterized" attributes 215 | if (LateAttrs && isAttributeLateParsed(*AttrName)) { 216 | @@ -2688,6 +2687,14 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, 217 | continue; 218 | } 219 | 220 | + case tok::kw_restrict: 221 | + Diag(Loc, diag::warn_objc_protocol_qualifier_missing_id); 222 | + if (NextToken().is(tok::l_paren)) { 223 | + ConsumeToken(); 224 | + continue; 225 | + } 226 | + break; 227 | + 228 | // GNU attributes support. 229 | case tok::kw___attribute: 230 | ParseGNUAttributes(DS.getAttributes(), 0, LateAttrs); 231 | @@ -3016,11 +3023,6 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, 232 | isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, DiagID, 233 | getLangOpts()); 234 | break; 235 | - case tok::kw_restrict: 236 | - isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, DiagID, 237 | - getLangOpts()); 238 | - break; 239 | - 240 | // C++ typename-specifier: 241 | case tok::kw_typename: 242 | if (TryAnnotateTypeOrScopeToken()) { 243 | @@ -3792,7 +3794,6 @@ bool Parser::isTypeQualifier() const { 244 | // type-qualifier 245 | case tok::kw_const: 246 | case tok::kw_volatile: 247 | - case tok::kw_restrict: 248 | case tok::kw___private: 249 | case tok::kw___local: 250 | case tok::kw___global: 251 | @@ -3801,6 +3802,9 @@ bool Parser::isTypeQualifier() const { 252 | case tok::kw___read_write: 253 | case tok::kw___write_only: 254 | return true; 255 | + case tok::kw_restrict: 256 | + if (NextToken().is(tok::l_paren)) return false; 257 | + return false; 258 | } 259 | } 260 | 261 | @@ -3938,7 +3942,6 @@ bool Parser::isTypeSpecifierQualifier() { 262 | // type-qualifier 263 | case tok::kw_const: 264 | case tok::kw_volatile: 265 | - case tok::kw_restrict: 266 | 267 | // Debugger support. 268 | case tok::kw___unknown_anytype: 269 | @@ -3977,6 +3980,10 @@ bool Parser::isTypeSpecifierQualifier() { 270 | // C11 _Atomic 271 | case tok::kw__Atomic: 272 | return true; 273 | + 274 | + case tok::kw_restrict: 275 | + if (NextToken().is(tok::l_paren)) return false; 276 | + return false; 277 | } 278 | } 279 | 280 | @@ -4094,7 +4101,6 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { 281 | // type-qualifier 282 | case tok::kw_const: 283 | case tok::kw_volatile: 284 | - case tok::kw_restrict: 285 | 286 | // function-specifier 287 | case tok::kw_inline: 288 | @@ -4157,6 +4163,10 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { 289 | case tok::kw___write_only: 290 | 291 | return true; 292 | + 293 | + case tok::kw_restrict: 294 | + if (NextToken().is(tok::l_paren)) return false; 295 | + return false; 296 | } 297 | } 298 | 299 | @@ -4294,8 +4304,17 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, 300 | getLangOpts()); 301 | break; 302 | case tok::kw_restrict: 303 | - isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, DiagID, 304 | - getLangOpts()); 305 | + if (NextToken().is(tok::l_paren)) { 306 | + ConsumeToken(); 307 | + if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, "restrict")) { 308 | + SkipUntil(tok::r_paren, true); // skip until ) or ; 309 | + break; 310 | + } 311 | + //ConsumeToken(); 312 | + } 313 | + else 314 | + isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, DiagID, 315 | + getLangOpts()); 316 | break; 317 | case tok::kw__Atomic: 318 | if (!AtomicAllowed) 319 | diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp 320 | index 9704b98..b90aaa0 100644 321 | --- a/lib/Parse/ParseExprCXX.cpp 322 | +++ b/lib/Parse/ParseExprCXX.cpp 323 | @@ -1049,12 +1049,14 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( 324 | 325 | Actions.ActOnStartOfLambdaDefinition(Intro, D, getCurScope()); 326 | 327 | + /* 328 | // Parse compound-statement. 329 | if (!Tok.is(tok::l_brace)) { 330 | Diag(Tok, diag::err_expected_lambda_body); 331 | Actions.ActOnLambdaError(LambdaBeginLoc, getCurScope()); 332 | return ExprError(); 333 | } 334 | + */ 335 | 336 | StmtResult Stmt(ParseCompoundStatementBody()); 337 | BodyScope.Exit(); 338 | diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp 339 | index 2c6d6b3..3a1e461 100644 340 | --- a/lib/Parse/Parser.cpp 341 | +++ b/lib/Parse/Parser.cpp 342 | @@ -974,7 +974,8 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, 343 | AttributeList *DtorAttrs = D.getAttributes(); 344 | while (DtorAttrs) { 345 | if (!IsThreadSafetyAttribute(DtorAttrs->getName()->getName()) && 346 | - !DtorAttrs->isCXX11Attribute()) { 347 | + !DtorAttrs->isCXX11Attribute() && 348 | + !llvm::StringSwitch(DtorAttrs->getName()->getName()).Case("amp_restrict", true)) { 349 | Diag(DtorAttrs->getLoc(), diag::warn_attribute_on_function_definition) 350 | << DtorAttrs->getName()->getName(); 351 | } 352 | diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp 353 | index ef401c3..9086688 100644 354 | --- a/lib/Sema/SemaDeclAttr.cpp 355 | +++ b/lib/Sema/SemaDeclAttr.cpp 356 | @@ -775,6 +775,28 @@ static void handleSharedLockFunctionAttr(Sema &S, Decl *D, 357 | Attr.getAttributeSpellingListIndex())); 358 | } 359 | 360 | +static void handleAMPrestrictAttr(Sema &S, Decl *D, 361 | + const AttributeList &Attr) { 362 | + assert(!Attr.isInvalid()); 363 | + 364 | + // check that the attribute is applied to a function 365 | + if (isa(D) || isa(D)) { 366 | + SmallVector Args; 367 | + for(unsigned Idx = 0; Idx < Attr.getNumArgs(); ++Idx) { 368 | + Expr *ArgExp = Attr.getArg(Idx); 369 | + Args.push_back(ArgExp); 370 | + } 371 | + 372 | + unsigned Size = Args.size(); 373 | + Expr **StartArg = Size == 0 ? 0 : &Args[0]; 374 | + D->addAttr(::new (S.Context) 375 | + AMPrestrictAttr(Attr.getRange(), S.Context, 376 | + StartArg, Size, 377 | + Attr.getAttributeSpellingListIndex())); 378 | + 379 | + } 380 | +} 381 | + 382 | static void handleExclusiveLockFunctionAttr(Sema &S, Decl *D, 383 | const AttributeList &Attr) { 384 | SmallVector Args; 385 | @@ -5001,6 +5023,9 @@ static void ProcessInheritableDeclAttr(Sema &S, Scope *scope, Decl *D, 386 | case AttributeList::AT_ExclusiveLockFunction: 387 | handleExclusiveLockFunctionAttr(S, D, Attr); 388 | break; 389 | + case AttributeList::AT_AMPrestrict: 390 | + handleAMPrestrictAttr(S, D, Attr); 391 | + break; 392 | case AttributeList::AT_ExclusiveLocksRequired: 393 | handleExclusiveLocksRequiredAttr(S, D, Attr); 394 | break; 395 | -------------------------------------------------------------------------------- /sources/CBackend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories( 2 | "${CMAKE_CURRENT_SOURCE_DIR}" 3 | "${PROJECT_SOURCE_DIR}/include" 4 | "${CLANG_INCLUDE_DIRS}") 5 | 6 | SET(HEADERS 7 | ./CTargetMachine.h 8 | ) 9 | 10 | add_definitions(${CLANG_DEFINITIONS}) 11 | 12 | link_directories( 13 | "/usr/local/lib/" 14 | "${LLVM_LIB_DIR}" 15 | ) 16 | 17 | add_library(CBackendCodeGen 18 | CBackend.cpp 19 | ) 20 | 21 | add_subdirectory(TargetInfo) 22 | -------------------------------------------------------------------------------- /sources/CBackend/CTargetMachine.h: -------------------------------------------------------------------------------- 1 | //===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | // 10 | // This file declares the TargetMachine that is used by the C backend. 11 | // 12 | //===----------------------------------------------------------------------===// 13 | 14 | #ifndef CTARGETMACHINE_H 15 | #define CTARGETMACHINE_H 16 | 17 | #include "llvm/Target/TargetMachine.h" 18 | #include "llvm/IR/DataLayout.h" 19 | 20 | namespace llvm { 21 | 22 | struct CTargetMachine : public TargetMachine { 23 | CTargetMachine(const Target &T, StringRef TT, 24 | StringRef CPU, StringRef FS, const TargetOptions &Options, 25 | Reloc::Model RM, CodeModel::Model CM, 26 | CodeGenOpt::Level OL) 27 | : TargetMachine(T, TT, CPU, FS, Options) { } 28 | 29 | virtual bool addPassesToEmitFile(PassManagerBase &PM, 30 | formatted_raw_ostream &Out, 31 | CodeGenFileType FileType, 32 | bool DisableVerify, 33 | AnalysisID StartAfter, 34 | AnalysisID StartBefore 35 | ); 36 | 37 | virtual const DataLayout *getDataLayout() const { return 0; } 38 | }; 39 | 40 | extern Target TheCBackendTarget; 41 | 42 | } // End llvm namespace 43 | 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /sources/CBackend/TargetInfo/CBackendTargetInfo.cpp: -------------------------------------------------------------------------------- 1 | //===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #include "CTargetMachine.h" 11 | #include "llvm/IR/Module.h" 12 | #include "llvm/Support/TargetRegistry.h" 13 | 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | Target llvm::TheCBackendTarget; 19 | 20 | extern "C" void LLVMInitializeCBackendTargetInfo() { 21 | RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); 22 | } 23 | 24 | extern "C" void LLVMInitializeCBackendTargetMC() {} 25 | -------------------------------------------------------------------------------- /sources/CBackend/TargetInfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(GNUInstallDirs) 2 | 3 | include_directories( 4 | ${CMAKE_CURRENT_SOURCE_DIR}/.. 5 | "${CMAKE_CURRENT_SOURCE_DIR}" 6 | "${PROJECT_SOURCE_DIR}/include" 7 | "${CLANG_INCLUDE_DIRS}") 8 | 9 | add_definitions(${CLANG_DEFINITIONS}) 10 | 11 | link_directories( 12 | "/usr/local/lib/" 13 | "${LLVM_LIB_DIR}" 14 | ) 15 | 16 | add_library(LLVMCBackendInfo 17 | CBackendTargetInfo.cpp 18 | ) 19 | 20 | 21 | -------------------------------------------------------------------------------- /sources/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories( 2 | "${CMAKE_CURRENT_SOURCE_DIR}" 3 | "${PROJECT_SOURCE_DIR}/include" 4 | "${CLANG_INCLUDE_DIRS}") 5 | 6 | add_definitions(${CLANG_DEFINITIONS}) 7 | 8 | add_subdirectory(CBackend) 9 | 10 | 11 | set(HEADERS 12 | ../include/cl.h 13 | compiler/MainEntry.h 14 | compiler/BitcodeDisassembler.h 15 | compiler/Compiler.h 16 | compiler/Rewriter.h 17 | compute/ParallelForEach.h 18 | ) 19 | 20 | set(SOURCES 21 | compiler/MainEntry.cpp 22 | compiler/BitcodeDisassembler.cpp 23 | compiler/Compiler.cpp 24 | compiler/Rewriter.cpp 25 | ) 26 | 27 | set(CLANG_LIBS 28 | clangFrontend clangDriver clangCodeGen 29 | clangSema clangAnalysis clangAST 30 | clangParse clangLex clangBasic 31 | clangARCMigrate clangEdit clangFrontendTool 32 | clangSerialization 33 | clangTooling clangSema clangRewriteCore) 34 | 35 | set(LIBS 36 | pthread 37 | dl 38 | CBackendCodeGen 39 | LLVMCBackendInfo) 40 | 41 | set(OPENCL_LIB OpenCL) 42 | 43 | configure_file(compute/ParallelForEach.h ParallelForEach.h COPYONLY) 44 | configure_file(../include/cl.h cl.h COPYONLY) 45 | 46 | add_executable(cpp_opencl ${HEADERS} ${SOURCES} Main.cpp) 47 | target_link_libraries(cpp_opencl ${OPENCL_LIB} ${LIBS} ${LLVM_LIBS_CORE} ${CLANG_LIBS} ) 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /sources/Main.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #include "compiler/MainEntry.h" 5 | #include "compiler/Compiler.h" 6 | 7 | 8 | int main(int Argc, const char **Argv) 9 | { 10 | compiler::MainEntry(Argc, Argv, compiler::BuildClCode); 11 | 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /sources/compiler/BitcodeDisassembler.cpp: -------------------------------------------------------------------------------- 1 | #include "BitcodeDisassembler.h" 2 | 3 | #include "../sources/CBackend/CTargetMachine.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | using namespace llvm; 40 | 41 | Target llvm::TheCBackendTarget; 42 | RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); 43 | RegisterTargetMachine XX(TheCBackendTarget); 44 | 45 | 46 | namespace compiler 47 | { 48 | 49 | 50 | class BitcodeDisassemblerImpl 51 | { 52 | public: 53 | explicit BitcodeDisassemblerImpl(llvm::Module* M); 54 | virtual ~BitcodeDisassemblerImpl() {} 55 | 56 | /// Return the disassembled bitcode as source-code e.g. CL source 57 | std::string DisassembleModule(); 58 | 59 | protected: 60 | virtual void InitializeTargets(); 61 | virtual void InitializePasses(); 62 | virtual void CreateTargetOptions(); 63 | virtual void CreateTriple(); 64 | virtual void CreateTargetMachine(); 65 | virtual void CreatePassManager(); 66 | virtual std::string RunPass(); 67 | 68 | 69 | llvm::Module* TheModule; 70 | llvm::Triple TheTriple; 71 | TargetMachine* TheTargetMachine; 72 | llvm::TargetOptions TheTargetOptions; 73 | llvm::PassManager ThePassMgr; 74 | }; 75 | 76 | BitcodeDisassemblerImpl::BitcodeDisassemblerImpl(llvm::Module* M) : 77 | TheModule{M}, 78 | TheTriple{Triple{TheModule->getTargetTriple()}}, 79 | TheTargetMachine{nullptr} 80 | { 81 | } 82 | 83 | void BitcodeDisassemblerImpl::InitializeTargets() 84 | { 85 | InitializeAllTargets(); 86 | InitializeAllTargetMCs(); 87 | InitializeAllAsmPrinters(); 88 | InitializeAllAsmParsers(); 89 | } 90 | 91 | void BitcodeDisassemblerImpl::InitializePasses() 92 | { 93 | PassRegistry* Registry = PassRegistry::getPassRegistry(); 94 | initializeCore(*Registry); 95 | initializeCodeGen(*Registry); 96 | initializeLoopStrengthReducePass(*Registry); 97 | initializeLowerIntrinsicsPass(*Registry); 98 | initializeUnreachableBlockElimPass(*Registry); 99 | } 100 | 101 | void BitcodeDisassemblerImpl::CreateTargetOptions() 102 | { 103 | TheTargetOptions.LessPreciseFPMADOption = EnableFPMAD; 104 | TheTargetOptions.NoFramePointerElim = DisableFPElim; 105 | TheTargetOptions.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf; 106 | TheTargetOptions.AllowFPOpFusion = FuseFPOps; 107 | TheTargetOptions.UnsafeFPMath = EnableUnsafeFPMath; 108 | TheTargetOptions.NoInfsFPMath = EnableNoInfsFPMath; 109 | TheTargetOptions.NoNaNsFPMath = EnableNoNaNsFPMath; 110 | TheTargetOptions.HonorSignDependentRoundingFPMathOption = EnableHonorSignDependentRoundingFPMath; 111 | TheTargetOptions.UseSoftFloat = GenerateSoftFloatCalls; 112 | if (FloatABIForCalls != FloatABI::Default) 113 | TheTargetOptions.FloatABIType = FloatABIForCalls; 114 | TheTargetOptions.NoZerosInBSS = DontPlaceZerosInBSS; 115 | TheTargetOptions.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; 116 | TheTargetOptions.DisableTailCalls = DisableTailCalls; 117 | TheTargetOptions.StackAlignmentOverride = OverrideStackAlignment; 118 | TheTargetOptions.RealignStack = EnableRealignStack; 119 | TheTargetOptions.TrapFuncName = TrapFuncName; 120 | TheTargetOptions.PositionIndependentExecutable = EnablePIE; 121 | TheTargetOptions.EnableSegmentedStacks = SegmentedStacks; 122 | TheTargetOptions.UseInitArray = UseInitArray; 123 | TheTargetOptions.SSPBufferSize = SSPBufferSize; 124 | } 125 | 126 | void BitcodeDisassemblerImpl::CreateTriple() 127 | { 128 | if (TheTriple.getTriple().empty()) 129 | TheTriple.setTriple(sys::getDefaultTargetTriple()); 130 | } 131 | 132 | void BitcodeDisassemblerImpl::CreateTargetMachine() 133 | { 134 | std::string Error{}; 135 | const llvm::Target* TheTarget = TargetRegistry::lookupTarget("c", TheTriple, Error); 136 | if (!TheTarget) { 137 | throw std::runtime_error(Error); 138 | } 139 | 140 | // Package up features to be passed to target/subtarget 141 | std::string FeaturesStr; 142 | if (MAttrs.size()) { 143 | SubtargetFeatures Features; 144 | for (unsigned i = 0; i != MAttrs.size(); ++i) 145 | Features.AddFeature(MAttrs[i]); 146 | FeaturesStr = Features.getString(); 147 | } 148 | 149 | CodeGenOpt::Level OLvl = CodeGenOpt::Default; // Determine optimization level 150 | 151 | TheTargetMachine = TheTarget->createTargetMachine( 152 | TheTriple.getTriple(), MCPU, FeaturesStr, 153 | TheTargetOptions, RelocModel, CMModel, OLvl); 154 | assert(TheTargetMachine && "Could not allocate target machine!"); 155 | 156 | if (DisableDotLoc) 157 | TheTargetMachine->setMCUseLoc(false); 158 | 159 | if (DisableCFI) 160 | TheTargetMachine->setMCUseCFI(false); 161 | 162 | if (EnableDwarfDirectory) 163 | TheTargetMachine->setMCUseDwarfDirectory(true); 164 | 165 | if (GenerateSoftFloatCalls) 166 | FloatABIForCalls = FloatABI::Soft; 167 | 168 | // Disable .loc support for older OS X versions. 169 | if (TheTriple.isMacOSX() && 170 | TheTriple.isMacOSXVersionLT(10, 6)) 171 | TheTargetMachine->setMCUseLoc(false); 172 | 173 | // Override default to generate verbose assembly. 174 | TheTargetMachine->setAsmVerbosityDefault(true); 175 | 176 | if (RelaxAll) { 177 | if (FileType != TargetMachine::CGFT_ObjectFile) 178 | errs() << "warning: ignoring -mc-relax-all because filetype != obj"; 179 | else 180 | TheTargetMachine->setMCRelaxAll(true); 181 | } 182 | } 183 | 184 | void BitcodeDisassemblerImpl::CreatePassManager() 185 | { 186 | // Add an appropriate TargetLibraryInfo pass for the module's triple. 187 | TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple); 188 | ThePassMgr.add(TLI); 189 | 190 | // Add intenal analysis passes from the target machine. 191 | TheTargetMachine->addAnalysisPasses(ThePassMgr); 192 | 193 | // Add the target data from the target machine, if it exists, or the module. 194 | if (const DataLayout *TD = TheTargetMachine->getDataLayout()) 195 | ThePassMgr.add(new DataLayout(*TD)); 196 | else 197 | ThePassMgr.add(new DataLayout(TheModule)); 198 | } 199 | 200 | std::string BitcodeDisassemblerImpl::RunPass() 201 | { 202 | std::string output; 203 | 204 | raw_string_ostream B{output}; 205 | formatted_raw_ostream FOS{B}; 206 | AnalysisID StartAfterID = 0; 207 | AnalysisID StopAfterID = 0; 208 | cl::opt NoVerify("disable-verify", cl::Hidden, cl::desc("Do not verify input module")); 209 | if (TheTargetMachine->addPassesToEmitFile(ThePassMgr, FOS, FileType, NoVerify, StartAfterID, StopAfterID)) { 210 | errs() << ": target does not support generation of this" 211 | << " file type!\n"; 212 | return ""; 213 | } 214 | 215 | //cl::PrintOptionValues(); 216 | ThePassMgr.run(*TheModule); 217 | FOS.flush(); 218 | 219 | return output; 220 | } 221 | 222 | std::string BitcodeDisassemblerImpl::DisassembleModule() 223 | { 224 | assert(TheModule!=nullptr); 225 | static bool count = true; 226 | if (count) { 227 | InitializeTargets(); 228 | InitializePasses(); 229 | CreateTriple(); 230 | CreateTargetOptions(); 231 | CreateTargetMachine(); 232 | count = false; 233 | } 234 | return RunPass(); 235 | } 236 | 237 | BitcodeDisassembler::BitcodeDisassembler(llvm::Module* M) : 238 | TheDisassembler{new BitcodeDisassemblerImpl(M)} 239 | { 240 | } 241 | 242 | BitcodeDisassembler::~BitcodeDisassembler() 243 | { 244 | } 245 | 246 | std::string BitcodeDisassembler::DisassembleModule() 247 | { 248 | std::string Output = TheDisassembler->DisassembleModule(); 249 | return Output; 250 | } 251 | 252 | } 253 | 254 | 255 | 256 | -------------------------------------------------------------------------------- /sources/compiler/BitcodeDisassembler.h: -------------------------------------------------------------------------------- 1 | #ifndef BitcodeDisassembler_H 2 | #define BitcodeDisassembler_H 3 | 4 | #include 5 | #include 6 | 7 | namespace llvm 8 | { 9 | class Module; 10 | } 11 | 12 | namespace compiler 13 | { 14 | 15 | 16 | class BitcodeDisassemblerImpl; 17 | 18 | // Convert LLVM byte code to platform-specific 'CL' code 19 | class BitcodeDisassembler 20 | { 21 | public: 22 | BitcodeDisassembler(const BitcodeDisassembler& that) = delete; 23 | BitcodeDisassembler& operator=(BitcodeDisassembler&) = delete; 24 | 25 | explicit BitcodeDisassembler(llvm::Module* M); 26 | virtual ~BitcodeDisassembler(); 27 | 28 | /// Return the disassembled bitcode as source-code e.g. CL source 29 | std::string DisassembleModule(); 30 | 31 | protected: 32 | std::shared_ptr TheDisassembler; 33 | }; 34 | 35 | 36 | } // namespace compiler 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /sources/compiler/Compiler.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "Compiler.h" 3 | #include "BitcodeDisassembler.h" 4 | #include "Rewriter.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | 36 | using namespace llvm; 37 | using namespace std; 38 | using namespace clang; 39 | using namespace llvm::opt; 40 | 41 | 42 | namespace { 43 | 44 | 45 | 46 | static void LLVMErrorHandler(void *UserData, const std::string &Message, 47 | bool GenCrashDiag) 48 | { 49 | DiagnosticsEngine& Diags = *static_cast(UserData); 50 | 51 | Diags.Report(diag::err_fe_error_backend) << Message; 52 | 53 | // Run the interrupt handlers to make sure any special cleanups get done, in 54 | // particular that we remove files registered with RemoveFileOnSignal. 55 | llvm::sys::RunInterruptHandlers(); 56 | 57 | // We cannot recover from llvm errors. When reporting a fatal error, exit 58 | // with status 70 to generate crash diagnostics. For BSD systems this is 59 | // defined as an internal software error. Otherwise, exit with status 1. 60 | exit(GenCrashDiag ? 70 : 1); 61 | } 62 | 63 | void InitializeTargets() 64 | { 65 | llvm::InitializeAllTargets(); 66 | llvm::InitializeAllTargetMCs(); 67 | llvm::InitializeAllAsmPrinters(); 68 | llvm::InitializeAllAsmParsers(); 69 | } 70 | 71 | std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { 72 | if (!CanonicalPrefixes) 73 | return Argv0; 74 | 75 | // This just needs to be some symbol in the binary; C++ doesn't 76 | // allow taking the address of ::main however. 77 | void *P = (void*) (intptr_t) GetExecutablePath; 78 | return llvm::sys::fs::getMainExecutable(Argv0, P); 79 | } 80 | 81 | 82 | OwningPtr CreateCompilerInvocation(SmallVector& Args, DiagnosticConsumer* DiagsBuffer) 83 | { 84 | const char** ArgBegin = Args.data()+2; 85 | const char** ArgEnd = Args.data()+Args.size(); 86 | const char* ExecutablePath = Args[0]; 87 | 88 | OwningPtr Clang { new CompilerInstance() }; 89 | 90 | IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); 91 | IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); 92 | TextDiagnosticBuffer *DiagsB = new TextDiagnosticBuffer; 93 | //IgnoringDiagConsumer *DiagsB = new IgnoringDiagConsumer; 94 | DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsB); 95 | bool Success; 96 | Success = CompilerInvocation::CreateFromArgs(Clang->getInvocation(), 97 | ArgBegin, ArgEnd, Diags); 98 | 99 | void *MainAddr = (void*) (intptr_t) GetExecutablePath; 100 | 101 | // Infer the builtin include path if unspecified. 102 | if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && 103 | Clang->getHeaderSearchOpts().ResourceDir.empty()) 104 | Clang->getHeaderSearchOpts().ResourceDir = 105 | CompilerInvocation::GetResourcesPath(ExecutablePath, MainAddr); 106 | 107 | // Create the actual diagnostics engine. 108 | if (DiagsBuffer) 109 | Clang->createDiagnostics(); 110 | else 111 | Clang->createDiagnostics(new IgnoringDiagConsumer); 112 | if (!Clang->hasDiagnostics()) 113 | return OwningPtr(); 114 | 115 | DiagsB->FlushDiagnostics(Clang->getDiagnostics()); 116 | if (!Success) { 117 | return OwningPtr(); 118 | } 119 | return Clang; 120 | } 121 | 122 | void CreateTarget(OwningPtr& Clang) 123 | { 124 | clang::TargetOptions* TO = new clang::TargetOptions; 125 | TO->Triple = llvm::sys::getDefaultTargetTriple(); 126 | Clang->setTarget(clang::TargetInfo::CreateTargetInfo(Clang->getDiagnostics(), TO)); 127 | } 128 | 129 | void CreateFileManager(OwningPtr& Clang) 130 | { 131 | Clang->createFileManager(); 132 | FileManager& FileMgr = Clang->getFileManager(); 133 | Clang->createSourceManager(FileMgr); 134 | } 135 | 136 | void CreatePreprocessor(OwningPtr& Clang) 137 | { 138 | Clang->createPreprocessor(); 139 | Preprocessor &PP = Clang->getPreprocessor(); 140 | PP.getBuiltinInfo().InitializeBuiltins(PP.getIdentifierTable(), PP.getLangOpts()); 141 | } 142 | 143 | void CreateAST(OwningPtr& Clang) 144 | { 145 | Clang->createASTContext(); 146 | } 147 | 148 | void CreateMainFile(OwningPtr& Clang, const char* SourceFileName) 149 | { 150 | Clang->getSourceManager().createMainFileID(Clang->getFileManager().getFile(SourceFileName)); 151 | Clang->getDiagnosticClient().BeginSourceFile(Clang->getLangOpts(), &Clang->getPreprocessor()); 152 | } 153 | 154 | void InstallFatalErrorHandler(OwningPtr& Clang) 155 | { 156 | llvm::remove_fatal_error_handler(); 157 | // Set an error handler, so that any LLVM backend diagnostics go through our 158 | // error handler. 159 | llvm::install_fatal_error_handler(LLVMErrorHandler, 160 | static_cast(&Clang->getDiagnostics())); 161 | } 162 | 163 | std::string GetSourceFileName(SmallVector& Args) 164 | { 165 | SmallVector::const_iterator it = 166 | find_if(begin(Args), end(Args), [](const char* s) { return string(s) == string("-c"); } ); 167 | if (it!=end(Args)) return *++it; 168 | else return Args[Args.size()-1]; 169 | } 170 | 171 | OwningPtr CreateCompilerInstance(SmallVector& Args, DiagnosticConsumer* DiagsBuffer) 172 | { 173 | OwningPtr Clang { CreateCompilerInvocation(Args, DiagsBuffer) }; 174 | InstallFatalErrorHandler(Clang); 175 | CreateTarget(Clang); 176 | CreateFileManager(Clang); 177 | CreatePreprocessor(Clang); 178 | CreateAST(Clang); 179 | CreateMainFile(Clang, GetSourceFileName(Args).c_str()); 180 | return Clang; 181 | } 182 | 183 | void CompileCpuSourceFile(SmallVector& Args, std::string SourceCode) 184 | { 185 | std::string CpuFileName { GetSourceFileName(Args) + "_cpu.cpp" }; 186 | std::ofstream cpu_file{ CpuFileName }; 187 | cpu_file << SourceCode; 188 | cpu_file.close(); 189 | 190 | SmallVector ArgsCpu {Args}; 191 | SmallVector::iterator it = 192 | find_if(begin(ArgsCpu), end(ArgsCpu), [](const char* s) { return string(s) == string("-c"); } ); 193 | if (it!=end(ArgsCpu)) *++it = CpuFileName.c_str(); 194 | else ArgsCpu[ArgsCpu.size()-1] = CpuFileName.c_str(); 195 | 196 | OwningPtr Clang { CreateCompilerInstance(ArgsCpu, new TextDiagnosticBuffer) }; 197 | 198 | ExecuteCompilerInvocation(Clang.get()); 199 | 200 | // If any timers were active but haven't been destroyed yet, print their 201 | // results now. This happens in -disable-free mode. 202 | llvm::TimerGroup::printAll(llvm::errs()); 203 | } 204 | 205 | std::string CompileGpuSourceFile(SmallVector& Args, std::string SourceCode) 206 | { 207 | std::string GpuFileName { GetSourceFileName(Args) + "_gpu.cpp" }; 208 | std::ofstream gpu_file{ GpuFileName }; 209 | gpu_file << SourceCode; 210 | gpu_file.close(); 211 | 212 | SmallVector ArgsGpu {Args}; 213 | SmallVector::iterator it2 = 214 | find_if(begin(ArgsGpu), end(ArgsGpu), [](const char* s) { return string(s) == string("-c"); } ); 215 | if (it2!=end(ArgsGpu)) *++it2 = GpuFileName.c_str(); 216 | else ArgsGpu[ArgsGpu.size()-1] = GpuFileName.c_str(); 217 | 218 | OwningPtr Clang { CreateCompilerInstance(ArgsGpu, new TextDiagnosticBuffer) }; 219 | 220 | OwningPtr Act(new clang::EmitLLVMOnlyAction()); 221 | if (!Clang->ExecuteAction(*Act)) { 222 | Act.reset(); 223 | llvm::errs() << "Could not generate source\n"; 224 | return ""; 225 | } 226 | 227 | compiler::BitcodeDisassembler cm{Act->takeModule()}; 228 | std::string OpenCLSource = cm.DisassembleModule(); 229 | 230 | std::string OpenClFileName { GetSourceFileName(Args) + ".cl" }; 231 | std::ofstream a_file{ OpenClFileName }; 232 | a_file << OpenCLSource; 233 | a_file.close(); 234 | 235 | #ifdef Put 236 | llvm::errs() << "_________________ OpenCL _____________________________\n"; 237 | llvm::errs() << OpenCLSource; 238 | llvm::errs() << "\n\n"; 239 | #endif 240 | return OpenCLSource; 241 | } 242 | 243 | 244 | } // namespace 245 | 246 | namespace compiler { 247 | 248 | 249 | std::vector RewriteSourceFile(SmallVector& Args) 250 | { 251 | OwningPtr Clang { CreateCompilerInstance(Args, nullptr) }; 252 | 253 | compiler::RewriterASTConsumer* TheConsumer = new compiler::RewriterASTConsumer {Clang}; 254 | Preprocessor &PP = Clang->getPreprocessor(); 255 | PP.addPPCallbacks(TheConsumer); // Takes ownership of TheConsumer 256 | ParseAST(Clang, *TheConsumer); 257 | 258 | return {TheConsumer->GetRewritenCpuSource(), TheConsumer->GetRewritenGpuSource()}; 259 | } 260 | 261 | std::vector BuildClCode(SmallVector& Args) 262 | { 263 | InitializeTargets(); 264 | 265 | auto Sources = RewriteSourceFile(Args); 266 | assert(Sources.size() == 2); 267 | CompileCpuSourceFile(Args, Sources[0]); 268 | CompileGpuSourceFile(Args, Sources[1]); 269 | 270 | llvm::llvm_shutdown(); 271 | 272 | return {}; 273 | } 274 | 275 | } // namespace compiler 276 | 277 | -------------------------------------------------------------------------------- /sources/compiler/Compiler.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_H 2 | #define COMPILER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace compiler { 9 | 10 | std::vector RewriteSourceFile(clang::SmallVector& Args); 11 | std::vector BuildClCode(clang::SmallVector& Args); 12 | 13 | 14 | } 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /sources/compiler/MainEntry.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | 31 | #include "Compiler.h" 32 | 33 | 34 | ////////////////////////// 35 | /// Copied from Clang 36 | ////////////////////////// 37 | 38 | 39 | using namespace llvm; 40 | using namespace clang; 41 | using namespace llvm::opt; 42 | using namespace clang::driver; 43 | 44 | namespace { 45 | 46 | static const char *SaveStringInSet(std::set &SavedStrings, StringRef S) 47 | { 48 | return SavedStrings.insert(S).first->c_str(); 49 | } 50 | 51 | static void ApplyOneQAOverride(raw_ostream &OS, 52 | SmallVectorImpl &Args, 53 | StringRef Edit, 54 | std::set &SavedStrings) 55 | { 56 | // This does not need to be efficient. 57 | 58 | if (Edit[0] == '^') { 59 | const char *Str = SaveStringInSet(SavedStrings, Edit.substr(1)); 60 | OS << "### Adding argument " << Str << " at beginning\n"; 61 | Args.insert(Args.begin() + 1, Str); 62 | } else if (Edit[0] == '+') { 63 | const char *Str = 64 | SaveStringInSet(SavedStrings, Edit.substr(1)); 65 | OS << "### Adding argument " << Str << " at end\n"; 66 | Args.push_back(Str); 67 | } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && 68 | Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { 69 | StringRef MatchPattern = Edit.substr(2).split('/').first; 70 | StringRef ReplPattern = Edit.substr(2).split('/').second; 71 | ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); 72 | 73 | for (unsigned i = 1, e = Args.size(); i != e; ++i) { 74 | std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); 75 | 76 | if (Repl != Args[i]) { 77 | OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; 78 | Args[i] = SaveStringInSet(SavedStrings, Repl); 79 | } 80 | } 81 | } else if (Edit[0] == 'x' || Edit[0] == 'X') { 82 | std::string Option = Edit.substr(1, std::string::npos); 83 | for (unsigned i = 1; i < Args.size();) { 84 | if (Option == Args[i]) { 85 | OS << "### Deleting argument " << Args[i] << '\n'; 86 | Args.erase(Args.begin() + i); 87 | if (Edit[0] == 'X') { 88 | if (i < Args.size()) { 89 | OS << "### Deleting argument " << Args[i] << '\n'; 90 | Args.erase(Args.begin() + i); 91 | } else 92 | OS << "### Invalid X edit, end of command line!\n"; 93 | } 94 | } else 95 | ++i; 96 | } 97 | } else if (Edit[0] == 'O') { 98 | for (unsigned i = 1; i < Args.size();) { 99 | const char *A = Args[i]; 100 | if (A[0] == '-' && A[1] == 'O' && 101 | (A[2] == '\0' || 102 | (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || 103 | ('0' <= A[2] && A[2] <= '9'))))) { 104 | OS << "### Deleting argument " << Args[i] << '\n'; 105 | Args.erase(Args.begin() + i); 106 | } else 107 | ++i; 108 | } 109 | OS << "### Adding argument " << Edit << " at end\n"; 110 | Args.push_back(SaveStringInSet(SavedStrings, '-' + Edit.str())); 111 | } else { 112 | OS << "### Unrecognized edit: " << Edit << "\n"; 113 | } 114 | } 115 | 116 | /// ApplyQAOverride - Apply a comma separate list of edits to the 117 | /// input argument lists. See ApplyOneQAOverride. 118 | static void ApplyQAOverride(SmallVectorImpl &Args, 119 | const char *OverrideStr, 120 | std::set &SavedStrings) 121 | { 122 | raw_ostream *OS = &llvm::errs(); 123 | 124 | if (OverrideStr[0] == '#') { 125 | ++OverrideStr; 126 | OS = &llvm::nulls(); 127 | } 128 | 129 | *OS << "### QA_OVERRIDE_GCC3_OPTIONS: " << OverrideStr << "\n"; 130 | 131 | // This does not need to be efficient. 132 | 133 | const char *S = OverrideStr; 134 | while (*S) { 135 | const char *End = ::strchr(S, ' '); 136 | if (!End) 137 | End = S + strlen(S); 138 | if (End != S) 139 | ApplyOneQAOverride(*OS, Args, std::string(S, End), SavedStrings); 140 | S = End; 141 | if (*S != '\0') 142 | ++S; 143 | } 144 | } 145 | 146 | 147 | 148 | class StringSetSaver : public llvm::cl::StringSaver { 149 | public: 150 | StringSetSaver(std::set &Storage) : Storage(Storage) {} 151 | 152 | const char *SaveString(const char *Str) LLVM_OVERRIDE { 153 | return SaveStringInSet(Storage, Str); 154 | } 155 | private: 156 | std::set &Storage; 157 | }; 158 | 159 | 160 | std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { 161 | if (!CanonicalPrefixes) 162 | return Argv0; 163 | 164 | // This just needs to be some symbol in the binary; C++ doesn't 165 | // allow taking the address of ::main however. 166 | void *P = (void*) (intptr_t) GetExecutablePath; 167 | return llvm::sys::fs::getMainExecutable(Argv0, P); 168 | } 169 | 170 | static void ParseProgName(SmallVectorImpl &ArgVector, 171 | std::set &SavedStrings, 172 | Driver &TheDriver) 173 | { 174 | // Try to infer frontend type and default target from the program name. 175 | 176 | // suffixes[] contains the list of known driver suffixes. 177 | // Suffixes are compared against the program name in order. 178 | // If there is a match, the frontend type is updated as necessary (CPP/C++). 179 | // If there is no match, a second round is done after stripping the last 180 | // hyphen and everything following it. This allows using something like 181 | // "clang++-2.9". 182 | 183 | // If there is a match in either the first or second round, 184 | // the function tries to identify a target as prefix. E.g. 185 | // "x86_64-linux-clang" as interpreted as suffix "clang" with 186 | // target prefix "x86_64-linux". If such a target prefix is found, 187 | // is gets added via -target as implicit first argument. 188 | static const struct { 189 | const char *Suffix; 190 | const char *ModeFlag; 191 | } suffixes [] = { 192 | { "clang", 0 }, 193 | { "clang++", "--driver-mode=g++" }, 194 | { "clang-c++", "--driver-mode=g++" }, 195 | { "clang-cc", 0 }, 196 | { "clang-cpp", "--driver-mode=cpp" }, 197 | { "clang-g++", "--driver-mode=g++" }, 198 | { "clang-gcc", 0 }, 199 | { "cc", 0 }, 200 | { "cpp", "--driver-mode=cpp" }, 201 | { "++", "--driver-mode=g++" }, 202 | }; 203 | 204 | std::string ProgName(llvm::sys::path::stem(ArgVector[0])); 205 | StringRef ProgNameRef(ProgName); 206 | StringRef Prefix; 207 | 208 | for (int Components = 2; Components; --Components) { 209 | bool FoundMatch = false; 210 | size_t i; 211 | 212 | for (i = 0; i < sizeof(suffixes) / sizeof(suffixes[0]); ++i) { 213 | if (ProgNameRef.endswith(suffixes[i].Suffix)) { 214 | FoundMatch = true; 215 | SmallVectorImpl::iterator it = ArgVector.begin(); 216 | if (it != ArgVector.end()) 217 | ++it; 218 | if (suffixes[i].ModeFlag) 219 | ArgVector.insert(it, suffixes[i].ModeFlag); 220 | break; 221 | } 222 | } 223 | 224 | if (FoundMatch) { 225 | StringRef::size_type LastComponent = ProgNameRef.rfind('-', 226 | ProgNameRef.size() - strlen(suffixes[i].Suffix)); 227 | if (LastComponent != StringRef::npos) 228 | Prefix = ProgNameRef.slice(0, LastComponent); 229 | break; 230 | } 231 | 232 | StringRef::size_type LastComponent = ProgNameRef.rfind('-'); 233 | if (LastComponent == StringRef::npos) 234 | break; 235 | ProgNameRef = ProgNameRef.slice(0, LastComponent); 236 | } 237 | 238 | if (Prefix.empty()) 239 | return; 240 | 241 | std::string IgnoredError; 242 | if (llvm::TargetRegistry::lookupTarget(Prefix, IgnoredError)) { 243 | SmallVectorImpl::iterator it = ArgVector.begin(); 244 | if (it != ArgVector.end()) 245 | ++it; 246 | ArgVector.insert(it, SaveStringInSet(SavedStrings, Prefix)); 247 | ArgVector.insert(it, 248 | SaveStringInSet(SavedStrings, std::string("-target"))); 249 | } 250 | } 251 | 252 | 253 | 254 | 255 | } // namespace 256 | 257 | 258 | namespace compiler { 259 | 260 | std::vector MainEntry(int Argc, 261 | const char **Argv, 262 | std::function(clang::SmallVector&)> Func) 263 | { 264 | llvm::sys::PrintStackTraceOnErrorSignal(); 265 | llvm::PrettyStackTraceProgram X(Argc, Argv); 266 | 267 | std::set SavedStrings; 268 | SmallVector argv(Argv, Argv + Argc); 269 | StringSetSaver Saver(SavedStrings); 270 | llvm::cl::ExpandResponseFiles(Saver, llvm::cl::TokenizeGNUCommandLine, argv); 271 | 272 | // Handle -cc1 integrated tools. 273 | if (argv.size() > 1 && StringRef(argv[1]).startswith("-cc1")) { 274 | StringRef Tool = argv[1] + 4; 275 | if (Tool == "") 276 | return Func(argv); 277 | llvm::errs() << "error: unknown integrated tool '" << Tool << "'\n"; 278 | return {"",""}; 279 | } 280 | 281 | bool CanonicalPrefixes = true; 282 | for (int i = 1, size = argv.size(); i < size; ++i) { 283 | if (StringRef(argv[i]) == "-no-canonical-prefixes") { 284 | CanonicalPrefixes = false; 285 | break; 286 | } 287 | } 288 | 289 | // Handle QA_OVERRIDE_GCC3_OPTIONS and CCC_ADD_ARGS, used for editing a 290 | // command line behind the scenes. 291 | if (const char *OverrideStr = ::getenv("QA_OVERRIDE_GCC3_OPTIONS")) { 292 | // FIXME: Driver shouldn't take extra initial argument. 293 | ApplyQAOverride(argv, OverrideStr, SavedStrings); 294 | } else if (const char *Cur = ::getenv("CCC_ADD_ARGS")) { 295 | // FIXME: Driver shouldn't take extra initial argument. 296 | std::vector ExtraArgs; 297 | for (;;) { 298 | const char *Next = strchr(Cur, ','); 299 | if (Next) { 300 | ExtraArgs.push_back(SaveStringInSet(SavedStrings, 301 | std::string(Cur, Next))); 302 | Cur = Next + 1; 303 | } 304 | else { 305 | if (*Cur != '\0') { 306 | ExtraArgs.push_back(SaveStringInSet(SavedStrings, Cur)); 307 | } 308 | break; 309 | } 310 | } 311 | argv.insert(&argv[1], ExtraArgs.begin(), ExtraArgs.end()); 312 | } 313 | 314 | std::string Path = GetExecutablePath(argv[0], CanonicalPrefixes); 315 | IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions; 316 | { 317 | // Note that ParseDiagnosticArgs() uses the cc1 option table. 318 | OwningPtr CC1Opts(createDriverOptTable()); 319 | unsigned MissingArgIndex, MissingArgCount; 320 | OwningPtr Aargs(CC1Opts->ParseArgs( 321 | argv.begin()+1, 322 | argv.end(), 323 | MissingArgIndex, 324 | MissingArgCount)); 325 | 326 | // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. 327 | // Any errors that would be diagnosed here will also be diagnosed later, 328 | // when the DiagnosticsEngine actually exists. 329 | (void) ParseDiagnosticArgs(*DiagOpts, *Aargs); 330 | } 331 | // Now we can create the DiagnosticsEngine with a properly-filled-out 332 | // DiagnosticOptions instance. 333 | TextDiagnosticPrinter *DiagClient 334 | = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); 335 | DiagClient->setPrefix(llvm::sys::path::filename(Path)); 336 | IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); 337 | 338 | DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); 339 | ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); 340 | 341 | Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), "a.out", Diags); 342 | 343 | // Attempt to find the original path used to invoke the driver, to determine 344 | // the installed path. We do this manually, because we want to support that 345 | // path being a symlink. 346 | { 347 | SmallString<128> InstalledPath(argv[0]); 348 | 349 | // Do a PATH lookup, if there are no directory components. 350 | if (llvm::sys::path::filename(InstalledPath) == InstalledPath) { 351 | std::string Tmp = llvm::sys::FindProgramByName( 352 | llvm::sys::path::filename(InstalledPath.str())); 353 | if (!Tmp.empty()) 354 | InstalledPath = Tmp; 355 | } 356 | llvm::sys::fs::make_absolute(InstalledPath); 357 | InstalledPath = llvm::sys::path::parent_path(InstalledPath); 358 | bool exists; 359 | if (!llvm::sys::fs::exists(InstalledPath.str(), exists) && exists) { 360 | TheDriver.setInstalledDir(InstalledPath); 361 | } 362 | } 363 | 364 | llvm::InitializeAllTargets(); 365 | ParseProgName(argv, SavedStrings, TheDriver); 366 | 367 | // Handle CC_PRINT_OPTIONS and CC_PRINT_OPTIONS_FILE. 368 | TheDriver.CCPrintOptions = !!::getenv("CC_PRINT_OPTIONS"); 369 | if (TheDriver.CCPrintOptions) { 370 | TheDriver.CCPrintOptionsFilename = ::getenv("CC_PRINT_OPTIONS_FILE"); 371 | } 372 | 373 | // Handle CC_PRINT_HEADERS and CC_PRINT_HEADERS_FILE. 374 | TheDriver.CCPrintHeaders = !!::getenv("CC_PRINT_HEADERS"); 375 | if (TheDriver.CCPrintHeaders) { 376 | TheDriver.CCPrintHeadersFilename = ::getenv("CC_PRINT_HEADERS_FILE"); 377 | } 378 | 379 | // Handle CC_LOG_DIAGNOSTICS and CC_LOG_DIAGNOSTICS_FILE. 380 | TheDriver.CCLogDiagnostics = !!::getenv("CC_LOG_DIAGNOSTICS"); 381 | if (TheDriver.CCLogDiagnostics) { 382 | TheDriver.CCLogDiagnosticsFilename = ::getenv("CC_LOG_DIAGNOSTICS_FILE"); 383 | } 384 | 385 | OwningPtr C(TheDriver.BuildCompilation(argv)); 386 | int Res = 0; 387 | SmallVector, 4> FailingCommands; 388 | #ifdef Out 389 | C->PrintJob(llvm::errs(), C->getJobs(), "\n", true); 390 | #endif 391 | 392 | const JobList *Jobs = cast(&C->getJobs()); 393 | for (JobList::const_iterator it = Jobs->begin(), ie = Jobs->end(); it != ie; ++it) { 394 | if (const Command *Cmd = dyn_cast((*it))) { 395 | const char **Argv = new const char*[Cmd->getArguments().size() + 1]; 396 | Argv[0] = Cmd->getExecutable(); 397 | std::copy(Cmd->getArguments().begin(), Cmd->getArguments().end(), Argv+1); 398 | SmallVector argv2(Argv, Argv + Cmd->getArguments().size() + 1); 399 | if (argv2.size() > 1 && StringRef(argv2[1]).startswith("-cc1")) { 400 | StringRef Tool = argv2[1] + 4; 401 | if (Tool == "") 402 | return Func(argv2); 403 | } 404 | } 405 | } 406 | 407 | // Force a crash to test the diagnostics. 408 | if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) { 409 | Diags.Report(diag::err_drv_force_crash) << "FORCE_CLANG_DIAGNOSTICS_CRASH"; 410 | const Command *FailingCommand = 0; 411 | FailingCommands.push_back(std::make_pair(-1, FailingCommand)); 412 | } 413 | 414 | for (SmallVectorImpl< std::pair >::iterator it = 415 | FailingCommands.begin(), ie = FailingCommands.end(); it != ie; ++it) { 416 | int CommandRes = it->first; 417 | const Command *FailingCommand = it->second; 418 | if (!Res) 419 | Res = CommandRes; 420 | 421 | // If result status is < 0, then the driver command signalled an error. 422 | // If result status is 70, then the driver command reported a fatal error. 423 | // In these cases, generate additional diagnostic information if possible. 424 | if (CommandRes < 0 || CommandRes == 70) { 425 | TheDriver.generateCompilationDiagnostics(*C, FailingCommand); 426 | break; 427 | } 428 | } 429 | 430 | // If any timers were active but haven't been destroyed yet, print their 431 | // results now. This happens in -disable-free mode. 432 | llvm::TimerGroup::printAll(llvm::errs()); 433 | 434 | llvm::llvm_shutdown(); 435 | 436 | #ifdef _WIN32 437 | // Exit status should not be negative on Win32, unless abnormal termination. 438 | // Once abnormal termiation was caught, negative status should not be 439 | // propagated. 440 | if (Res < 0) 441 | Res = 1; 442 | #endif 443 | 444 | return {"",""}; 445 | } 446 | 447 | 448 | } // namespace compiler 449 | -------------------------------------------------------------------------------- /sources/compiler/MainEntry.h: -------------------------------------------------------------------------------- 1 | #ifndef MAINENTRY_H 2 | #define MAINENTRY_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace compiler { 11 | 12 | std::vector MainEntry(int Argc, 13 | const char** Argv, 14 | std::function(clang::SmallVector&)> F); 15 | 16 | } 17 | 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /sources/compiler/Rewriter.cpp: -------------------------------------------------------------------------------- 1 | #include "Rewriter.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace clang; 10 | 11 | namespace compiler { 12 | 13 | 14 | void ParseAST(OwningPtr& TheCompInst, RewriterASTConsumer& TheConsumer) 15 | { 16 | clang::ParseAST(TheCompInst->getPreprocessor(), &TheConsumer, TheCompInst->getASTContext()); 17 | } 18 | 19 | RewriterASTConsumer::RewriterASTConsumer(const OwningPtr& CI) : 20 | RewritenCpuSource{}, RewritenGpuSource{}, ParallelForEachCallCount{0} 21 | { 22 | TheCpuRewriter.setSourceMgr(CI->getSourceManager(), CI->getLangOpts()); 23 | TheGpuRewriter.setSourceMgr(CI->getSourceManager(), CI->getLangOpts()); 24 | } 25 | 26 | RewriterASTConsumer::~RewriterASTConsumer() 27 | {} 28 | 29 | std::string RewriterASTConsumer::GetRewritenCpuSource() const 30 | { 31 | return RewritenCpuSource; 32 | } 33 | 34 | std::string RewriterASTConsumer::GetRewritenGpuSource() const 35 | { 36 | return RewritenGpuSource; 37 | } 38 | 39 | void RewriterASTConsumer::InclusionDirective(clang::SourceLocation HashLoc, 40 | const clang::Token& IncludeTok, 41 | clang::StringRef FileName, 42 | bool IsAngled, 43 | clang::CharSourceRange FilenameRange, 44 | const clang::FileEntry* File, 45 | clang::StringRef SearchPath, 46 | clang::StringRef RelativePath, 47 | const clang::Module* Imported) 48 | { 49 | 50 | if ("ParallelForEach.h" == FileName) { 51 | SourceManager& SM = TheGpuRewriter.getSourceMgr(); 52 | SourceRange Range; 53 | Range.setBegin(HashLoc); 54 | Range.setEnd(SM.getSpellingLoc(FilenameRange.getEnd())); 55 | TheGpuRewriter.RemoveText(Range); 56 | } 57 | } 58 | 59 | void RewriterASTConsumer::HandleTranslationUnit(ASTContext& Context) 60 | { 61 | TranslationUnitDecl* D = Context.getTranslationUnitDecl(); 62 | TraverseDecl(D); 63 | 64 | const RewriteBuffer& RewriteBufG = 65 | TheGpuRewriter.getEditBuffer(TheGpuRewriter.getSourceMgr().getMainFileID()); 66 | RewritenGpuSource = std::string(RewriteBufG.begin(), RewriteBufG.end()); 67 | 68 | const RewriteBuffer& RewriteBufC = 69 | TheCpuRewriter.getEditBuffer(TheCpuRewriter.getSourceMgr().getMainFileID()); 70 | RewritenCpuSource = std::string(RewriteBufC.begin(), RewriteBufC.end()); 71 | 72 | #ifdef Out 73 | llvm::errs() << "______________________________ CPU _____________________ \n"; 74 | llvm::errs() << std::string(RewriteBufC.begin(), RewriteBufC.end()); 75 | llvm::errs() << "\n\n______________________________ GPU _____________________ \n"; 76 | llvm::errs() << std::string(RewriteBufG.begin(), RewriteBufG.end()); 77 | #endif 78 | } 79 | 80 | void RemoveFunction(clang::Rewriter& TheRewriter, const FunctionDecl* F) 81 | { 82 | ExpandSourceRange SourceRange{TheRewriter}; 83 | TheRewriter.RemoveText(SourceRange(F->getSourceRange())); 84 | if (FunctionTemplateDecl* D = F->getDescribedFunctionTemplate()) { 85 | TheRewriter.RemoveText(SourceRange(D->getSourceRange())); 86 | } 87 | } 88 | 89 | bool RewriterASTConsumer::VisitFunctionDecl(clang::FunctionDecl const * const F) 90 | { 91 | Func = F; 92 | 93 | if (F->isMain()) { 94 | TheGpuRewriter.RemoveText(F->getSourceRange()); 95 | return true; 96 | } 97 | 98 | HasRestrictAttribute Result{F}; 99 | if (!Result.IsRestrict()) return true; 100 | if (!Result.IsValid()) { 101 | SourceManager& TheSourceMgr = F->getASTContext().getSourceManager(); 102 | llvm::errs() << "Not Valid amp attribute: " << Result.getLocation().printToString(TheSourceMgr) << "\n"; 103 | return true; 104 | } 105 | 106 | if (Result.HasCPU() && !Result.HasGPU()) { 107 | RemoveFunction(TheGpuRewriter, F); 108 | } 109 | 110 | if (!Result.HasCPU() && Result.HasGPU()) { 111 | RemoveFunction(TheCpuRewriter, F); 112 | } 113 | 114 | return true; 115 | } 116 | 117 | void RemoveStatement(clang::Rewriter& TheRewriter, clang::CallExpr const * const Statement) 118 | { 119 | ExpandSourceRange SourceRange{TheRewriter}; 120 | TheRewriter.RemoveText(SourceRange(Statement->getSourceRange())); 121 | } 122 | 123 | bool RewriterASTConsumer::VisitCallExpr(clang::CallExpr const * const Statement) 124 | { 125 | if (clang::FunctionDecl const * const F = Statement->getDirectCallee()) { 126 | if ("compute::parallel_for_each" != F->getQualifiedNameAsString()) 127 | return true; 128 | //RemoveStatement(TheGpuRewriter, Statement); 129 | RemoveFunction(TheGpuRewriter, Func); 130 | OnParallelForEachCall(Statement); 131 | } 132 | return true; 133 | } 134 | 135 | void RewriterASTConsumer::OnParallelForEachCall(clang::CallExpr const * const Statement) 136 | { 137 | WriteGpuDeclarators(Statement); 138 | 139 | LambdaRewiter Lambda(TheCpuRewriter, TheGpuRewriter); 140 | Lambda.Rewrite(Statement); 141 | } 142 | 143 | void RewriterASTConsumer::WriteGpuDeclarators(clang::CallExpr const * const Statement) 144 | { 145 | if (++ParallelForEachCallCount == 1) { 146 | SourceManager& SM = TheGpuRewriter.getSourceMgr(); 147 | std::pair locInfo = SM.getDecomposedLoc(Statement->getLocStart()); 148 | SourceLocation Eof = SM.getLocForEndOfFile(locInfo.first); 149 | std::string Decls1 { "extern \"C\" long long get_global_id(int);" }; 150 | std::string Decls2 { "extern \"C\" int get_global_size(int);" }; 151 | TheGpuRewriter.InsertTextAfter(Eof, Decls1 + "\n" + Decls2 + "\n\n\n"); 152 | } 153 | } 154 | 155 | 156 | } // namespace compiler 157 | 158 | 159 | namespace { 160 | 161 | LambdaRewiter::LambdaRewiter(Rewriter& CpuRewriter, Rewriter& GpuRewriter) 162 | : RecursiveASTVisitor(), 163 | TheCpuRewriter(CpuRewriter), TheGpuRewriter(GpuRewriter) 164 | { 165 | } 166 | 167 | void LambdaRewiter::Rewrite(CallExpr const * const Statement) 168 | { 169 | ExtractLambdaFunctionInfo(Statement); 170 | GenerateKernelNamePostfix(); 171 | RewriteGpuCode(); 172 | RewriteCpuCode(); 173 | } 174 | 175 | void LambdaRewiter::ExtractLambdaFunctionInfo(CallExpr const * const Statement) 176 | { 177 | FunctionDecl const * const F = Statement->getDirectCallee(); 178 | static const unsigned int NR_ARGUMENTS = 4; 179 | assert(NR_ARGUMENTS == std::min(Statement->getNumArgs(), F->getNumParams())); 180 | Stmt const * const S = Statement->getArg(NR_ARGUMENTS-1); 181 | this->TraverseStmt(const_cast(S)); 182 | } 183 | 184 | bool LambdaRewiter::VisitLambdaExpr(LambdaExpr *LE) 185 | { 186 | LambdaExpr::capture_iterator I = LE->capture_begin(); 187 | LambdaExpr::capture_iterator E = LE->capture_end(); 188 | for (; I != E; ++I) { 189 | if (VarDecl* D = I->getCapturedVar()) { 190 | std::string Type {QualType::getAsString(D->getType().split())}; 191 | std::string Variable {D->getName().str()}; 192 | 193 | if (I->getCaptureKind() == LCK_ByRef) { 194 | TheCapturesByRef.push_back({"",Type,Variable}); 195 | } 196 | else if (I->getCaptureKind() == LCK_ByCopy) { 197 | TheCapturesByValue.push_back({"",Type,Variable}); 198 | } 199 | else { 200 | return false; 201 | } 202 | } 203 | } 204 | 205 | CaptureListRange.setBegin(LE->getIntroducerRange().getBegin()); 206 | CaptureListRange.setEnd(LE->getIntroducerRange().getEnd()); 207 | BodyRange.setBegin(LE->getBody()->getLocStart()); 208 | BodyRange.setEnd(LE->getBody()->getLocEnd()); 209 | 210 | TraverseLambdaBody(LE); 211 | return true; 212 | } 213 | 214 | bool LambdaRewiter::VisitDeclStmt(DeclStmt *S) 215 | { 216 | for (DeclStmt::decl_iterator I = S->decl_begin(), 217 | E = S->decl_end(); I!=E; ++I) { 218 | if (VarDecl *VD = dyn_cast(*I)) { 219 | VisitVarDecl(VD); 220 | } 221 | } 222 | ParamRange.setBegin(S->getLocStart()); 223 | ParamRange.setEnd(S->getLocEnd()); 224 | 225 | return true; 226 | } 227 | 228 | bool LambdaRewiter::VisitVarDecl(VarDecl *VD) 229 | { 230 | if (! VD->isLocalVarDecl()) { 231 | std::string VarTypeName {QualType::getAsString(VD->getType().split())}; 232 | std::string VarName {VD->getName().str()}; 233 | TheParams.push_back({"",VarTypeName,VarName}); 234 | } 235 | return true; 236 | } 237 | 238 | void LambdaRewiter::GenerateKernelNamePostfix() 239 | { 240 | PostfixName = std::string("_") + std::to_string(std::rand()); 241 | } 242 | 243 | void LambdaRewiter::RewriteCpuCode() 244 | { 245 | SourceManager& SM = TheCpuRewriter.getSourceMgr(); 246 | std::string FileName { " \"" + std::string {SM.getFilename(BodyRange.getBegin())} + ".cl\" " }; 247 | std::string KernelName {" \"_Kernel" + PostfixName + "\" "}; 248 | std::string NewLambdaBody { " { return std::pair ( " + FileName + "," + KernelName + "); }" }; 249 | ExpandSourceRange Range{TheCpuRewriter}; 250 | TheCpuRewriter.ReplaceText(Range(BodyRange), NewLambdaBody.c_str()); 251 | //TheCpuRewriter.ReplaceText(ParamRange, ""); 252 | } 253 | 254 | void LambdaRewiter::RewriteGpuCode() 255 | { 256 | assert(1 == TheParams.size()); 257 | 258 | std::string SignatureLambda { TheParams[0].Type + " _Lambda" + PostfixName + 259 | "(" + TheParams[0].Type + " " + TheParams[0].VariableName + ") " }; 260 | std::string BodyLambda { TheCpuRewriter.getRewrittenText(BodyRange) }; 261 | 262 | std::string SignatureKernel { std::string {"extern \"C\" void _Kernel"} + PostfixName + 263 | "(" + TheParams[0].Type + "* in, " + TheParams[0].Type + "* out) " } ; 264 | std::string BodyKernel { "{ unsigned idx = get_global_id(0); out[idx] = _Lambda" + PostfixName + "(in[idx]); }" }; 265 | 266 | SourceManager& SM = TheGpuRewriter.getSourceMgr(); 267 | std::pair locInfo = SM.getDecomposedLoc(BodyRange.getEnd()); 268 | SourceLocation Eof = SM.getLocForEndOfFile(locInfo.first); 269 | TheGpuRewriter.InsertTextAfter(Eof, SignatureLambda + BodyLambda + "\n\n" + SignatureKernel + BodyKernel); 270 | } 271 | 272 | HasRestrictAttribute::HasRestrictAttribute(FunctionDecl const * const F) : 273 | Restrict{false}, Valid{false}, CPU{false}, GPU{false}, 274 | Attr{nullptr}, Func{F} 275 | { 276 | Attr = F->getAttr(); 277 | if (Attr) { 278 | Restrict = true; 279 | Valid = true; 280 | for (AMPrestrictAttr::args_iterator it = Attr->args_begin(); 281 | it != Attr->args_end(); 282 | ++it) { 283 | StringLiteral* string = cast(*it); 284 | if (string->getString() == "cpu") CPU = true; 285 | else if (string->getString() == "gpu") GPU = true; 286 | else Valid = false; 287 | } 288 | if (!CPU && !GPU) Valid = false; 289 | if (Valid) { 290 | if (IsRestrictKeywordBeforeFunctionName() /*|| 291 | IsDeclarationADefinitionRestrictKeywordDifferent()*/) { 292 | Valid = false; 293 | } 294 | } 295 | } 296 | } 297 | 298 | bool HasRestrictAttribute::IsRestrict() const { return Restrict; } 299 | bool HasRestrictAttribute::IsValid() const { return Valid; } 300 | bool HasRestrictAttribute::HasCPU() const { return CPU; } 301 | bool HasRestrictAttribute::HasGPU() const { return GPU; } 302 | 303 | SourceLocation HasRestrictAttribute::getLocation() const 304 | { 305 | SourceManager& TheSourceMgr = Func->getASTContext().getSourceManager(); 306 | return TheSourceMgr.getExpansionLoc(Attr->getLocation()); 307 | } 308 | 309 | bool HasRestrictAttribute::IsRestrictKeywordBeforeFunctionName() const { 310 | if (getLocation() < Func->getNameInfo().getBeginLoc()) { 311 | return true; 312 | } 313 | return false; 314 | } 315 | 316 | bool HasRestrictAttribute::IsDeclarationADefinitionRestrictKeywordDifferent() 317 | { 318 | if (Func->isThisDeclarationADefinition()) { 319 | if (const FunctionDecl* FD = Func->getCanonicalDecl()) { 320 | if (FD == Func) return false; 321 | 322 | HasRestrictAttribute Result{FD}; 323 | if (!Result.IsRestrict()) return true; 324 | if (!Result.IsValid()) return true; 325 | if (CPU != Result.HasCPU()) return true; 326 | if (GPU != Result.HasGPU()) return true; 327 | } 328 | } 329 | return false; 330 | } 331 | 332 | ExpandSourceRange::ExpandSourceRange(Rewriter& Rewrite) : 333 | TheRewriter(Rewrite) 334 | {} 335 | 336 | SourceRange ExpandSourceRange::operator() (SourceRange loc) 337 | { 338 | // If the range is a full statement, and is followed by a 339 | // semi-colon then expand the range to include the semicolon. 340 | //return loc; 341 | 342 | SourceLocation b = loc.getBegin(); 343 | SourceLocation e = FindSemiColonAfterLocation(loc.getEnd()); 344 | if (e.isInvalid()) e = loc.getEnd(); 345 | return SourceRange(b,e); 346 | } 347 | 348 | SourceLocation ExpandSourceRange::FindSemiColonAfterLocation(SourceLocation loc) 349 | { 350 | SourceManager &SM = TheRewriter.getSourceMgr(); 351 | if (loc.isMacroID()) { 352 | if (!Lexer::isAtEndOfMacroExpansion(loc, SM, 353 | TheRewriter.getLangOpts(), &loc)) 354 | return SourceLocation(); 355 | } 356 | loc = Lexer::getLocForEndOfToken(loc, /*Offset=*/0, SM, 357 | TheRewriter.getLangOpts()); 358 | 359 | // Break down the source location. 360 | std::pair locInfo = SM.getDecomposedLoc(loc); 361 | 362 | // Try to load the file buffer. 363 | bool invalidTemp = false; 364 | StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); 365 | if (invalidTemp) 366 | return SourceLocation(); 367 | 368 | const char *tokenBegin = file.data() + locInfo.second; 369 | 370 | // Lex from the start of the given location. 371 | Lexer lexer(SM.getLocForStartOfFile(locInfo.first), 372 | TheRewriter.getLangOpts(), 373 | file.begin(), tokenBegin, file.end()); 374 | Token tok; 375 | lexer.LexFromRawLexer(tok); 376 | if (tok.isNot(tok::semi)) 377 | return SourceLocation(); 378 | 379 | return tok.getLocation(); 380 | } 381 | 382 | 383 | } // namespace 384 | 385 | 386 | -------------------------------------------------------------------------------- /sources/compiler/Rewriter.h: -------------------------------------------------------------------------------- 1 | #ifndef REWRITER_H 2 | #define REWRITER_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | namespace clang 14 | { 15 | class CompilerInstance; 16 | class ASTContext; 17 | class AMPrestrictAttr; 18 | } 19 | 20 | namespace compiler { 21 | 22 | 23 | /// Rewrite the source code: The result of rewritting the source code 24 | /// should be two source code files (or strings holding the source); 25 | /// one that will be compiled on the CPU and the other on the GPU 26 | class RewriterASTConsumer : 27 | public clang::ASTConsumer, 28 | public clang::RecursiveASTVisitor, 29 | public clang::PPCallbacks 30 | { 31 | public: 32 | RewriterASTConsumer(const RewriterASTConsumer& that) = delete; 33 | RewriterASTConsumer& operator=(RewriterASTConsumer&) = delete; 34 | 35 | explicit RewriterASTConsumer(const llvm::OwningPtr& CI); 36 | 37 | ~RewriterASTConsumer(); 38 | 39 | std::string GetRewritenCpuSource() const; 40 | std::string GetRewritenGpuSource() const; 41 | 42 | virtual void HandleTranslationUnit(clang::ASTContext& Context); 43 | virtual bool VisitFunctionDecl(clang::FunctionDecl const * const F); 44 | virtual bool VisitCallExpr(clang::CallExpr const * const Stmt); 45 | virtual void InclusionDirective(clang::SourceLocation HashLoc, 46 | const clang::Token &IncludeTok, 47 | clang::StringRef FileName, 48 | bool IsAngled, 49 | clang::CharSourceRange FilenameRange, 50 | const clang::FileEntry *File, 51 | clang::StringRef SearchPath, 52 | clang::StringRef RelativePath, 53 | const clang::Module *Imported); 54 | 55 | protected: 56 | virtual void OnParallelForEachCall(clang::CallExpr const * const Stmt); 57 | private: 58 | void WriteGpuDeclarators(clang::CallExpr const * const Statement); 59 | 60 | private: 61 | std::string RewritenCpuSource; 62 | std::string RewritenGpuSource; 63 | 64 | clang::Rewriter TheCpuRewriter; 65 | clang::Rewriter TheGpuRewriter; 66 | 67 | int ParallelForEachCallCount; 68 | 69 | clang::FunctionDecl const* Func; 70 | }; 71 | 72 | void ParseAST(llvm::OwningPtr& TheCompInst, RewriterASTConsumer& TheConsumer); 73 | 74 | } // namespace compiler 75 | 76 | 77 | namespace { 78 | 79 | class HasRestrictAttribute { 80 | public: 81 | HasRestrictAttribute(const HasRestrictAttribute& that) = delete; 82 | HasRestrictAttribute& operator=(HasRestrictAttribute&) = delete; 83 | 84 | explicit HasRestrictAttribute(clang::FunctionDecl const * const F); 85 | 86 | bool IsRestrict() const; 87 | bool IsValid() const; 88 | bool HasCPU() const; 89 | bool HasGPU() const; 90 | 91 | clang::SourceLocation getLocation() const; 92 | 93 | private: 94 | bool IsRestrictKeywordBeforeFunctionName() const; 95 | bool IsDeclarationADefinitionRestrictKeywordDifferent(); 96 | 97 | private: 98 | bool Restrict; 99 | bool Valid; 100 | bool CPU; 101 | bool GPU; 102 | 103 | clang::AMPrestrictAttr* Attr; 104 | clang::FunctionDecl const* Func; 105 | }; 106 | 107 | 108 | class ExpandSourceRange { 109 | public: 110 | ExpandSourceRange(const ExpandSourceRange& that) = delete; 111 | ExpandSourceRange& operator=(ExpandSourceRange&) = delete; 112 | 113 | ExpandSourceRange(clang::Rewriter& Rewrite); 114 | 115 | clang::SourceRange operator() (clang::SourceRange loc); 116 | private: 117 | clang::SourceLocation FindSemiColonAfterLocation(clang::SourceLocation loc); 118 | private: 119 | clang::Rewriter& TheRewriter; 120 | }; 121 | 122 | 123 | /// What arguments (names and types) are captured by the lambda ? Some are captured by reference 124 | /// and others be value. 125 | class LambdaRewiter : 126 | public clang::RecursiveASTVisitor 127 | { 128 | public: 129 | LambdaRewiter(clang::Rewriter& CpuRewriter, clang::Rewriter& GpuRewriter); 130 | 131 | void Rewrite(clang::CallExpr const * const Statement); 132 | 133 | public: 134 | bool VisitLambdaExpr(clang::LambdaExpr *LE); 135 | bool VisitDeclStmt(clang::DeclStmt *S); 136 | bool VisitVarDecl(clang::VarDecl *VD); 137 | 138 | private: 139 | void ExtractLambdaFunctionInfo(clang::CallExpr const * const Statement); 140 | void GenerateKernelNamePostfix(); 141 | void RewriteCpuCode(); 142 | void RewriteGpuCode(); 143 | 144 | private: 145 | struct DeclarationInfo { 146 | std::string ValueType; 147 | std::string Type; 148 | std::string VariableName; 149 | 150 | // e.g. std::vector& A; 151 | // ValueType would be 'int' 152 | // Type would be 'std::vector' 153 | // VariableName would be 'A' 154 | }; 155 | 156 | using DeclarationInfoList = std::vector; 157 | 158 | clang::Rewriter& TheCpuRewriter; 159 | clang::Rewriter& TheGpuRewriter; 160 | 161 | DeclarationInfoList TheCapturesByRef; 162 | DeclarationInfoList TheCapturesByValue; 163 | DeclarationInfoList TheParams; 164 | clang::SourceRange CaptureListRange; 165 | clang::SourceRange BodyRange; 166 | clang::SourceRange ParamRange; 167 | 168 | std::string PostfixName; 169 | }; 170 | 171 | } 172 | 173 | #endif 174 | -------------------------------------------------------------------------------- /sources/compute/ParallelForEach.h: -------------------------------------------------------------------------------- 1 | #ifndef ParallelForEach_H 2 | #define ParallelForEach_H 3 | 4 | #define __CL_ENABLE_EXCEPTIONS 5 | #include "cl.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace compute { 12 | 13 | 14 | class Accelerator 15 | { 16 | public: 17 | Accelerator() 18 | { 19 | try { 20 | VECTOR_CLASS Platforms; 21 | cl::Platform::get(&Platforms); 22 | if (Platforms.size() > 0) { 23 | Platform = Platforms[0]; 24 | Setup(); 25 | } 26 | } catch(cl::Error& e) { 27 | std::cerr << e.what() << ": " << e.err() << "\n"; 28 | } catch(std::exception& e) { 29 | std::cerr << e.what() << "\n"; 30 | } 31 | } 32 | 33 | Accelerator(const Accelerator& that) = delete; 34 | Accelerator& operator=(Accelerator&) = delete; 35 | 36 | static Accelerator& Instance() 37 | { 38 | static Accelerator I; 39 | return I; 40 | } 41 | 42 | void BuildKernel(const std::string& KernelName, const std::string& KernelCode) 43 | { 44 | try { 45 | cl::Program::Sources Sources; 46 | Sources.push_back({KernelCode.c_str(),KernelCode.length()}); 47 | Program = cl::Program(Context,Sources); 48 | Program.build({Device}); 49 | Kernel = cl::Kernel(Program, KernelName.c_str()); 50 | } catch(cl::Error& e) { 51 | std::cerr << e.what() << ": " << e.err() << "\n"; 52 | std::cerr << "Build Status: " << Program.getBuildInfo(Device) << std::endl; 53 | std::cerr << "Build Options:\t" << Program.getBuildInfo(Device) << std::endl; 54 | std::cerr << "Build Log:\t " << Program.getBuildInfo(Device) << std::endl; 55 | } catch(std::exception& e) { 56 | std::cerr << e.what() << "\n"; 57 | } 58 | } 59 | 60 | template 61 | void Run(InputIterator begin, InputIterator end, OutputIterator output) 62 | { 63 | typedef typename std::iterator_traits::value_type value_type; 64 | int Extent = std::distance(begin, end); 65 | ::size_t ByteLength = sizeof(value_type) * (Extent); 66 | 67 | cl::Buffer BufferIn(Context,CL_MEM_READ_WRITE, ByteLength); 68 | Queue.enqueueWriteBuffer(BufferIn,CL_TRUE,0,ByteLength, static_cast(&*begin)); 69 | 70 | cl::Buffer BufferOut(Context,CL_MEM_READ_WRITE,ByteLength); 71 | 72 | Kernel.setArg(0,BufferIn); 73 | Kernel.setArg(1,BufferOut); 74 | Queue.enqueueNDRangeKernel(Kernel, cl::NullRange, cl::NDRange(Extent), cl::NullRange); 75 | Queue.finish(); 76 | 77 | Queue.enqueueReadBuffer(BufferOut,CL_TRUE,0,ByteLength,static_cast(&*output)); 78 | } 79 | 80 | 81 | private: 82 | void Setup() 83 | { 84 | Devices = new VECTOR_CLASS; 85 | if (CL_SUCCESS != Platform.getDevices(CL_DEVICE_TYPE_ALL, Devices) || Devices->size() == 0) 86 | throw std::runtime_error("Failed to create Accelerator."); 87 | Device = (*Devices)[0]; 88 | 89 | std::cout << "Using platform: " << Platform.getInfo()<<"\n"; 90 | std::cout << "Using device: " << Device.getInfo()<<"\n"; 91 | 92 | Context = cl::Context(VECTOR_CLASS{Device}); 93 | 94 | Queue = cl::CommandQueue(Context,Device); 95 | } 96 | 97 | private: 98 | VECTOR_CLASS* Devices; 99 | cl::Platform Platform; 100 | cl::Device Device; 101 | cl::Context Context; 102 | cl::CommandQueue Queue; 103 | cl::Program Program; 104 | cl::Kernel Kernel; 105 | cl::Program::Sources Sources; 106 | }; 107 | 108 | 109 | template 110 | void parallel_for_each(InputIterator begin, InputIterator end, OutputIterator output, const KernelType& F) 111 | { 112 | std::pair Names = F(0); 113 | std::ifstream sourceFile(Names.first); 114 | if(sourceFile.fail()) 115 | throw std::runtime_error("Failed to open OpenCL source file."); 116 | std::string KernelCode( 117 | std::istreambuf_iterator(sourceFile), 118 | (std::istreambuf_iterator())); 119 | 120 | Accelerator& K = Accelerator::Instance(); 121 | K.BuildKernel(Names.second, KernelCode); 122 | K.Run(begin, end, output); 123 | } 124 | 125 | 126 | } // namespace compute 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories( 2 | "${CMAKE_CURRENT_SOURCE_DIR}" 3 | "${PROJECT_SOURCE_DIR}/include" 4 | "${CLANG_INCLUDE_DIRS}") 5 | 6 | add_definitions(${CLANG_DEFINITIONS}) 7 | 8 | set(KERNEL ./kernel.cpp) 9 | 10 | set(HEADERS 11 | ../include/cl.h 12 | ../sources/compiler/MainEntry.h 13 | ../sources/compiler/BitcodeDisassembler.h 14 | ../sources/compiler/Compiler.h 15 | ../sources/compiler/Rewriter.h 16 | ../sources/compute/ParallelForEach.h 17 | ) 18 | 19 | set(SOURCES 20 | ../sources/compiler/MainEntry.cpp 21 | ../sources/compiler/BitcodeDisassembler.cpp 22 | ../sources/compiler/Compiler.cpp 23 | ../sources/compiler/Rewriter.cpp 24 | ) 25 | 26 | 27 | set(CLANG_LIBS 28 | clangFrontend clangDriver clangCodeGen 29 | clangSema clangAnalysis clangAST 30 | clangParse clangLex clangBasic 31 | clangARCMigrate clangEdit clangFrontendTool 32 | clangSerialization 33 | clangTooling clangSema clangRewriteCore) 34 | 35 | set(LIBS 36 | pthread 37 | dl 38 | CBackendCodeGen 39 | LLVMCBackendInfo) 40 | 41 | set(OPENCL_LIB OpenCL) 42 | 43 | configure_file(kernel.cpp kernel.cpp COPYONLY) 44 | configure_file(../sources/compute/ParallelForEach.h ParallelForEach.h COPYONLY) 45 | configure_file(../include/cl.h cl.h COPYONLY) 46 | 47 | add_executable(test_kernel ${HEADERS} ${SOURCES} test_kernel.cpp) 48 | target_link_libraries(test_kernel ${OPENCL_LIB} ${LIBS} ${LLVM_LIBS_CORE} ${CLANG_LIBS} ) 49 | 50 | add_executable(test_rewriter ${HEADERS} ${SOURCES} test_rewriter.cpp) 51 | target_link_libraries(test_rewriter ${OPENCL_LIB} ${LIBS} ${LLVM_LIBS_CORE} ${CLANG_LIBS} ) 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | extern "C" long long get_global_id(int); 7 | extern "C" int get_global_size(int); 8 | 9 | extern "C" { 10 | 11 | void _Kernel_global_id(int* out) 12 | { 13 | unsigned id = get_global_id(0); 14 | out[id] = id; 15 | } 16 | 17 | void _Kernel_global_id2d(int* out) 18 | { 19 | unsigned x = get_global_id(0); 20 | unsigned y = get_global_id(1); 21 | unsigned id = (y * get_global_size(1)) + x; 22 | out[id] = id; 23 | } 24 | 25 | void _Kernel_add(int* arg0, int* arg1, int* out) 26 | { 27 | out[0] = arg0[0] + arg1[0]; 28 | } 29 | 30 | void _Kernel_div(int* arg0, int* arg1, int * out) 31 | { 32 | out[0] = arg0[0] / arg1[0]; 33 | } 34 | 35 | void _Kernel_if_eq(int* arg0, int* arg1, int* out) 36 | { 37 | out[0] = 0; 38 | if (arg0[0] == arg1[0]) { 39 | out[0] = 1; 40 | } 41 | } 42 | 43 | } 44 | 45 | 46 | // foo1 overloads are enabled via the return type 47 | template 48 | typename std::enable_if::value, T>::type 49 | foo1(T t) 50 | { 51 | return t; 52 | } 53 | 54 | template 55 | typename std::enable_if::value, T>::type 56 | foo1(T t) 57 | { 58 | return t; 59 | } 60 | 61 | template 62 | T foo2(T t, typename std::enable_if::value >::type* = 0) 63 | { 64 | return 1; 65 | } 66 | 67 | template 68 | T foo2(T t, typename std::enable_if::value >::type* = 0) 69 | { 70 | return 0; 71 | } 72 | 73 | extern "C" { 74 | 75 | void _Kernel_enable_if_return_type(int* arg0, int* out) 76 | { 77 | out[0] = foo1(arg0[0]); 78 | } 79 | 80 | void _Kernel_enable_if_int_argument(int* arg0, int* out) 81 | { 82 | out[0] = foo2(arg0[0]); 83 | } 84 | 85 | void _Kernel_enable_if_float_argument(float* arg0, float* out) 86 | { 87 | out[0] = foo2(arg0[0]); 88 | } 89 | 90 | } 91 | 92 | 93 | extern "C" void _Kernel_find_if(int* arg, int* out) { 94 | int* it = std::find_if (arg, arg+3, [] (int i) { return ((i%2)==1); } ); 95 | out[0] = *it; 96 | } 97 | 98 | extern "C" void _Kernel_sort(int* arg, int* out) { 99 | std::sort (arg, arg+3); 100 | out[0] = arg[0]; 101 | } 102 | 103 | -------------------------------------------------------------------------------- /tests/test_kernel.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() 2 | #include "../tests/catch.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define __CL_ENABLE_EXCEPTIONS 12 | #include "cl.h" 13 | 14 | #include "../sources/compiler/MainEntry.h" 15 | #include "../sources/compiler/Compiler.h" 16 | 17 | class KernelFixture 18 | { 19 | public: 20 | KernelFixture() 21 | { 22 | try { 23 | VECTOR_CLASS Platforms; 24 | cl::Platform::get(&Platforms); 25 | if (Platforms.size() > 0) { 26 | Platform = Platforms[0]; 27 | Setup(); 28 | TransformSource(); 29 | } 30 | } catch(cl::Error& e) { 31 | std::cerr << e.what() << ": " << e.err() << "\n"; 32 | } catch(std::exception& e) { 33 | std::cerr << e.what() << "\n"; 34 | } 35 | } 36 | 37 | static KernelFixture& Instance() 38 | { 39 | static KernelFixture I; 40 | return I; 41 | } 42 | 43 | std::string GetKernelCode() const { return KernelCode; } 44 | 45 | void BuildKernel(const std::string& Kernelname) 46 | { 47 | try { 48 | cl::Program::Sources Sources; 49 | Sources.push_back({KernelCode.c_str(),KernelCode.length()}); 50 | Program = cl::Program(Context,Sources); 51 | Program.build({Device}); 52 | Kernel = cl::Kernel(Program, Kernelname.c_str()); 53 | } catch(cl::Error& e) { 54 | std::cerr << e.what() << ": " << e.err() << "\n"; 55 | std::cerr << "Build Status: " << Program.getBuildInfo(Device) << std::endl; 56 | std::cerr << "Build Options:\t" << Program.getBuildInfo(Device) << std::endl; 57 | std::cerr << "Build Log:\t " << Program.getBuildInfo(Device) << std::endl; 58 | } catch(std::exception& e) { 59 | std::cerr << e.what() << "\n"; 60 | } 61 | } 62 | 63 | template 64 | void Run(T& In1, T& In2, T& Out) 65 | { 66 | int Extent = std::end(In1)-std::begin(In1); 67 | ::size_t ByteLength = sizeof(Val) * (Extent); 68 | 69 | cl::Buffer B1(Context,CL_MEM_READ_WRITE, ByteLength); 70 | Queue.enqueueWriteBuffer(B1,CL_TRUE,0,ByteLength, static_cast(&*std::begin(In1))); 71 | 72 | cl::Buffer B2(Context,CL_MEM_READ_WRITE, ByteLength); 73 | Queue.enqueueWriteBuffer(B2,CL_TRUE,0,ByteLength, static_cast(&*std::begin(In2))); 74 | 75 | cl::Buffer O(Context,CL_MEM_READ_WRITE,ByteLength); 76 | 77 | Kernel.setArg(0,B1); 78 | Kernel.setArg(1,B2); 79 | Kernel.setArg(2,O); 80 | Queue.enqueueNDRangeKernel(Kernel, cl::NullRange, cl::NDRange(Extent), cl::NullRange); 81 | Queue.finish(); 82 | 83 | Queue.enqueueReadBuffer(O,CL_TRUE,0,ByteLength,static_cast(&*std::begin(Out))); 84 | } 85 | 86 | template 87 | void Run(T& Buf, cl::NDRange global=cl::NullRange, cl::NDRange local=cl::NullRange) 88 | { 89 | size_t Extent = std::end(Buf)-std::begin(Buf); 90 | if (global.dimensions() == 0) 91 | global = {Extent}; 92 | ::size_t ByteLength = sizeof(Val) * Extent; 93 | 94 | cl::Buffer Buffer(Context,CL_MEM_READ_WRITE,ByteLength); 95 | Queue.enqueueWriteBuffer(Buffer,CL_TRUE,0,ByteLength, static_cast(&*std::begin(Buf))); 96 | Kernel.setArg(0,Buffer); 97 | Queue.enqueueNDRangeKernel(Kernel, cl::NullRange, global, local); 98 | Queue.finish(); 99 | Queue.enqueueReadBuffer(Buffer,CL_TRUE,0,ByteLength,static_cast(&*std::begin(Buf))); 100 | } 101 | 102 | template 103 | void Run(T& Buf1, T& Buf2, cl::NDRange global=cl::NullRange, cl::NDRange local=cl::NullRange) 104 | { 105 | size_t Extent = std::end(Buf1)-std::begin(Buf1); 106 | if (global.dimensions() == 0) 107 | global = {Extent}; 108 | ::size_t ByteLength = sizeof(Val) * Extent; 109 | 110 | cl::Buffer Buffer1(Context,CL_MEM_READ_WRITE,ByteLength); 111 | Queue.enqueueWriteBuffer(Buffer1,CL_TRUE,0,ByteLength, static_cast(&*std::begin(Buf1))); 112 | cl::Buffer Buffer2(Context,CL_MEM_READ_WRITE,ByteLength); 113 | Queue.enqueueWriteBuffer(Buffer2,CL_TRUE,0,ByteLength, static_cast(&*std::begin(Buf2))); 114 | 115 | Kernel.setArg(0,Buffer1); 116 | Kernel.setArg(1,Buffer2); 117 | Queue.enqueueNDRangeKernel(Kernel, cl::NullRange, global, local); 118 | Queue.finish(); 119 | Queue.enqueueReadBuffer(Buffer1,CL_TRUE,0,ByteLength,static_cast(&*std::begin(Buf1))); 120 | Queue.enqueueReadBuffer(Buffer2,CL_TRUE,0,ByteLength,static_cast(&*std::begin(Buf2))); 121 | } 122 | 123 | private: 124 | void Setup() 125 | { 126 | Devices = new VECTOR_CLASS; 127 | if (CL_SUCCESS != Platform.getDevices(CL_DEVICE_TYPE_ALL, Devices) || Devices->size() == 0) 128 | throw std::runtime_error("Failed to create AcceleratorImpl."); 129 | Device = (*Devices)[0]; 130 | 131 | std::cout << "Using platform: "<()<<"\n"; 132 | std::cout << "Using device: "<()<<"\n"; 133 | 134 | Context = cl::Context(VECTOR_CLASS{Device}); 135 | 136 | Queue = cl::CommandQueue(Context,Device); 137 | } 138 | 139 | void TransformSource() 140 | { 141 | static const char FileName[] = "kernel.cpp"; 142 | 143 | const char* CmdLine[] = { 144 | "clang", 145 | "-x", "c++", "-std=c++11", "-O3", 146 | "-o", "/tmp/test.cc.o", 147 | "-I/home/dimitri/projects/Clang/amp/install/lib/clang/3.4/include", 148 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7", 149 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/x86_64-linux-gnu", 150 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/backward", 151 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/bits", 152 | "-I/usr/local/include", 153 | "-I/usr/include/x86_64-linux-gnu", 154 | "-I/usr/include", 155 | "-c", FileName 156 | }; 157 | KernelCode = compiler::MainEntry(17, CmdLine, compiler::BuildClCode)[0]; 158 | 159 | #ifdef HACK 160 | std::ifstream sourceFile("/tmp/opencl_temp.cl"); 161 | if(sourceFile.fail()) 162 | throw cl::Error(1, "Failed to open OpenCL source file"); 163 | std::string sourceCode( 164 | std::istreambuf_iterator(sourceFile), 165 | (std::istreambuf_iterator())); 166 | KernelCode = sourceCode; 167 | #endif 168 | 169 | 170 | } 171 | 172 | private: 173 | VECTOR_CLASS* Devices; 174 | cl::Platform Platform; 175 | cl::Device Device; 176 | cl::Context Context; 177 | cl::CommandQueue Queue; 178 | cl::Program Program; 179 | cl::Kernel Kernel; 180 | cl::Program::Sources Sources; 181 | std::string KernelCode; 182 | }; 183 | 184 | 185 | TEST_CASE( "some cl operations", "[opencl]" ) { 186 | 187 | KernelFixture& K = KernelFixture::Instance(); 188 | 189 | SECTION( "test C++ to OpenCL transformations" ) { 190 | REQUIRE( K.GetKernelCode() != "" ); 191 | 192 | SECTION( "test opencl function get_global_id" ) { 193 | K.BuildKernel("_Kernel_global_id"); 194 | 195 | int Out[3]; 196 | K.Run(Out, 3); 197 | 198 | REQUIRE( 0 == Out[0] ); 199 | REQUIRE( 1 == Out[1] ); 200 | REQUIRE( 2 == Out[2] ); 201 | } 202 | 203 | SECTION( "test opencl function get_global_id for 2D data" ) { 204 | K.BuildKernel("_Kernel_global_id2d"); 205 | 206 | static const int GLOBAL_DIM_X = 10; 207 | static const int GLOBAL_DIM_Y = 10; 208 | int out_data[GLOBAL_DIM_X * GLOBAL_DIM_Y]; 209 | K.Run(out_data, {GLOBAL_DIM_X, GLOBAL_DIM_Y}, {5, 5}); 210 | 211 | REQUIRE( 0 == out_data[0] ); 212 | REQUIRE( 1 == out_data[1] ); 213 | REQUIRE( 2 == out_data[2] ); 214 | REQUIRE( 10 == out_data[10] ); 215 | REQUIRE( 20 == out_data[20] ); 216 | REQUIRE( 90 == out_data[90] ); 217 | REQUIRE( 95 == out_data[95] ); 218 | REQUIRE( 99 == out_data[99] ); 219 | 220 | for (int i = 0; i < GLOBAL_DIM_X; i++) { 221 | for (int j = 0; j < GLOBAL_DIM_Y; j++) { 222 | fprintf(stderr, "%2u ", out_data[i * GLOBAL_DIM_Y + j]); 223 | } 224 | fprintf(stderr, "\n"); 225 | } 226 | } 227 | 228 | SECTION( "add two integers" ) { 229 | K.BuildKernel("_Kernel_add"); 230 | 231 | int Arg1[1] = { 1 }; 232 | int Arg2[1] = { 2 }; 233 | int Out[1]; 234 | K.Run(Arg1, Arg2, Out); 235 | 236 | REQUIRE( 3 == Out[0] ); 237 | } 238 | 239 | SECTION( "test divide" ) { 240 | K.BuildKernel("_Kernel_div"); 241 | 242 | int Arg1[1] = { 4 }; 243 | int Arg2[1] = { 2 }; 244 | int Out[1]; 245 | K.Run(Arg1, Arg2, Out); 246 | 247 | REQUIRE( 2 == Out[0] ); 248 | } 249 | 250 | SECTION( "test if_eq statement" ) { 251 | K.BuildKernel("_Kernel_if_eq"); 252 | 253 | int Arg1[1] = { 4 }; 254 | int Arg2[1] = { 4 }; 255 | int Out[1] = { 0 }; 256 | K.Run(Arg1, Arg2, Out); 257 | REQUIRE( 1 == Out[0] ); 258 | 259 | int Arg3[1] = { 3 }; 260 | int Arg4[1] = { 4 }; 261 | Out[1] = { 0 }; 262 | K.Run(Arg3, Arg4, Out); 263 | REQUIRE( 0 == Out[0] ); 264 | } 265 | 266 | SECTION( "test std::enable_if return type" ) { 267 | K.BuildKernel("_Kernel_enable_if_return_type"); 268 | 269 | int Arg1[1] = { 4 }; 270 | int Out[1] = { 0 }; 271 | K.Run(Arg1, Out); 272 | 273 | REQUIRE( 4 == Out[0] ); 274 | } 275 | 276 | SECTION( "test std::enable_if int argument " ) { 277 | K.BuildKernel("_Kernel_enable_if_int_argument"); 278 | 279 | int Arg[1] = { 1 }; 280 | int Out[1] = { 0 }; 281 | K.Run(Arg, Out); 282 | 283 | REQUIRE( 1 == Out[0] ); 284 | } 285 | 286 | SECTION( "tst std::enable_if float argument" ) { 287 | K.BuildKernel("_Kernel_enable_if_float_argument"); 288 | 289 | cl_float Arg[1] = { 1.0 }; 290 | cl_float Out[1] = { 1 }; 291 | K.Run(Arg, Out); 292 | 293 | REQUIRE( Approx(0.0) == Out[0] ); 294 | } 295 | 296 | SECTION( "test std::find_if find an odd number" ) { 297 | K.BuildKernel("_Kernel_find_if"); 298 | 299 | int Arg[3] = { 4, 3, 8 }; 300 | int Out[3] = { 0 }; 301 | K.Run(Arg, Out); 302 | 303 | REQUIRE( 3 == Out[0] ); 304 | } 305 | 306 | SECTION( "test std::sort" ) { 307 | K.BuildKernel("_Kernel_sort"); 308 | 309 | int Arg[8] = { 4, 3, 8, 10, 7, 8, 20, 10 }; 310 | int Out[8] = { 0 }; 311 | K.Run(Arg, Out); 312 | 313 | REQUIRE( 3 == Out[0] ); 314 | } 315 | } 316 | } 317 | 318 | 319 | -------------------------------------------------------------------------------- /tests/test_rewriter.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() 2 | #include "../tests/catch.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define __CL_ENABLE_EXCEPTIONS 12 | #include "cl.h" 13 | 14 | #include "../sources/compiler/MainEntry.h" 15 | #include "../sources/compiler/Compiler.h" 16 | 17 | 18 | std::vector TransformSource(std::string Source) 19 | { 20 | static const char FileName[] = "Input.cpp"; 21 | 22 | std::ofstream Afile {FileName}; 23 | Afile << Source; 24 | Afile.close(); 25 | 26 | const char* CmdLine[] = { 27 | "clang", 28 | "-x", "c++", "-std=c++11", "-O3", 29 | "-o", "Input.cc.o", 30 | "-I/home/dimitri/projects/Clang/amp/install/lib/clang/3.4/include", 31 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7", 32 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/x86_64-linux-gnu", 33 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/backward", 34 | "-I/usr/lib/gcc/x86_64-linux-gnu/4.7/../../../../include/c++/4.7/bits", 35 | "-I/usr/local/include", 36 | "-I/usr/include/x86_64-linux-gnu", 37 | "-I/usr/include", 38 | "-c", FileName 39 | }; 40 | auto Code = compiler::MainEntry(17, CmdLine, compiler::RewriteSourceFile); 41 | assert(2 == Code.size()); 42 | return Code; 43 | } 44 | 45 | static inline std::string& remove_whitespace(std::string &s) { 46 | s.erase(std::remove_if(s.begin(), s.end(), isspace), s.end()); 47 | return s; 48 | } 49 | 50 | void CheckRewritenSource(std::string Cpu1, std::string Cpu2, std::string Gpu1, std::string Gpu2) { 51 | remove_whitespace(Cpu1); 52 | remove_whitespace(Cpu2); 53 | remove_whitespace(Gpu1); 54 | remove_whitespace(Gpu2); 55 | REQUIRE( Cpu1 == Cpu2 ); 56 | REQUIRE( Gpu1 == Gpu2 ); 57 | } 58 | 59 | TEST_CASE( "opencl rewriter", "[rewriter]" ) { 60 | 61 | SECTION( "Add modifier to const member function" ) { 62 | const char* CpuCode = R"( 63 | struct FooClass { 64 | int func(int x) const volatile __attribute__((amp_restrict("cpu"))); 65 | }; 66 | int FooClass::func(int x) const volatile __attribute__((amp_restrict("cpu"))) { 67 | return x; 68 | } 69 | int main() { 70 | FooClass f; 71 | int ( FooClass::* pointer)( int x) const volatile __attribute__((amp_restrict("cpu"))) = &FooClass::func; 72 | return ((f).*(pointer))( 5) == 12 ? 0 : 1; 73 | } 74 | )"; 75 | 76 | std::string Gpucode = "struct FooClass {};"; 77 | 78 | auto Code = TransformSource(CpuCode); 79 | auto CpuSource = Code[0]; 80 | auto GpuSource = Code[1]; 81 | 82 | CheckRewritenSource(CpuSource, CpuCode, GpuSource, Gpucode); 83 | } 84 | 85 | SECTION( "parallel_for_each" ) { 86 | const char* InputCode = R"( 87 | #include 88 | #include "ParallelForEach.h" 89 | 90 | template T square(T x) __attribute__((amp_restrict("gpu","cpu"))) 91 | { return x * x; } 92 | 93 | void func() { 94 | std::vector MyArray {1,2,3,4,5,6}; 95 | std::vector Output(6); 96 | 97 | compute::parallel_for_each(MyArray.begin(), MyArray.end(), Output.begin(), [](int x) { 98 | return square(x); 99 | }); 100 | } 101 | 102 | int main() { 103 | func(); 104 | return 0; 105 | } 106 | )"; 107 | 108 | const char* CpuCode = R"( 109 | #include 110 | #include "ParallelForEach.h" 111 | 112 | template T square(T x) __attribute__((amp_restrict("gpu","cpu"))) 113 | { return x * x; } 114 | 115 | void func() { 116 | std::vector MyArray {1,2,3,4,5,6}; 117 | std::vector Output(6); 118 | 119 | compute::parallel_for_each(MyArray.begin(), MyArray.end(), Output.begin(), [](int x) { 120 | return std::pair ( "Input.cpp.cl" , "_Kernel_1804289383" ); 121 | }); 122 | } 123 | 124 | int main() { 125 | func(); 126 | return 0; 127 | } 128 | )"; 129 | 130 | const char* GpuCode = R"( 131 | #include 132 | 133 | template T square(T x) __attribute__((amp_restrict("gpu","cpu"))) 134 | { return x * x; } 135 | 136 | void func() { 137 | std::vector MyArray {1,2,3,4,5,6}; 138 | std::vector Output(6); 139 | } 140 | 141 | extern "C" long long get_global_id(int); 142 | extern "C" int get_global_size(int); 143 | 144 | int _Lambda_1804289383(int x) { return square(x); } 145 | extern "C" void _Kernel_1804289383(int* in, int* out) { unsigned idx = get_global_id(0); out[idx] = _Lambda_1804289383(in[idx]); } 146 | )"; 147 | 148 | auto Code = TransformSource(InputCode); 149 | auto CpuSource = Code[0]; 150 | auto GpuSource = Code[1]; 151 | 152 | CheckRewritenSource(CpuSource, CpuCode, GpuSource, GpuCode); 153 | } 154 | 155 | SECTION( "Overload member function" ) { 156 | const char* InputCode = R"( 157 | struct A { 158 | int doubleIt() const __attribute__((amp_restrict("cpu"))) { 159 | return 2; 160 | } 161 | int doubleIt() const __attribute__((amp_restrict("gpu"))) { 162 | return 4; 163 | } 164 | }; 165 | )"; 166 | 167 | const char* CpuCode = R"( 168 | struct A { 169 | int doubleIt() const __attribute__((amp_restrict("cpu"))) { 170 | return 2; 171 | } 172 | }; 173 | )"; 174 | 175 | const char* GpuCode = R"( 176 | struct A { 177 | int doubleIt() const __attribute__((amp_restrict("gpu"))) { 178 | return 4; 179 | } 180 | }; 181 | )"; 182 | 183 | auto Code = TransformSource(InputCode); 184 | auto CpuSource = Code[0]; 185 | auto GpuSource = Code[1]; 186 | 187 | CheckRewritenSource(CpuSource, CpuCode, GpuSource, GpuCode); 188 | } 189 | } 190 | 191 | 192 | --------------------------------------------------------------------------------