├── CMakeLists.txt ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── build_docker_image.sh ├── jammy │ └── Dockerfile └── scripts │ ├── build.sh │ ├── build_install_llvm.sh │ ├── checkout.sh │ └── llvm_checksum │ ├── llvm_checksum.py │ └── project_tree.py ├── logo.png └── src ├── cicc.cpp ├── nvcc-llvm-ir.cpp ├── nvcc.cpp ├── pass1.cpp ├── pass2.cpp ├── test1.cu └── test2.cu /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19 FATAL_ERROR) 2 | 3 | project(nvcc-llvm-ir LANGUAGES CXX CUDA) 4 | 5 | find_package(LLVM CONFIG PATHS "/usr/local/lib/cmake" NO_DEFAULT_PATH) 6 | 7 | if (LLVM_FOUND) 8 | 9 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 10 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 11 | 12 | # Locate NVVM components in the CUDA toolkit directory tree: 13 | # https://github.com/nvidia-compiler-sdk/nvvmir-samples/blob/master/CMakeLists.txt 14 | find_package(CUDAToolkit REQUIRED) 15 | get_filename_component(CUDA_HOME "${CUDAToolkit_BIN_DIR}" DIRECTORY) 16 | find_file(LIBNVVM_HOME nvvm PATHS "${CUDA_HOME}") 17 | find_library(NVVM_LIB nvvm PATHS "${LIBNVVM_HOME}/lib64" "${LIBNVVM_HOME}/lib/x64") 18 | find_file(NVVM_H nvvm.h PATH "${LIBNVVM_HOME}/include") 19 | get_filename_component(NVVM_INCLUDE_DIRS ${NVVM_H} DIRECTORY) 20 | 21 | add_library(cicc SHARED "src/cicc.cpp") 22 | target_include_directories(cicc PRIVATE ${LLVM_INCLUDE_DIRS}) 23 | target_include_directories(cicc PRIVATE ${NVVM_INCLUDE_DIRS}) 24 | target_compile_features(cicc PRIVATE cxx_std_17) 25 | if (NOT WIN32) 26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") 27 | endif() 28 | target_compile_definitions(cicc PRIVATE ${LLVM_DEFINITIONS_LIST}) 29 | target_compile_options(cicc PRIVATE $<$:/wd4624> $<$:/wd4291> $<$:/MT>) 30 | target_link_directories(cicc PRIVATE ${LLVM_LIBRARY_DIRS}) 31 | 32 | # With older LLVM releases, instead of just linking agains ${LLVM_LIBRARIES}, 33 | # we have to use this idiotic procedure: 34 | # https://llvm.org/docs/CMake.html#embedding-llvm-in-your-project 35 | # Find the libraries that correspond to the LLVM components we wish to use 36 | llvm_map_components_to_libnames(llvm_libs core support ipo analysis target scalaropts transformutils instcombine) 37 | target_link_libraries(cicc PRIVATE ${llvm_libs}) 38 | 39 | add_library(pass1 SHARED "src/pass1.cpp") 40 | target_include_directories(pass1 PRIVATE ${LLVM_INCLUDE_DIRS}) 41 | target_include_directories(pass1 PRIVATE ${NVVM_INCLUDE_DIRS}) 42 | target_compile_features(pass1 PRIVATE cxx_std_17) 43 | target_compile_definitions(pass1 PRIVATE ${LLVM_DEFINITIONS_LIST}) 44 | target_compile_options(pass1 PRIVATE $<$:/wd4624> $<$:/wd4291> $<$:/MT>) 45 | target_link_directories(pass1 PRIVATE ${LLVM_LIBRARY_DIRS}) 46 | target_link_libraries(pass1 PRIVATE ${llvm_libs}) 47 | 48 | add_library(pass2 SHARED "src/pass2.cpp") 49 | target_include_directories(pass2 PRIVATE ${LLVM_INCLUDE_DIRS}) 50 | target_include_directories(pass2 PRIVATE ${NVVM_INCLUDE_DIRS}) 51 | target_compile_features(pass2 PRIVATE cxx_std_17) 52 | target_compile_definitions(pass2 PRIVATE ${LLVM_DEFINITIONS_LIST}) 53 | target_compile_options(pass2 PRIVATE $<$:/wd4624> $<$:/wd4291> $<$:/MT>) 54 | target_link_directories(pass2 PRIVATE ${LLVM_LIBRARY_DIRS}) 55 | target_link_libraries(pass2 PRIVATE ${llvm_libs}) 56 | 57 | add_library(nvcc SHARED "src/nvcc.cpp") 58 | target_link_libraries(nvcc ${CMAKE_DL_LIBS}) 59 | target_compile_definitions(nvcc PRIVATE LIBCICC="$") 60 | 61 | add_executable(${PROJECT_NAME} "src/${PROJECT_NAME}.cpp") 62 | target_compile_definitions(${PROJECT_NAME} PRIVATE LIBNVCC="$") 63 | 64 | add_custom_target(test DEPENDS test1 test2) 65 | add_custom_target(test1 DEPENDS test1_unopt test1_opt) 66 | add_custom_target(test2 DEPENDS test2_unopt test2_opt) 67 | 68 | # Prepend CUDA compiler with a launcher, which shall perform preloading 69 | # of our shared library wrapper. 70 | set(CMAKE_CUDA_COMPILER_LAUNCHER ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}) 71 | 72 | add_executable(test1_unopt "src/test1.cu") 73 | add_dependencies(test1_unopt cicc nvcc pass1 pass2) 74 | set_property(TARGET test1_unopt PROPERTY CUDA_SEPARABLE_COMPILATION ON) 75 | target_compile_options(test1_unopt PRIVATE -keep --${PROJECT_NAME}-unopt=$) 76 | 77 | add_executable(test1_opt "src/test1.cu") 78 | add_dependencies(test1_unopt cicc nvcc pass1 pass2) 79 | set_property(TARGET test1_opt PROPERTY CUDA_SEPARABLE_COMPILATION ON) 80 | target_compile_options(test1_opt PRIVATE -keep --${PROJECT_NAME}-opt=$) 81 | 82 | add_executable(test2_unopt "src/test2.cu") 83 | add_dependencies(test2_unopt cicc nvcc pass1 pass2) 84 | set_property(TARGET test2_unopt PROPERTY CUDA_SEPARABLE_COMPILATION ON) 85 | target_compile_options(test2_unopt PRIVATE -keep --${PROJECT_NAME}-unopt=$) 86 | 87 | add_executable(test2_opt "src/test2.cu") 88 | add_dependencies(test2_opt cicc nvcc pass1 pass2) 89 | set_property(TARGET test2_opt PROPERTY CUDA_SEPARABLE_COMPILATION ON) 90 | target_compile_options(test2_opt PRIVATE -keep --${PROJECT_NAME}-opt=$) 91 | 92 | endif() 93 | 94 | add_custom_target(docker 95 | COMMAND docker build -f ${CMAKE_CURRENT_SOURCE_DIR}/docker/Dockerfile -t ${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/docker 96 | COMMAND docker run -it --rm --user "${UID}:${GID}" -v${CMAKE_CURRENT_SOURCE_DIR}:/project ${PROJECT_NAME} sh /project/docker/scripts/build.sh 97 | COMMENT "Building ${PROJECT_NAME} in a Docker container") 98 | 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2014,2015,2020,2023 Applied Parallel Computing LLC, http://parallel-computing.pro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Enabling on-the-fly manipulations with LLVM IR code of CUDA sources 2 | 3 | ![logo](logo.png) 4 | 5 | Largely thanks to [LLVM](http://llvm.org/), in recent years we've seen a significant increase of interest to domain-specific compilation tools research & development. With the release of PTX backends by NVIDIA (opensource [NVPTX](http://llvm.org/docs/NVPTXUsage.html) and proprietary [libNVVM](https://developer.nvidia.com/cuda-llvm-compiler)), construction of custom LLVM-driven compilers for generating GPU binaries also becomes possible. However, two questions are still remaining: 6 | 7 | 1. How to customize the CUDA source compilation? 8 | 2. What is the best set of GPU-specific LLVM optimizations and how to continue modifying IR after applying them? 9 | 10 | The first question is the result of opensource CUDA *frontend* unavailability. In fact the *EDG* frontend (by Edison Design Group Inc.) used by NVIDIA CUDA compiler is the only frontend that is able to translate CUDA source into LLVM IR. LLVM's clang has basic support for some CUDA constructs, but yet is too far from implementing the entire set of parallel extensions. EDG frontend is tightly bound to the rest of CUDA compiler (*cicc*), and there is no public API to use it just for LLVM IR generation. 11 | 12 | The second question is essential for generating efficient GPU code and its further customization. It's well-known that the standard LLVM NVPTX backend and NVIDIA's libNVVM may generate different code, because libNVVM applies specific passes in addition to standard `-O3` set. For instance, as of CUDA 6.0, libNVVM has the following optimization chain for the `sm_30` target: 13 | 14 | ``` 15 | opt -nv-cuda -nvvm-pretreat -generic-to-nvvm -nv-inline-must -R __CUDA_PREC_DIV=1 -R __CUDA_PREC_SQRT=1 -opt-arch=sm_30 -inline -globaldce -lower-struct-args -memory-space-opt=1 -disable-rsqrt-opt=1 -O3 16 | ``` 17 | 18 | Some of the passes mnemonics do not exist in standard LLVM 3.0, meaning they are likely NVIDIA's proprietary extensions. Thus, GPU code generation could not be fully reproduced by the opensource NVPTX backend. On the other hand, if libNVVM backend is used, then the LLVM IR input is translated directly into PTX code, without a possibility to review and modify the optimized IR before PTX generation. 19 | 20 | In order to remove these limitations, we have created a special dynamic library. Being attached to NVIDIA CUDA compiler, this library exposes unoptimized and optimized LLVM IR code to the user and allows its on-the-fly modification. As a result, domain-specific compiler developer receives flexibility e.g. to re-target CUDA-generated LLVM IR to different architectures, or to make additional modifications to IR after executing NVIDIA's optimizations. Below we explain the technical details of how unoptimized and optimized LLVM IR versions have been retrieved from CUDA compiler by our dynamic library. 21 | 22 | ## NVIDIA CUDA compiler overview 23 | 24 | NVIDIA CUDA compiler is a complex set of pipelined code processing binaries. After the input source is preprocessed and decomposed into separate host and device sources, compiler driver (*nvcc*) deploys CUDA-to-LLVM compiler -- *cicc*, which shall be our main point of interest. 25 | 26 | According to the [License For Customer Use of NVIDIA Software](http://www.nvidia.com/content/DriverDownload-March2009/licence.php?lang=us), customer may not reverse engineer, decompile, or disassemble the software, nor attempt in any other manner to obtain the source code. Being in strict compliance with this requirement, we analyzed *cicc* only by means of basic debugging tool and standard C library calls instrumentation. 27 | 28 | ## Unoptimized LLVM IR retrieval 29 | 30 | Retrieval of unoptimized LLVM IR is relatively straight-forward. In order to generate the PTX code, *cicc* deploys libNVVM library functions, which have a documented interface. Instrumentation of the first call to `nvvmAddModuleToProgram` function allows to retrieve the LLVM IR for input CUDA source from the second parameter, which is the LLVM bitcode string. This bitcode could be parsed into LLVM Module instance using functions of a compatible LLVM release, and printed as IR: 31 | 32 | ```c++ 33 | string source = ""; 34 | source.reserve(size); 35 | source.assign(bitcode, bitcode + size); 36 | MemoryBuffer *input = MemoryBuffer::getMemBuffer(source); 37 | string err; 38 | LLVMContext &context = getGlobalContext(); 39 | initial_module = ParseBitcodeFile(input, context, &err); 40 | if (!initial_module) 41 | cerr << "Error parsing module bitcode : " << err; 42 | 43 | outs() << *initial_module; 44 | ``` 45 | 46 | On-the-fly modification of unoptimized LLVM could be achieved by exporting LLVM Module back into bitcode string: 47 | 48 | ```c++ 49 | SmallVector output; 50 | raw_svector_ostream outputStream(output); 51 | WriteBitcodeToFile(initial_module, outputStream); 52 | outputStream.flush(); 53 | 54 | // Call real nvvmAddModuleToProgram 55 | return nvvmAddModuleToProgram_real(prog, output.data(), output.size(), name); 56 | ``` 57 | 58 | Note the unoptimized LLVM IR does not include math and GPU-specific builtins, that are linked-in later. 59 | 60 | ## Optimized LLVM IR retrieval 61 | 62 | The libNVVM library itself statically links to NVIDIA's customized LLVM engine, and like most of other binaries in CUDA Toolkit is fully stripped (no debug info, no function frames, etc.). Fortunately, libNVVM is still dynamically linked against the standard C library, which allows to analyze memory allocations and data transfers. Instrumentation of `malloc` function reveals Module-sized space allocation in the beginning of `nvvmCompileProgam`: 63 | 64 | ```c++ 65 | void* result = malloc_real(size); 66 | 67 | if (called_compile) 68 | { 69 | if (size == sizeof(Module)) 70 | optimized_module = (Module*)result; 71 | } 72 | ``` 73 | 74 | Luckily, this very Module instance exists during entire compilation process, and accumulates all changes made to input LLVM IR by optimization passes. Thus, we only need to find an appropriate moment to intercept this Module and modify its contents. The subsequent call to `localtime` is used as heuristic. Unlike the unoptimized case, this Module could be printed and modified directly, without loading/storing any bitcode. 75 | 76 | Retrieved optimized LLVM IR is linked together with math and GPU-specific builtins and is ready for PTX backend. 77 | 78 | ## Building 79 | 80 | Unlike AMD, which uses the most recent versions of clang++ for HIP compilation, NVIDIA CUDA compiler is historically always far behind the actual release of LLVM. In order to determine the matching LLVM release, we can look into the `cicc` executable: 81 | 82 | ``` 83 | $ strings /usr/local/cuda/nvvm/bin/cicc | grep LLVM | grep 7 84 | LLVM0700H 85 | LLVM0700 86 | LLVM7.0.1 87 | llvm-mc (based on LLVM 7.0.1) 88 | ``` 89 | 90 | Prepare a Docker container with matching releases of CUDA and LLVM pre-installed: 91 | 92 | ```bash 93 | ./docker/build_docker_image.sh \ 94 | -s jammy -d llvm7-ubuntu -t "jammy" \ 95 | --branch release/7.x \ 96 | -i install \ 97 | -- \ 98 | -DLLVM_TARGETS_TO_BUILD="host;NVPTX" \ 99 | -DCMAKE_BUILD_TYPE=Release 100 | ``` 101 | 102 | Compile our dynamic libraries within the Docker container: 103 | 104 | ``` 105 | $ make 106 | g++ -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -I/opt/llvm-3.0/include -I/opt/cuda/nvvm/include/ -fPIC cicc.cpp -shared -o libcicc.so -ldl 107 | g++ -g -I/opt/cuda/nvvm/include/ -fPIC nvcc.cpp -shared -o libnvcc.so -ldl 108 | ``` 109 | 110 | ## Usage 111 | 112 | Finally, let's demonstrate LLVM IR on-the-fly modification in action. Consider the following input CUDA source: 113 | 114 | ```c++ 115 | $ cat test.cu 116 | extern "C" __device__ void kernel(int* result) { *result = 1; } 117 | ``` 118 | 119 | The LLVM IR retrieval mode is specified by two environment variables: 120 | 121 | * `CICC_MODIFY_UNOPT_MODULE=1` -- retrieve unoptimized LLVM IR and change it as specified in `modifyModule` function (`cicc.cpp` source file) 122 | * `CICC_MODIFY_OPT_MODULE=1` -- retrieve unoptimized LLVM IR and change it as specified in `modifyModule` function (`cicc.cpp` source file) 123 | 124 | Example `modifyModule` simply adds suffix to all existing functions names: 125 | 126 | ```c++ 127 | void modifyModule(Module* module) 128 | { 129 | if (!module) return; 130 | 131 | // Add suffix to function name, for example. 132 | for (Module::iterator i = module->begin(), e = module->end(); i != e; i++) 133 | i->setName(i->getName() + "_modified"); 134 | } 135 | 136 | ``` 137 | 138 | Each of the following two commands deploys the corresponding retrieval mode: 139 | 140 | ``` 141 | CICC_MODIFY_UNOPT_MODULE=1 LD_PRELOAD=./libnvcc.so nvcc -arch=sm_30 test.cu -c -keep 142 | CICC_MODIFY_OPT_MODULE=1 LD_PRELOAD=./libnvcc.so nvcc -arch=sm_30 test.cu -c -keep 143 | ``` 144 | 145 | The `-keep` option is added to store the `test.ptx` file, which could be opened to ensure the LLVM IR modification has landed into output PTX code: 146 | 147 | ``` 148 | $ cat test.ptx 149 | // 150 | // Generated by NVIDIA NVVM Compiler 151 | // Compiler built on Thu Mar 13 19:31:35 2014 (1394735495) 152 | // Cuda compilation tools, release 6.0, V6.0.1 153 | // 154 | 155 | .version 4.0 156 | .target sm_30 157 | .address_size 64 158 | 159 | .visible .func kernel_modified( 160 | .param .b64 kernel_modified_param_0 161 | ) 162 | { 163 | .reg .s32 %r<2>; 164 | .reg .s64 %rd<2>; 165 | 166 | 167 | ld.param.u64 %rd1, [kernel_modified_param_0]; 168 | mov.u32 %r1, 1; 169 | st.u32 [%rd1], %r1; 170 | ret; 171 | } 172 | ``` 173 | 174 | ## Final credits 175 | 176 | This library has been developed for the purpose of software interoperability and used in compilation of [CERN SixTrack application](https://github.com/apc-llc/sixtrack). 177 | 178 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM llvm7-ubuntu:jammy 2 | 3 | LABEL maintainer="dmitry@kernelgen.org" 4 | 5 | ENV DEBIAN_FRONTEND noninteractive 6 | ENV LC_ALL C.UTF-8 7 | ENV LANG en_US.UTF-8 8 | ENV LANGUAGE en_US.UTF-8 9 | 10 | RUN apt-get update && \ 11 | apt-get -y --no-install-recommends install \ 12 | cmake \ 13 | git \ 14 | ninja-build \ 15 | g++-11 && \ 16 | apt-get clean 17 | 18 | COPY . /root/ 19 | 20 | WORKDIR /root 21 | -------------------------------------------------------------------------------- /docker/build_docker_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #===- llvm/utils/docker/build_docker_image.sh ----------------------------===// 3 | # 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 | # See https://llvm.org/LICENSE.txt for license information. 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 | # 8 | #===----------------------------------------------------------------------===// 9 | set -e 10 | 11 | IMAGE_SOURCE="" 12 | DOCKER_REPOSITORY="" 13 | DOCKER_TAG="" 14 | BUILDSCRIPT_ARGS="" 15 | CHECKOUT_ARGS="" 16 | CMAKE_ENABLED_PROJECTS="" 17 | 18 | function show_usage() { 19 | cat << EOF 20 | Usage: build_docker_image.sh [options] [-- [cmake_args]...] 21 | 22 | Available options: 23 | General: 24 | -h|--help show this help message 25 | Docker-specific: 26 | -s|--source image source dir (i.e. debian10, nvidia-cuda, etc) 27 | -d|--docker-repository docker repository for the image 28 | -t|--docker-tag docker tag for the image 29 | Checkout arguments: 30 | -b|--branch git branch to checkout, i.e. 'main', 31 | 'release/10.x' 32 | (default: 'main') 33 | -r|--revision git revision to checkout 34 | -c|--cherrypick revision to cherry-pick. Can be specified multiple times. 35 | Cherry-picks are performed in the sorted order using the 36 | following command: 37 | 'git cherry-pick \$rev'. 38 | -p|--llvm-project Add the project to a list LLVM_ENABLE_PROJECTS, passed to 39 | CMake. 40 | Can be specified multiple times. 41 | -c|--checksums name of a file, containing checksums of llvm checkout. 42 | Script will fail if checksums of the checkout do not 43 | match. 44 | Build-specific: 45 | -i|--install-target name of a cmake install target to build and include in 46 | the resulting archive. Can be specified multiple times. 47 | 48 | Required options: --source and --docker-repository, at least one 49 | --install-target. 50 | 51 | All options after '--' are passed to CMake invocation. 52 | 53 | For example, running: 54 | $ build_docker_image.sh -s debian10 -d mydocker/debian10-clang -t latest \ 55 | -p clang -i install-clang -i install-clang-resource-headers 56 | will produce two docker images: 57 | mydocker/debian10-clang-build:latest - an intermediate image used to compile 58 | clang. 59 | mydocker/clang-debian10:latest - a small image with preinstalled clang. 60 | Please note that this example produces a not very useful installation, since it 61 | doesn't override CMake defaults, which produces a Debug and non-boostrapped 62 | version of clang. 63 | 64 | To get a 2-stage clang build, you could use this command: 65 | $ ./build_docker_image.sh -s debian10 -d mydocker/clang-debian10 -t "latest" \ 66 | -p clang -i stage2-install-clang -i stage2-install-clang-resource-headers \ 67 | -- \ 68 | -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ 69 | -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ 70 | -DCLANG_ENABLE_BOOTSTRAP=ON \ 71 | -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-resource-headers" 72 | EOF 73 | } 74 | 75 | CHECKSUMS_FILE="" 76 | SEEN_INSTALL_TARGET=0 77 | SEEN_CMAKE_ARGS=0 78 | while [[ $# -gt 0 ]]; do 79 | case "$1" in 80 | -h|--help) 81 | show_usage 82 | exit 0 83 | ;; 84 | -s|--source) 85 | shift 86 | IMAGE_SOURCE="$1" 87 | shift 88 | ;; 89 | -d|--docker-repository) 90 | shift 91 | DOCKER_REPOSITORY="$1" 92 | shift 93 | ;; 94 | -t|--docker-tag) 95 | shift 96 | DOCKER_TAG="$1" 97 | shift 98 | ;; 99 | -r|--revision|-c|-cherrypick|-b|--branch) 100 | CHECKOUT_ARGS="$CHECKOUT_ARGS $1 $2" 101 | shift 2 102 | ;; 103 | -i|--install-target) 104 | SEEN_INSTALL_TARGET=1 105 | BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS $1 $2" 106 | shift 2 107 | ;; 108 | -p|--llvm-project) 109 | PROJ="$2" 110 | CMAKE_ENABLED_PROJECTS="$CMAKE_ENABLED_PROJECTS;$PROJ" 111 | shift 2 112 | ;; 113 | -c|--checksums) 114 | shift 115 | CHECKSUMS_FILE="$1" 116 | shift 117 | ;; 118 | --) 119 | shift 120 | BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -- $*" 121 | SEEN_CMAKE_ARGS=1 122 | shift $# 123 | ;; 124 | *) 125 | echo "Unknown argument $1" 126 | exit 1 127 | ;; 128 | esac 129 | done 130 | 131 | 132 | if [ "$CMAKE_ENABLED_PROJECTS" != "" ]; then 133 | # Remove the leading ';' character. 134 | CMAKE_ENABLED_PROJECTS="${CMAKE_ENABLED_PROJECTS:1}" 135 | 136 | if [[ $SEEN_CMAKE_ARGS -eq 0 ]]; then 137 | BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS --" 138 | fi 139 | BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -DLLVM_ENABLE_PROJECTS=$CMAKE_ENABLED_PROJECTS" 140 | fi 141 | 142 | command -v docker >/dev/null || 143 | { 144 | echo "Docker binary cannot be found. Please install Docker to use this script." 145 | exit 1 146 | } 147 | 148 | if [ "$IMAGE_SOURCE" == "" ]; then 149 | echo "Required argument missing: --source" 150 | exit 1 151 | fi 152 | 153 | if [ "$DOCKER_REPOSITORY" == "" ]; then 154 | echo "Required argument missing: --docker-repository" 155 | exit 1 156 | fi 157 | 158 | if [ $SEEN_INSTALL_TARGET -eq 0 ]; then 159 | echo "Please provide at least one --install-target" 160 | exit 1 161 | fi 162 | 163 | SOURCE_DIR=$(dirname $0) 164 | if [ ! -d "$SOURCE_DIR/$IMAGE_SOURCE" ]; then 165 | echo "No sources for '$IMAGE_SOURCE' were found in $SOURCE_DIR" 166 | exit 1 167 | fi 168 | 169 | BUILD_DIR=$(mktemp -d) 170 | trap "rm -rf $BUILD_DIR" EXIT 171 | echo "Using a temporary directory for the build: $BUILD_DIR" 172 | 173 | cp -r "$SOURCE_DIR/$IMAGE_SOURCE" "$BUILD_DIR/$IMAGE_SOURCE" 174 | cp -r "$SOURCE_DIR/scripts" "$BUILD_DIR/scripts" 175 | 176 | mkdir "$BUILD_DIR/checksums" 177 | if [ "$CHECKSUMS_FILE" != "" ]; then 178 | cp "$CHECKSUMS_FILE" "$BUILD_DIR/checksums/checksums.txt" 179 | fi 180 | 181 | if [ "$DOCKER_TAG" != "" ]; then 182 | DOCKER_TAG=":$DOCKER_TAG" 183 | fi 184 | 185 | echo "Building ${DOCKER_REPOSITORY}${DOCKER_TAG} from $IMAGE_SOURCE" 186 | DOCKER_BUILDKIT=1 BUILDKIT_PROGRESS=plain docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \ 187 | --build-arg "checkout_args=$CHECKOUT_ARGS" \ 188 | --build-arg "buildscript_args=$BUILDSCRIPT_ARGS" \ 189 | -f "$BUILD_DIR/$IMAGE_SOURCE/Dockerfile" \ 190 | "$BUILD_DIR" 191 | echo "Done" 192 | -------------------------------------------------------------------------------- /docker/jammy/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:experimental 2 | #===- llvm/utils/docker/jammy/build/Dockerfile -------------------------===// 3 | # 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 | # See https://llvm.org/LICENSE.txt for license information. 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 | # 8 | #===----------------------------------------------------------------------===// 9 | # Stage 1. Check out LLVM source code and run the build. 10 | FROM nvcr.io/nvidia/nvhpc:22.11-devel-cuda_multi-ubuntu22.04 as builder 11 | LABEL maintainer "dmitry@kernelgen.org" 12 | # Install build dependencies of llvm. 13 | # First, Update the apt's source list and include the sources of the packages. 14 | RUN grep deb /etc/apt/sources.list | \ 15 | sed 's/^deb/deb-src /g' >> /etc/apt/sources.list 16 | # Install compiler, python and subversion. 17 | RUN apt-get update && \ 18 | apt-get install -y --no-install-recommends ca-certificates gnupg \ 19 | build-essential cmake make python3 zlib1g wget subversion unzip git 20 | # Install a newer ninja release. It seems the older version 21 | # randomly crashes when compiling llvm. 22 | RUN wget "https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip" && \ 23 | echo "d2fea9ff33b3ef353161ed906f260d565ca55b8ca0568fa07b1d2cab90a84a07 ninja-linux.zip" \ 24 | | sha256sum -c && \ 25 | unzip ninja-linux.zip -d /usr/local/bin && \ 26 | rm ninja-linux.zip 27 | # Install mold as recommended here: https://dev-docs.kicad.org/en/build/linux/ 28 | RUN git clone https://github.com/rui314/mold.git && \ 29 | mkdir mold/build && \ 30 | cd mold/build && \ 31 | git checkout v1.7.1 && \ 32 | ../install-build-deps.sh && \ 33 | cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=c++ -G Ninja .. && \ 34 | cmake --build . && \ 35 | cmake --install . 36 | 37 | ADD checksums /checksums 38 | ADD scripts /scripts 39 | 40 | # Checkout the source code. 41 | # Run the build. Results of the build will be available at /tmp/llvm-install/. 42 | ARG checkout_args 43 | ARG buildscript_args 44 | RUN --mount=type=tmpfs,destination=/tmp \ 45 | /scripts/checkout.sh ${checkout_args} && \ 46 | /scripts/build_install_llvm.sh --to /llvm-install ${buildscript_args} 47 | 48 | 49 | # Stage 2. Produce a minimal release image with build results. 50 | FROM nvcr.io/nvidia/nvhpc:22.11-devel-cuda_multi-ubuntu22.04 51 | LABEL maintainer "dmitry@kernelgen.org" 52 | # Install packages for minimal useful image. 53 | RUN apt-get update && \ 54 | apt-get install -y --no-install-recommends libstdc++-9-dev binutils && \ 55 | rm -rf /var/lib/apt/lists/* 56 | # Copy build results of stage 1 to /usr/local. 57 | COPY --from=builder /llvm-install/ /usr/local/ 58 | 59 | -------------------------------------------------------------------------------- /docker/scripts/build.sh: -------------------------------------------------------------------------------- 1 | set -e -x 2 | cd /project 3 | mkdir -p build-docker 4 | cd build-docker 5 | cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=/usr/local/lib/cmake/llvm -DCMAKE_CXX_COMPILER=g++-11 .. && \ 6 | #cmake --build . 7 | make VERBOSE=1 8 | -------------------------------------------------------------------------------- /docker/scripts/build_install_llvm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #===- llvm/utils/docker/scripts/build_install_llvm.sh ---------------------===// 3 | # 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 | # See https://llvm.org/LICENSE.txt for license information. 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 | # 8 | #===-----------------------------------------------------------------------===// 9 | 10 | set -e 11 | 12 | function show_usage() { 13 | cat << EOF 14 | Usage: build_install_llvm.sh [options] -- [cmake-args] 15 | 16 | Run cmake with the specified arguments. Used inside docker container. 17 | Passes additional -DCMAKE_INSTALL_PREFIX and puts the build results into 18 | the directory specified by --to option. 19 | 20 | Available options: 21 | -h|--help show this help message 22 | -i|--install-target name of a cmake install target to build and include in 23 | the resulting archive. Can be specified multiple times. 24 | --to destination directory where to install the targets. 25 | Required options: --to, at least one --install-target. 26 | 27 | All options after '--' are passed to CMake invocation. 28 | EOF 29 | } 30 | 31 | CMAKE_ARGS="" 32 | CMAKE_INSTALL_TARGETS="" 33 | LLVM_INSTALL_DIR="" 34 | 35 | while [[ $# -gt 0 ]]; do 36 | case "$1" in 37 | -i|--install-target) 38 | shift 39 | CMAKE_INSTALL_TARGETS="$CMAKE_INSTALL_TARGETS $1" 40 | shift 41 | ;; 42 | --to) 43 | shift 44 | LLVM_INSTALL_DIR="$1" 45 | shift 46 | ;; 47 | --) 48 | shift 49 | CMAKE_ARGS="$*" 50 | shift $# 51 | ;; 52 | -h|--help) 53 | show_usage 54 | exit 0 55 | ;; 56 | *) 57 | echo "Unknown option: $1" 58 | exit 1 59 | esac 60 | done 61 | 62 | if [ "$CMAKE_INSTALL_TARGETS" == "" ]; then 63 | echo "No install targets. Please pass one or more --install-target." 64 | exit 1 65 | fi 66 | 67 | if [ "$LLVM_INSTALL_DIR" == "" ]; then 68 | echo "No install directory. Please specify the --to argument." 69 | exit 1 70 | fi 71 | 72 | LLVM_BUILD_DIR=/tmp/llvm-build 73 | 74 | mkdir -p "$LLVM_INSTALL_DIR" 75 | 76 | mkdir -p /build 77 | pushd /build 78 | 79 | # Run the build as specified in the build arguments. 80 | echo "Running build" 81 | cmake -GNinja \ 82 | -DCMAKE_LINKER=mold -DCMAKE_C_FLAGS=-fuse-ld=mold -DCMAKE_CXX_FLAGS=-fuse-ld=mold \ 83 | -DCMAKE_INSTALL_PREFIX="$LLVM_INSTALL_DIR" \ 84 | $CMAKE_ARGS \ 85 | "$LLVM_BUILD_DIR/src/llvm" 86 | ninja $CMAKE_INSTALL_TARGETS 87 | 88 | popd 89 | 90 | echo "Done" 91 | -------------------------------------------------------------------------------- /docker/scripts/checkout.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #===- llvm/utils/docker/scripts/checkout.sh ---------------------===// 3 | # 4 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 | # See https://llvm.org/LICENSE.txt for license information. 6 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 | # 8 | #===-----------------------------------------------------------------------===// 9 | 10 | set -e 11 | 12 | function show_usage() { 13 | cat << EOF 14 | Usage: checkout.sh [options] 15 | 16 | Checkout git sources into /tmp/llvm-build/src. Used inside a docker container. 17 | 18 | Available options: 19 | -h|--help show this help message 20 | -b|--branch git branch to checkout, i.e. 'main', 21 | 'release/10.x' 22 | (default: 'main') 23 | -r|--revision git revision to checkout 24 | -c|--cherrypick revision to cherry-pick. Can be specified multiple times. 25 | Cherry-picks are performed in the sorted order using the 26 | following command: 27 | 'git cherry-pick \$rev)'. 28 | EOF 29 | } 30 | 31 | LLVM_GIT_REV="" 32 | CHERRYPICKS="" 33 | LLVM_BRANCH="" 34 | 35 | while [[ $# -gt 0 ]]; do 36 | case "$1" in 37 | -r|--revision) 38 | shift 39 | LLVM_GIT_REV="$1" 40 | shift 41 | ;; 42 | -c|--cherrypick) 43 | shift 44 | CHERRYPICKS="$CHERRYPICKS $1" 45 | shift 46 | ;; 47 | -b|--branch) 48 | shift 49 | LLVM_BRANCH="$1" 50 | shift 51 | ;; 52 | -h|--help) 53 | show_usage 54 | exit 0 55 | ;; 56 | *) 57 | echo "Unknown option: $1" 58 | exit 1 59 | esac 60 | done 61 | 62 | if [ "$LLVM_BRANCH" == "" ]; then 63 | LLVM_BRANCH="main" 64 | fi 65 | 66 | if [ "$LLVM_GIT_REV" != "" ]; then 67 | GIT_REV_ARG="$LLVM_GIT_REV" 68 | echo "Checking out git revision $LLVM_GIT_REV." 69 | else 70 | GIT_REV_ARG="" 71 | echo "Checking out latest git revision." 72 | fi 73 | 74 | # Sort cherrypicks and remove duplicates. 75 | CHERRYPICKS="$(echo "$CHERRYPICKS" | xargs -n1 | sort | uniq | xargs)" 76 | 77 | function apply_cherrypicks() { 78 | local CHECKOUT_DIR="$1" 79 | 80 | [ "$CHERRYPICKS" == "" ] || echo "Applying cherrypicks" 81 | pushd "$CHECKOUT_DIR" 82 | 83 | # This function is always called on a sorted list of cherrypicks. 84 | for CHERRY_REV in $CHERRYPICKS; do 85 | echo "Cherry-picking $CHERRY_REV into $CHECKOUT_DIR" 86 | git cherry-pick $CHERRY_REV 87 | done 88 | 89 | popd 90 | } 91 | 92 | LLVM_BUILD_DIR=/tmp/llvm-build 93 | 94 | # Get the sources from git. 95 | echo "Checking out sources from git" 96 | mkdir -p "$LLVM_BUILD_DIR/src" 97 | CHECKOUT_DIR="$LLVM_BUILD_DIR/src" 98 | 99 | echo "Checking out https://github.com/llvm/llvm-project.git to $CHECKOUT_DIR" 100 | git clone -b $LLVM_BRANCH --single-branch \ 101 | "https://github.com/llvm/llvm-project.git" \ 102 | "$CHECKOUT_DIR" 103 | 104 | pushd $CHECKOUT_DIR 105 | git checkout -q $GIT_REV_ARG 106 | popd 107 | 108 | # We apply cherrypicks to all repositories regardless of whether the revision 109 | # changes this repository or not. For repositories not affected by the 110 | # cherrypick, applying the cherrypick is a no-op. 111 | apply_cherrypicks "$CHECKOUT_DIR" 112 | 113 | CHECKSUMS_FILE="/tmp/checksums/checksums.txt" 114 | 115 | if [ -f "$CHECKSUMS_FILE" ]; then 116 | echo "Validating checksums for LLVM checkout..." 117 | python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \ 118 | --partial --multi_dir "$LLVM_BUILD_DIR/src" 119 | else 120 | echo "Skipping checksumming checks..." 121 | fi 122 | 123 | echo "Done" 124 | -------------------------------------------------------------------------------- /docker/scripts/llvm_checksum/llvm_checksum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ A small program to compute checksums of LLVM checkout. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import hashlib 9 | import logging 10 | import re 11 | import sys 12 | from argparse import ArgumentParser 13 | from project_tree import * 14 | 15 | SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") 16 | 17 | 18 | def main(): 19 | parser = ArgumentParser() 20 | parser.add_argument( 21 | "-v", "--verbose", action="store_true", help="enable debug logging") 22 | parser.add_argument( 23 | "-c", 24 | "--check", 25 | metavar="reference_file", 26 | help="read checksums from reference_file and " + 27 | "check they match checksums of llvm_path.") 28 | parser.add_argument( 29 | "--partial", 30 | action="store_true", 31 | help="ignore projects from reference_file " + 32 | "that are not checked out in llvm_path.") 33 | parser.add_argument( 34 | "--multi_dir", 35 | action="store_true", 36 | help="indicates llvm_path contains llvm, checked out " + 37 | "into multiple directories, as opposed to a " + 38 | "typical single source tree checkout.") 39 | parser.add_argument("llvm_path") 40 | 41 | args = parser.parse_args() 42 | if args.check is not None: 43 | with open(args.check, "r") as f: 44 | reference_checksums = ReadLLVMChecksums(f) 45 | else: 46 | reference_checksums = None 47 | 48 | if args.verbose: 49 | logging.basicConfig(level=logging.DEBUG) 50 | 51 | llvm_projects = CreateLLVMProjects(not args.multi_dir) 52 | checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) 53 | 54 | if reference_checksums is None: 55 | WriteLLVMChecksums(checksums, sys.stdout) 56 | sys.exit(0) 57 | 58 | if not ValidateChecksums(reference_checksums, checksums, args.partial): 59 | sys.stdout.write("Checksums differ.\nNew checksums:\n") 60 | WriteLLVMChecksums(checksums, sys.stdout) 61 | sys.stdout.write("Reference checksums:\n") 62 | WriteLLVMChecksums(reference_checksums, sys.stdout) 63 | sys.exit(1) 64 | else: 65 | sys.stdout.write("Checksums match.") 66 | 67 | 68 | def ComputeLLVMChecksums(root_path, projects): 69 | """Compute checksums for LLVM sources checked out using svn. 70 | 71 | Args: 72 | root_path: a directory of llvm checkout. 73 | projects: a list of LLVMProject instances, which describe checkout paths, 74 | relative to root_path. 75 | 76 | Returns: 77 | A dict mapping from project name to project checksum. 78 | """ 79 | hash_algo = hashlib.sha256 80 | 81 | def collapse_svn_substitutions(contents): 82 | # Replace svn substitutions for $Date$ and $LastChangedDate$. 83 | # Unfortunately, these are locale-specific. 84 | return SVN_DATES_REGEX.sub("$\1$", contents) 85 | 86 | def read_and_collapse_svn_subsitutions(file_path): 87 | with open(file_path, "rb") as f: 88 | contents = f.read() 89 | new_contents = collapse_svn_substitutions(contents) 90 | if contents != new_contents: 91 | logging.debug("Replaced svn keyword substitutions in %s", file_path) 92 | logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) 93 | return new_contents 94 | 95 | project_checksums = dict() 96 | # Hash each project. 97 | for proj in projects: 98 | project_root = os.path.join(root_path, proj.relpath) 99 | if not os.path.exists(project_root): 100 | logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, 101 | proj.name) 102 | continue 103 | 104 | files = list() 105 | 106 | def add_file_hash(file_path): 107 | if os.path.islink(file_path) and not os.path.exists(file_path): 108 | content = os.readlink(file_path) 109 | else: 110 | content = read_and_collapse_svn_subsitutions(file_path) 111 | hasher = hash_algo() 112 | hasher.update(content) 113 | file_digest = hasher.hexdigest() 114 | logging.debug("Checksum %s for file %s", file_digest, file_path) 115 | files.append((file_path, file_digest)) 116 | 117 | logging.info("Computing checksum for %s", proj.name) 118 | WalkProjectFiles(root_path, projects, proj, add_file_hash) 119 | 120 | # Compute final checksum. 121 | files.sort(key=lambda x: x[0]) 122 | hasher = hash_algo() 123 | for file_path, file_digest in files: 124 | file_path = os.path.relpath(file_path, project_root) 125 | hasher.update(file_path) 126 | hasher.update(file_digest) 127 | project_checksums[proj.name] = hasher.hexdigest() 128 | return project_checksums 129 | 130 | 131 | def WriteLLVMChecksums(checksums, f): 132 | """Writes checksums to a text file. 133 | 134 | Args: 135 | checksums: a dict mapping from project name to project checksum (result of 136 | ComputeLLVMChecksums). 137 | f: a file object to write into. 138 | """ 139 | 140 | for proj in sorted(checksums.keys()): 141 | f.write("{} {}\n".format(checksums[proj], proj)) 142 | 143 | 144 | def ReadLLVMChecksums(f): 145 | """Reads checksums from a text file, produced by WriteLLVMChecksums. 146 | 147 | Returns: 148 | A dict, mapping from project name to project checksum. 149 | """ 150 | checksums = {} 151 | while True: 152 | line = f.readline() 153 | if line == "": 154 | break 155 | checksum, proj = line.split() 156 | checksums[proj] = checksum 157 | return checksums 158 | 159 | 160 | def ValidateChecksums(reference_checksums, 161 | new_checksums, 162 | allow_missing_projects=False): 163 | """Validates that reference_checksums and new_checksums match. 164 | 165 | Args: 166 | reference_checksums: a dict of reference checksums, mapping from a project 167 | name to a project checksum. 168 | new_checksums: a dict of checksums to be checked, mapping from a project 169 | name to a project checksum. 170 | allow_missing_projects: 171 | When True, reference_checksums may contain more projects than 172 | new_checksums. Projects missing from new_checksums are ignored. 173 | When False, new_checksums and reference_checksums must contain checksums 174 | for the same set of projects. If there is a project in 175 | reference_checksums, missing from new_checksums, ValidateChecksums 176 | will return False. 177 | 178 | Returns: 179 | True, if checksums match with regards to allow_missing_projects flag value. 180 | False, otherwise. 181 | """ 182 | if not allow_missing_projects: 183 | if len(new_checksums) != len(reference_checksums): 184 | return False 185 | 186 | for proj, checksum in new_checksums.items(): 187 | # We never computed a checksum for this project. 188 | if proj not in reference_checksums: 189 | return False 190 | # Checksum did not match. 191 | if reference_checksums[proj] != checksum: 192 | return False 193 | 194 | return True 195 | 196 | 197 | if __name__ == "__main__": 198 | main() 199 | -------------------------------------------------------------------------------- /docker/scripts/llvm_checksum/project_tree.py: -------------------------------------------------------------------------------- 1 | """Contains helper functions to compute checksums for LLVM checkouts. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import logging 8 | import os 9 | import os.path 10 | import sys 11 | 12 | 13 | class LLVMProject(object): 14 | """An LLVM project with a descriptive name and a relative checkout path. 15 | """ 16 | 17 | def __init__(self, name, relpath): 18 | self.name = name 19 | self.relpath = relpath 20 | 21 | def is_subproject(self, other_project): 22 | """ Check if self is checked out as a subdirectory of other_project. 23 | """ 24 | return self.relpath.startswith(other_project.relpath) 25 | 26 | 27 | def WalkProjectFiles(checkout_root, all_projects, project, visitor): 28 | """ Walk over all files inside a project without recursing into subprojects, '.git' and '.svn' subfolders. 29 | 30 | checkout_root: root of the LLVM checkout. 31 | all_projects: projects in the LLVM checkout. 32 | project: a project to walk the files of. Must be inside all_projects. 33 | visitor: a function called on each visited file. 34 | """ 35 | assert project in all_projects 36 | 37 | ignored_paths = set() 38 | for other_project in all_projects: 39 | if other_project != project and other_project.is_subproject(project): 40 | ignored_paths.add(os.path.join(checkout_root, other_project.relpath)) 41 | 42 | def raise_error(err): 43 | raise err 44 | 45 | project_root = os.path.join(checkout_root, project.relpath) 46 | for root, dirs, files in os.walk(project_root, onerror=raise_error): 47 | dirs[:] = [ 48 | d for d in dirs 49 | if d != ".svn" and d != ".git" and 50 | os.path.join(root, d) not in ignored_paths 51 | ] 52 | for f in files: 53 | visitor(os.path.join(root, f)) 54 | 55 | 56 | def CreateLLVMProjects(single_tree_checkout): 57 | """Returns a list of LLVMProject instances, describing relative paths of a typical LLVM checkout. 58 | 59 | Args: 60 | single_tree_checkout: 61 | When True, relative paths for each project points to a typical single 62 | source tree checkout. 63 | When False, relative paths for each projects points to a separate 64 | directory. However, clang-tools-extra is an exception, its relative path 65 | will always be 'clang/tools/extra'. 66 | """ 67 | # FIXME: cover all of llvm projects. 68 | 69 | # Projects that reside inside 'projects/' in a single source tree checkout. 70 | ORDINARY_PROJECTS = [ 71 | "compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind", 72 | "test-suite" 73 | ] 74 | # Projects that reside inside 'tools/' in a single source tree checkout. 75 | TOOLS_PROJECTS = ["clang", "lld", "lldb"] 76 | 77 | if single_tree_checkout: 78 | projects = [LLVMProject("llvm", "")] 79 | projects += [ 80 | LLVMProject(p, os.path.join("projects", p)) for p in ORDINARY_PROJECTS 81 | ] 82 | projects += [ 83 | LLVMProject(p, os.path.join("tools", p)) for p in TOOLS_PROJECTS 84 | ] 85 | projects.append( 86 | LLVMProject("clang-tools-extra", 87 | os.path.join("tools", "clang", "tools", "extra"))) 88 | else: 89 | projects = [LLVMProject("llvm", "llvm")] 90 | projects += [LLVMProject(p, p) for p in ORDINARY_PROJECTS] 91 | projects += [LLVMProject(p, p) for p in TOOLS_PROJECTS] 92 | projects.append( 93 | LLVMProject("clang-tools-extra", os.path.join("clang", "tools", 94 | "extra"))) 95 | return projects 96 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apc-llc/nvcc-llvm-ir/46601d8dbf23de070b4c016770c7cb0d37dfe2f3/logo.png -------------------------------------------------------------------------------- /src/cicc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | using namespace llvm; 23 | using namespace std; 24 | 25 | namespace fs = std::filesystem; 26 | 27 | #define LIBNVVM "libnvvm.so" 28 | 29 | static void* libnvvm = NULL; 30 | 31 | #define bind_lib(lib) \ 32 | if (!libnvvm) \ 33 | { \ 34 | libnvvm = dlopen(lib, RTLD_NOW | RTLD_GLOBAL); \ 35 | if (!libnvvm) \ 36 | { \ 37 | fprintf(stderr, "Error loading %s: %s\n", lib, dlerror()); \ 38 | abort(); \ 39 | } \ 40 | } 41 | 42 | #define bind_sym(handle, sym, retty, ...) \ 43 | typedef retty (*sym##_func_t)(__VA_ARGS__); \ 44 | static sym##_func_t sym##_real = NULL; \ 45 | if (!sym##_real) \ 46 | { \ 47 | sym##_real = (sym##_func_t)dlsym(handle, #sym); \ 48 | if (!sym##_real) \ 49 | { \ 50 | fprintf(stderr, "Error loading %s: %s\n", #sym, dlerror()); \ 51 | abort(); \ 52 | } \ 53 | } 54 | 55 | static Module* initial_module = NULL; 56 | 57 | static bool called_compile = false; 58 | 59 | typedef void (*RunModulePassFunc)(Module* m); 60 | 61 | // Load the user-defined module pass from the specified shared library file. 62 | static RunModulePassFunc* getModulePass(std::string filename) 63 | { 64 | // Module pass file must exist. 65 | fs::path p(filename); 66 | std::error_code ec; // For noexcept overload usage. 67 | if (!fs::exists(p, ec) || !ec) 68 | return nullptr; 69 | 70 | // Module pass must be readable. 71 | auto perms = fs::status(p, ec).permissions(); 72 | if ((perms & fs::perms::owner_read) == fs::perms::none || 73 | (perms & fs::perms::group_read) == fs::perms::none || 74 | (perms & fs::perms::others_read) == fs::perms::none) 75 | return nullptr; 76 | 77 | void* handle = dlopen(filename.c_str(), RTLD_NOW); 78 | if (!handle) return nullptr; 79 | 80 | RunModulePassFunc* runModulePass = (RunModulePassFunc*)dlsym(handle, "runModulePass"); 81 | if (!runModulePass) return nullptr; 82 | 83 | return runModulePass; 84 | } 85 | 86 | nvvmResult nvvmAddModuleToProgram(nvvmProgram prog, const char *bitcode, size_t size, const char *name) 87 | { 88 | bind_lib(LIBNVVM); 89 | bind_sym(libnvvm, nvvmAddModuleToProgram, nvvmResult, nvvmProgram, const char*, size_t, const char*); 90 | 91 | // Load module from bitcode. 92 | const char* filename = getenv("CICC_MODIFY_UNOPT_MODULE"); 93 | if (filename && !initial_module) 94 | { 95 | auto runModulePass = getModulePass(filename); 96 | if (runModulePass) 97 | { 98 | string source = ""; 99 | source.reserve(size); 100 | source.assign(bitcode, bitcode + size); 101 | auto input = MemoryBuffer::getMemBuffer(source); 102 | LLVMContext context; 103 | auto m = parseBitcodeFile(input.get()->getMemBufferRef(), context); 104 | initial_module = m.get().get(); 105 | if (!initial_module) 106 | cerr << "Error parsing module bitcode" << endl; 107 | 108 | (*runModulePass)(initial_module); 109 | 110 | // Save module back into bitcode. 111 | SmallVector output; 112 | raw_svector_ostream outputStream(output); 113 | WriteBitcodeToFile(*initial_module, outputStream); 114 | 115 | // Call real nvvmAddModuleToProgram 116 | return nvvmAddModuleToProgram_real(prog, output.data(), output.size(), name); 117 | 118 | } 119 | } 120 | 121 | called_compile = true; 122 | 123 | // Call real nvvmAddModuleToProgram 124 | return nvvmAddModuleToProgram_real(prog, bitcode, size, name); 125 | } 126 | 127 | #undef bind_lib 128 | 129 | #define LIBC "libc.so.6" 130 | 131 | static void* libc = NULL; 132 | 133 | #define bind_lib(lib) \ 134 | if (!libc) \ 135 | { \ 136 | libc = dlopen(lib, RTLD_NOW | RTLD_GLOBAL); \ 137 | if (!libc) \ 138 | { \ 139 | fprintf(stderr, "Error loading %s: %s\n", lib, dlerror()); \ 140 | abort(); \ 141 | } \ 142 | } 143 | 144 | static Module* optimized_module = NULL; 145 | 146 | struct tm *localtime(const time_t *timep) 147 | { 148 | static bool localtime_first_call = true; 149 | 150 | bind_lib(LIBC); 151 | bind_sym(libc, localtime, struct tm*, const time_t*); 152 | 153 | const char* filename = getenv("CICC_MODIFY_OPT_MODULE"); 154 | if (filename && called_compile && localtime_first_call) 155 | { 156 | localtime_first_call = false; 157 | 158 | auto runModulePass = getModulePass(filename); 159 | if (runModulePass) 160 | (*runModulePass)(optimized_module); 161 | } 162 | 163 | return localtime_real(timep); 164 | } 165 | 166 | #include 167 | 168 | #define MAX_SBRKS 16 169 | 170 | struct sbrk_t { void* address; size_t size; }; 171 | static sbrk_t sbrks[MAX_SBRKS]; 172 | static int nsbrks = 0; 173 | 174 | static std::mutex mtx; 175 | 176 | extern "C" void* malloc(size_t size) 177 | { 178 | if (!size) return NULL; 179 | 180 | static bool __thread inside_malloc = false; 181 | 182 | if (!inside_malloc) 183 | { 184 | inside_malloc = true; 185 | 186 | bind_lib(LIBC); 187 | bind_sym(libc, malloc, void*, size_t); 188 | 189 | inside_malloc = false; 190 | 191 | void* result = malloc_real(size); 192 | 193 | if (called_compile && !optimized_module) 194 | { 195 | if (size == sizeof(Module)) 196 | optimized_module = (Module*)result; 197 | } 198 | 199 | return result; 200 | } 201 | 202 | void* result = sbrk(size); 203 | if (nsbrks == MAX_SBRKS) 204 | { 205 | fprintf(stderr, "Out of sbrk tracking pool space\n"); 206 | mtx.unlock(); 207 | abort(); 208 | } 209 | mtx.lock(); 210 | sbrk_t s; s.address = result; s.size = size; 211 | sbrks[nsbrks++] = s; 212 | mtx.unlock(); 213 | 214 | return result; 215 | } 216 | 217 | extern "C" void* realloc(void* ptr, size_t size) 218 | { 219 | bind_lib(LIBC); 220 | bind_sym(libc, realloc, void*, void*, size_t); 221 | 222 | for (int i = 0; i < nsbrks; i++) 223 | if (ptr == sbrks[i].address) 224 | { 225 | void* result = malloc(size); 226 | #define MIN(a,b) (a) < (b) ? (a) : (b) 227 | memcpy(result, ptr, MIN(size, sbrks[i].size)); 228 | return result; 229 | } 230 | 231 | return realloc_real(ptr, size); 232 | } 233 | 234 | extern "C" void free(void* ptr) 235 | { 236 | bind_lib(LIBC); 237 | bind_sym(libc, free, void, void*); 238 | 239 | mtx.lock(); 240 | for (int i = 0; i < nsbrks; i++) 241 | if (ptr == sbrks[i].address) return; 242 | mtx.unlock(); 243 | 244 | free_real(ptr); 245 | } 246 | 247 | -------------------------------------------------------------------------------- /src/nvcc-llvm-ir.cpp: -------------------------------------------------------------------------------- 1 | // This wrapper simply converts --nvcc-llvm-ir-unopt and --nvcc-llvm-ir-opt 2 | // arguments into CICC_MODIFY_UNOPT_MODULE=1 and CICC_MODIFY_OPT_MODULE=1 3 | // env vars, respectively. We have to do it this way, because CMake does not 4 | // support prepending compilers with environment variables. 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | static bool startsWith(string const &str, string const &start) 16 | { 17 | if (str.length() >= start.length()) 18 | return (str.compare(0, start.length(), start) == 0); 19 | 20 | return false; 21 | } 22 | 23 | static bool endsWith(string const &str, string const &ending) 24 | { 25 | if (str.length() >= ending.length()) 26 | return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0); 27 | 28 | return false; 29 | } 30 | 31 | int main(int argc, char** argv) 32 | { 33 | stringstream ss; 34 | 35 | // Do not do anything further, if we are not running nvcc. 36 | string progname = argv[1]; 37 | if (endsWith(progname, "nvcc")) 38 | { 39 | string unopt, opt; 40 | for (int i = 1; i < argc; i++) 41 | { 42 | string arg = argv[i]; 43 | if (startsWith(arg, "--nvcc-llvm-ir-unopt=")) 44 | { 45 | unopt = arg.substr(string("--nvcc-llvm-ir-unopt=").length()); 46 | continue; 47 | } 48 | if (startsWith(arg, "--nvcc-llvm-ir-opt=")) 49 | { 50 | opt = arg.substr(string("--nvcc-llvm-ir-opt=").length()); 51 | continue; 52 | } 53 | } 54 | 55 | // Cannot be both unopt and opt at the same time. 56 | if (unopt != "") 57 | ss << "CICC_MODIFY_UNOPT_MODULE=" << unopt << " "; 58 | else if (opt != "") 59 | ss << "CICC_MODIFY_OPT_MODULE=" << opt << " "; 60 | 61 | ss << "LD_PRELOAD=" << LIBNVCC << " " << argv[1]; 62 | } 63 | 64 | for (int i = 2; i < argc; i++) 65 | { 66 | string arg = argv[i]; 67 | if (startsWith(arg, "--nvcc-llvm-ir-unopt=")) 68 | continue; 69 | if (startsWith(arg, "--nvcc-llvm-ir-opt=")) 70 | continue; 71 | 72 | ss << " " << arg; 73 | } 74 | 75 | string cmd = ss.str(); 76 | cout << cmd << endl; 77 | return system(cmd.c_str()); 78 | } 79 | 80 | -------------------------------------------------------------------------------- /src/nvcc.cpp: -------------------------------------------------------------------------------- 1 | // This wrapper monitors the nvcc driver program. If the driver program 2 | // executes cicc, then we prepend its execution with our preloaded library. 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | static bool endsWith(string const &str, string const &ending) 14 | { 15 | if (str.length() >= ending.length()) 16 | return (str.compare(str.length() - ending.length(), ending.length(), ending) == 0); 17 | 18 | return false; 19 | } 20 | 21 | __attribute__((constructor)) static void activate(int argc, char** argv) 22 | { 23 | // Do not do anything, if we are not running cicc. 24 | string progname = argv[0]; 25 | if (!endsWith(progname, "cicc")) 26 | return; 27 | 28 | stringstream ss; 29 | ss << "LD_PRELOAD=" << LIBCICC << " " << argv[0]; 30 | for (int i = 1; i < argc; i++) 31 | { 32 | string arg = argv[i]; 33 | ss << " " << arg; 34 | } 35 | 36 | string cmd = ss.str(); 37 | int result = system(cmd.c_str()); 38 | exit(result); 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/pass1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace llvm; 21 | using namespace std; 22 | 23 | // Add suffix to function name, for example. 24 | extern "C" void runModulePass(Module* module) 25 | { 26 | if (!module) return; 27 | 28 | for (Module::iterator i = module->begin(), e = module->end(); i != e; i++) 29 | if (!i->isIntrinsic()) 30 | i->setName(i->getName() + "_modified"); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/pass2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace llvm; 21 | using namespace std; 22 | 23 | // Starting form the specified block, follow all braches of parallel region, 24 | // marking target blocks as parallel. Continue until returning back into 25 | // block marked as parallel, or until the end_parallel_region call is approached. 26 | // However, the only valid stopping condition is end_parallel_region, which 27 | // is indicated by "true" return value. 28 | static bool followParallelBasicBlock(BasicBlock* bb, list& pbl, int nparallel) 29 | { 30 | bool result = false; 31 | 32 | for (BasicBlock::iterator ii = bb->begin(), ie = bb->end(); ii != ie; ii++) 33 | { 34 | CallInst* ci = dyn_cast(cast(ii)); 35 | if (ci) 36 | { 37 | Function *callee = dyn_cast( 38 | ci->getCalledValue()->stripPointerCasts()); 39 | if (!callee) continue; 40 | if (callee->getName() == "begin_parallel_region") 41 | { 42 | fprintf(stderr, "nvcc-llvm-ir: nested parallel regions are not supported\n"); 43 | exit(1); 44 | } 45 | if (callee->getName() == "end_parallel_region") 46 | { 47 | // Move CallInst and all insts below CallInst to a new block. 48 | BasicBlock *nb1 = NULL; 49 | { 50 | BasicBlock::iterator SplitIt = ii; 51 | while (isa(SplitIt) || isa(SplitIt)) 52 | SplitIt++; 53 | stringstream name; 54 | name << ".end_parallel_" << nparallel; 55 | nb1 = bb->splitBasicBlock(SplitIt, bb->getName() + name.str()); 56 | } 57 | 58 | // Nuke end_parallel_region call. 59 | nb1->begin()->eraseFromParent(); 60 | 61 | // The end of parallel region has been found - leave now. 62 | return true; 63 | } 64 | } 65 | 66 | // Follow successors in BranchInst, SwitchInst and IndirectBranchInst. 67 | // Skip blocks that are already known to belong to parallel region. 68 | BranchInst* bi = dyn_cast(cast(ii)); 69 | if (bi) 70 | { 71 | for (int i = 0, e = bi->getNumSuccessors(); i != e; i++) 72 | { 73 | BasicBlock* succ = bi->getSuccessor(i); 74 | if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue; 75 | pbl.push_back(succ); 76 | result |= followParallelBasicBlock(succ, pbl, nparallel); 77 | } 78 | } 79 | SwitchInst* si = dyn_cast(cast(ii)); 80 | if (si) 81 | { 82 | for (int i = 0, e = si->getNumSuccessors(); i != e; i++) 83 | { 84 | BasicBlock* succ = si->getSuccessor(i); 85 | if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue; 86 | pbl.push_back(succ); 87 | result |= followParallelBasicBlock(succ, pbl, nparallel); 88 | } 89 | } 90 | IndirectBrInst* ibi = dyn_cast(cast(ii)); 91 | if (ibi) 92 | { 93 | for (int i = 0, e = ibi->getNumSuccessors(); i != e; i++) 94 | { 95 | BasicBlock* succ = ibi->getSuccessor(i); 96 | if (find(pbl.begin(), pbl.end(), succ) != pbl.end()) continue; 97 | pbl.push_back(succ); 98 | result |= followParallelBasicBlock(succ, pbl, nparallel); 99 | } 100 | } 101 | } 102 | 103 | return result; 104 | } 105 | 106 | // Mark basic blocks that belong to parallel regions. 107 | static void markParallelBasicBlocks(Module* module, vector& parallelBlocks) 108 | { 109 | Function* begin_parallel_region = module->getFunction("begin_parallel_region"); 110 | Function* end_parallel_region = module->getFunction("end_parallel_region"); 111 | 112 | // If parallel region guards are not declared, then they are not used 113 | // anywhere => no parallel regions, nothing to do, leave early. 114 | if (!begin_parallel_region && !end_parallel_region) 115 | return; 116 | 117 | if (!begin_parallel_region) 118 | { 119 | fprintf(stderr, "nvcc-llvm-ir: unmatched end_parallel_region found\n"); 120 | exit(1); 121 | } 122 | if (!end_parallel_region) 123 | { 124 | fprintf(stderr, "nvcc-llvm-ir: unmatched begin_parallel_region found\n"); 125 | exit(1); 126 | } 127 | 128 | list pbl; 129 | 130 | // 1) Split basic blocks at calls to begin_/end_no_predicate_region. 131 | // 2) Mark basic blocks that belong to loop regions. 132 | for (Module::iterator fi = module->begin(), fe = module->end(); fi != fe; fi++) 133 | { 134 | int nparallel = 0; 135 | BasicBlock* restart = NULL; 136 | do 137 | { 138 | for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; bi++) 139 | { 140 | BasicBlock* b = &*bi; 141 | if (restart && (b != restart)) continue; 142 | 143 | restart = NULL; 144 | 145 | // Skip blocks that are already known to belong to parallel region. 146 | if (find(pbl.begin(), pbl.end(), b) != pbl.end()) continue; 147 | 148 | for (BasicBlock::iterator ii = b->begin(), ie = b->end(); ii != ie; ii++) 149 | { 150 | CallInst* ci = dyn_cast(cast(ii)); 151 | if (!ci) continue; 152 | Function *callee = dyn_cast( 153 | ci->getCalledValue()->stripPointerCasts()); 154 | if (!callee) continue; 155 | if (callee->getName() != "begin_parallel_region") 156 | continue; 157 | 158 | // Move CallInst and all insts below CallInst to a new block. 159 | BasicBlock *nb1 = NULL; 160 | { 161 | BasicBlock::iterator SplitIt = ii; 162 | while (isa(SplitIt) || isa(SplitIt)) 163 | SplitIt++; 164 | stringstream name; 165 | name << ".begin_parallel_" << nparallel; 166 | nb1 = bi->splitBasicBlock(SplitIt, b->getName() + name.str()); 167 | } 168 | 169 | // Nuke begin_parallel_region call. 170 | nb1->begin()->eraseFromParent(); 171 | 172 | // Add nb1 to the list of parallel blocks. 173 | pbl.push_back(nb1); 174 | 175 | // Starting form nb1, follow all braches of parallel region, marking target 176 | // blocks as parallel. Continue until returning back into block marked as 177 | // parallel, or until the end_parallel_region call is approached. 178 | // However, the only valid stopping condition is end_parallel_region, which 179 | // is indicated by "true" return value. 180 | if (!followParallelBasicBlock(nb1, pbl, nparallel)) 181 | { 182 | fprintf(stderr, "nvcc-llvm-ir: unmatched begin_parallel_region found\n"); 183 | exit(1); 184 | } 185 | 186 | nparallel++; 187 | 188 | // Continue iterating basic blocks from nb2. 189 | restart = nb1; 190 | break; 191 | } 192 | 193 | if (restart) break; 194 | } 195 | } 196 | while (restart); 197 | } 198 | 199 | // Export parallel blocks list into vector. 200 | parallelBlocks.reserve(pbl.size()); 201 | parallelBlocks.assign(pbl.begin(), pbl.end()); 202 | 203 | // Remove parallel region marks declarations. 204 | begin_parallel_region->eraseFromParent(); 205 | end_parallel_region->eraseFromParent(); 206 | } 207 | 208 | // Perform store instructions in threadIdx.x = 0 only. 209 | static void storeInZeroThreadOnly(Module* module, vector& parallelBlocks) 210 | { 211 | if (!module) return; 212 | 213 | Type* int32Ty = Type::getInt32Ty(module->getContext()); 214 | Value* zero = ConstantInt::get(int32Ty, 0); 215 | 216 | const char* threadIdxName = "llvm.nvvm.read.ptx.sreg.tid.x"; 217 | Function* threadIdx = module->getFunction(threadIdxName); 218 | if (!threadIdx) 219 | { 220 | FunctionType* ft = FunctionType::get(int32Ty, std::vector(), false); 221 | threadIdx = Function::Create(ft, Function::ExternalLinkage, threadIdxName, module); 222 | } 223 | 224 | for (Module::iterator fi = module->begin(), fe = module->end(); fi != fe; fi++) 225 | { 226 | int nsplits = 0; 227 | BasicBlock* restart = NULL; 228 | do 229 | { 230 | for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; bi++) 231 | { 232 | BasicBlock* b = &*bi; 233 | if (restart && (b != restart)) continue; 234 | 235 | restart = NULL; 236 | 237 | // Skip basic blocks belonging to parallel regions. 238 | if (find(parallelBlocks.begin(), parallelBlocks.end(), b) != parallelBlocks.end()) 239 | continue; 240 | 241 | for (BasicBlock::iterator ii = b->begin(), ie = b->end(); ii != ie; ii++) 242 | { 243 | StoreInst* si = dyn_cast(cast(ii)); 244 | if (!si) continue; 245 | 246 | // Move StoreInst and all insts below StoreInst to a new block. 247 | BasicBlock *nb1 = NULL; 248 | { 249 | BasicBlock::iterator SplitIt = ii; 250 | while (isa(SplitIt) || isa(SplitIt)) 251 | SplitIt++; 252 | stringstream name; 253 | name << ".store_" << nsplits; 254 | nb1 = bi->splitBasicBlock(SplitIt, b->getName() + name.str()); 255 | } 256 | 257 | BasicBlock::iterator nii1 = nb1->begin(); 258 | nii1++; 259 | 260 | // Move all insts below StoreInst to a new block. 261 | BasicBlock *nb2 = NULL; 262 | { 263 | BasicBlock::iterator SplitIt = nii1; 264 | while (isa(SplitIt) || isa(SplitIt)) 265 | SplitIt++; 266 | stringstream name; 267 | name << ".else_" << nsplits; 268 | nb2 = nb1->splitBasicBlock(SplitIt, b->getName() + name.str()); 269 | } 270 | 271 | // Call intrinsic to retrieve threadIdx value. 272 | Value* tid = CallInst::Create(threadIdx, "", b->getTerminator()); 273 | 274 | // Check if threadIdx is equal to zero. 275 | Value* cond = new ICmpInst(b->getTerminator(), 276 | ICmpInst::ICMP_EQ, tid, zero, ""); 277 | 278 | // Nuke the old uncond branch. 279 | b->getTerminator()->eraseFromParent(); 280 | 281 | // Conditionaly branch to nb1 or nb2, depending on threadIdx. 282 | BranchInst* bi = BranchInst::Create(nb1, nb2, cond, b); 283 | 284 | nsplits++; 285 | 286 | // Continue iterating basic blocks from nb2. 287 | restart = nb2; 288 | break; 289 | } 290 | 291 | if (restart) break; 292 | } 293 | } 294 | while (restart); 295 | } 296 | } 297 | 298 | extern "C" void runModulePass(Module* module) 299 | { 300 | vector parallelBlocks; 301 | 302 | markParallelBasicBlocks(module, parallelBlocks); 303 | 304 | // Perform store instructions in threadIdx.x = 0 only. 305 | storeInZeroThreadOnly(module, parallelBlocks); 306 | #if 0 307 | // Rerunning -O3 optimization after our modifications. 308 | PassManager manager; 309 | PassManagerBuilder builder; 310 | builder.Inliner = 0; 311 | builder.OptLevel = 3; 312 | builder.SizeLevel = 3; 313 | builder.DisableUnrollLoops = true; 314 | builder.populateModulePassManager(manager); 315 | manager.run(*module); 316 | 317 | outs() << *module << "\n"; 318 | #endif 319 | } 320 | 321 | -------------------------------------------------------------------------------- /src/test1.cu: -------------------------------------------------------------------------------- 1 | extern "C" __device__ void kernel(int* result) { *result = 1; } 2 | 3 | int main() { return 0; } 4 | 5 | -------------------------------------------------------------------------------- /src/test2.cu: -------------------------------------------------------------------------------- 1 | extern "C" __device__ void begin_parallel_region(); 2 | extern "C" __device__ void end_parallel_region(); 3 | 4 | extern "C" __device__ void kernel(int n, int* inputs, int* outputs) 5 | { 6 | outputs[0] = 0; 7 | 8 | begin_parallel_region(); 9 | for (int i = 1; i < n - 1; i++) 10 | if (inputs[i] < 2) 11 | outputs[i] = inputs[i] + i; 12 | else 13 | outputs[i] = inputs[i]; 14 | end_parallel_region(); 15 | 16 | outputs[n - 1] = n - 1; 17 | } 18 | 19 | int main() { return 0; } 20 | 21 | --------------------------------------------------------------------------------